From c787f1baa5031c22cbe20af17b2ee36ad32957ea Mon Sep 17 00:00:00 2001
From: Mike Christie <michael.christie@oracle.com>
Date: Fri, 7 Apr 2023 15:05:34 -0500
Subject: block: Add PR callouts for read keys and reservation

Add callouts for reading keys and reservations. This allows LIO to support
the READ_KEYS and READ_RESERVATION commands so it can export devices to
VMs for software like windows clustering.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
Link: https://lore.kernel.org/r/20230407200551.12660-2-michael.christie@oracle.com
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/pr.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pr.h b/include/linux/pr.h
index 94ceec713afe..3003daec28a5 100644
--- a/include/linux/pr.h
+++ b/include/linux/pr.h
@@ -4,6 +4,18 @@
 
 #include <uapi/linux/pr.h>
 
+struct pr_keys {
+	u32	generation;
+	u32	num_keys;
+	u64	keys[];
+};
+
+struct pr_held_reservation {
+	u64		key;
+	u32		generation;
+	enum pr_type	type;
+};
+
 struct pr_ops {
 	int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key,
 			u32 flags);
@@ -14,6 +26,19 @@ struct pr_ops {
 	int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key,
 			enum pr_type type, bool abort);
 	int (*pr_clear)(struct block_device *bdev, u64 key);
+	/*
+	 * pr_read_keys - Read the registered keys and return them in the
+	 * pr_keys->keys array. The keys array will have been allocated at the
+	 * end of the pr_keys struct, and pr_keys->num_keys must be set to the
+	 * number of keys the array can hold. If there are more than can fit
+	 * in the array, success will still be returned and pr_keys->num_keys
+	 * will reflect the total number of keys the device contains, so the
+	 * caller can retry with a larger array.
+	 */
+	int (*pr_read_keys)(struct block_device *bdev,
+			struct pr_keys *keys_info);
+	int (*pr_read_reservation)(struct block_device *bdev,
+			struct pr_held_reservation *rsv);
 };
 
 #endif /* LINUX_PR_H */
-- 
cgit v1.2.3


From 7ba150834b840f6f5cdd07ca69a4ccf39df59a66 Mon Sep 17 00:00:00 2001
From: Mike Christie <michael.christie@oracle.com>
Date: Fri, 7 Apr 2023 15:05:35 -0500
Subject: block: Rename BLK_STS_NEXUS to BLK_STS_RESV_CONFLICT

BLK_STS_NEXUS is used for NVMe/SCSI reservation conflicts and DASD's
locking feature which works similar to NVMe/SCSI reservations where a
host can get a lock on a device and when the lock is taken it will get
failures.

This patch renames BLK_STS_NEXUS so it better reflects this type of
use.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
Link: https://lore.kernel.org/r/20230407200551.12660-3-michael.christie@oracle.com
Acked-by: Stefan Haberland <sth@linux.ibm.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/blk-core.c          | 2 +-
 drivers/nvme/host/core.c  | 2 +-
 drivers/s390/block/dasd.c | 7 ++++++-
 drivers/scsi/scsi_lib.c   | 2 +-
 include/linux/blk_types.h | 4 ++--
 5 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 9e5e0277a4d9..ff8fb7a49389 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -155,7 +155,7 @@ static const struct {
 	[BLK_STS_NOSPC]		= { -ENOSPC,	"critical space allocation" },
 	[BLK_STS_TRANSPORT]	= { -ENOLINK,	"recoverable transport" },
 	[BLK_STS_TARGET]	= { -EREMOTEIO,	"critical target" },
-	[BLK_STS_NEXUS]		= { -EBADE,	"critical nexus" },
+	[BLK_STS_RESV_CONFLICT]	= { -EBADE,	"reservation conflict" },
 	[BLK_STS_MEDIUM]	= { -ENODATA,	"critical medium" },
 	[BLK_STS_PROTECTION]	= { -EILSEQ,	"protection" },
 	[BLK_STS_RESOURCE]	= { -ENOMEM,	"kernel resource" },
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c2730b116dc6..535a26ceb205 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -279,7 +279,7 @@ static blk_status_t nvme_error_status(u16 status)
 	case NVME_SC_INVALID_PI:
 		return BLK_STS_PROTECTION;
 	case NVME_SC_RESERVATION_CONFLICT:
-		return BLK_STS_NEXUS;
+		return BLK_STS_RESV_CONFLICT;
 	case NVME_SC_HOST_PATH_ERROR:
 		return BLK_STS_TRANSPORT;
 	case NVME_SC_ZONE_TOO_MANY_ACTIVE:
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index a9c2a8d76c45..ca0df87fa8f4 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2723,7 +2723,12 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
 	else if (status == 0) {
 		switch (cqr->intrc) {
 		case -EPERM:
-			error = BLK_STS_NEXUS;
+			/*
+			 * DASD doesn't implement SCSI/NVMe reservations, but it
+			 * implements a locking scheme similar to them. We
+			 * return this error when we no longer have the lock.
+			 */
+			error = BLK_STS_RESV_CONFLICT;
 			break;
 		case -ENOLINK:
 			error = BLK_STS_TRANSPORT;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index b7c569a42aa4..e1468483ac7e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -599,7 +599,7 @@ static blk_status_t scsi_result_to_blk_status(int result)
 	case SCSIML_STAT_OK:
 		break;
 	case SCSIML_STAT_RESV_CONFLICT:
-		return BLK_STS_NEXUS;
+		return BLK_STS_RESV_CONFLICT;
 	case SCSIML_STAT_NOSPC:
 		return BLK_STS_NOSPC;
 	case SCSIML_STAT_MED_ERROR:
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 99be590f952f..2b2452086a2f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -96,7 +96,7 @@ typedef u16 blk_short_t;
 #define BLK_STS_NOSPC		((__force blk_status_t)3)
 #define BLK_STS_TRANSPORT	((__force blk_status_t)4)
 #define BLK_STS_TARGET		((__force blk_status_t)5)
-#define BLK_STS_NEXUS		((__force blk_status_t)6)
+#define BLK_STS_RESV_CONFLICT	((__force blk_status_t)6)
 #define BLK_STS_MEDIUM		((__force blk_status_t)7)
 #define BLK_STS_PROTECTION	((__force blk_status_t)8)
 #define BLK_STS_RESOURCE	((__force blk_status_t)9)
@@ -184,7 +184,7 @@ static inline bool blk_path_error(blk_status_t error)
 	case BLK_STS_NOTSUPP:
 	case BLK_STS_NOSPC:
 	case BLK_STS_TARGET:
-	case BLK_STS_NEXUS:
+	case BLK_STS_RESV_CONFLICT:
 	case BLK_STS_MEDIUM:
 	case BLK_STS_PROTECTION:
 		return false;
-- 
cgit v1.2.3


From f2bf2e7e2d526116aab942aaf1b71a949a570ba6 Mon Sep 17 00:00:00 2001
From: Mike Christie <michael.christie@oracle.com>
Date: Fri, 7 Apr 2023 15:05:40 -0500
Subject: nvme: Fix reservation status related structs

This fixes the following issues with the reservation status structs:

1. resv10 is bytes 23:10 so it should be 14 bytes.
2. regctl_ds only supports 64 bit host IDs.

These are not currently used, but will be in this patchset which adds
support for the reservation report command.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
Link: https://lore.kernel.org/r/20230407200551.12660-8-michael.christie@oracle.com
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/nvme.h | 38 ++++++++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 4fad4aa245fb..57b5b2b8d95b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -759,20 +759,42 @@ enum {
 	NVME_LBART_ATTRIB_HIDE	= 1 << 1,
 };
 
+struct nvme_registered_ctrl {
+	__le16	cntlid;
+	__u8	rcsts;
+	__u8	rsvd3[5];
+	__le64	hostid;
+	__le64	rkey;
+};
+
 struct nvme_reservation_status {
 	__le32	gen;
 	__u8	rtype;
 	__u8	regctl[2];
 	__u8	resv5[2];
 	__u8	ptpls;
-	__u8	resv10[13];
-	struct {
-		__le16	cntlid;
-		__u8	rcsts;
-		__u8	resv3[5];
-		__le64	hostid;
-		__le64	rkey;
-	} regctl_ds[];
+	__u8	resv10[14];
+	struct nvme_registered_ctrl regctl_ds[];
+};
+
+struct nvme_registered_ctrl_ext {
+	__le16	cntlid;
+	__u8	rcsts;
+	__u8	rsvd3[5];
+	__le64	rkey;
+	__u8	hostid[16];
+	__u8	rsvd32[32];
+};
+
+struct nvme_reservation_status_ext {
+	__le32	gen;
+	__u8	rtype;
+	__u8	regctl[2];
+	__u8	resv5[2];
+	__u8	ptpls;
+	__u8	resv10[14];
+	__u8	rsvd24[40];
+	struct nvme_registered_ctrl_ext regctl_eds[];
 };
 
 enum nvme_async_event_type {
-- 
cgit v1.2.3


From 5fd96a4e15de8442915a912233d800c56f49001d Mon Sep 17 00:00:00 2001
From: Mike Christie <michael.christie@oracle.com>
Date: Fri, 7 Apr 2023 15:05:44 -0500
Subject: nvme: Add pr_ops read_keys support

This patch adds support for the pr_ops read_keys callout by calling the
NVMe Reservation Report helper, then parsing that info to get the
controller's registered keys. Because the callout is only used in the
kernel where the callers, like LIO, do not know about controller/host IDs,
the callout just returns the registered keys which is required by the SCSI
PR in READ KEYS command.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
Link: https://lore.kernel.org/r/20230407200551.12660-12-michael.christie@oracle.com
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/nvme/host/pr.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h   |  4 +++
 2 files changed, 73 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index cd93d2e5b340..0ee656404437 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -154,10 +154,79 @@ static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type
 	return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
 }
 
+static int nvme_pr_resv_report(struct block_device *bdev, void *data,
+		u32 data_len, bool *eds)
+{
+	struct nvme_command c = { };
+	int ret;
+
+	c.common.opcode = nvme_cmd_resv_report;
+	c.common.cdw10 = cpu_to_le32(nvme_bytes_to_numd(data_len));
+	c.common.cdw11 = cpu_to_le32(NVME_EXTENDED_DATA_STRUCT);
+	*eds = true;
+
+retry:
+	ret = nvme_send_pr_command(bdev, &c, data, data_len);
+	if (ret == NVME_SC_HOST_ID_INCONSIST &&
+	    c.common.cdw11 == cpu_to_le32(NVME_EXTENDED_DATA_STRUCT)) {
+		c.common.cdw11 = 0;
+		*eds = false;
+		goto retry;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	return nvme_sc_to_pr_err(ret);
+}
+
+static int nvme_pr_read_keys(struct block_device *bdev,
+		struct pr_keys *keys_info)
+{
+	u32 rse_len, num_keys = keys_info->num_keys;
+	struct nvme_reservation_status_ext *rse;
+	int ret, i;
+	bool eds;
+
+	/*
+	 * Assume we are using 128-bit host IDs and allocate a buffer large
+	 * enough to get enough keys to fill the return keys buffer.
+	 */
+	rse_len = struct_size(rse, regctl_eds, num_keys);
+	rse = kzalloc(rse_len, GFP_KERNEL);
+	if (!rse)
+		return -ENOMEM;
+
+	ret = nvme_pr_resv_report(bdev, rse, rse_len, &eds);
+	if (ret)
+		goto free_rse;
+
+	keys_info->generation = le32_to_cpu(rse->gen);
+	keys_info->num_keys = get_unaligned_le16(&rse->regctl);
+
+	num_keys = min(num_keys, keys_info->num_keys);
+	for (i = 0; i < num_keys; i++) {
+		if (eds) {
+			keys_info->keys[i] =
+					le64_to_cpu(rse->regctl_eds[i].rkey);
+		} else {
+			struct nvme_reservation_status *rs;
+
+			rs = (struct nvme_reservation_status *)rse;
+			keys_info->keys[i] = le64_to_cpu(rs->regctl_ds[i].rkey);
+		}
+	}
+
+free_rse:
+	kfree(rse);
+	return ret;
+}
+
 const struct pr_ops nvme_pr_ops = {
 	.pr_register	= nvme_pr_register,
 	.pr_reserve	= nvme_pr_reserve,
 	.pr_release	= nvme_pr_release,
 	.pr_preempt	= nvme_pr_preempt,
 	.pr_clear	= nvme_pr_clear,
+	.pr_read_keys	= nvme_pr_read_keys,
 };
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 57b5b2b8d95b..a617e250d629 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -759,6 +759,10 @@ enum {
 	NVME_LBART_ATTRIB_HIDE	= 1 << 1,
 };
 
+enum nvme_eds {
+	NVME_EXTENDED_DATA_STRUCT	= 0x1,
+};
+
 struct nvme_registered_ctrl {
 	__le16	cntlid;
 	__u8	rcsts;
-- 
cgit v1.2.3


From be1a7cd2d0ed028ffdd60c65e3734e2a1d8b17df Mon Sep 17 00:00:00 2001
From: Mike Christie <michael.christie@oracle.com>
Date: Fri, 7 Apr 2023 15:05:45 -0500
Subject: nvme: Add a nvme_pr_type enum

The next patch adds support to report the reservation type, so we need to
be able to convert from the NVMe PR value we get from the device to the
linux block layer PR value that will be returned to callers. To prepare
for that, this patch adds a nvme_pr_type enum and renames the nvme_pr_type
function.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
Link: https://lore.kernel.org/r/20230407200551.12660-13-michael.christie@oracle.com
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/nvme/host/pr.c | 24 ++++++++++++------------
 include/linux/nvme.h   |  9 +++++++++
 2 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index 0ee656404437..732c56b417c2 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -9,24 +9,24 @@
 
 #include "nvme.h"
 
-static char nvme_pr_type(enum pr_type type)
+static enum nvme_pr_type nvme_pr_type_from_blk(enum pr_type type)
 {
 	switch (type) {
 	case PR_WRITE_EXCLUSIVE:
-		return 1;
+		return NVME_PR_WRITE_EXCLUSIVE;
 	case PR_EXCLUSIVE_ACCESS:
-		return 2;
+		return NVME_PR_EXCLUSIVE_ACCESS;
 	case PR_WRITE_EXCLUSIVE_REG_ONLY:
-		return 3;
+		return NVME_PR_WRITE_EXCLUSIVE_REG_ONLY;
 	case PR_EXCLUSIVE_ACCESS_REG_ONLY:
-		return 4;
+		return NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY;
 	case PR_WRITE_EXCLUSIVE_ALL_REGS:
-		return 5;
+		return NVME_PR_WRITE_EXCLUSIVE_ALL_REGS;
 	case PR_EXCLUSIVE_ACCESS_ALL_REGS:
-		return 6;
-	default:
-		return 0;
+		return NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS;
 	}
+
+	return 0;
 }
 
 static int nvme_send_ns_head_pr_command(struct block_device *bdev,
@@ -127,7 +127,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key,
 	if (flags & ~PR_FL_IGNORE_KEY)
 		return -EOPNOTSUPP;
 
-	cdw10 = nvme_pr_type(type) << 8;
+	cdw10 = nvme_pr_type_from_blk(type) << 8;
 	cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
 	return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
 }
@@ -135,7 +135,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key,
 static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
 		enum pr_type type, bool abort)
 {
-	u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1);
+	u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (abort ? 2 : 1);
 
 	return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
 }
@@ -149,7 +149,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key)
 
 static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
 {
-	u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 0 : 1 << 3);
+	u32 cdw10 = nvme_pr_type_from_blk(type) << 8 | (key ? 0 : 1 << 3);
 
 	return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
 }
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a617e250d629..4013abb86642 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -759,6 +759,15 @@ enum {
 	NVME_LBART_ATTRIB_HIDE	= 1 << 1,
 };
 
+enum nvme_pr_type {
+	NVME_PR_WRITE_EXCLUSIVE			= 1,
+	NVME_PR_EXCLUSIVE_ACCESS		= 2,
+	NVME_PR_WRITE_EXCLUSIVE_REG_ONLY	= 3,
+	NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY	= 4,
+	NVME_PR_WRITE_EXCLUSIVE_ALL_REGS	= 5,
+	NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS	= 6,
+};
+
 enum nvme_eds {
 	NVME_EXTENDED_DATA_STRUCT	= 0x1,
 };
-- 
cgit v1.2.3


From 5fbcc6708fe32ef80122cd2a59ddca9d18b24d6e Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 6 Apr 2023 15:21:06 +0200
Subject: video/aperture: Drop primary argument

With the preceding patches it's become defunct. Also I'm about to add
a different boolean argument, so it's better to keep the confusion
down to the absolute minimum.

v2: Since the hypervfb patch got droppped (it's only a pci device for
gen1 vm, not for gen2) there is one leftover user in an actual driver
left to touch.

v4:
- fixes to commit message
- fix Daniel's S-o-b address

v5:
- add back an S-o-b tag with Daniel's Intel address

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Helge Deller <deller@gmx.de>
Cc: linux-fbdev@vger.kernel.org
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: linux-hyperv@vger.kernel.org
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230406132109.32050-7-tzimmermann@suse.de
---
 drivers/gpu/drm/drm_aperture.c  | 2 +-
 drivers/video/aperture.c        | 7 +++----
 drivers/video/fbdev/hyperv_fb.c | 2 +-
 include/linux/aperture.h        | 9 ++++-----
 4 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_aperture.c b/drivers/gpu/drm/drm_aperture.c
index 697cffbfd603..5729f3bb4398 100644
--- a/drivers/gpu/drm/drm_aperture.c
+++ b/drivers/gpu/drm/drm_aperture.c
@@ -168,7 +168,7 @@ EXPORT_SYMBOL(devm_aperture_acquire_from_firmware);
 int drm_aperture_remove_conflicting_framebuffers(resource_size_t base, resource_size_t size,
 						 const struct drm_driver *req_driver)
 {
-	return aperture_remove_conflicting_devices(base, size, false, req_driver->name);
+	return aperture_remove_conflicting_devices(base, size, req_driver->name);
 }
 EXPORT_SYMBOL(drm_aperture_remove_conflicting_framebuffers);
 
diff --git a/drivers/video/aperture.c b/drivers/video/aperture.c
index b378cd1d44d0..a0945027e0df 100644
--- a/drivers/video/aperture.c
+++ b/drivers/video/aperture.c
@@ -43,7 +43,7 @@
  *		base = mem->start;
  *		size = resource_size(mem);
  *
- *		ret = aperture_remove_conflicting_devices(base, size, false, "example");
+ *		ret = aperture_remove_conflicting_devices(base, size, "example");
  *		if (ret)
  *			return ret;
  *
@@ -274,7 +274,6 @@ static void aperture_detach_devices(resource_size_t base, resource_size_t size)
  * aperture_remove_conflicting_devices - remove devices in the given range
  * @base: the aperture's base address in physical memory
  * @size: aperture size in bytes
- * @primary: also kick vga16fb if present; only relevant for VGA devices
  * @name: a descriptive name of the requesting driver
  *
  * This function removes devices that own apertures within @base and @size.
@@ -283,7 +282,7 @@ static void aperture_detach_devices(resource_size_t base, resource_size_t size)
  * 0 on success, or a negative errno code otherwise
  */
 int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t size,
-					bool primary, const char *name)
+					const char *name)
 {
 	/*
 	 * If a driver asked to unregister a platform device registered by
@@ -329,7 +328,7 @@ int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *na
 
 		base = pci_resource_start(pdev, bar);
 		size = pci_resource_len(pdev, bar);
-		ret = aperture_remove_conflicting_devices(base, size, primary, name);
+		ret = aperture_remove_conflicting_devices(base, size, name);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c
index ec3f6cf05f8c..54f433e09ab8 100644
--- a/drivers/video/fbdev/hyperv_fb.c
+++ b/drivers/video/fbdev/hyperv_fb.c
@@ -1073,7 +1073,7 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
 	info->screen_size = dio_fb_size;
 
 getmem_done:
-	aperture_remove_conflicting_devices(base, size, false, KBUILD_MODNAME);
+	aperture_remove_conflicting_devices(base, size, KBUILD_MODNAME);
 
 	if (gen2vm) {
 		/* framebuffer is reallocated, clear screen_info to avoid misuse from kexec */
diff --git a/include/linux/aperture.h b/include/linux/aperture.h
index 442f15a57cad..7248727753be 100644
--- a/include/linux/aperture.h
+++ b/include/linux/aperture.h
@@ -14,7 +14,7 @@ int devm_aperture_acquire_for_platform_device(struct platform_device *pdev,
 					      resource_size_t size);
 
 int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t size,
-					bool primary, const char *name);
+					const char *name);
 
 int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *name);
 #else
@@ -26,7 +26,7 @@ static inline int devm_aperture_acquire_for_platform_device(struct platform_devi
 }
 
 static inline int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t size,
-						      bool primary, const char *name)
+						      const char *name)
 {
 	return 0;
 }
@@ -39,7 +39,6 @@ static inline int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev,
 
 /**
  * aperture_remove_all_conflicting_devices - remove all existing framebuffers
- * @primary: also kick vga16fb if present; only relevant for VGA devices
  * @name: a descriptive name of the requesting driver
  *
  * This function removes all graphics device drivers. Use this function on systems
@@ -48,9 +47,9 @@ static inline int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev,
  * Returns:
  * 0 on success, or a negative errno code otherwise
  */
-static inline int aperture_remove_all_conflicting_devices(bool primary, const char *name)
+static inline int aperture_remove_all_conflicting_devices(const char *name)
 {
-	return aperture_remove_conflicting_devices(0, (resource_size_t)-1, primary, name);
+	return aperture_remove_conflicting_devices(0, (resource_size_t)-1, name);
 }
 
 #endif
-- 
cgit v1.2.3


From 116b1c5a364bcbdc40be64d4f3ec9dbc32e264dd Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 6 Apr 2023 15:21:09 +0200
Subject: video/aperture: Provide a VGA helper for gma500 and internal use

The hardware for gma500 is different from the rest, as it uses stolen
framebuffer memory that is not available via PCI BAR. The regular PCI
removal helper cannot detect the framebuffer, while the non-PCI helper
misses possible conflicting VGA devices (i.e., a framebuffer or text
console).

Gma500 therefore calls both helpers to catch all cases. It's confusing
as it implies that there's something about the PCI device that requires
ownership management. The relationship between the PCI device and the
VGA devices is non-obvious. At worst, readers might assume that calling
two functions for clearing aperture ownership is a bug in the driver.

Hence, move the PCI removal helper's code for VGA functionality into
a separate function and call this function from gma500. Documents the
purpose of each call to aperture helpers. The change contains comments
and example code form the discussion at [1].

v5:
	* fix grammar in gma500 comment (Javier)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20230404201842.567344-1-daniel.vetter@ffwll.ch/ # 1
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230406132109.32050-10-tzimmermann@suse.de
---
 drivers/gpu/drm/gma500/psb_drv.c | 48 +++++++++++++++++++++++----------
 drivers/video/aperture.c         | 58 +++++++++++++++++++++++++++-------------
 include/linux/aperture.h         |  7 +++++
 3 files changed, 81 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 4bb06a89e48d..8b64f61ffaf9 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -7,6 +7,7 @@
  *
  **************************************************************************/
 
+#include <linux/aperture.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
@@ -19,7 +20,6 @@
 #include <acpi/video.h>
 
 #include <drm/drm.h>
-#include <drm/drm_aperture.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_ioctl.h>
@@ -414,25 +414,45 @@ out_err:
 	return ret;
 }
 
-static int psb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+/*
+ * Hardware for gma500 is a hybrid device, which both acts as a PCI
+ * device (for legacy vga functionality) but also more like an
+ * integrated display on a SoC where the framebuffer simply
+ * resides in main memory and not in a special PCI bar (that
+ * internally redirects to a stolen range of main memory) like all
+ * other integrated PCI display devices implement it.
+ *
+ * To catch all cases we need to remove conflicting firmware devices
+ * for the stolen system memory and for the VGA functionality. As we
+ * currently cannot easily find the framebuffer's location in stolen
+ * memory, we remove all framebuffers here.
+ *
+ * TODO: Refactor psb_driver_load() to map vdc_reg earlier. Then
+ *       we might be able to read the framebuffer range from the
+ *       device.
+ */
+static int gma_remove_conflicting_framebuffers(struct pci_dev *pdev,
+					       const struct drm_driver *req_driver)
 {
-	struct drm_psb_private *dev_priv;
-	struct drm_device *dev;
+	resource_size_t base = 0;
+	resource_size_t size = U32_MAX; /* 4 GiB HW limit */
+	const char *name = req_driver->name;
 	int ret;
 
-	/*
-	 * We cannot yet easily find the framebuffer's location in memory. So
-	 * remove all framebuffers here. Note that we still want the pci special
-	 * handling to kick out vgacon.
-	 *
-	 * TODO: Refactor psb_driver_load() to map vdc_reg earlier. Then we
-	 *       might be able to read the framebuffer range from the device.
-	 */
-	ret = drm_aperture_remove_framebuffers(&driver);
+	ret = aperture_remove_conflicting_devices(base, size, name);
 	if (ret)
 		return ret;
 
-	ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
+	return __aperture_remove_legacy_vga_devices(pdev);
+}
+
+static int psb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct drm_psb_private *dev_priv;
+	struct drm_device *dev;
+	int ret;
+
+	ret = gma_remove_conflicting_framebuffers(pdev, &driver);
 	if (ret)
 		return ret;
 
diff --git a/drivers/video/aperture.c b/drivers/video/aperture.c
index fa71f8257eed..561be8feca96 100644
--- a/drivers/video/aperture.c
+++ b/drivers/video/aperture.c
@@ -301,6 +301,37 @@ int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t si
 }
 EXPORT_SYMBOL(aperture_remove_conflicting_devices);
 
+/**
+ * __aperture_remove_legacy_vga_devices - remove legacy VGA devices of a PCI devices
+ * @pdev: PCI device
+ *
+ * This function removes VGA devices provided by @pdev, such as a VGA
+ * framebuffer or a console. This is useful if you have a VGA-compatible
+ * PCI graphics device with framebuffers in non-BAR locations. Drivers
+ * should acquire ownership of those memory areas and afterwards call
+ * this helper to release remaining VGA devices.
+ *
+ * If your hardware has its framebuffers accessible via PCI BARS, use
+ * aperture_remove_conflicting_pci_devices() instead. The function will
+ * release any VGA devices automatically.
+ *
+ * WARNING: Apparently we must remove graphics drivers before calling
+ *          this helper. Otherwise the vga fbdev driver falls over if
+ *          we have vgacon configured.
+ *
+ * Returns:
+ * 0 on success, or a negative errno code otherwise
+ */
+int __aperture_remove_legacy_vga_devices(struct pci_dev *pdev)
+{
+	/* VGA framebuffer */
+	aperture_detach_devices(VGA_FB_PHYS_BASE, VGA_FB_PHYS_SIZE);
+
+	/* VGA textmode console */
+	return vga_remove_vgacon(pdev);
+}
+EXPORT_SYMBOL(__aperture_remove_legacy_vga_devices);
+
 /**
  * aperture_remove_conflicting_pci_devices - remove existing framebuffers for PCI devices
  * @pdev: PCI device
@@ -317,7 +348,7 @@ int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *na
 {
 	bool primary = false;
 	resource_size_t base, size;
-	int bar, ret;
+	int bar, ret = 0;
 
 	if (pdev == vga_default_device())
 		primary = true;
@@ -334,24 +365,15 @@ int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *na
 		aperture_detach_devices(base, size);
 	}
 
-	if (primary) {
-		/*
-		 * If this is the primary adapter, there could be a VGA device
-		 * that consumes the VGA framebuffer I/O range. Remove this
-		 * device as well.
-		 */
-		aperture_detach_devices(VGA_FB_PHYS_BASE, VGA_FB_PHYS_SIZE);
-
-		/*
-		 * WARNING: Apparently we must kick fbdev drivers before vgacon,
-		 * otherwise the vga fbdev driver falls over.
-		 */
-		ret = vga_remove_vgacon(pdev);
-		if (ret)
-			return ret;
-	}
+	/*
+	 * If this is the primary adapter, there could be a VGA device
+	 * that consumes the VGA framebuffer I/O range. Remove this
+	 * device as well.
+	 */
+	if (primary)
+		ret = __aperture_remove_legacy_vga_devices(pdev);
 
-	return 0;
+	return ret;
 
 }
 EXPORT_SYMBOL(aperture_remove_conflicting_pci_devices);
diff --git a/include/linux/aperture.h b/include/linux/aperture.h
index 7248727753be..1a9a88b11584 100644
--- a/include/linux/aperture.h
+++ b/include/linux/aperture.h
@@ -16,6 +16,8 @@ int devm_aperture_acquire_for_platform_device(struct platform_device *pdev,
 int aperture_remove_conflicting_devices(resource_size_t base, resource_size_t size,
 					const char *name);
 
+int __aperture_remove_legacy_vga_devices(struct pci_dev *pdev);
+
 int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *name);
 #else
 static inline int devm_aperture_acquire_for_platform_device(struct platform_device *pdev,
@@ -31,6 +33,11 @@ static inline int aperture_remove_conflicting_devices(resource_size_t base, reso
 	return 0;
 }
 
+static inline int __aperture_remove_legacy_vga_devices(struct pci_dev *pdev)
+{
+	return 0;
+}
+
 static inline int aperture_remove_conflicting_pci_devices(struct pci_dev *pdev, const char *name)
 {
 	return 0;
-- 
cgit v1.2.3


From 26662d7347a058ca497792c4b22ac91cc415cbf6 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Thu, 20 Apr 2023 00:14:12 -0700
Subject: bpf: Add bpf_dynptr_size

bpf_dynptr_size returns the number of usable bytes in a dynptr.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20230420071414.570108-4-joannelkoong@gmail.com
---
 include/linux/bpf.h      |  2 +-
 kernel/bpf/helpers.c     | 15 ++++++++++++---
 kernel/trace/bpf_trace.c |  4 ++--
 3 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e53ceee1df37..456f33b9d205 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1197,7 +1197,7 @@ enum bpf_dynptr_type {
 };
 
 int bpf_dynptr_check_size(u32 size);
-u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
+u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
 
 #ifdef CONFIG_BPF_JIT
 int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a683b3e71a28..c465e97733b9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1443,7 +1443,7 @@ static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *pt
 	return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
 }
 
-u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
+u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
 {
 	return ptr->size & DYNPTR_SIZE_MASK;
 }
@@ -1476,7 +1476,7 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
 
 static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
 {
-	u32 size = bpf_dynptr_get_size(ptr);
+	u32 size = __bpf_dynptr_size(ptr);
 
 	if (len > size || offset > size - len)
 		return -E2BIG;
@@ -2311,7 +2311,7 @@ __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 en
 	if (!ptr->data || start > end)
 		return -EINVAL;
 
-	size = bpf_dynptr_get_size(ptr);
+	size = __bpf_dynptr_size(ptr);
 
 	if (start > size || end > size)
 		return -ERANGE;
@@ -2335,6 +2335,14 @@ __bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
 	return __bpf_dynptr_is_rdonly(ptr);
 }
 
+__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr)
+{
+	if (!ptr->data)
+		return -EINVAL;
+
+	return __bpf_dynptr_size(ptr);
+}
+
 __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
 {
 	return obj;
@@ -2410,6 +2418,7 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
 BTF_ID_FLAGS(func, bpf_dynptr_adjust)
 BTF_ID_FLAGS(func, bpf_dynptr_is_null)
 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
+BTF_ID_FLAGS(func, bpf_dynptr_size)
 BTF_SET8_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index bcf91bc7bf71..8deb22a99abe 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1349,9 +1349,9 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
 	}
 
 	return verify_pkcs7_signature(data_ptr->data,
-				      bpf_dynptr_get_size(data_ptr),
+				      __bpf_dynptr_size(data_ptr),
 				      sig_ptr->data,
-				      bpf_dynptr_get_size(sig_ptr),
+				      __bpf_dynptr_size(sig_ptr),
 				      trusted_keyring->key,
 				      VERIFYING_UNSPECIFIED_SIGNATURE, NULL,
 				      NULL);
-- 
cgit v1.2.3


From 7b9c13dd4d0ebbe89dfd7e1ecd09696037622101 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Mon, 1 May 2023 17:39:45 -0700
Subject: Input: i8042 - add missing include
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<linux/i8042.h> include uses ENODEV when included with
!IS_ENABLED(CONFIG_SERIO_I8042) and so need to include it.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Link: https://lore.kernel.org/r/49fd4d400d1ab62095e5ed75a6637f883c0d071b.1682795105.git.mirq-linux@rere.qmqm.pl
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/i8042.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/i8042.h b/include/linux/i8042.h
index 0261e2fb3636..95b07f8b77fe 100644
--- a/include/linux/i8042.h
+++ b/include/linux/i8042.h
@@ -3,6 +3,7 @@
 #define _LINUX_I8042_H
 
 
+#include <linux/errno.h>
 #include <linux/types.h>
 
 /*
-- 
cgit v1.2.3


From 0cce06ba859a515bd06224085d3addb870608b6d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Apr 2023 17:03:13 +0200
Subject: debugobjects,locking: Annotate debug_object_fill_pool() wait type
 violation

There is an explicit wait-type violation in debug_object_fill_pool()
for PREEMPT_RT=n kernels which allows them to more easily fill the
object pool and reduce the chance of allocation failures.

Lockdep's wait-type checks are designed to check the PREEMPT_RT
locking rules even for PREEMPT_RT=n kernels and object to this, so
create a lockdep annotation to allow this to stand.

Specifically, create a 'lock' type that overrides the inner wait-type
while it is held -- allowing one to temporarily raise it, such that
the violation is hidden.

Reported-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Qi Zheng <zhengqi.arch@bytedance.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Qi Zheng <zhengqi.arch@bytedance.com>
Link: https://lkml.kernel.org/r/20230429100614.GA1489784@hirez.programming.kicks-ass.net
---
 include/linux/lockdep.h       | 14 ++++++++++++++
 include/linux/lockdep_types.h |  1 +
 kernel/locking/lockdep.c      | 28 +++++++++++++++++++++-------
 lib/debugobjects.c            | 15 +++++++++++++--
 4 files changed, 49 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 1023f349af71..a3329fb49b33 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -339,6 +339,16 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
 #define lockdep_repin_lock(l,c)	lock_repin_lock(&(l)->dep_map, (c))
 #define lockdep_unpin_lock(l,c)	lock_unpin_lock(&(l)->dep_map, (c))
 
+/*
+ * Must use lock_map_aquire_try() with override maps to avoid
+ * lockdep thinking they participate in the block chain.
+ */
+#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type)	\
+	struct lockdep_map _name = {			\
+		.name = #_name "-wait-type-override",	\
+		.wait_type_inner = _wait_type,		\
+		.lock_type = LD_LOCK_WAIT_OVERRIDE, }
+
 #else /* !CONFIG_LOCKDEP */
 
 static inline void lockdep_init_task(struct task_struct *task)
@@ -427,6 +437,9 @@ extern int lockdep_is_held(const void *);
 #define lockdep_repin_lock(l, c)		do { (void)(l); (void)(c); } while (0)
 #define lockdep_unpin_lock(l, c)		do { (void)(l); (void)(c); } while (0)
 
+#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type)	\
+	struct lockdep_map __maybe_unused _name = {}
+
 #endif /* !LOCKDEP */
 
 enum xhlock_context_t {
@@ -551,6 +564,7 @@ do {									\
 #define rwsem_release(l, i)			lock_release(l, i)
 
 #define lock_map_acquire(l)			lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
+#define lock_map_acquire_try(l)			lock_acquire_exclusive(l, 0, 1, NULL, _THIS_IP_)
 #define lock_map_acquire_read(l)		lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
 #define lock_map_acquire_tryread(l)		lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
 #define lock_map_release(l)			lock_release(l, _THIS_IP_)
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index d22430840b53..59f4fb1626ea 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -33,6 +33,7 @@ enum lockdep_wait_type {
 enum lockdep_lock_type {
 	LD_LOCK_NORMAL = 0,	/* normal, catch all */
 	LD_LOCK_PERCPU,		/* percpu */
+	LD_LOCK_WAIT_OVERRIDE,	/* annotation */
 	LD_LOCK_MAX,
 };
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 50d4863974e7..62ef295e07e6 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2253,6 +2253,9 @@ static inline bool usage_match(struct lock_list *entry, void *mask)
 
 static inline bool usage_skip(struct lock_list *entry, void *mask)
 {
+	if (entry->class->lock_type == LD_LOCK_NORMAL)
+		return false;
+
 	/*
 	 * Skip local_lock() for irq inversion detection.
 	 *
@@ -2279,14 +2282,16 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
 	 * As a result, we will skip local_lock(), when we search for irq
 	 * inversion bugs.
 	 */
-	if (entry->class->lock_type == LD_LOCK_PERCPU) {
-		if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
-			return false;
+	if (entry->class->lock_type == LD_LOCK_PERCPU &&
+	    DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
+		return false;
 
-		return true;
-	}
+	/*
+	 * Skip WAIT_OVERRIDE for irq inversion detection -- it's not actually
+	 * a lock and only used to override the wait_type.
+	 */
 
-	return false;
+	return true;
 }
 
 /*
@@ -4752,7 +4757,8 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
 
 	for (; depth < curr->lockdep_depth; depth++) {
 		struct held_lock *prev = curr->held_locks + depth;
-		u8 prev_inner = hlock_class(prev)->wait_type_inner;
+		struct lock_class *class = hlock_class(prev);
+		u8 prev_inner = class->wait_type_inner;
 
 		if (prev_inner) {
 			/*
@@ -4762,6 +4768,14 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
 			 * Also due to trylocks.
 			 */
 			curr_inner = min(curr_inner, prev_inner);
+
+			/*
+			 * Allow override for annotations -- this is typically
+			 * only valid/needed for code that only exists when
+			 * CONFIG_PREEMPT_RT=n.
+			 */
+			if (unlikely(class->lock_type == LD_LOCK_WAIT_OVERRIDE))
+				curr_inner = prev_inner;
 		}
 	}
 
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 003edc5ebd67..826c617b10a7 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -591,10 +591,21 @@ static void debug_objects_fill_pool(void)
 {
 	/*
 	 * On RT enabled kernels the pool refill must happen in preemptible
-	 * context:
+	 * context -- for !RT kernels we rely on the fact that spinlock_t and
+	 * raw_spinlock_t are basically the same type and this lock-type
+	 * inversion works just fine.
 	 */
-	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+		/*
+		 * Annotate away the spinlock_t inside raw_spinlock_t warning
+		 * by temporarily raising the wait-type to WAIT_SLEEP, matching
+		 * the preemptible() condition above.
+		 */
+		static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_SLEEP);
+		lock_map_acquire_try(&fill_pool_map);
 		fill_pool();
+		lock_map_release(&fill_pool_map);
+	}
 }
 
 static void
-- 
cgit v1.2.3


From 407958a0e980b9e1842ab87b5a1040521e1e24e9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 4 May 2023 21:33:10 -0700
Subject: bpf: encapsulate precision backtracking bookkeeping

Add struct backtrack_state and straightforward API around it to keep
track of register and stack masks used and maintained during precision
backtracking process. Having this logic separately allow to keep
high-level backtracking algorithm cleaner, but also it sets us up to
cleanly keep track of register and stack masks per frame, allowing (with
some further logic adjustments) to perform precision backpropagation
across multiple frames (i.e., subprog calls).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230505043317.3629845-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  14 +++
 kernel/bpf/verifier.c        | 249 +++++++++++++++++++++++++++++++------------
 2 files changed, 196 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3dd29a53b711..33f541366f4e 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -238,6 +238,10 @@ enum bpf_stack_slot_type {
 
 #define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
 
+#define BPF_REGMASK_ARGS ((1 << BPF_REG_1) | (1 << BPF_REG_2) | \
+			  (1 << BPF_REG_3) | (1 << BPF_REG_4) | \
+			  (1 << BPF_REG_5))
+
 #define BPF_DYNPTR_SIZE		sizeof(struct bpf_dynptr_kern)
 #define BPF_DYNPTR_NR_SLOTS		(BPF_DYNPTR_SIZE / BPF_REG_SIZE)
 
@@ -541,6 +545,15 @@ struct bpf_subprog_info {
 	bool is_async_cb;
 };
 
+struct bpf_verifier_env;
+
+struct backtrack_state {
+	struct bpf_verifier_env *env;
+	u32 frame;
+	u32 reg_masks[MAX_CALL_FRAMES];
+	u64 stack_masks[MAX_CALL_FRAMES];
+};
+
 /* single container for all structs
  * one verifier_env per bpf_check() call
  */
@@ -578,6 +591,7 @@ struct bpf_verifier_env {
 		int *insn_stack;
 		int cur_stack;
 	} cfg;
+	struct backtrack_state bt;
 	u32 pass_cnt; /* number of times do_check() was called */
 	u32 subprog_cnt;
 	/* number of instructions analyzed by the verifier */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index da8a5834f2ca..9b2e571250e1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1296,6 +1296,12 @@ static bool is_spilled_reg(const struct bpf_stack_state *stack)
 	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
 }
 
+static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
+{
+	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
+	       stack->spilled_ptr.type == SCALAR_VALUE;
+}
+
 static void scrub_spilled_slot(u8 *stype)
 {
 	if (*stype != STACK_INVALID)
@@ -3186,12 +3192,128 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
 	return btf_name_by_offset(desc_btf, func->name_off);
 }
 
+static inline void bt_init(struct backtrack_state *bt, u32 frame)
+{
+	bt->frame = frame;
+}
+
+static inline void bt_reset(struct backtrack_state *bt)
+{
+	struct bpf_verifier_env *env = bt->env;
+
+	memset(bt, 0, sizeof(*bt));
+	bt->env = env;
+}
+
+static inline u32 bt_empty(struct backtrack_state *bt)
+{
+	u64 mask = 0;
+	int i;
+
+	for (i = 0; i <= bt->frame; i++)
+		mask |= bt->reg_masks[i] | bt->stack_masks[i];
+
+	return mask == 0;
+}
+
+static inline int bt_subprog_enter(struct backtrack_state *bt)
+{
+	if (bt->frame == MAX_CALL_FRAMES - 1) {
+		verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame);
+		WARN_ONCE(1, "verifier backtracking bug");
+		return -EFAULT;
+	}
+	bt->frame++;
+	return 0;
+}
+
+static inline int bt_subprog_exit(struct backtrack_state *bt)
+{
+	if (bt->frame == 0) {
+		verbose(bt->env, "BUG subprog exit from frame 0\n");
+		WARN_ONCE(1, "verifier backtracking bug");
+		return -EFAULT;
+	}
+	bt->frame--;
+	return 0;
+}
+
+static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+	bt->reg_masks[frame] |= 1 << reg;
+}
+
+static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
+{
+	bt->reg_masks[frame] &= ~(1 << reg);
+}
+
+static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
+{
+	bt_set_frame_reg(bt, bt->frame, reg);
+}
+
+static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
+{
+	bt_clear_frame_reg(bt, bt->frame, reg);
+}
+
+static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+	bt->stack_masks[frame] |= 1ull << slot;
+}
+
+static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
+{
+	bt->stack_masks[frame] &= ~(1ull << slot);
+}
+
+static inline void bt_set_slot(struct backtrack_state *bt, u32 slot)
+{
+	bt_set_frame_slot(bt, bt->frame, slot);
+}
+
+static inline void bt_clear_slot(struct backtrack_state *bt, u32 slot)
+{
+	bt_clear_frame_slot(bt, bt->frame, slot);
+}
+
+static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
+{
+	return bt->reg_masks[frame];
+}
+
+static inline u32 bt_reg_mask(struct backtrack_state *bt)
+{
+	return bt->reg_masks[bt->frame];
+}
+
+static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
+{
+	return bt->stack_masks[frame];
+}
+
+static inline u64 bt_stack_mask(struct backtrack_state *bt)
+{
+	return bt->stack_masks[bt->frame];
+}
+
+static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
+{
+	return bt->reg_masks[bt->frame] & (1 << reg);
+}
+
+static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot)
+{
+	return bt->stack_masks[bt->frame] & (1ull << slot);
+}
+
 /* For given verifier state backtrack_insn() is called from the last insn to
  * the first insn. Its purpose is to compute a bitmask of registers and
  * stack slots that needs precision in the parent verifier state.
  */
 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
-			  u32 *reg_mask, u64 *stack_mask)
+			  struct backtrack_state *bt)
 {
 	const struct bpf_insn_cbs cbs = {
 		.cb_call	= disasm_kfunc_name,
@@ -3202,20 +3324,20 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 	u8 class = BPF_CLASS(insn->code);
 	u8 opcode = BPF_OP(insn->code);
 	u8 mode = BPF_MODE(insn->code);
-	u32 dreg = 1u << insn->dst_reg;
-	u32 sreg = 1u << insn->src_reg;
+	u32 dreg = insn->dst_reg;
+	u32 sreg = insn->src_reg;
 	u32 spi;
 
 	if (insn->code == 0)
 		return 0;
 	if (env->log.level & BPF_LOG_LEVEL2) {
-		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
+		verbose(env, "regs=%x stack=%llx before ", bt_reg_mask(bt), bt_stack_mask(bt));
 		verbose(env, "%d: ", idx);
 		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
 	}
 
 	if (class == BPF_ALU || class == BPF_ALU64) {
-		if (!(*reg_mask & dreg))
+		if (!bt_is_reg_set(bt, dreg))
 			return 0;
 		if (opcode == BPF_MOV) {
 			if (BPF_SRC(insn->code) == BPF_X) {
@@ -3223,8 +3345,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 				 * dreg needs precision after this insn
 				 * sreg needs precision before this insn
 				 */
-				*reg_mask &= ~dreg;
-				*reg_mask |= sreg;
+				bt_clear_reg(bt, dreg);
+				bt_set_reg(bt, sreg);
 			} else {
 				/* dreg = K
 				 * dreg needs precision after this insn.
@@ -3232,7 +3354,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 				 * as precise=true in this verifier state.
 				 * No further markings in parent are necessary
 				 */
-				*reg_mask &= ~dreg;
+				bt_clear_reg(bt, dreg);
 			}
 		} else {
 			if (BPF_SRC(insn->code) == BPF_X) {
@@ -3240,15 +3362,15 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 				 * both dreg and sreg need precision
 				 * before this insn
 				 */
-				*reg_mask |= sreg;
+				bt_set_reg(bt, sreg);
 			} /* else dreg += K
 			   * dreg still needs precision before this insn
 			   */
 		}
 	} else if (class == BPF_LDX) {
-		if (!(*reg_mask & dreg))
+		if (!bt_is_reg_set(bt, dreg))
 			return 0;
-		*reg_mask &= ~dreg;
+		bt_clear_reg(bt, dreg);
 
 		/* scalars can only be spilled into stack w/o losing precision.
 		 * Load from any other memory can be zero extended.
@@ -3269,9 +3391,9 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 			WARN_ONCE(1, "verifier backtracking bug");
 			return -EFAULT;
 		}
-		*stack_mask |= 1ull << spi;
+		bt_set_slot(bt, spi);
 	} else if (class == BPF_STX || class == BPF_ST) {
-		if (*reg_mask & dreg)
+		if (bt_is_reg_set(bt, dreg))
 			/* stx & st shouldn't be using _scalar_ dst_reg
 			 * to access memory. It means backtracking
 			 * encountered a case of pointer subtraction.
@@ -3286,11 +3408,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 			WARN_ONCE(1, "verifier backtracking bug");
 			return -EFAULT;
 		}
-		if (!(*stack_mask & (1ull << spi)))
+		if (!bt_is_slot_set(bt, spi))
 			return 0;
-		*stack_mask &= ~(1ull << spi);
+		bt_clear_slot(bt, spi);
 		if (class == BPF_STX)
-			*reg_mask |= sreg;
+			bt_set_reg(bt, sreg);
 	} else if (class == BPF_JMP || class == BPF_JMP32) {
 		if (opcode == BPF_CALL) {
 			if (insn->src_reg == BPF_PSEUDO_CALL)
@@ -3307,19 +3429,19 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
 				return -ENOTSUPP;
 			/* regular helper call sets R0 */
-			*reg_mask &= ~1;
-			if (*reg_mask & 0x3f) {
+			bt_clear_reg(bt, BPF_REG_0);
+			if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
 				/* if backtracing was looking for registers R1-R5
 				 * they should have been found already.
 				 */
-				verbose(env, "BUG regs %x\n", *reg_mask);
+				verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
 				WARN_ONCE(1, "verifier backtracking bug");
 				return -EFAULT;
 			}
 		} else if (opcode == BPF_EXIT) {
 			return -ENOTSUPP;
 		} else if (BPF_SRC(insn->code) == BPF_X) {
-			if (!(*reg_mask & (dreg | sreg)))
+			if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
 				return 0;
 			/* dreg <cond> sreg
 			 * Both dreg and sreg need precision before
@@ -3327,7 +3449,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 			 * before it would be equally necessary to
 			 * propagate it to dreg.
 			 */
-			*reg_mask |= (sreg | dreg);
+			bt_set_reg(bt, dreg);
+			bt_set_reg(bt, sreg);
 			 /* else dreg <cond> K
 			  * Only dreg still needs precision before
 			  * this insn, so for the K-based conditional
@@ -3335,9 +3458,9 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 			  */
 		}
 	} else if (class == BPF_LD) {
-		if (!(*reg_mask & dreg))
+		if (!bt_is_reg_set(bt, dreg))
 			return 0;
-		*reg_mask &= ~dreg;
+		bt_clear_reg(bt, dreg);
 		/* It's ld_imm64 or ld_abs or ld_ind.
 		 * For ld_imm64 no further tracking of precision
 		 * into parent is necessary
@@ -3550,20 +3673,21 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_
 static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno,
 				  int spi)
 {
+	struct backtrack_state *bt = &env->bt;
 	struct bpf_verifier_state *st = env->cur_state;
 	int first_idx = st->first_insn_idx;
 	int last_idx = env->insn_idx;
 	struct bpf_func_state *func;
 	struct bpf_reg_state *reg;
-	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
-	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
 	bool skip_first = true;
-	bool new_marks = false;
 	int i, err;
 
 	if (!env->bpf_capable)
 		return 0;
 
+	/* set frame number from which we are starting to backtrack */
+	bt_init(bt, frame);
+
 	/* Do sanity checks against current state of register and/or stack
 	 * slot, but don't set precise flag in current state, as precision
 	 * tracking in the current state is unnecessary.
@@ -3575,26 +3699,17 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
 			WARN_ONCE(1, "backtracing misuse");
 			return -EFAULT;
 		}
-		new_marks = true;
+		bt_set_reg(bt, regno);
 	}
 
 	while (spi >= 0) {
-		if (!is_spilled_reg(&func->stack[spi])) {
-			stack_mask = 0;
+		if (!is_spilled_scalar_reg(&func->stack[spi]))
 			break;
-		}
-		reg = &func->stack[spi].spilled_ptr;
-		if (reg->type != SCALAR_VALUE) {
-			stack_mask = 0;
-			break;
-		}
-		new_marks = true;
+		bt_set_slot(bt, spi);
 		break;
 	}
 
-	if (!new_marks)
-		return 0;
-	if (!reg_mask && !stack_mask)
+	if (bt_empty(bt))
 		return 0;
 
 	for (;;) {
@@ -3613,12 +3728,13 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
 			if (st->curframe == 0 &&
 			    st->frame[0]->subprogno > 0 &&
 			    st->frame[0]->callsite == BPF_MAIN_FUNC &&
-			    stack_mask == 0 && (reg_mask & ~0x3e) == 0) {
-				bitmap_from_u64(mask, reg_mask);
+			    bt_stack_mask(bt) == 0 &&
+			    (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
+				bitmap_from_u64(mask, bt_reg_mask(bt));
 				for_each_set_bit(i, mask, 32) {
 					reg = &st->frame[0]->regs[i];
 					if (reg->type != SCALAR_VALUE) {
-						reg_mask &= ~(1u << i);
+						bt_clear_reg(bt, i);
 						continue;
 					}
 					reg->precise = true;
@@ -3626,8 +3742,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
 				return 0;
 			}
 
-			verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n",
-				st->frame[0]->subprogno, reg_mask, stack_mask);
+			verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
+				st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
 			WARN_ONCE(1, "verifier backtracking bug");
 			return -EFAULT;
 		}
@@ -3637,15 +3753,16 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
 				err = 0;
 				skip_first = false;
 			} else {
-				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
+				err = backtrack_insn(env, i, bt);
 			}
 			if (err == -ENOTSUPP) {
 				mark_all_scalars_precise(env, st);
+				bt_reset(bt);
 				return 0;
 			} else if (err) {
 				return err;
 			}
-			if (!reg_mask && !stack_mask)
+			if (bt_empty(bt))
 				/* Found assignment(s) into tracked register in this state.
 				 * Since this state is already marked, just return.
 				 * Nothing to be tracked further in the parent state.
@@ -3670,21 +3787,21 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
 		if (!st)
 			break;
 
-		new_marks = false;
 		func = st->frame[frame];
-		bitmap_from_u64(mask, reg_mask);
+		bitmap_from_u64(mask, bt_reg_mask(bt));
 		for_each_set_bit(i, mask, 32) {
 			reg = &func->regs[i];
 			if (reg->type != SCALAR_VALUE) {
-				reg_mask &= ~(1u << i);
+				bt_clear_reg(bt, i);
 				continue;
 			}
-			if (!reg->precise)
-				new_marks = true;
-			reg->precise = true;
+			if (reg->precise)
+				bt_clear_reg(bt, i);
+			else
+				reg->precise = true;
 		}
 
-		bitmap_from_u64(mask, stack_mask);
+		bitmap_from_u64(mask, bt_stack_mask(bt));
 		for_each_set_bit(i, mask, 64) {
 			if (i >= func->allocated_stack / BPF_REG_SIZE) {
 				/* the sequence of instructions:
@@ -3701,32 +3818,28 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
 				 * In such case fallback to conservative.
 				 */
 				mark_all_scalars_precise(env, st);
+				bt_reset(bt);
 				return 0;
 			}
 
-			if (!is_spilled_reg(&func->stack[i])) {
-				stack_mask &= ~(1ull << i);
+			if (!is_spilled_scalar_reg(&func->stack[i])) {
+				bt_clear_slot(bt, i);
 				continue;
 			}
 			reg = &func->stack[i].spilled_ptr;
-			if (reg->type != SCALAR_VALUE) {
-				stack_mask &= ~(1ull << i);
-				continue;
-			}
-			if (!reg->precise)
-				new_marks = true;
-			reg->precise = true;
+			if (reg->precise)
+				bt_clear_slot(bt, i);
+			else
+				reg->precise = true;
 		}
 		if (env->log.level & BPF_LOG_LEVEL2) {
 			verbose(env, "parent %s regs=%x stack=%llx marks:",
-				new_marks ? "didn't have" : "already had",
-				reg_mask, stack_mask);
+				!bt_empty(bt) ? "didn't have" : "already had",
+				bt_reg_mask(bt), bt_stack_mask(bt));
 			print_verifier_state(env, func, true);
 		}
 
-		if (!reg_mask && !stack_mask)
-			break;
-		if (!new_marks)
+		if (bt_empty(bt))
 			break;
 
 		last_idx = st->last_insn_idx;
@@ -18872,6 +18985,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 	if (!env)
 		return -ENOMEM;
 
+	env->bt.env = env;
+
 	len = (*prog)->len;
 	env->insn_aux_data =
 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
-- 
cgit v1.2.3


From d9439c21a9e4769bfd83a03ab39056164d44ac31 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Thu, 4 May 2023 21:33:11 -0700
Subject: bpf: improve precision backtrack logging

Add helper to format register and stack masks in more human-readable
format. Adjust logging a bit during backtrack propagation and especially
during forcing precision fallback logic to make it clearer what's going
on (with log_level=2, of course), and also start reporting affected
frame depth. This is in preparation for having more than one active
frame later when precision propagation between subprog calls is added.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20230505043317.3629845-5-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h                   |  13 ++-
 kernel/bpf/verifier.c                          |  72 +++++++++++++++--
 tools/testing/selftests/bpf/verifier/precise.c | 106 +++++++++++++------------
 3 files changed, 128 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 33f541366f4e..5b11a3b0fec0 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -18,8 +18,11 @@
  * that converting umax_value to int cannot overflow.
  */
 #define BPF_MAX_VAR_SIZ	(1 << 29)
-/* size of type_str_buf in bpf_verifier. */
-#define TYPE_STR_BUF_LEN 128
+/* size of tmp_str_buf in bpf_verifier.
+ * we need at least 306 bytes to fit full stack mask representation
+ * (in the "-8,-16,...,-512" form)
+ */
+#define TMP_STR_BUF_LEN 320
 
 /* Liveness marks, used for registers and spilled-regs (in stack slots).
  * Read marks propagate upwards until they find a write mark; they record that
@@ -620,8 +623,10 @@ struct bpf_verifier_env {
 	/* Same as scratched_regs but for stack slots */
 	u64 scratched_stack_slots;
 	u64 prev_log_pos, prev_insn_print_pos;
-	/* buffer used in reg_type_str() to generate reg_type string */
-	char type_str_buf[TYPE_STR_BUF_LEN];
+	/* buffer used to generate temporary string representations,
+	 * e.g., in reg_type_str() to generate reg_type string
+	 */
+	char tmp_str_buf[TMP_STR_BUF_LEN];
 };
 
 __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9b2e571250e1..5412c8c8511d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -605,9 +605,9 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
 		 type & PTR_TRUSTED ? "trusted_" : ""
 	);
 
-	snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
+	snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s",
 		 prefix, str[base_type(type)], postfix);
-	return env->type_str_buf;
+	return env->tmp_str_buf;
 }
 
 static char slot_type_char[] = {
@@ -3308,6 +3308,45 @@ static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot)
 	return bt->stack_masks[bt->frame] & (1ull << slot);
 }
 
+/* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
+static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
+{
+	DECLARE_BITMAP(mask, 64);
+	bool first = true;
+	int i, n;
+
+	buf[0] = '\0';
+
+	bitmap_from_u64(mask, reg_mask);
+	for_each_set_bit(i, mask, 32) {
+		n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
+		first = false;
+		buf += n;
+		buf_sz -= n;
+		if (buf_sz < 0)
+			break;
+	}
+}
+/* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
+static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
+{
+	DECLARE_BITMAP(mask, 64);
+	bool first = true;
+	int i, n;
+
+	buf[0] = '\0';
+
+	bitmap_from_u64(mask, stack_mask);
+	for_each_set_bit(i, mask, 64) {
+		n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
+		first = false;
+		buf += n;
+		buf_sz -= n;
+		if (buf_sz < 0)
+			break;
+	}
+}
+
 /* For given verifier state backtrack_insn() is called from the last insn to
  * the first insn. Its purpose is to compute a bitmask of registers and
  * stack slots that needs precision in the parent verifier state.
@@ -3331,7 +3370,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 	if (insn->code == 0)
 		return 0;
 	if (env->log.level & BPF_LOG_LEVEL2) {
-		verbose(env, "regs=%x stack=%llx before ", bt_reg_mask(bt), bt_stack_mask(bt));
+		fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
+		verbose(env, "mark_precise: frame%d: regs=%s ",
+			bt->frame, env->tmp_str_buf);
+		fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
+		verbose(env, "stack=%s before ", env->tmp_str_buf);
 		verbose(env, "%d: ", idx);
 		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
 	}
@@ -3531,6 +3574,11 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
 	struct bpf_reg_state *reg;
 	int i, j;
 
+	if (env->log.level & BPF_LOG_LEVEL2) {
+		verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
+			st->curframe);
+	}
+
 	/* big hammer: mark all scalars precise in this path.
 	 * pop_stack may still get !precise scalars.
 	 * We also skip current state and go straight to first parent state,
@@ -3542,17 +3590,25 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
 			func = st->frame[i];
 			for (j = 0; j < BPF_REG_FP; j++) {
 				reg = &func->regs[j];
-				if (reg->type != SCALAR_VALUE)
+				if (reg->type != SCALAR_VALUE || reg->precise)
 					continue;
 				reg->precise = true;
+				if (env->log.level & BPF_LOG_LEVEL2) {
+					verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
+						i, j);
+				}
 			}
 			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
 				if (!is_spilled_reg(&func->stack[j]))
 					continue;
 				reg = &func->stack[j].spilled_ptr;
-				if (reg->type != SCALAR_VALUE)
+				if (reg->type != SCALAR_VALUE || reg->precise)
 					continue;
 				reg->precise = true;
+				if (env->log.level & BPF_LOG_LEVEL2) {
+					verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
+						i, -(j + 1) * 8);
+				}
 			}
 		}
 	}
@@ -3716,8 +3772,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r
 		DECLARE_BITMAP(mask, 64);
 		u32 history = st->jmp_history_cnt;
 
-		if (env->log.level & BPF_LOG_LEVEL2)
-			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
+		if (env->log.level & BPF_LOG_LEVEL2) {
+			verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d\n",
+				bt->frame, last_idx, first_idx);
+		}
 
 		if (last_idx < 0) {
 			/* we are at the entry into subprog, which
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 8f0340eed696..a22fabd404ed 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -38,25 +38,24 @@
 	.fixup_map_array_48b = { 1 },
 	.result = VERBOSE_ACCEPT,
 	.errstr =
-	"26: (85) call bpf_probe_read_kernel#113\
-	last_idx 26 first_idx 20\
-	regs=4 stack=0 before 25\
-	regs=4 stack=0 before 24\
-	regs=4 stack=0 before 23\
-	regs=4 stack=0 before 22\
-	regs=4 stack=0 before 20\
-	parent didn't have regs=4 stack=0 marks\
-	last_idx 19 first_idx 10\
-	regs=4 stack=0 before 19\
-	regs=200 stack=0 before 18\
-	regs=300 stack=0 before 17\
-	regs=201 stack=0 before 15\
-	regs=201 stack=0 before 14\
-	regs=200 stack=0 before 13\
-	regs=200 stack=0 before 12\
-	regs=200 stack=0 before 11\
-	regs=200 stack=0 before 10\
-	parent already had regs=0 stack=0 marks",
+	"mark_precise: frame0: last_idx 26 first_idx 20\
+	mark_precise: frame0: regs=r2 stack= before 25\
+	mark_precise: frame0: regs=r2 stack= before 24\
+	mark_precise: frame0: regs=r2 stack= before 23\
+	mark_precise: frame0: regs=r2 stack= before 22\
+	mark_precise: frame0: regs=r2 stack= before 20\
+	parent didn't have regs=4 stack=0 marks:\
+	mark_precise: frame0: last_idx 19 first_idx 10\
+	mark_precise: frame0: regs=r2 stack= before 19\
+	mark_precise: frame0: regs=r9 stack= before 18\
+	mark_precise: frame0: regs=r8,r9 stack= before 17\
+	mark_precise: frame0: regs=r0,r9 stack= before 15\
+	mark_precise: frame0: regs=r0,r9 stack= before 14\
+	mark_precise: frame0: regs=r9 stack= before 13\
+	mark_precise: frame0: regs=r9 stack= before 12\
+	mark_precise: frame0: regs=r9 stack= before 11\
+	mark_precise: frame0: regs=r9 stack= before 10\
+	parent already had regs=0 stack=0 marks:",
 },
 {
 	"precise: test 2",
@@ -100,20 +99,20 @@
 	.flags = BPF_F_TEST_STATE_FREQ,
 	.errstr =
 	"26: (85) call bpf_probe_read_kernel#113\
-	last_idx 26 first_idx 22\
-	regs=4 stack=0 before 25\
-	regs=4 stack=0 before 24\
-	regs=4 stack=0 before 23\
-	regs=4 stack=0 before 22\
-	parent didn't have regs=4 stack=0 marks\
-	last_idx 20 first_idx 20\
-	regs=4 stack=0 before 20\
-	parent didn't have regs=4 stack=0 marks\
-	last_idx 19 first_idx 17\
-	regs=4 stack=0 before 19\
-	regs=200 stack=0 before 18\
-	regs=300 stack=0 before 17\
-	parent already had regs=0 stack=0 marks",
+	mark_precise: frame0: last_idx 26 first_idx 22\
+	mark_precise: frame0: regs=r2 stack= before 25\
+	mark_precise: frame0: regs=r2 stack= before 24\
+	mark_precise: frame0: regs=r2 stack= before 23\
+	mark_precise: frame0: regs=r2 stack= before 22\
+	parent didn't have regs=4 stack=0 marks:\
+	mark_precise: frame0: last_idx 20 first_idx 20\
+	mark_precise: frame0: regs=r2 stack= before 20\
+	parent didn't have regs=4 stack=0 marks:\
+	mark_precise: frame0: last_idx 19 first_idx 17\
+	mark_precise: frame0: regs=r2 stack= before 19\
+	mark_precise: frame0: regs=r9 stack= before 18\
+	mark_precise: frame0: regs=r8,r9 stack= before 17\
+	parent already had regs=0 stack=0 marks:",
 },
 {
 	"precise: cross frame pruning",
@@ -153,15 +152,15 @@
 	},
 	.prog_type = BPF_PROG_TYPE_XDP,
 	.flags = BPF_F_TEST_STATE_FREQ,
-	.errstr = "5: (2d) if r4 > r0 goto pc+0\
-	last_idx 5 first_idx 5\
-	parent didn't have regs=10 stack=0 marks\
-	last_idx 4 first_idx 2\
-	regs=10 stack=0 before 4\
-	regs=10 stack=0 before 3\
-	regs=0 stack=1 before 2\
-	last_idx 5 first_idx 5\
-	parent didn't have regs=1 stack=0 marks",
+	.errstr = "mark_precise: frame0: last_idx 5 first_idx 5\
+	parent didn't have regs=10 stack=0 marks:\
+	mark_precise: frame0: last_idx 4 first_idx 2\
+	mark_precise: frame0: regs=r4 stack= before 4\
+	mark_precise: frame0: regs=r4 stack= before 3\
+	mark_precise: frame0: regs= stack=-8 before 2\
+	mark_precise: frame0: falling back to forcing all scalars precise\
+	mark_precise: frame0: last_idx 5 first_idx 5\
+	parent didn't have regs=1 stack=0 marks:",
 	.result = VERBOSE_ACCEPT,
 	.retval = -1,
 },
@@ -179,16 +178,19 @@
 	},
 	.prog_type = BPF_PROG_TYPE_XDP,
 	.flags = BPF_F_TEST_STATE_FREQ,
-	.errstr = "last_idx 6 first_idx 6\
-	parent didn't have regs=10 stack=0 marks\
-	last_idx 5 first_idx 3\
-	regs=10 stack=0 before 5\
-	regs=10 stack=0 before 4\
-	regs=0 stack=1 before 3\
-	last_idx 6 first_idx 6\
-	parent didn't have regs=1 stack=0 marks\
-	last_idx 5 first_idx 3\
-	regs=1 stack=0 before 5",
+	.errstr = "mark_precise: frame0: last_idx 6 first_idx 6\
+	parent didn't have regs=10 stack=0 marks:\
+	mark_precise: frame0: last_idx 5 first_idx 3\
+	mark_precise: frame0: regs=r4 stack= before 5\
+	mark_precise: frame0: regs=r4 stack= before 4\
+	mark_precise: frame0: regs= stack=-8 before 3\
+	mark_precise: frame0: falling back to forcing all scalars precise\
+	force_precise: frame0: forcing r0 to be precise\
+	force_precise: frame0: forcing r0 to be precise\
+	mark_precise: frame0: last_idx 6 first_idx 6\
+	parent didn't have regs=1 stack=0 marks:\
+	mark_precise: frame0: last_idx 5 first_idx 3\
+	mark_precise: frame0: regs=r0 stack= before 5",
 	.result = VERBOSE_ACCEPT,
 	.retval = -1,
 },
-- 
cgit v1.2.3


From 3bda08b63670c39be390fcb00e7718775508e673 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Fri, 5 May 2023 18:31:30 -0700
Subject: bpf: Allow NULL buffers in bpf_dynptr_slice(_rw)

bpf_dynptr_slice(_rw) uses a user provided buffer if it can not provide
a pointer to a block of contiguous memory. This buffer is unused in the
case of local dynptrs, and may be unused in other cases as well. There
is no need to require the buffer, as the kfunc can just return NULL if
it was needed and not provided.

This adds another kfunc annotation, __opt, which combines with __sz and
__szk to allow the buffer associated with the size to be NULL. If the
buffer is NULL, the verifier does not check that the buffer is of
sufficient size.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
Link: https://lore.kernel.org/r/20230506013134.2492210-2-drosen@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/kfuncs.rst | 23 ++++++++++++++++++++++-
 include/linux/skbuff.h       |  2 +-
 kernel/bpf/helpers.c         | 30 ++++++++++++++++++------------
 kernel/bpf/verifier.c        | 17 +++++++++++++----
 4 files changed, 54 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index ea2516374d92..7a3d9de5f315 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -100,7 +100,7 @@ Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
 size parameter, and the value of the constant matters for program safety, __k
 suffix should be used.
 
-2.2.2 __uninit Annotation
+2.2.3 __uninit Annotation
 -------------------------
 
 This annotation is used to indicate that the argument will be treated as
@@ -117,6 +117,27 @@ Here, the dynptr will be treated as an uninitialized dynptr. Without this
 annotation, the verifier will reject the program if the dynptr passed in is
 not initialized.
 
+2.2.4 __opt Annotation
+-------------------------
+
+This annotation is used to indicate that the buffer associated with an __sz or __szk
+argument may be null. If the function is passed a nullptr in place of the buffer,
+the verifier will not check that length is appropriate for the buffer. The kfunc is
+responsible for checking if this buffer is null before using it.
+
+An example is given below::
+
+        __bpf_kfunc void *bpf_dynptr_slice(..., void *buffer__opt, u32 buffer__szk)
+        {
+        ...
+        }
+
+Here, the buffer may be null. If buffer is not null, it at least of size buffer_szk.
+Either way, the returned buffer is either NULL, or of size buffer_szk. Without this
+annotation, the verifier will reject the program if a null pointer is passed in with
+a nonzero size.
+
+
 .. _BPF_kfunc_nodef:
 
 2.3 Using an existing kernel function
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 738776ab8838..8ddb4af1a501 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4033,7 +4033,7 @@ __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
 	if (likely(hlen - offset >= len))
 		return (void *)data + offset;
 
-	if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
+	if (!skb || !buffer || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
 		return NULL;
 
 	return buffer;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a128fe0ab2d0..4ef4c4f8a355 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2190,13 +2190,15 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
  * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
  * @ptr: The dynptr whose data slice to retrieve
  * @offset: Offset into the dynptr
- * @buffer: User-provided buffer to copy contents into
- * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
- *		 requested slice. This must be a constant.
+ * @buffer__opt: User-provided buffer to copy contents into.  May be NULL
+ * @buffer__szk: Size (in bytes) of the buffer if present. This is the
+ *               length of the requested slice. This must be a constant.
  *
  * For non-skb and non-xdp type dynptrs, there is no difference between
  * bpf_dynptr_slice and bpf_dynptr_data.
  *
+ *  If buffer__opt is NULL, the call will fail if buffer_opt was needed.
+ *
  * If the intention is to write to the data slice, please use
  * bpf_dynptr_slice_rdwr.
  *
@@ -2213,7 +2215,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
  * direct pointer)
  */
 __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
-				   void *buffer, u32 buffer__szk)
+				   void *buffer__opt, u32 buffer__szk)
 {
 	enum bpf_dynptr_type type;
 	u32 len = buffer__szk;
@@ -2233,15 +2235,17 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
 	case BPF_DYNPTR_TYPE_RINGBUF:
 		return ptr->data + ptr->offset + offset;
 	case BPF_DYNPTR_TYPE_SKB:
-		return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer);
+		return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt);
 	case BPF_DYNPTR_TYPE_XDP:
 	{
 		void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
 		if (xdp_ptr)
 			return xdp_ptr;
 
-		bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer, len, false);
-		return buffer;
+		if (!buffer__opt)
+			return NULL;
+		bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer__opt, len, false);
+		return buffer__opt;
 	}
 	default:
 		WARN_ONCE(true, "unknown dynptr type %d\n", type);
@@ -2253,13 +2257,15 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
  * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
  * @ptr: The dynptr whose data slice to retrieve
  * @offset: Offset into the dynptr
- * @buffer: User-provided buffer to copy contents into
- * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
- *		 requested slice. This must be a constant.
+ * @buffer__opt: User-provided buffer to copy contents into. May be NULL
+ * @buffer__szk: Size (in bytes) of the buffer if present. This is the
+ *               length of the requested slice. This must be a constant.
  *
  * For non-skb and non-xdp type dynptrs, there is no difference between
  * bpf_dynptr_slice and bpf_dynptr_data.
  *
+ * If buffer__opt is NULL, the call will fail if buffer_opt was needed.
+ *
  * The returned pointer is writable and may point to either directly the dynptr
  * data at the requested offset or to the buffer if unable to obtain a direct
  * data pointer to (example: the requested slice is to the paged area of an skb
@@ -2290,7 +2296,7 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset
  * direct pointer)
  */
 __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
-					void *buffer, u32 buffer__szk)
+					void *buffer__opt, u32 buffer__szk)
 {
 	if (!ptr->data || __bpf_dynptr_is_rdonly(ptr))
 		return NULL;
@@ -2317,7 +2323,7 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o
 	 * will be copied out into the buffer and the user will need to call
 	 * bpf_dynptr_write() to commit changes.
 	 */
-	return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
+	return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk);
 }
 
 __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0fa96581eb77..7e6bbae9db81 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9743,6 +9743,11 @@ static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
 	return __kfunc_param_match_suffix(btf, arg, "__szk");
 }
 
+static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg)
+{
+	return __kfunc_param_match_suffix(btf, arg, "__opt");
+}
+
 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
 {
 	return __kfunc_param_match_suffix(btf, arg, "__k");
@@ -10830,13 +10835,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			break;
 		case KF_ARG_PTR_TO_MEM_SIZE:
 		{
+			struct bpf_reg_state *buff_reg = &regs[regno];
+			const struct btf_param *buff_arg = &args[i];
 			struct bpf_reg_state *size_reg = &regs[regno + 1];
 			const struct btf_param *size_arg = &args[i + 1];
 
-			ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
-			if (ret < 0) {
-				verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
-				return ret;
+			if (!register_is_null(buff_reg) || !is_kfunc_arg_optional(meta->btf, buff_arg)) {
+				ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
+				if (ret < 0) {
+					verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
+					return ret;
+				}
 			}
 
 			if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
-- 
cgit v1.2.3


From 69da5aa99ea67e86d3461fb281eadc952cc2914f Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Fri, 7 Apr 2023 07:47:33 -0400
Subject: regmap-irq: Drop map from handle_mask_sync() parameters

Remove the map parameter from the struct regmap_irq_chip callback
handle_mask_sync() because it can be passed via the irq_drv_data
parameter instead. The gpio-104-dio-48e driver is the only consumer of
this callback and is thus updated accordingly.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org
Signed-off-by: William Breathitt Gray <william.gray@linaro.org
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com
Link: https://lore.kernel.org/r/1f44fb0fbcd3dccea3371215b00f1b9a956c1a12.1679323449.git.william.gray@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org
---
 drivers/base/regmap/regmap-irq.c | 5 ++---
 drivers/gpio/gpio-104-dio-48e.c  | 2 +-
 include/linux/regmap.h           | 3 +--
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index b99bb2369fff..362926d155a4 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -116,8 +116,7 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 	for (i = 0; i < d->chip->num_regs; i++) {
 		if (d->mask_base) {
 			if (d->chip->handle_mask_sync)
-				d->chip->handle_mask_sync(d->map, i,
-							  d->mask_buf_def[i],
+				d->chip->handle_mask_sync(i, d->mask_buf_def[i],
 							  d->mask_buf[i],
 							  d->chip->irq_drv_data);
 			else {
@@ -920,7 +919,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 
 		if (d->mask_base) {
 			if (chip->handle_mask_sync) {
-				ret = chip->handle_mask_sync(d->map, i,
+				ret = chip->handle_mask_sync(i,
 							     d->mask_buf_def[i],
 							     d->mask_buf[i],
 							     chip->irq_drv_data);
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 63dce9532e97..8ff5f4ff5958 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -110,7 +110,7 @@ struct dio48e_gpio {
 	unsigned int irq_mask;
 };
 
-static int dio48e_handle_mask_sync(struct regmap *const map, const int index,
+static int dio48e_handle_mask_sync(const int index,
 				   const unsigned int mask_buf_def,
 				   const unsigned int mask_buf,
 				   void *const irq_drv_data)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index c2b9cc5db824..f820bd44d16f 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1658,8 +1658,7 @@ struct regmap_irq_chip {
 
 	int (*handle_pre_irq)(void *irq_drv_data);
 	int (*handle_post_irq)(void *irq_drv_data);
-	int (*handle_mask_sync)(struct regmap *map, int index,
-				unsigned int mask_buf_def,
+	int (*handle_mask_sync)(int index, unsigned int mask_buf_def,
 				unsigned int mask_buf, void *irq_drv_data);
 	int (*set_type_virt)(unsigned int **buf, unsigned int type,
 			     unsigned long hwirq, int reg);
-- 
cgit v1.2.3


From d1a7718ee8dbcc488d3243d52e19c755123e0024 Mon Sep 17 00:00:00 2001
From: Jaewon Kim <jaewon02.kim@samsung.com>
Date: Tue, 2 May 2023 15:28:11 +0900
Subject: spi: s3c64xx: change polling mode to optional

Previously, Polling mode was supported as quirk for SOC without DMA.
To provide more flexible support for polling mode, it changed to polling
mode when the 'dmas' property is not present in the devicetree, rather than
using a quirk.

Signed-off-by: Jaewon Kim <jaewon02.kim@samsung.com
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org
Link: https://lore.kernel.org/r/20230502062813.112434-2-jaewon02.kim@samsung.com
Signed-off-by: Mark Brown <broonie@kernel.org
---
 drivers/spi/spi-s3c64xx.c                 | 4 ++--
 include/linux/platform_data/spi-s3c64xx.h | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 7ac17f0d18a9..5f59d6f8c8d8 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -19,7 +19,6 @@
 #include <linux/platform_data/spi-s3c64xx.h>
 
 #define MAX_SPI_PORTS		12
-#define S3C64XX_SPI_QUIRK_POLL		(1 << 0)
 #define S3C64XX_SPI_QUIRK_CS_AUTO	(1 << 1)
 #define AUTOSUSPEND_TIMEOUT	2000
 
@@ -116,7 +115,7 @@
 #define S3C64XX_SPI_TRAILCNT		S3C64XX_SPI_MAX_TRAILCNT
 
 #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
-#define is_polling(x)	(x->port_conf->quirks & S3C64XX_SPI_QUIRK_POLL)
+#define is_polling(x)	(x->cntrlr_info->polling)
 
 #define RXBUSY    (1<<2)
 #define TXBUSY    (1<<3)
@@ -1068,6 +1067,7 @@ static struct s3c64xx_spi_info *s3c64xx_spi_parse_dt(struct device *dev)
 	}
 
 	sci->no_cs = of_property_read_bool(dev->of_node, "no-cs-readback");
+	sci->polling = !of_property_present(dev->of_node, "dmas");
 
 	return sci;
 }
diff --git a/include/linux/platform_data/spi-s3c64xx.h b/include/linux/platform_data/spi-s3c64xx.h
index 3101152ce449..1d6e6c424fc6 100644
--- a/include/linux/platform_data/spi-s3c64xx.h
+++ b/include/linux/platform_data/spi-s3c64xx.h
@@ -36,6 +36,7 @@ struct s3c64xx_spi_info {
 	int src_clk_nr;
 	int num_cs;
 	bool no_cs;
+	bool polling;
 	int (*cfg_gpio)(void);
 };
 
-- 
cgit v1.2.3


From 0d6d062ca27ec7ef547712d34dcfcfb952bcef53 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@amd.com>
Date: Thu, 4 May 2023 16:30:00 +0530
Subject: perf/core: Rework forwarding of {task|cpu}-clock events

Currently, PERF_TYPE_SOFTWARE is treated specially since task-clock and
cpu-clock events are interfaced through it but internally gets forwarded
to their own pmus.

Rework this by overwriting event->attr.type in perf_swevent_init() which
will cause perf_init_event() to retry with updated type and event will
automatically get forwarded to right pmu. With the change, SW pmu no
longer needs to be treated specially and can be included in 'pmu_idr'
list.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230504110003.2548-2-ravi.bangoria@amd.com
---
 include/linux/perf_event.h | 10 ++++++
 kernel/events/core.c       | 77 ++++++++++++++++++++++++----------------------
 2 files changed, 51 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d5628a7b5eaa..bf4f346d6d70 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -295,6 +295,8 @@ struct perf_event_pmu_context;
 
 struct perf_output_handle;
 
+#define PMU_NULL_DEV	((void *)(~0UL))
+
 /**
  * struct pmu - generic performance monitoring unit
  */
@@ -827,6 +829,14 @@ struct perf_event {
 	void *security;
 #endif
 	struct list_head		sb_list;
+
+	/*
+	 * Certain events gets forwarded to another pmu internally by over-
+	 * writing kernel copy of event->attr.type without user being aware
+	 * of it. event->orig_type contains original 'type' requested by
+	 * user.
+	 */
+	__u32				orig_type;
 #endif /* CONFIG_PERF_EVENTS */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 68baa8194d9f..c01bbe93e291 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6647,7 +6647,7 @@ static void perf_sigtrap(struct perf_event *event)
 		return;
 
 	send_sig_perf((void __user *)event->pending_addr,
-		      event->attr.type, event->attr.sig_data);
+		      event->orig_type, event->attr.sig_data);
 }
 
 /*
@@ -9951,6 +9951,9 @@ static void sw_perf_event_destroy(struct perf_event *event)
 	swevent_hlist_put();
 }
 
+static struct pmu perf_cpu_clock; /* fwd declaration */
+static struct pmu perf_task_clock;
+
 static int perf_swevent_init(struct perf_event *event)
 {
 	u64 event_id = event->attr.config;
@@ -9966,7 +9969,10 @@ static int perf_swevent_init(struct perf_event *event)
 
 	switch (event_id) {
 	case PERF_COUNT_SW_CPU_CLOCK:
+		event->attr.type = perf_cpu_clock.type;
+		return -ENOENT;
 	case PERF_COUNT_SW_TASK_CLOCK:
+		event->attr.type = perf_task_clock.type;
 		return -ENOENT;
 
 	default:
@@ -11086,7 +11092,7 @@ static void cpu_clock_event_read(struct perf_event *event)
 
 static int cpu_clock_event_init(struct perf_event *event)
 {
-	if (event->attr.type != PERF_TYPE_SOFTWARE)
+	if (event->attr.type != perf_cpu_clock.type)
 		return -ENOENT;
 
 	if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
@@ -11107,6 +11113,7 @@ static struct pmu perf_cpu_clock = {
 	.task_ctx_nr	= perf_sw_context,
 
 	.capabilities	= PERF_PMU_CAP_NO_NMI,
+	.dev		= PMU_NULL_DEV,
 
 	.event_init	= cpu_clock_event_init,
 	.add		= cpu_clock_event_add,
@@ -11167,7 +11174,7 @@ static void task_clock_event_read(struct perf_event *event)
 
 static int task_clock_event_init(struct perf_event *event)
 {
-	if (event->attr.type != PERF_TYPE_SOFTWARE)
+	if (event->attr.type != perf_task_clock.type)
 		return -ENOENT;
 
 	if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
@@ -11188,6 +11195,7 @@ static struct pmu perf_task_clock = {
 	.task_ctx_nr	= perf_sw_context,
 
 	.capabilities	= PERF_PMU_CAP_NO_NMI,
+	.dev		= PMU_NULL_DEV,
 
 	.event_init	= task_clock_event_init,
 	.add		= task_clock_event_add,
@@ -11415,31 +11423,31 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
 		goto unlock;
 
 	pmu->type = -1;
-	if (!name)
-		goto skip_type;
+	if (WARN_ONCE(!name, "Can not register anonymous pmu.\n")) {
+		ret = -EINVAL;
+		goto free_pdc;
+	}
+
 	pmu->name = name;
 
-	if (type != PERF_TYPE_SOFTWARE) {
-		if (type >= 0)
-			max = type;
+	if (type >= 0)
+		max = type;
 
-		ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
-		if (ret < 0)
-			goto free_pdc;
+	ret = idr_alloc(&pmu_idr, pmu, max, 0, GFP_KERNEL);
+	if (ret < 0)
+		goto free_pdc;
 
-		WARN_ON(type >= 0 && ret != type);
+	WARN_ON(type >= 0 && ret != type);
 
-		type = ret;
-	}
+	type = ret;
 	pmu->type = type;
 
-	if (pmu_bus_running) {
+	if (pmu_bus_running && !pmu->dev) {
 		ret = pmu_dev_alloc(pmu);
 		if (ret)
 			goto free_idr;
 	}
 
-skip_type:
 	ret = -ENOMEM;
 	pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context);
 	if (!pmu->cpu_pmu_context)
@@ -11481,16 +11489,7 @@ skip_type:
 	if (!pmu->event_idx)
 		pmu->event_idx = perf_event_idx_default;
 
-	/*
-	 * Ensure the TYPE_SOFTWARE PMUs are at the head of the list,
-	 * since these cannot be in the IDR. This way the linear search
-	 * is fast, provided a valid software event is provided.
-	 */
-	if (type == PERF_TYPE_SOFTWARE || !name)
-		list_add_rcu(&pmu->entry, &pmus);
-	else
-		list_add_tail_rcu(&pmu->entry, &pmus);
-
+	list_add_rcu(&pmu->entry, &pmus);
 	atomic_set(&pmu->exclusive_cnt, 0);
 	ret = 0;
 unlock:
@@ -11499,12 +11498,13 @@ unlock:
 	return ret;
 
 free_dev:
-	device_del(pmu->dev);
-	put_device(pmu->dev);
+	if (pmu->dev && pmu->dev != PMU_NULL_DEV) {
+		device_del(pmu->dev);
+		put_device(pmu->dev);
+	}
 
 free_idr:
-	if (pmu->type != PERF_TYPE_SOFTWARE)
-		idr_remove(&pmu_idr, pmu->type);
+	idr_remove(&pmu_idr, pmu->type);
 
 free_pdc:
 	free_percpu(pmu->pmu_disable_count);
@@ -11525,9 +11525,8 @@ void perf_pmu_unregister(struct pmu *pmu)
 	synchronize_rcu();
 
 	free_percpu(pmu->pmu_disable_count);
-	if (pmu->type != PERF_TYPE_SOFTWARE)
-		idr_remove(&pmu_idr, pmu->type);
-	if (pmu_bus_running) {
+	idr_remove(&pmu_idr, pmu->type);
+	if (pmu_bus_running && pmu->dev && pmu->dev != PMU_NULL_DEV) {
 		if (pmu->nr_addr_filters)
 			device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
 		device_del(pmu->dev);
@@ -11601,6 +11600,12 @@ static struct pmu *perf_init_event(struct perf_event *event)
 
 	idx = srcu_read_lock(&pmus_srcu);
 
+	/*
+	 * Save original type before calling pmu->event_init() since certain
+	 * pmus overwrites event->attr.type to forward event to another pmu.
+	 */
+	event->orig_type = event->attr.type;
+
 	/* Try parent's PMU first: */
 	if (event->parent && event->parent->pmu) {
 		pmu = event->parent->pmu;
@@ -13640,8 +13645,8 @@ void __init perf_event_init(void)
 	perf_event_init_all_cpus();
 	init_srcu_struct(&pmus_srcu);
 	perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
-	perf_pmu_register(&perf_cpu_clock, NULL, -1);
-	perf_pmu_register(&perf_task_clock, NULL, -1);
+	perf_pmu_register(&perf_cpu_clock, "cpu_clock", -1);
+	perf_pmu_register(&perf_task_clock, "task_clock", -1);
 	perf_tp_register();
 	perf_event_init_cpu(smp_processor_id());
 	register_reboot_notifier(&perf_reboot_notifier);
@@ -13684,7 +13689,7 @@ static int __init perf_event_sysfs_init(void)
 		goto unlock;
 
 	list_for_each_entry(pmu, &pmus, entry) {
-		if (!pmu->name || pmu->type < 0)
+		if (pmu->dev)
 			continue;
 
 		ret = pmu_dev_alloc(pmu);
-- 
cgit v1.2.3


From ca528cc501896a808dc79c3c0544369d23b331c8 Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 6 Apr 2023 13:31:45 -0700
Subject: sched/topology: Remove SHARED_CHILD from ASYM_PACKING

Only x86 and Power7 use ASYM_PACKING. They use it differently.

Power7 has cores of equal priority, but the SMT siblings of a core have
different priorities. Parent scheduling domains do not need (nor have) the
ASYM_PACKING flag. SHARED_CHILD is not needed. Using SHARED_PARENT would
cause the topology debug code to complain.

X86 has cores of different priority, but all the SMT siblings of the core
have equal priority. It needs ASYM_PACKING at the MC level, but not at the
SMT level (it also needs it at upper levels if they have scheduling groups
of different priority). Removing ASYM_PACKING from the SMT domain causes
the topology debug code to complain.

Remove SHARED_CHILD for now. We still need a topology check that satisfies
both architectures.

Suggested-by: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Link: https://lore.kernel.org/r/20230406203148.19182-10-ricardo.neri-calderon@linux.intel.com
---
 include/linux/sched/sd_flags.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
index 57bde66d95f7..fad77b5172e2 100644
--- a/include/linux/sched/sd_flags.h
+++ b/include/linux/sched/sd_flags.h
@@ -132,12 +132,9 @@ SD_FLAG(SD_SERIALIZE, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
 /*
  * Place busy tasks earlier in the domain
  *
- * SHARED_CHILD: Usually set on the SMT level. Technically could be set further
- *               up, but currently assumed to be set from the base domain
- *               upwards (see update_top_cache_domain()).
  * NEEDS_GROUPS: Load balancing flag.
  */
-SD_FLAG(SD_ASYM_PACKING, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
+SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS)
 
 /*
  * Prefer to place tasks in a sibling domain
-- 
cgit v1.2.3


From 6121cd9ef911432b14c2a17aefaf8cd2f3cfcdff Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 28 Apr 2023 14:24:51 +0200
Subject: fbdev: Move I/O read and write code into helper functions

Move the existing I/O read and write code for I/O memory into
the new helpers fb_cfb_read() and fb_cfb_write(). Make them the
default fp_ops. No functional changes.

In the near term, the new functions will be useful to the DRM
subsystem, which currently provides it's own implementation. It
can then use the shared code. In the longer term, it might make
sense to revise the I/O helper's default status and make them
opt-in by the driver. Systems that don't use them would not
contain the code any longer.

v2:
	* add detailed commit message (Javier)
	* rename fb_cfb_() to fb_io_() (Geert)
	* add fixes that got lost while moving the code (Geert)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Tested-by: Sui Jingfeng <suijingfeng@loongson.cn>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Helge Deller <deller@gmx.de>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230428122452.4856-19-tzimmermann@suse.de
---
 drivers/video/fbdev/core/Makefile     |   2 +-
 drivers/video/fbdev/core/fb_io_fops.c | 133 ++++++++++++++++++++++++++++++++++
 drivers/video/fbdev/core/fbmem.c      | 118 +-----------------------------
 include/linux/fb.h                    |  10 +++
 4 files changed, 146 insertions(+), 117 deletions(-)
 create mode 100644 drivers/video/fbdev/core/fb_io_fops.c

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/Makefile b/drivers/video/fbdev/core/Makefile
index 08fabce76b74..8f0060160ffb 100644
--- a/drivers/video/fbdev/core/Makefile
+++ b/drivers/video/fbdev/core/Makefile
@@ -2,7 +2,7 @@
 obj-$(CONFIG_FB_NOTIFY)           += fb_notify.o
 obj-$(CONFIG_FB)                  += fb.o
 fb-y                              := fbmem.o fbmon.o fbcmap.o fbsysfs.o \
-                                     modedb.o fbcvt.o fb_cmdline.o
+                                     modedb.o fbcvt.o fb_cmdline.o fb_io_fops.o
 fb-$(CONFIG_FB_DEFERRED_IO)       += fb_defio.o
 
 ifeq ($(CONFIG_FRAMEBUFFER_CONSOLE),y)
diff --git a/drivers/video/fbdev/core/fb_io_fops.c b/drivers/video/fbdev/core/fb_io_fops.c
new file mode 100644
index 000000000000..f5299d50f33b
--- /dev/null
+++ b/drivers/video/fbdev/core/fb_io_fops.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/fb.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+ssize_t fb_io_read(struct fb_info *info, char __user *buf, size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	u8 *buffer, *dst;
+	u8 __iomem *src;
+	int c, cnt = 0, err = 0;
+	unsigned long total_size, trailing;
+
+	if (!info->screen_base)
+		return -ENODEV;
+
+	total_size = info->screen_size;
+
+	if (total_size == 0)
+		total_size = info->fix.smem_len;
+
+	if (p >= total_size)
+		return 0;
+
+	if (count >= total_size)
+		count = total_size;
+
+	if (count + p > total_size)
+		count = total_size - p;
+
+	buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count,
+			 GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	src = (u8 __iomem *) (info->screen_base + p);
+
+	if (info->fbops->fb_sync)
+		info->fbops->fb_sync(info);
+
+	while (count) {
+		c  = (count > PAGE_SIZE) ? PAGE_SIZE : count;
+		dst = buffer;
+		fb_memcpy_fromfb(dst, src, c);
+		dst += c;
+		src += c;
+
+		trailing = copy_to_user(buf, buffer, c);
+		if (trailing == c) {
+			err = -EFAULT;
+			break;
+		}
+		c -= trailing;
+
+		*ppos += c;
+		buf += c;
+		cnt += c;
+		count -= c;
+	}
+
+	kfree(buffer);
+
+	return cnt ? cnt : err;
+}
+EXPORT_SYMBOL(fb_io_read);
+
+ssize_t fb_io_write(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos)
+{
+	unsigned long p = *ppos;
+	u8 *buffer, *src;
+	u8 __iomem *dst;
+	int c, cnt = 0, err = 0;
+	unsigned long total_size, trailing;
+
+	if (!info->screen_base)
+		return -ENODEV;
+
+	total_size = info->screen_size;
+
+	if (total_size == 0)
+		total_size = info->fix.smem_len;
+
+	if (p > total_size)
+		return -EFBIG;
+
+	if (count > total_size) {
+		err = -EFBIG;
+		count = total_size;
+	}
+
+	if (count + p > total_size) {
+		if (!err)
+			err = -ENOSPC;
+
+		count = total_size - p;
+	}
+
+	buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count,
+			 GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	dst = (u8 __iomem *) (info->screen_base + p);
+
+	if (info->fbops->fb_sync)
+		info->fbops->fb_sync(info);
+
+	while (count) {
+		c = (count > PAGE_SIZE) ? PAGE_SIZE : count;
+		src = buffer;
+
+		trailing = copy_from_user(src, buf, c);
+		if (trailing == c) {
+			err = -EFAULT;
+			break;
+		}
+		c -= trailing;
+
+		fb_memcpy_tofb(dst, src, c);
+		dst += c;
+		src += c;
+		*ppos += c;
+		buf += c;
+		cnt += c;
+		count -= c;
+	}
+
+	kfree(buffer);
+
+	return (cnt) ? cnt : err;
+}
+EXPORT_SYMBOL(fb_io_write);
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index bf5a0780457e..63af40831d7d 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -761,12 +761,7 @@ static struct fb_info *file_fb_info(struct file *file)
 static ssize_t
 fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
-	unsigned long p = *ppos;
 	struct fb_info *info = file_fb_info(file);
-	u8 *buffer, *dst;
-	u8 __iomem *src;
-	int c, cnt = 0, err = 0;
-	unsigned long total_size, trailing;
 
 	if (!info)
 		return -ENODEV;
@@ -777,67 +772,13 @@ fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	if (info->fbops->fb_read)
 		return info->fbops->fb_read(info, buf, count, ppos);
 
-	if (!info->screen_base)
-		return -ENODEV;
-
-	total_size = info->screen_size;
-
-	if (total_size == 0)
-		total_size = info->fix.smem_len;
-
-	if (p >= total_size)
-		return 0;
-
-	if (count >= total_size)
-		count = total_size;
-
-	if (count + p > total_size)
-		count = total_size - p;
-
-	buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count,
-			 GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-
-	src = (u8 __iomem *) (info->screen_base + p);
-
-	if (info->fbops->fb_sync)
-		info->fbops->fb_sync(info);
-
-	while (count) {
-		c  = (count > PAGE_SIZE) ? PAGE_SIZE : count;
-		dst = buffer;
-		fb_memcpy_fromfb(dst, src, c);
-		dst += c;
-		src += c;
-
-		trailing = copy_to_user(buf, buffer, c);
-		if (trailing == c) {
-			err = -EFAULT;
-			break;
-		}
-		c -= trailing;
-
-		*ppos += c;
-		buf += c;
-		cnt += c;
-		count -= c;
-	}
-
-	kfree(buffer);
-
-	return cnt ? cnt : err;
+	return fb_io_read(info, buf, count, ppos);
 }
 
 static ssize_t
 fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
-	unsigned long p = *ppos;
 	struct fb_info *info = file_fb_info(file);
-	u8 *buffer, *src;
-	u8 __iomem *dst;
-	int c, cnt = 0, err = 0;
-	unsigned long total_size, trailing;
 
 	if (!info)
 		return -ENODEV;
@@ -848,62 +789,7 @@ fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 	if (info->fbops->fb_write)
 		return info->fbops->fb_write(info, buf, count, ppos);
 
-	if (!info->screen_base)
-		return -ENODEV;
-
-	total_size = info->screen_size;
-
-	if (total_size == 0)
-		total_size = info->fix.smem_len;
-
-	if (p > total_size)
-		return -EFBIG;
-
-	if (count > total_size) {
-		err = -EFBIG;
-		count = total_size;
-	}
-
-	if (count + p > total_size) {
-		if (!err)
-			err = -ENOSPC;
-
-		count = total_size - p;
-	}
-
-	buffer = kmalloc((count > PAGE_SIZE) ? PAGE_SIZE : count,
-			 GFP_KERNEL);
-	if (!buffer)
-		return -ENOMEM;
-
-	dst = (u8 __iomem *) (info->screen_base + p);
-
-	if (info->fbops->fb_sync)
-		info->fbops->fb_sync(info);
-
-	while (count) {
-		c = (count > PAGE_SIZE) ? PAGE_SIZE : count;
-		src = buffer;
-
-		trailing = copy_from_user(src, buf, c);
-		if (trailing == c) {
-			err = -EFAULT;
-			break;
-		}
-		c -= trailing;
-
-		fb_memcpy_tofb(dst, src, c);
-		dst += c;
-		src += c;
-		*ppos += c;
-		buf += c;
-		cnt += c;
-		count -= c;
-	}
-
-	kfree(buffer);
-
-	return (cnt) ? cnt : err;
+	return fb_io_write(info, buf, count, ppos);
 }
 
 int
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 08cb47da71f8..ec978a4969a9 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -576,9 +576,19 @@ struct fb_info {
 extern int fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var);
 extern int fb_pan_display(struct fb_info *info, struct fb_var_screeninfo *var);
 extern int fb_blank(struct fb_info *info, int blank);
+
+/*
+ * Drawing operations where framebuffer is in I/O memory
+ */
+
 extern void cfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
 extern void cfb_copyarea(struct fb_info *info, const struct fb_copyarea *area);
 extern void cfb_imageblit(struct fb_info *info, const struct fb_image *image);
+extern ssize_t fb_io_read(struct fb_info *info, char __user *buf,
+			  size_t count, loff_t *ppos);
+extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf,
+			   size_t count, loff_t *ppos);
+
 /*
  * Drawing operations where framebuffer is in system RAM
  */
-- 
cgit v1.2.3


From c00bc80462afc7963f449d7f21d896d2f629cacc Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 15 Apr 2023 20:23:34 +0200
Subject: power: supply: bq27xxx: Fix poll_interval handling and races on
 remove

Before this patch bq27xxx_battery_teardown() was setting poll_interval = 0
to avoid bq27xxx_battery_update() requeuing the delayed_work item.

There are 2 problems with this:

1. If the driver is unbound through sysfs, rather then the module being
   rmmod-ed, this changes poll_interval unexpectedly

2. This is racy, after it being set poll_interval could be changed
   before bq27xxx_battery_update() checks it through
   /sys/module/bq27xxx_battery/parameters/poll_interval

Fix this by added a removed attribute to struct bq27xxx_device_info and
using that instead of setting poll_interval to 0.

There also is another poll_interval related race on remove(), writing
/sys/module/bq27xxx_battery/parameters/poll_interval will requeue
the delayed_work item for all devices on the bq27xxx_battery_devices
list and the device being removed was only removed from that list
after cancelling the delayed_work item.

Fix this by moving the removal from the bq27xxx_battery_devices list
to before cancelling the delayed_work item.

Fixes: 8cfaaa811894 ("bq27x00_battery: Fix OOPS caused by unregistring bq27x00 driver")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq27xxx_battery.c | 22 +++++++++-------------
 include/linux/power/bq27xxx_battery.h  |  1 +
 2 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 79aedf83cc8d..7411f1cf741b 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -1801,7 +1801,7 @@ static void bq27xxx_battery_update_unlocked(struct bq27xxx_device_info *di)
 
 	di->last_update = jiffies;
 
-	if (poll_interval > 0)
+	if (!di->removed && poll_interval > 0)
 		mod_delayed_work(system_wq, &di->work, poll_interval * HZ);
 }
 
@@ -2132,22 +2132,18 @@ EXPORT_SYMBOL_GPL(bq27xxx_battery_setup);
 
 void bq27xxx_battery_teardown(struct bq27xxx_device_info *di)
 {
-	/*
-	 * power_supply_unregister call bq27xxx_battery_get_property which
-	 * call bq27xxx_battery_poll.
-	 * Make sure that bq27xxx_battery_poll will not call
-	 * schedule_delayed_work again after unregister (which cause OOPS).
-	 */
-	poll_interval = 0;
-
-	cancel_delayed_work_sync(&di->work);
-
-	power_supply_unregister(di->bat);
-
 	mutex_lock(&bq27xxx_list_lock);
 	list_del(&di->list);
 	mutex_unlock(&bq27xxx_list_lock);
 
+	/* Set removed to avoid bq27xxx_battery_update() re-queuing the work */
+	mutex_lock(&di->lock);
+	di->removed = true;
+	mutex_unlock(&di->lock);
+
+	cancel_delayed_work_sync(&di->work);
+
+	power_supply_unregister(di->bat);
 	mutex_destroy(&di->lock);
 }
 EXPORT_SYMBOL_GPL(bq27xxx_battery_teardown);
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index a1aa68141d0b..e3322dad9c85 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -68,6 +68,7 @@ struct bq27xxx_device_info {
 	struct bq27xxx_access_methods bus;
 	struct bq27xxx_reg_cache cache;
 	int charge_design_full;
+	bool removed;
 	unsigned long last_update;
 	struct delayed_work work;
 	struct power_supply *bat;
-- 
cgit v1.2.3


From 939a116142012926e25de0ea6b7e2f8d86a5f1b6 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 15 Apr 2023 20:23:37 +0200
Subject: power: supply: bq27xxx: Ensure power_supply_changed() is called on
 current sign changes

On gauges where the current register is signed, there is no charging
flag in the flags register. So only checking flags will not result
in power_supply_changed() getting called when e.g. a charger is plugged
in and the current sign changes from negative (discharging) to
positive (charging).

This causes userspace's notion of the status to lag until userspace
does a poll.

And when a power_supply_leds.c LED trigger is used to indicate charging
status with a LED, this LED will lag until the capacity percentage
changes, which may take many minutes (because the LED trigger only is
updated on power_supply_changed() calls).

Fix this by calling bq27xxx_battery_current_and_status() on gauges with
a signed current register and checking if the status has changed.

Fixes: 297a533b3e62 ("bq27x00: Cache battery registers")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq27xxx_battery.c | 13 ++++++++++++-
 include/linux/power/bq27xxx_battery.h  |  3 +++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index 8f2995e9850a..f98b51ce19b5 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -1836,6 +1836,7 @@ static int bq27xxx_battery_current_and_status(
 
 static void bq27xxx_battery_update_unlocked(struct bq27xxx_device_info *di)
 {
+	union power_supply_propval status = di->last_status;
 	struct bq27xxx_reg_cache cache = {0, };
 	bool has_singe_flag = di->opts & BQ27XXX_O_ZERO;
 
@@ -1860,14 +1861,24 @@ static void bq27xxx_battery_update_unlocked(struct bq27xxx_device_info *di)
 		if (di->regs[BQ27XXX_REG_CYCT] != INVALID_REG_ADDR)
 			cache.cycle_count = bq27xxx_battery_read_cyct(di);
 
+		/*
+		 * On gauges with signed current reporting the current must be
+		 * checked to detect charging <-> discharging status changes.
+		 */
+		if (!(di->opts & BQ27XXX_O_ZERO))
+			bq27xxx_battery_current_and_status(di, NULL, &status, &cache);
+
 		/* We only have to read charge design full once */
 		if (di->charge_design_full <= 0)
 			di->charge_design_full = bq27xxx_battery_read_dcap(di);
 	}
 
 	if ((di->cache.capacity != cache.capacity) ||
-	    (di->cache.flags != cache.flags))
+	    (di->cache.flags != cache.flags) ||
+	    (di->last_status.intval != status.intval)) {
+		di->last_status.intval = status.intval;
 		power_supply_changed(di->bat);
+	}
 
 	if (memcmp(&di->cache, &cache, sizeof(cache)) != 0)
 		di->cache = cache;
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index e3322dad9c85..7c8d65414a70 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -2,6 +2,8 @@
 #ifndef __LINUX_BQ27X00_BATTERY_H__
 #define __LINUX_BQ27X00_BATTERY_H__
 
+#include <linux/power_supply.h>
+
 enum bq27xxx_chip {
 	BQ27000 = 1, /* bq27000, bq27200 */
 	BQ27010, /* bq27010, bq27210 */
@@ -70,6 +72,7 @@ struct bq27xxx_device_info {
 	int charge_design_full;
 	bool removed;
 	unsigned long last_update;
+	union power_supply_propval last_status;
 	struct delayed_work work;
 	struct power_supply *bat;
 	struct list_head list;
-- 
cgit v1.2.3


From 19b8766459c41c6f318f8a548cc1c66dffd18363 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Thu, 20 Apr 2023 16:06:03 +0100
Subject: firmware: arm_ffa: Fix FFA device names for logical partitions

Each physical partition can provide multiple services each with UUID.
Each such service can be presented as logical partition with a unique
combination of VM ID and UUID. The number of distinct UUID in a system
will be less than or equal to the number of logical partitions.

However, currently it fails to register more than one logical partition
or service within a physical partition as the device name contains only
VM ID while both VM ID and UUID are maintained in the partition information.
The kernel complains with the below message:

  | sysfs: cannot create duplicate filename '/devices/arm-ffa-8001'
  | CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.3.0-rc7 #8
  | Hardware name: FVP Base RevC (DT)
  | Call trace:
  |  dump_backtrace+0xf8/0x118
  |  show_stack+0x18/0x24
  |  dump_stack_lvl+0x50/0x68
  |  dump_stack+0x18/0x24
  |  sysfs_create_dir_ns+0xe0/0x13c
  |  kobject_add_internal+0x220/0x3d4
  |  kobject_add+0x94/0x100
  |  device_add+0x144/0x5d8
  |  device_register+0x20/0x30
  |  ffa_device_register+0x88/0xd8
  |  ffa_setup_partitions+0x108/0x1b8
  |  ffa_init+0x2ec/0x3a4
  |  do_one_initcall+0xcc/0x240
  |  do_initcall_level+0x8c/0xac
  |  do_initcalls+0x54/0x94
  |  do_basic_setup+0x1c/0x28
  |  kernel_init_freeable+0x100/0x16c
  |  kernel_init+0x20/0x1a0
  |  ret_from_fork+0x10/0x20
  | kobject_add_internal failed for arm-ffa-8001 with -EEXIST, don't try to
  | register things with the same name in the same directory.
  | arm_ffa arm-ffa: unable to register device arm-ffa-8001 err=-17
  | ARM FF-A: ffa_setup_partitions: failed to register partition ID 0x8001

By virtue of being random enough to avoid collisions when generated in a
distributed system, there is no way to compress UUID keys to the number
of bits required to identify each. We can eliminate '-' in the name but
it is not worth eliminating 4 bytes and add unnecessary logic for doing
that. Also v1.0 doesn't provide the UUID of the partitions which makes
it hard to use the same for the device name.

So to keep it simple, let us alloc an ID using ida_alloc() and append the
same to "arm-ffa" to make up a unique device name. Also stash the id value
in ffa_dev to help freeing the ID later when the device is destroyed.

Fixes: e781858488b9 ("firmware: arm_ffa: Add initial FFA bus support for device enumeration")
Reported-by: Lucian Paul-Trifu <lucian.paul-trifu@arm.com>
Link: https://lore.kernel.org/r/20230419-ffa_fixes_6-4-v2-3-d9108e43a176@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_ffa/bus.c | 16 +++++++++++++---
 include/linux/arm_ffa.h        |  1 +
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c
index 36bd5423c2f0..2b8bfcd010f5 100644
--- a/drivers/firmware/arm_ffa/bus.c
+++ b/drivers/firmware/arm_ffa/bus.c
@@ -15,6 +15,8 @@
 
 #include "common.h"
 
+static DEFINE_IDA(ffa_bus_id);
+
 static int ffa_device_match(struct device *dev, struct device_driver *drv)
 {
 	const struct ffa_device_id *id_table;
@@ -131,6 +133,7 @@ static void ffa_release_device(struct device *dev)
 {
 	struct ffa_device *ffa_dev = to_ffa_dev(dev);
 
+	ida_free(&ffa_bus_id, ffa_dev->id);
 	kfree(ffa_dev);
 }
 
@@ -171,18 +174,24 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev)
 struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id,
 				       const struct ffa_ops *ops)
 {
-	int ret;
+	int id, ret;
 	struct device *dev;
 	struct ffa_device *ffa_dev;
 
+	id = ida_alloc_min(&ffa_bus_id, 1, GFP_KERNEL);
+	if (id < 0)
+		return NULL;
+
 	ffa_dev = kzalloc(sizeof(*ffa_dev), GFP_KERNEL);
-	if (!ffa_dev)
+	if (!ffa_dev) {
+		ida_free(&ffa_bus_id, id);
 		return NULL;
+	}
 
 	dev = &ffa_dev->dev;
 	dev->bus = &ffa_bus_type;
 	dev->release = ffa_release_device;
-	dev_set_name(&ffa_dev->dev, "arm-ffa-%04x", vm_id);
+	dev_set_name(&ffa_dev->dev, "arm-ffa-%d", id);
 
 	ffa_dev->vm_id = vm_id;
 	ffa_dev->ops = ops;
@@ -218,4 +227,5 @@ void arm_ffa_bus_exit(void)
 {
 	ffa_devices_unregister();
 	bus_unregister(&ffa_bus_type);
+	ida_destroy(&ffa_bus_id);
 }
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index c87aeecaa9b2..583fe3b49a49 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -96,6 +96,7 @@
 
 /* FFA Bus/Device/Driver related */
 struct ffa_device {
+	u32 id;
 	int vm_id;
 	bool mode_32bit;
 	uuid_t uuid;
-- 
cgit v1.2.3


From 111cd11bbc54850f24191c52ff217da88a5e639b Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@redhat.com>
Date: Mon, 8 May 2023 09:58:50 +0200
Subject: sched/cpuset: Bring back cpuset_mutex

Turns out percpu_cpuset_rwsem - commit 1243dc518c9d ("cgroup/cpuset:
Convert cpuset_mutex to percpu_rwsem") - wasn't such a brilliant idea,
as it has been reported to cause slowdowns in workloads that need to
change cpuset configuration frequently and it is also not implementing
priority inheritance (which causes troubles with realtime workloads).

Convert percpu_cpuset_rwsem back to regular cpuset_mutex. Also grab it
only for SCHED_DEADLINE tasks (other policies don't care about stable
cpusets anyway).

Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Reviewed-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cpuset.h |   8 +--
 kernel/cgroup/cpuset.c | 159 +++++++++++++++++++++++++------------------------
 kernel/sched/core.c    |  22 ++++---
 3 files changed, 99 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 980b76a1237e..f90e6325d707 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -71,8 +71,8 @@ extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_wait_for_hotplug(void);
-extern void cpuset_read_lock(void);
-extern void cpuset_read_unlock(void);
+extern void cpuset_lock(void);
+extern void cpuset_unlock(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
 extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
@@ -189,8 +189,8 @@ static inline void cpuset_update_active_cpus(void)
 
 static inline void cpuset_wait_for_hotplug(void) { }
 
-static inline void cpuset_read_lock(void) { }
-static inline void cpuset_read_unlock(void) { }
+static inline void cpuset_lock(void) { }
+static inline void cpuset_unlock(void) { }
 
 static inline void cpuset_cpus_allowed(struct task_struct *p,
 				       struct cpumask *mask)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 428ab46291e2..041c0809adaf 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -366,22 +366,23 @@ static struct cpuset top_cpuset = {
 		if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
 
 /*
- * There are two global locks guarding cpuset structures - cpuset_rwsem and
+ * There are two global locks guarding cpuset structures - cpuset_mutex and
  * callback_lock. We also require taking task_lock() when dereferencing a
  * task's cpuset pointer. See "The task_lock() exception", at the end of this
- * comment.  The cpuset code uses only cpuset_rwsem write lock.  Other
- * kernel subsystems can use cpuset_read_lock()/cpuset_read_unlock() to
- * prevent change to cpuset structures.
+ * comment.  The cpuset code uses only cpuset_mutex. Other kernel subsystems
+ * can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
+ * structures. Note that cpuset_mutex needs to be a mutex as it is used in
+ * paths that rely on priority inheritance (e.g. scheduler - on RT) for
+ * correctness.
  *
  * A task must hold both locks to modify cpusets.  If a task holds
- * cpuset_rwsem, it blocks others wanting that rwsem, ensuring that it
- * is the only task able to also acquire callback_lock and be able to
- * modify cpusets.  It can perform various checks on the cpuset structure
- * first, knowing nothing will change.  It can also allocate memory while
- * just holding cpuset_rwsem.  While it is performing these checks, various
- * callback routines can briefly acquire callback_lock to query cpusets.
- * Once it is ready to make the changes, it takes callback_lock, blocking
- * everyone else.
+ * cpuset_mutex, it blocks others, ensuring that it is the only task able to
+ * also acquire callback_lock and be able to modify cpusets.  It can perform
+ * various checks on the cpuset structure first, knowing nothing will change.
+ * It can also allocate memory while just holding cpuset_mutex.  While it is
+ * performing these checks, various callback routines can briefly acquire
+ * callback_lock to query cpusets.  Once it is ready to make the changes, it
+ * takes callback_lock, blocking everyone else.
  *
  * Calls to the kernel memory allocator can not be made while holding
  * callback_lock, as that would risk double tripping on callback_lock
@@ -403,16 +404,16 @@ static struct cpuset top_cpuset = {
  * guidelines for accessing subsystem state in kernel/cgroup.c
  */
 
-DEFINE_STATIC_PERCPU_RWSEM(cpuset_rwsem);
+static DEFINE_MUTEX(cpuset_mutex);
 
-void cpuset_read_lock(void)
+void cpuset_lock(void)
 {
-	percpu_down_read(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 }
 
-void cpuset_read_unlock(void)
+void cpuset_unlock(void)
 {
-	percpu_up_read(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 }
 
 static DEFINE_SPINLOCK(callback_lock);
@@ -496,7 +497,7 @@ static inline bool partition_is_populated(struct cpuset *cs,
  * One way or another, we guarantee to return some non-empty subset
  * of cpu_online_mask.
  *
- * Call with callback_lock or cpuset_rwsem held.
+ * Call with callback_lock or cpuset_mutex held.
  */
 static void guarantee_online_cpus(struct task_struct *tsk,
 				  struct cpumask *pmask)
@@ -538,7 +539,7 @@ out_unlock:
  * One way or another, we guarantee to return some non-empty subset
  * of node_states[N_MEMORY].
  *
- * Call with callback_lock or cpuset_rwsem held.
+ * Call with callback_lock or cpuset_mutex held.
  */
 static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
 {
@@ -550,7 +551,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
 /*
  * update task's spread flag if cpuset's page/slab spread flag is set
  *
- * Call with callback_lock or cpuset_rwsem held. The check can be skipped
+ * Call with callback_lock or cpuset_mutex held. The check can be skipped
  * if on default hierarchy.
  */
 static void cpuset_update_task_spread_flags(struct cpuset *cs,
@@ -575,7 +576,7 @@ static void cpuset_update_task_spread_flags(struct cpuset *cs,
  *
  * One cpuset is a subset of another if all its allowed CPUs and
  * Memory Nodes are a subset of the other, and its exclusive flags
- * are only set if the other's are set.  Call holding cpuset_rwsem.
+ * are only set if the other's are set.  Call holding cpuset_mutex.
  */
 
 static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
@@ -713,7 +714,7 @@ out:
  * If we replaced the flag and mask values of the current cpuset
  * (cur) with those values in the trial cpuset (trial), would
  * our various subset and exclusive rules still be valid?  Presumes
- * cpuset_rwsem held.
+ * cpuset_mutex held.
  *
  * 'cur' is the address of an actual, in-use cpuset.  Operations
  * such as list traversal that depend on the actual address of the
@@ -829,7 +830,7 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
 	rcu_read_unlock();
 }
 
-/* Must be called with cpuset_rwsem held.  */
+/* Must be called with cpuset_mutex held.  */
 static inline int nr_cpusets(void)
 {
 	/* jump label reference count + the top-level cpuset */
@@ -855,7 +856,7 @@ static inline int nr_cpusets(void)
  * domains when operating in the severe memory shortage situations
  * that could cause allocation failures below.
  *
- * Must be called with cpuset_rwsem held.
+ * Must be called with cpuset_mutex held.
  *
  * The three key local variables below are:
  *    cp - cpuset pointer, used (together with pos_css) to perform a
@@ -1084,7 +1085,7 @@ static void dl_rebuild_rd_accounting(void)
 	struct cpuset *cs = NULL;
 	struct cgroup_subsys_state *pos_css;
 
-	percpu_rwsem_assert_held(&cpuset_rwsem);
+	lockdep_assert_held(&cpuset_mutex);
 	lockdep_assert_cpus_held();
 	lockdep_assert_held(&sched_domains_mutex);
 
@@ -1134,7 +1135,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
  * 'cpus' is removed, then call this routine to rebuild the
  * scheduler's dynamic sched domains.
  *
- * Call with cpuset_rwsem held.  Takes cpus_read_lock().
+ * Call with cpuset_mutex held.  Takes cpus_read_lock().
  */
 static void rebuild_sched_domains_locked(void)
 {
@@ -1145,7 +1146,7 @@ static void rebuild_sched_domains_locked(void)
 	int ndoms;
 
 	lockdep_assert_cpus_held();
-	percpu_rwsem_assert_held(&cpuset_rwsem);
+	lockdep_assert_held(&cpuset_mutex);
 
 	/*
 	 * If we have raced with CPU hotplug, return early to avoid
@@ -1196,9 +1197,9 @@ static void rebuild_sched_domains_locked(void)
 void rebuild_sched_domains(void)
 {
 	cpus_read_lock();
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 	rebuild_sched_domains_locked();
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 	cpus_read_unlock();
 }
 
@@ -1208,7 +1209,7 @@ void rebuild_sched_domains(void)
  * @new_cpus: the temp variable for the new effective_cpus mask
  *
  * Iterate through each task of @cs updating its cpus_allowed to the
- * effective cpuset's.  As this function is called with cpuset_rwsem held,
+ * effective cpuset's.  As this function is called with cpuset_mutex held,
  * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask()
  * is used instead of effective_cpus to make sure all offline CPUs are also
  * included as hotplug code won't update cpumasks for tasks in top_cpuset.
@@ -1322,7 +1323,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
 	int old_prs, new_prs;
 	int part_error = PERR_NONE;	/* Partition error? */
 
-	percpu_rwsem_assert_held(&cpuset_rwsem);
+	lockdep_assert_held(&cpuset_mutex);
 
 	/*
 	 * The parent must be a partition root.
@@ -1545,7 +1546,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
  *
  * On legacy hierarchy, effective_cpus will be the same with cpu_allowed.
  *
- * Called with cpuset_rwsem held
+ * Called with cpuset_mutex held
  */
 static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
 				 bool force)
@@ -1705,7 +1706,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
 	struct cpuset *sibling;
 	struct cgroup_subsys_state *pos_css;
 
-	percpu_rwsem_assert_held(&cpuset_rwsem);
+	lockdep_assert_held(&cpuset_mutex);
 
 	/*
 	 * Check all its siblings and call update_cpumasks_hier()
@@ -1955,12 +1956,12 @@ static void *cpuset_being_rebound;
  * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
  *
  * Iterate through each task of @cs updating its mems_allowed to the
- * effective cpuset's.  As this function is called with cpuset_rwsem held,
+ * effective cpuset's.  As this function is called with cpuset_mutex held,
  * cpuset membership stays stable.
  */
 static void update_tasks_nodemask(struct cpuset *cs)
 {
-	static nodemask_t newmems;	/* protected by cpuset_rwsem */
+	static nodemask_t newmems;	/* protected by cpuset_mutex */
 	struct css_task_iter it;
 	struct task_struct *task;
 
@@ -1973,7 +1974,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
 	 * take while holding tasklist_lock.  Forks can happen - the
 	 * mpol_dup() cpuset_being_rebound check will catch such forks,
 	 * and rebind their vma mempolicies too.  Because we still hold
-	 * the global cpuset_rwsem, we know that no other rebind effort
+	 * the global cpuset_mutex, we know that no other rebind effort
 	 * will be contending for the global variable cpuset_being_rebound.
 	 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
 	 * is idempotent.  Also migrate pages in each mm to new nodes.
@@ -2019,7 +2020,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
  *
  * On legacy hierarchy, effective_mems will be the same with mems_allowed.
  *
- * Called with cpuset_rwsem held
+ * Called with cpuset_mutex held
  */
 static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
 {
@@ -2072,7 +2073,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
  * mempolicies and if the cpuset is marked 'memory_migrate',
  * migrate the tasks pages to the new memory.
  *
- * Call with cpuset_rwsem held. May take callback_lock during call.
+ * Call with cpuset_mutex held. May take callback_lock during call.
  * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
  * lock each such tasks mm->mmap_lock, scan its vma's and rebind
  * their mempolicies to the cpusets new mems_allowed.
@@ -2164,7 +2165,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
  * @cs: the cpuset in which each task's spread flags needs to be changed
  *
  * Iterate through each task of @cs updating its spread flags.  As this
- * function is called with cpuset_rwsem held, cpuset membership stays
+ * function is called with cpuset_mutex held, cpuset membership stays
  * stable.
  */
 static void update_tasks_flags(struct cpuset *cs)
@@ -2184,7 +2185,7 @@ static void update_tasks_flags(struct cpuset *cs)
  * cs:		the cpuset to update
  * turning_on: 	whether the flag is being set or cleared
  *
- * Call with cpuset_rwsem held.
+ * Call with cpuset_mutex held.
  */
 
 static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
@@ -2234,7 +2235,7 @@ out:
  * @new_prs: new partition root state
  * Return: 0 if successful, != 0 if error
  *
- * Call with cpuset_rwsem held.
+ * Call with cpuset_mutex held.
  */
 static int update_prstate(struct cpuset *cs, int new_prs)
 {
@@ -2472,7 +2473,7 @@ static int cpuset_can_attach_check(struct cpuset *cs)
 	return 0;
 }
 
-/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
+/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
 static int cpuset_can_attach(struct cgroup_taskset *tset)
 {
 	struct cgroup_subsys_state *css;
@@ -2484,7 +2485,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 	cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
 	cs = css_cs(css);
 
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 
 	/* Check to see if task is allowed in the cpuset */
 	ret = cpuset_can_attach_check(cs);
@@ -2506,7 +2507,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 	 */
 	cs->attach_in_progress++;
 out_unlock:
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 	return ret;
 }
 
@@ -2518,15 +2519,15 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
 	cgroup_taskset_first(tset, &css);
 	cs = css_cs(css);
 
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 	cs->attach_in_progress--;
 	if (!cs->attach_in_progress)
 		wake_up(&cpuset_attach_wq);
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 }
 
 /*
- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
+ * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach_task()
  * but we can't allocate it dynamically there.  Define it global and
  * allocate from cpuset_init().
  */
@@ -2535,7 +2536,7 @@ static nodemask_t cpuset_attach_nodemask_to;
 
 static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
 {
-	percpu_rwsem_assert_held(&cpuset_rwsem);
+	lockdep_assert_held(&cpuset_mutex);
 
 	if (cs != &top_cpuset)
 		guarantee_online_cpus(task, cpus_attach);
@@ -2565,7 +2566,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 	cs = css_cs(css);
 
 	lockdep_assert_cpus_held();	/* see cgroup_attach_lock() */
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 	cpus_updated = !cpumask_equal(cs->effective_cpus,
 				      oldcs->effective_cpus);
 	mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems);
@@ -2626,7 +2627,7 @@ out:
 	if (!cs->attach_in_progress)
 		wake_up(&cpuset_attach_wq);
 
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 }
 
 /* The various types of files and directories in a cpuset file system */
@@ -2658,7 +2659,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
 	int retval = 0;
 
 	cpus_read_lock();
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 	if (!is_cpuset_online(cs)) {
 		retval = -ENODEV;
 		goto out_unlock;
@@ -2694,7 +2695,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
 		break;
 	}
 out_unlock:
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 	cpus_read_unlock();
 	return retval;
 }
@@ -2707,7 +2708,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
 	int retval = -ENODEV;
 
 	cpus_read_lock();
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 	if (!is_cpuset_online(cs))
 		goto out_unlock;
 
@@ -2720,7 +2721,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
 		break;
 	}
 out_unlock:
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 	cpus_read_unlock();
 	return retval;
 }
@@ -2753,7 +2754,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
 	 * operation like this one can lead to a deadlock through kernfs
 	 * active_ref protection.  Let's break the protection.  Losing the
 	 * protection is okay as we check whether @cs is online after
-	 * grabbing cpuset_rwsem anyway.  This only happens on the legacy
+	 * grabbing cpuset_mutex anyway.  This only happens on the legacy
 	 * hierarchies.
 	 */
 	css_get(&cs->css);
@@ -2761,7 +2762,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
 	flush_work(&cpuset_hotplug_work);
 
 	cpus_read_lock();
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 	if (!is_cpuset_online(cs))
 		goto out_unlock;
 
@@ -2785,7 +2786,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
 
 	free_cpuset(trialcs);
 out_unlock:
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 	cpus_read_unlock();
 	kernfs_unbreak_active_protection(of->kn);
 	css_put(&cs->css);
@@ -2933,13 +2934,13 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
 
 	css_get(&cs->css);
 	cpus_read_lock();
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 	if (!is_cpuset_online(cs))
 		goto out_unlock;
 
 	retval = update_prstate(cs, val);
 out_unlock:
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 	cpus_read_unlock();
 	css_put(&cs->css);
 	return retval ?: nbytes;
@@ -3156,7 +3157,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 		return 0;
 
 	cpus_read_lock();
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 
 	set_bit(CS_ONLINE, &cs->flags);
 	if (is_spread_page(parent))
@@ -3207,7 +3208,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 	cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
 	spin_unlock_irq(&callback_lock);
 out_unlock:
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 	cpus_read_unlock();
 	return 0;
 }
@@ -3228,7 +3229,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
 	struct cpuset *cs = css_cs(css);
 
 	cpus_read_lock();
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 
 	if (is_partition_valid(cs))
 		update_prstate(cs, 0);
@@ -3247,7 +3248,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
 	cpuset_dec();
 	clear_bit(CS_ONLINE, &cs->flags);
 
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 	cpus_read_unlock();
 }
 
@@ -3260,7 +3261,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
 
 static void cpuset_bind(struct cgroup_subsys_state *root_css)
 {
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 	spin_lock_irq(&callback_lock);
 
 	if (is_in_v2_mode()) {
@@ -3273,7 +3274,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
 	}
 
 	spin_unlock_irq(&callback_lock);
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 }
 
 /*
@@ -3294,7 +3295,7 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
 		return 0;
 
 	lockdep_assert_held(&cgroup_mutex);
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 
 	/* Check to see if task is allowed in the cpuset */
 	ret = cpuset_can_attach_check(cs);
@@ -3315,7 +3316,7 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
 	 */
 	cs->attach_in_progress++;
 out_unlock:
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 	return ret;
 }
 
@@ -3331,11 +3332,11 @@ static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
 	if (same_cs)
 		return;
 
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 	cs->attach_in_progress--;
 	if (!cs->attach_in_progress)
 		wake_up(&cpuset_attach_wq);
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 }
 
 /*
@@ -3363,7 +3364,7 @@ static void cpuset_fork(struct task_struct *task)
 	}
 
 	/* CLONE_INTO_CGROUP */
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
 	cpuset_attach_task(cs, task);
 
@@ -3371,7 +3372,7 @@ static void cpuset_fork(struct task_struct *task)
 	if (!cs->attach_in_progress)
 		wake_up(&cpuset_attach_wq);
 
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 }
 
 struct cgroup_subsys cpuset_cgrp_subsys = {
@@ -3472,7 +3473,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
 	is_empty = cpumask_empty(cs->cpus_allowed) ||
 		   nodes_empty(cs->mems_allowed);
 
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 
 	/*
 	 * Move tasks to the nearest ancestor with execution resources,
@@ -3482,7 +3483,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
 	if (is_empty)
 		remove_tasks_in_empty_cpuset(cs);
 
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 }
 
 static void
@@ -3533,14 +3534,14 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
 retry:
 	wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);
 
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 
 	/*
 	 * We have raced with task attaching. We wait until attaching
 	 * is finished, so we won't attach a task to an empty cpuset.
 	 */
 	if (cs->attach_in_progress) {
-		percpu_up_write(&cpuset_rwsem);
+		mutex_unlock(&cpuset_mutex);
 		goto retry;
 	}
 
@@ -3637,7 +3638,7 @@ update_tasks:
 					    cpus_updated, mems_updated);
 
 unlock:
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 }
 
 /**
@@ -3667,7 +3668,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 	if (on_dfl && !alloc_cpumasks(NULL, &tmp))
 		ptmp = &tmp;
 
-	percpu_down_write(&cpuset_rwsem);
+	mutex_lock(&cpuset_mutex);
 
 	/* fetch the available cpus/mems and find out which changed how */
 	cpumask_copy(&new_cpus, cpu_active_mask);
@@ -3724,7 +3725,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 		update_tasks_nodemask(&top_cpuset);
 	}
 
-	percpu_up_write(&cpuset_rwsem);
+	mutex_unlock(&cpuset_mutex);
 
 	/* if cpus or mems changed, we need to propagate to descendants */
 	if (cpus_updated || mems_updated) {
@@ -4155,7 +4156,7 @@ void __cpuset_memory_pressure_bump(void)
  *  - Used for /proc/<pid>/cpuset.
  *  - No need to task_lock(tsk) on this tsk->cpuset reference, as it
  *    doesn't really matter if tsk->cpuset changes after we read it,
- *    and we take cpuset_rwsem, keeping cpuset_attach() from changing it
+ *    and we take cpuset_mutex, keeping cpuset_attach() from changing it
  *    anyway.
  */
 int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 944c3ae39861..d826bec1c522 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7590,6 +7590,7 @@ static int __sched_setscheduler(struct task_struct *p,
 	int reset_on_fork;
 	int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
 	struct rq *rq;
+	bool cpuset_locked = false;
 
 	/* The pi code expects interrupts enabled */
 	BUG_ON(pi && in_interrupt());
@@ -7639,8 +7640,14 @@ recheck:
 			return retval;
 	}
 
-	if (pi)
-		cpuset_read_lock();
+	/*
+	 * SCHED_DEADLINE bandwidth accounting relies on stable cpusets
+	 * information.
+	 */
+	if (dl_policy(policy) || dl_policy(p->policy)) {
+		cpuset_locked = true;
+		cpuset_lock();
+	}
 
 	/*
 	 * Make sure no PI-waiters arrive (or leave) while we are
@@ -7716,8 +7723,8 @@ change:
 	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
 		policy = oldpolicy = -1;
 		task_rq_unlock(rq, p, &rf);
-		if (pi)
-			cpuset_read_unlock();
+		if (cpuset_locked)
+			cpuset_unlock();
 		goto recheck;
 	}
 
@@ -7784,7 +7791,8 @@ change:
 	task_rq_unlock(rq, p, &rf);
 
 	if (pi) {
-		cpuset_read_unlock();
+		if (cpuset_locked)
+			cpuset_unlock();
 		rt_mutex_adjust_pi(p);
 	}
 
@@ -7796,8 +7804,8 @@ change:
 
 unlock:
 	task_rq_unlock(rq, p, &rf);
-	if (pi)
-		cpuset_read_unlock();
+	if (cpuset_locked)
+		cpuset_unlock();
 	return retval;
 }
 
-- 
cgit v1.2.3


From 6c24849f5515e4966d94fa5279bdff4acf2e9489 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@redhat.com>
Date: Mon, 8 May 2023 09:58:51 +0200
Subject: sched/cpuset: Keep track of SCHED_DEADLINE task in cpusets

Qais reported that iterating over all tasks when rebuilding root domains
for finding out which ones are DEADLINE and need their bandwidth
correctly restored on such root domains can be a costly operation (10+
ms delays on suspend-resume).

To fix the problem keep track of the number of DEADLINE tasks belonging
to each cpuset and then use this information (followup patch) to only
perform the above iteration if DEADLINE tasks are actually present in
the cpuset for which a corresponding root domain is being rebuilt.

Reported-by: Qais Yousef <qyousef@layalina.io>
Link: https://lore.kernel.org/lkml/20230206221428.2125324-1-qyousef@layalina.io/
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Reviewed-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cpuset.h  |  4 ++++
 kernel/cgroup/cgroup.c  |  4 ++++
 kernel/cgroup/cpuset.c  | 25 +++++++++++++++++++++++++
 kernel/sched/deadline.c | 14 ++++++++++++++
 4 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index f90e6325d707..d629094fac6e 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -71,6 +71,8 @@ extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_wait_for_hotplug(void);
+extern void inc_dl_tasks_cs(struct task_struct *task);
+extern void dec_dl_tasks_cs(struct task_struct *task);
 extern void cpuset_lock(void);
 extern void cpuset_unlock(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
@@ -189,6 +191,8 @@ static inline void cpuset_update_active_cpus(void)
 
 static inline void cpuset_wait_for_hotplug(void) { }
 
+static inline void inc_dl_tasks_cs(struct task_struct *task) { }
+static inline void dec_dl_tasks_cs(struct task_struct *task) { }
 static inline void cpuset_lock(void) { }
 static inline void cpuset_unlock(void) { }
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 625d7483951c..9d809191a54f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -57,6 +57,7 @@
 #include <linux/file.h>
 #include <linux/fs_parser.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/deadline.h>
 #include <linux/psi.h>
 #include <net/sock.h>
 
@@ -6683,6 +6684,9 @@ void cgroup_exit(struct task_struct *tsk)
 	list_add_tail(&tsk->cg_list, &cset->dying_tasks);
 	cset->nr_tasks--;
 
+	if (dl_task(tsk))
+		dec_dl_tasks_cs(tsk);
+
 	WARN_ON_ONCE(cgroup_task_frozen(tsk));
 	if (unlikely(!(tsk->flags & PF_KTHREAD) &&
 		     test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 041c0809adaf..ca195ff8b298 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -193,6 +193,12 @@ struct cpuset {
 	int use_parent_ecpus;
 	int child_ecpus_count;
 
+	/*
+	 * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
+	 * know when to rebuild associated root domain bandwidth information.
+	 */
+	int nr_deadline_tasks;
+
 	/* Invalid partition error code, not lock protected */
 	enum prs_errcode prs_err;
 
@@ -245,6 +251,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs)
 	return css_cs(cs->css.parent);
 }
 
+void inc_dl_tasks_cs(struct task_struct *p)
+{
+	struct cpuset *cs = task_cs(p);
+
+	cs->nr_deadline_tasks++;
+}
+
+void dec_dl_tasks_cs(struct task_struct *p)
+{
+	struct cpuset *cs = task_cs(p);
+
+	cs->nr_deadline_tasks--;
+}
+
 /* bits in struct cpuset flags field */
 typedef enum {
 	CS_ONLINE,
@@ -2499,6 +2519,11 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 		ret = security_task_setscheduler(task);
 		if (ret)
 			goto out_unlock;
+
+		if (dl_task(task)) {
+			cs->nr_deadline_tasks++;
+			cpuset_attach_old_cs->nr_deadline_tasks--;
+		}
 	}
 
 	/*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 5a9a4b81c972..e11de074a6fd 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -16,6 +16,8 @@
  *                    Fabio Checconi <fchecconi@gmail.com>
  */
 
+#include <linux/cpuset.h>
+
 /*
  * Default limits for DL period; on the top end we guard against small util
  * tasks still getting ridiculously long effective runtimes, on the bottom end we
@@ -2596,6 +2598,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
 	if (task_on_rq_queued(p) && p->dl.dl_runtime)
 		task_non_contending(p);
 
+	/*
+	 * In case a task is setscheduled out from SCHED_DEADLINE we need to
+	 * keep track of that on its cpuset (for correct bandwidth tracking).
+	 */
+	dec_dl_tasks_cs(p);
+
 	if (!task_on_rq_queued(p)) {
 		/*
 		 * Inactive timer is armed. However, p is leaving DEADLINE and
@@ -2636,6 +2644,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 	if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
 		put_task_struct(p);
 
+	/*
+	 * In case a task is setscheduled to SCHED_DEADLINE we need to keep
+	 * track of that on its cpuset (for correct bandwidth tracking).
+	 */
+	inc_dl_tasks_cs(p);
+
 	/* If p is not queued we will update its parameters at next wakeup. */
 	if (!task_on_rq_queued(p)) {
 		add_rq_bw(&p->dl, &rq->dl);
-- 
cgit v1.2.3


From 85989106feb734437e2d598b639991b9185a43a6 Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Mon, 8 May 2023 09:58:53 +0200
Subject: sched/deadline: Create DL BW alloc, free & check overflow interface

While moving a set of tasks between exclusive cpusets,
cpuset_can_attach() -> task_can_attach() calls dl_cpu_busy(..., p) for
DL BW overflow checking and per-task DL BW allocation on the destination
root_domain for the DL tasks in this set.

This approach has the issue of not freeing already allocated DL BW in
the following error cases:

(1) The set of tasks includes multiple DL tasks and DL BW overflow
    checking fails for one of the subsequent DL tasks.

(2) Another controller next to the cpuset controller which is attached
    to the same cgroup fails in its can_attach().

To address this problem rework dl_cpu_busy():

(1) Split it into dl_bw_check_overflow() & dl_bw_alloc() and add a
    dedicated dl_bw_free().

(2) dl_bw_alloc() & dl_bw_free() take a `u64 dl_bw` parameter instead of
    a `struct task_struct *p` used in dl_cpu_busy(). This allows to
    allocate DL BW for a set of tasks too rather than only for a single
    task.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched.h   |  2 ++
 kernel/sched/core.c     |  4 ++--
 kernel/sched/deadline.c | 53 +++++++++++++++++++++++++++++++++++++------------
 kernel/sched/sched.h    |  2 +-
 4 files changed, 45 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eed5d65b8d1f..0bee06542450 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1853,6 +1853,8 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags)
 
 extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
 extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus);
+extern int dl_bw_alloc(int cpu, u64 dl_bw);
+extern void dl_bw_free(int cpu, u64 dl_bw);
 #ifdef CONFIG_SMP
 extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
 extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d826bec1c522..df659892d7d5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9319,7 +9319,7 @@ int task_can_attach(struct task_struct *p,
 
 		if (unlikely(cpu >= nr_cpu_ids))
 			return -EINVAL;
-		ret = dl_cpu_busy(cpu, p);
+		ret = dl_bw_alloc(cpu, p->dl.dl_bw);
 	}
 
 out:
@@ -9604,7 +9604,7 @@ static void cpuset_cpu_active(void)
 static int cpuset_cpu_inactive(unsigned int cpu)
 {
 	if (!cpuhp_tasks_frozen) {
-		int ret = dl_cpu_busy(cpu, NULL);
+		int ret = dl_bw_check_overflow(cpu);
 
 		if (ret)
 			return ret;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e11de074a6fd..166c3e6eae61 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -3058,26 +3058,38 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
 	return ret;
 }
 
-int dl_cpu_busy(int cpu, struct task_struct *p)
+enum dl_bw_request {
+	dl_bw_req_check_overflow = 0,
+	dl_bw_req_alloc,
+	dl_bw_req_free
+};
+
+static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
 {
-	unsigned long flags, cap;
+	unsigned long flags;
 	struct dl_bw *dl_b;
-	bool overflow;
+	bool overflow = 0;
 
 	rcu_read_lock_sched();
 	dl_b = dl_bw_of(cpu);
 	raw_spin_lock_irqsave(&dl_b->lock, flags);
-	cap = dl_bw_capacity(cpu);
-	overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
 
-	if (!overflow && p) {
-		/*
-		 * We reserve space for this task in the destination
-		 * root_domain, as we can't fail after this point.
-		 * We will free resources in the source root_domain
-		 * later on (see set_cpus_allowed_dl()).
-		 */
-		__dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu));
+	if (req == dl_bw_req_free) {
+		__dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
+	} else {
+		unsigned long cap = dl_bw_capacity(cpu);
+
+		overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
+
+		if (req == dl_bw_req_alloc && !overflow) {
+			/*
+			 * We reserve space in the destination
+			 * root_domain, as we can't fail after this point.
+			 * We will free resources in the source root_domain
+			 * later on (see set_cpus_allowed_dl()).
+			 */
+			__dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
+		}
 	}
 
 	raw_spin_unlock_irqrestore(&dl_b->lock, flags);
@@ -3085,6 +3097,21 @@ int dl_cpu_busy(int cpu, struct task_struct *p)
 
 	return overflow ? -EBUSY : 0;
 }
+
+int dl_bw_check_overflow(int cpu)
+{
+	return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
+}
+
+int dl_bw_alloc(int cpu, u64 dl_bw)
+{
+	return dl_bw_manage(dl_bw_req_alloc, cpu, dl_bw);
+}
+
+void dl_bw_free(int cpu, u64 dl_bw)
+{
+	dl_bw_manage(dl_bw_req_free, cpu, dl_bw);
+}
 #endif
 
 #ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ec7b3e0a2b20..0ad712811e35 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -330,7 +330,7 @@ extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
 extern bool __checkparam_dl(const struct sched_attr *attr);
 extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
 extern int  dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
-extern int  dl_cpu_busy(int cpu, struct task_struct *p);
+extern int  dl_bw_check_overflow(int cpu);
 
 #ifdef CONFIG_CGROUP_SCHED
 
-- 
cgit v1.2.3


From 2ef269ef1ac006acf974793d975539244d77b28f Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Mon, 8 May 2023 09:58:54 +0200
Subject: cgroup/cpuset: Free DL BW in case can_attach() fails

cpuset_can_attach() can fail. Postpone DL BW allocation until all tasks
have been checked. DL BW is not allocated per-task but as a sum over
all DL tasks migrating.

If multiple controllers are attached to the cgroup next to the cpuset
controller a non-cpuset can_attach() can fail. In this case free DL BW
in cpuset_cancel_attach().

Finally, update cpuset DL task count (nr_deadline_tasks) only in
cpuset_attach().

Suggested-by: Waiman Long <longman@redhat.com>
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Reviewed-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/sched.h  |  2 +-
 kernel/cgroup/cpuset.c | 53 +++++++++++++++++++++++++++++++++++++++++++++-----
 kernel/sched/core.c    | 17 ++--------------
 3 files changed, 51 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0bee06542450..2553918f0b61 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1852,7 +1852,7 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags)
 }
 
 extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
-extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus);
+extern int task_can_attach(struct task_struct *p);
 extern int dl_bw_alloc(int cpu, u64 dl_bw);
 extern void dl_bw_free(int cpu, u64 dl_bw);
 #ifdef CONFIG_SMP
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index b7168970fff2..2c76fcd9f0bc 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -198,6 +198,8 @@ struct cpuset {
 	 * know when to rebuild associated root domain bandwidth information.
 	 */
 	int nr_deadline_tasks;
+	int nr_migrate_dl_tasks;
+	u64 sum_migrate_dl_bw;
 
 	/* Invalid partition error code, not lock protected */
 	enum prs_errcode prs_err;
@@ -2496,16 +2498,23 @@ static int cpuset_can_attach_check(struct cpuset *cs)
 	return 0;
 }
 
+static void reset_migrate_dl_data(struct cpuset *cs)
+{
+	cs->nr_migrate_dl_tasks = 0;
+	cs->sum_migrate_dl_bw = 0;
+}
+
 /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
 static int cpuset_can_attach(struct cgroup_taskset *tset)
 {
 	struct cgroup_subsys_state *css;
-	struct cpuset *cs;
+	struct cpuset *cs, *oldcs;
 	struct task_struct *task;
 	int ret;
 
 	/* used later by cpuset_attach() */
 	cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
+	oldcs = cpuset_attach_old_cs;
 	cs = css_cs(css);
 
 	mutex_lock(&cpuset_mutex);
@@ -2516,7 +2525,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 		goto out_unlock;
 
 	cgroup_taskset_for_each(task, css, tset) {
-		ret = task_can_attach(task, cs->effective_cpus);
+		ret = task_can_attach(task);
 		if (ret)
 			goto out_unlock;
 		ret = security_task_setscheduler(task);
@@ -2524,11 +2533,31 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
 			goto out_unlock;
 
 		if (dl_task(task)) {
-			cs->nr_deadline_tasks++;
-			cpuset_attach_old_cs->nr_deadline_tasks--;
+			cs->nr_migrate_dl_tasks++;
+			cs->sum_migrate_dl_bw += task->dl.dl_bw;
 		}
 	}
 
+	if (!cs->nr_migrate_dl_tasks)
+		goto out_success;
+
+	if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) {
+		int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
+
+		if (unlikely(cpu >= nr_cpu_ids)) {
+			reset_migrate_dl_data(cs);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
+		if (ret) {
+			reset_migrate_dl_data(cs);
+			goto out_unlock;
+		}
+	}
+
+out_success:
 	/*
 	 * Mark attach is in progress.  This makes validate_change() fail
 	 * changes which zero cpus/mems_allowed.
@@ -2551,6 +2580,14 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
 	cs->attach_in_progress--;
 	if (!cs->attach_in_progress)
 		wake_up(&cpuset_attach_wq);
+
+	if (cs->nr_migrate_dl_tasks) {
+		int cpu = cpumask_any(cs->effective_cpus);
+
+		dl_bw_free(cpu, cs->sum_migrate_dl_bw);
+		reset_migrate_dl_data(cs);
+	}
+
 	mutex_unlock(&cpuset_mutex);
 }
 
@@ -2651,6 +2688,12 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 out:
 	cs->old_mems_allowed = cpuset_attach_nodemask_to;
 
+	if (cs->nr_migrate_dl_tasks) {
+		cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks;
+		oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks;
+		reset_migrate_dl_data(cs);
+	}
+
 	cs->attach_in_progress--;
 	if (!cs->attach_in_progress)
 		wake_up(&cpuset_attach_wq);
@@ -3330,7 +3373,7 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
 	if (ret)
 		goto out_unlock;
 
-	ret = task_can_attach(task, cs->effective_cpus);
+	ret = task_can_attach(task);
 	if (ret)
 		goto out_unlock;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index df659892d7d5..ed0d7381b2ec 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9294,8 +9294,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
 	return ret;
 }
 
-int task_can_attach(struct task_struct *p,
-		    const struct cpumask *cs_effective_cpus)
+int task_can_attach(struct task_struct *p)
 {
 	int ret = 0;
 
@@ -9308,21 +9307,9 @@ int task_can_attach(struct task_struct *p,
 	 * success of set_cpus_allowed_ptr() on all attached tasks
 	 * before cpus_mask may be changed.
 	 */
-	if (p->flags & PF_NO_SETAFFINITY) {
+	if (p->flags & PF_NO_SETAFFINITY)
 		ret = -EINVAL;
-		goto out;
-	}
-
-	if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
-					      cs_effective_cpus)) {
-		int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus);
 
-		if (unlikely(cpu >= nr_cpu_ids))
-			return -EINVAL;
-		ret = dl_bw_alloc(cpu, p->dl.dl_bw);
-	}
-
-out:
 	return ret;
 }
 
-- 
cgit v1.2.3


From b4cc979588ee94b179e28c6f3f5c2d6197ea6461 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Tue, 25 Apr 2023 00:29:36 +0200
Subject: platform/x86: wmi: Add kernel doc comments

Add kernel doc comments useful for documenting the functions/structs
used to interact with the WMI driver core.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20230424222939.208137-2-W_Armin@gmx.de
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/wmi.c | 51 ++++++++++++++++++++++++++++++++++++++--------
 include/linux/wmi.h        | 41 ++++++++++++++++++++++++++++++++++---
 2 files changed, 80 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index d81319a502ef..99af2cc03b0f 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -248,7 +248,9 @@ static acpi_status get_event_data(const struct wmi_block *wblock, struct acpi_bu
  * @wdev: A wmi bus device from a driver
  * @length: Required buffer size
  *
- * Allocates memory needed for buffer, stores the buffer size in that memory
+ * Allocates memory needed for buffer, stores the buffer size in that memory.
+ *
+ * Return: 0 on success or a negative error code for failure.
  */
 int set_required_buffer_size(struct wmi_device *wdev, u64 length)
 {
@@ -269,7 +271,9 @@ EXPORT_SYMBOL_GPL(set_required_buffer_size);
  * @in: Buffer containing input for the method call
  * @out: Empty buffer to return the method results
  *
- * Call an ACPI-WMI method
+ * Call an ACPI-WMI method, the caller must free @out.
+ *
+ * Return: acpi_status signaling success or error.
  */
 acpi_status wmi_evaluate_method(const char *guid_string, u8 instance, u32 method_id,
 				const struct acpi_buffer *in, struct acpi_buffer *out)
@@ -294,7 +298,9 @@ EXPORT_SYMBOL_GPL(wmi_evaluate_method);
  * @in: Buffer containing input for the method call
  * @out: Empty buffer to return the method results
  *
- * Call an ACPI-WMI method
+ * Call an ACPI-WMI method, the caller must free @out.
+ *
+ * Return: acpi_status signaling success or error.
  */
 acpi_status wmidev_evaluate_method(struct wmi_device *wdev, u8 instance, u32 method_id,
 				   const struct acpi_buffer *in, struct acpi_buffer *out)
@@ -411,7 +417,9 @@ static acpi_status __query_block(struct wmi_block *wblock, u8 instance,
  * @instance: Instance index
  * @out: Empty buffer to return the contents of the data block to
  *
- * Return the contents of an ACPI-WMI data block to a buffer
+ * Query a ACPI-WMI block, the caller must free @out.
+ *
+ * Return: ACPI object containing the content of the WMI block.
  */
 acpi_status wmi_query_block(const char *guid_string, u8 instance,
 			    struct acpi_buffer *out)
@@ -427,6 +435,15 @@ acpi_status wmi_query_block(const char *guid_string, u8 instance,
 }
 EXPORT_SYMBOL_GPL(wmi_query_block);
 
+/**
+ * wmidev_block_query - Return contents of a WMI block
+ * @wdev: A wmi bus device from a driver
+ * @instance: Instance index
+ *
+ * Query an ACPI-WMI block, the caller must free the result.
+ *
+ * Return: ACPI object containing the content of the WMI block.
+ */
 union acpi_object *wmidev_block_query(struct wmi_device *wdev, u8 instance)
 {
 	struct acpi_buffer out = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -445,7 +462,9 @@ EXPORT_SYMBOL_GPL(wmidev_block_query);
  * @instance: Instance index
  * @in: Buffer containing new values for the data block
  *
- * Write the contents of the input buffer to an ACPI-WMI data block
+ * Write the contents of the input buffer to an ACPI-WMI data block.
+ *
+ * Return: acpi_status signaling success or error.
  */
 acpi_status wmi_set_block(const char *guid_string, u8 instance,
 			  const struct acpi_buffer *in)
@@ -555,6 +574,8 @@ static void wmi_notify_debug(u32 value, void *context)
  * @data: Data to be returned to handler when event is fired
  *
  * Register a handler for events sent to the ACPI-WMI mapper device.
+ *
+ * Return: acpi_status signaling success or error.
  */
 acpi_status wmi_install_notify_handler(const char *guid,
 				       wmi_notify_handler handler,
@@ -597,6 +618,8 @@ EXPORT_SYMBOL_GPL(wmi_install_notify_handler);
  * @guid: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
  *
  * Unregister handler for events sent to the ACPI-WMI mapper device.
+ *
+ * Return: acpi_status signaling success or error.
  */
 acpi_status wmi_remove_notify_handler(const char *guid)
 {
@@ -641,9 +664,11 @@ EXPORT_SYMBOL_GPL(wmi_remove_notify_handler);
  * wmi_get_event_data - Get WMI data associated with an event
  *
  * @event: Event to find
- * @out: Buffer to hold event data. out->pointer should be freed with kfree()
+ * @out: Buffer to hold event data
+ *
+ * Get extra data associated with an WMI event, the caller needs to free @out.
  *
- * Returns extra data associated with an event in WMI.
+ * Return: acpi_status signaling success or error.
  */
 acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out)
 {
@@ -664,7 +689,9 @@ EXPORT_SYMBOL_GPL(wmi_get_event_data);
  * wmi_has_guid - Check if a GUID is available
  * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
  *
- * Check if a given GUID is defined by _WDG
+ * Check if a given GUID is defined by _WDG.
+ *
+ * Return: True if GUID is available, false otherwise.
  */
 bool wmi_has_guid(const char *guid_string)
 {
@@ -678,7 +705,7 @@ EXPORT_SYMBOL_GPL(wmi_has_guid);
  *
  * Find the _UID of ACPI device associated with this WMI GUID.
  *
- * Return: The ACPI _UID field value or NULL if the WMI GUID was not found
+ * Return: The ACPI _UID field value or NULL if the WMI GUID was not found.
  */
 char *wmi_get_acpi_device_uid(const char *guid_string)
 {
@@ -1454,6 +1481,12 @@ int __must_check __wmi_driver_register(struct wmi_driver *driver,
 }
 EXPORT_SYMBOL(__wmi_driver_register);
 
+/**
+ * wmi_driver_unregister() - Unregister a WMI driver
+ * @driver: WMI driver to unregister
+ *
+ * Unregisters a WMI driver from the WMI bus.
+ */
 void wmi_driver_unregister(struct wmi_driver *driver)
 {
 	driver_unregister(&driver->driver);
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index b88d7b58e61e..c1a3bd4e4838 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -13,25 +13,44 @@
 #include <linux/mod_devicetable.h>
 #include <uapi/linux/wmi.h>
 
+/**
+ * struct wmi_device - WMI device structure
+ * @dev: Device associated with this WMI device
+ * @setable: True for devices implementing the Set Control Method
+ *
+ * This represents WMI devices discovered by the WMI driver core.
+ */
 struct wmi_device {
 	struct device dev;
 
-	 /* True for data blocks implementing the Set Control Method */
+	/* private: used by the WMI driver core */
 	bool setable;
 };
 
-/* evaluate the ACPI method associated with this device */
 extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev,
 					  u8 instance, u32 method_id,
 					  const struct acpi_buffer *in,
 					  struct acpi_buffer *out);
 
-/* Caller must kfree the result. */
 extern union acpi_object *wmidev_block_query(struct wmi_device *wdev,
 					     u8 instance);
 
 extern int set_required_buffer_size(struct wmi_device *wdev, u64 length);
 
+/**
+ * struct wmi_driver - WMI driver structure
+ * @driver: Driver model structure
+ * @id_table: List of WMI GUIDs supported by this driver
+ * @no_notify_data: WMI events provide no event data
+ * @probe: Callback for device binding
+ * @remove: Callback for device unbinding
+ * @notify: Callback for receiving WMI events
+ * @filter_callback: Callback for filtering device IOCTLs
+ *
+ * This represents WMI drivers which handle WMI devices.
+ * @filter_callback is only necessary for drivers which
+ * want to set up a WMI IOCTL interface.
+ */
 struct wmi_driver {
 	struct device_driver driver;
 	const struct wmi_device_id *id_table;
@@ -47,8 +66,24 @@ struct wmi_driver {
 extern int __must_check __wmi_driver_register(struct wmi_driver *driver,
 					      struct module *owner);
 extern void wmi_driver_unregister(struct wmi_driver *driver);
+
+/**
+ * wmi_driver_register() - Helper macro to register a WMI driver
+ * @driver: wmi_driver struct
+ *
+ * Helper macro for registering a WMI driver. It automatically passes
+ * THIS_MODULE to the underlying function.
+ */
 #define wmi_driver_register(driver) __wmi_driver_register((driver), THIS_MODULE)
 
+/**
+ * module_wmi_driver() - Helper macro to register/unregister a WMI driver
+ * @__wmi_driver: wmi_driver struct
+ *
+ * Helper macro for WMI drivers which do not do anything special in module
+ * init/exit. This eliminates a lot of boilerplate. Each module may only
+ * use this macro once, and calling it replaces module_init() and module_exit().
+ */
 #define module_wmi_driver(__wmi_driver) \
 	module_driver(__wmi_driver, wmi_driver_register, \
 		      wmi_driver_unregister)
-- 
cgit v1.2.3


From 2a2b13ae50cf70e07b471301ff50299f31d81c1d Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sun, 30 Apr 2023 22:31:52 +0200
Subject: platform/x86: wmi: Allow retrieving the number of WMI object
 instances

Currently, the WMI driver core knows how many instances of a given
WMI object exist, but WMI drivers cannot access this information.
At the same time, some current and upcoming WMI drivers want to
have access to this information. Add wmi_instance_count() and
wmidev_instance_count() to allow WMI drivers to get the number of
WMI object instances.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20230430203153.5587-2-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/wmi.c | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h       |  2 ++
 include/linux/wmi.h        |  2 ++
 3 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index c226dd4163a1..5b95d7aa5c2f 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -263,6 +263,47 @@ int set_required_buffer_size(struct wmi_device *wdev, u64 length)
 }
 EXPORT_SYMBOL_GPL(set_required_buffer_size);
 
+/**
+ * wmi_instance_count - Get number of WMI object instances
+ * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ *
+ * Get the number of WMI object instances.
+ *
+ * Returns: Number of WMI object instances or negative error code.
+ */
+int wmi_instance_count(const char *guid_string)
+{
+	struct wmi_block *wblock;
+	acpi_status status;
+
+	status = find_guid(guid_string, &wblock);
+	if (ACPI_FAILURE(status)) {
+		if (status == AE_BAD_PARAMETER)
+			return -EINVAL;
+
+		return -ENODEV;
+	}
+
+	return wmidev_instance_count(&wblock->dev);
+}
+EXPORT_SYMBOL_GPL(wmi_instance_count);
+
+/**
+ * wmidev_instance_count - Get number of WMI object instances
+ * @wdev: A wmi bus device from a driver
+ *
+ * Get the number of WMI object instances.
+ *
+ * Returns: Number of WMI object instances.
+ */
+u8 wmidev_instance_count(struct wmi_device *wdev)
+{
+	struct wmi_block *wblock = container_of(wdev, struct wmi_block, dev);
+
+	return wblock->gblock.instance_count;
+}
+EXPORT_SYMBOL_GPL(wmidev_instance_count);
+
 /**
  * wmi_evaluate_method - Evaluate a WMI method (deprecated)
  * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7b71dd74baeb..5b9353f56928 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -414,6 +414,8 @@ extern bool acpi_is_pnp_device(struct acpi_device *);
 
 typedef void (*wmi_notify_handler) (u32 value, void *context);
 
+int wmi_instance_count(const char *guid);
+
 extern acpi_status wmi_evaluate_method(const char *guid, u8 instance,
 					u32 method_id,
 					const struct acpi_buffer *in,
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index c1a3bd4e4838..763bd382cf2d 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -35,6 +35,8 @@ extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev,
 extern union acpi_object *wmidev_block_query(struct wmi_device *wdev,
 					     u8 instance);
 
+u8 wmidev_instance_count(struct wmi_device *wdev);
+
 extern int set_required_buffer_size(struct wmi_device *wdev, u64 length);
 
 /**
-- 
cgit v1.2.3


From 0ac448e0d29d6ba978684b3fa2e3ac7294ec2475 Mon Sep 17 00:00:00 2001
From: Mike Pastore <mike@oobak.org>
Date: Sun, 7 May 2023 02:35:19 -0500
Subject: PCI: Delay after FLR of Solidigm P44 Pro NVMe

Prevent KVM hang when a Solidgm P44 Pro NVMe is passed through to a guest
via IOMMU and the guest is subsequently rebooted.

A similar issue was identified and patched by 51ba09452d11 ("PCI: Delay
after FLR of Intel DC P3700 NVMe") and the same fix can be applied for this
case. (Intel spun off their NAND and SSD business as Solidigm and sold it
to SK Hynix in late 2021.)

Link: https://lore.kernel.org/r/20230507073519.9737-1-mike@oobak.org
Signed-off-by: Mike Pastore <mike@oobak.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/quirks.c    | 10 ++++++----
 include/linux/pci_ids.h |  2 ++
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index f4e2a88729fd..c1239706eeaf 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3993,10 +3993,11 @@ static int nvme_disable_and_flr(struct pci_dev *dev, bool probe)
 }
 
 /*
- * Intel DC P3700 NVMe controller will timeout waiting for ready status
- * to change after NVMe enable if the driver starts interacting with the
- * device too soon after FLR.  A 250ms delay after FLR has heuristically
- * proven to produce reliably working results for device assignment cases.
+ * Some NVMe controllers such as Intel DC P3700 and Solidigm P44 Pro will
+ * timeout waiting for ready status to change after NVMe enable if the driver
+ * starts interacting with the device too soon after FLR.  A 250ms delay after
+ * FLR has heuristically proven to produce reliably working results for device
+ * assignment cases.
  */
 static int delay_250ms_after_flr(struct pci_dev *dev, bool probe)
 {
@@ -4083,6 +4084,7 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
 	{ PCI_VENDOR_ID_SAMSUNG, 0xa804, nvme_disable_and_flr },
 	{ PCI_VENDOR_ID_INTEL, 0x0953, delay_250ms_after_flr },
 	{ PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr },
+	{ PCI_VENDOR_ID_SOLIDIGM, 0xf1ac, delay_250ms_after_flr },
 	{ PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
 		reset_chelsio_generic_dev },
 	{ PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 45c3d62e616d..20c3403a62cd 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -158,6 +158,8 @@
 
 #define PCI_VENDOR_ID_LOONGSON		0x0014
 
+#define PCI_VENDOR_ID_SOLIDIGM		0x025e
+
 #define PCI_VENDOR_ID_TTTECH		0x0357
 #define PCI_DEVICE_ID_TTTECH_MC322	0x000a
 
-- 
cgit v1.2.3


From 1da82598cfc22f43fb0a3bd47774f7e886cc8b62 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Sat, 18 Mar 2023 13:51:10 -0700
Subject: srcu: Remove extraneous parentheses from srcu_read_lock() etc.

This commit removes extraneous parentheses from srcu_read_lock(),
srcu_read_lock_nmisafe(), srcu_read_unlock(), and
srcu_read_unlock_nmisafe().  Looks like someone was once a macro.

Cc: Christoph Hellwig <hch@lst.de>
Tested-by: Sachin Sant <sachinp@linux.ibm.com>
Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/srcu.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 41c4b26fb1c1..eb92a50a4599 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -212,7 +212,7 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
 
 	srcu_check_nmi_safety(ssp, false);
 	retval = __srcu_read_lock(ssp);
-	srcu_lock_acquire(&(ssp)->dep_map);
+	srcu_lock_acquire(&ssp->dep_map);
 	return retval;
 }
 
@@ -229,7 +229,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
 
 	srcu_check_nmi_safety(ssp, true);
 	retval = __srcu_read_lock_nmisafe(ssp);
-	rcu_lock_acquire(&(ssp)->dep_map);
+	rcu_lock_acquire(&ssp->dep_map);
 	return retval;
 }
 
@@ -284,7 +284,7 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
 {
 	WARN_ON_ONCE(idx & ~0x1);
 	srcu_check_nmi_safety(ssp, false);
-	srcu_lock_release(&(ssp)->dep_map);
+	srcu_lock_release(&ssp->dep_map);
 	__srcu_read_unlock(ssp, idx);
 }
 
@@ -300,7 +300,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
 {
 	WARN_ON_ONCE(idx & ~0x1);
 	srcu_check_nmi_safety(ssp, true);
-	rcu_lock_release(&(ssp)->dep_map);
+	rcu_lock_release(&ssp->dep_map);
 	__srcu_read_unlock_nmisafe(ssp, idx);
 }
 
-- 
cgit v1.2.3


From 7e3f926bf4538cb4988b3e3f8bc1cb4a603b2ef6 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Wed, 1 Feb 2023 16:09:54 +0100
Subject: rcu/kvfree: Eliminate k[v]free_rcu() single argument macro

The kvfree_rcu() and kfree_rcu() APIs are hazardous in that if you forget
the second argument, it works, but might sleep.  This sleeping can be a
correctness bug from atomic contexts, and even in non-atomic contexts it
might introduce unacceptable latencies.  This commit therefore removes the
single-argument kvfree_rcu() and kfree_rcu() macros.  Code that would have
previously used these single-argument kvfree_rcu() and kfree_rcu() macros
should instead use kvfree_rcu_mightsleep() or kfree_rcu_mightsleep().

[ paulmck: Apply Joel Fernandes feedback. ]

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 include/linux/rcupdate.h | 29 ++++++++---------------------
 1 file changed, 8 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index dcd2cf1e8326..744869ef930a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -957,9 +957,8 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 
 /**
  * kfree_rcu() - kfree an object after a grace period.
- * @ptr: pointer to kfree for both single- and double-argument invocations.
- * @rhf: the name of the struct rcu_head within the type of @ptr,
- *       but only for double-argument invocations.
+ * @ptr: pointer to kfree for double-argument invocations.
+ * @rhf: the name of the struct rcu_head within the type of @ptr.
  *
  * Many rcu callbacks functions just call kfree() on the base structure.
  * These functions are trivial, but their size adds up, and furthermore
@@ -984,26 +983,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  * The BUILD_BUG_ON check must not involve any function calls, hence the
  * checks are done in macros here.
  */
-#define kfree_rcu(ptr, rhf...) kvfree_rcu(ptr, ## rhf)
+#define kfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf)
+#define kvfree_rcu(ptr, rhf) kvfree_rcu_arg_2(ptr, rhf)
 
 /**
- * kvfree_rcu() - kvfree an object after a grace period.
- *
- * This macro consists of one or two arguments and it is
- * based on whether an object is head-less or not. If it
- * has a head then a semantic stays the same as it used
- * to be before:
- *
- *     kvfree_rcu(ptr, rhf);
- *
- * where @ptr is a pointer to kvfree(), @rhf is the name
- * of the rcu_head structure within the type of @ptr.
+ * kfree_rcu_mightsleep() - kfree an object after a grace period.
+ * @ptr: pointer to kfree for single-argument invocations.
  *
  * When it comes to head-less variant, only one argument
  * is passed and that is just a pointer which has to be
  * freed after a grace period. Therefore the semantic is
  *
- *     kvfree_rcu(ptr);
+ *     kfree_rcu_mightsleep(ptr);
  *
  * where @ptr is the pointer to be freed by kvfree().
  *
@@ -1012,13 +1003,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  * annotation. Otherwise, please switch and embed the
  * rcu_head structure within the type of @ptr.
  */
-#define kvfree_rcu(...) KVFREE_GET_MACRO(__VA_ARGS__,		\
-	kvfree_rcu_arg_2, kvfree_rcu_arg_1)(__VA_ARGS__)
-
+#define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
 #define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
-#define kfree_rcu_mightsleep(ptr) kvfree_rcu_mightsleep(ptr)
 
-#define KVFREE_GET_MACRO(_1, _2, NAME, ...) NAME
 #define kvfree_rcu_arg_2(ptr, rhf)					\
 do {									\
 	typeof (ptr) ___p = (ptr);					\
-- 
cgit v1.2.3


From 212bc1ce618dd7c734920a68988fe4b473f09f40 Mon Sep 17 00:00:00 2001
From: Aidan MacDonald <aidanmacdonald.0x0@gmail.com>
Date: Tue, 9 May 2023 12:00:55 +0100
Subject: regmap-irq: Fix typo in documentation for .get_irq_reg()

It refers to a non-existent "num_type_settings" value, which is
an old name I'd used during development of config registers and
later dropped because it wasn't very clear.

The correct bound for the range is num_config_regs, which can
be verified by checking the implementation.

Signed-off-by: Aidan MacDonald <aidanmacdonald.0x0@gmail.com
Link: https://lore.kernel.org/r/20230509110100.3980123-2-aidanmacdonald.0x0@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org
---
 include/linux/regmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index f820bd44d16f..253f99fb282f 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1606,7 +1606,7 @@ struct regmap_irq_chip_data;
  *		 addresses. The base register will be one of @status_base,
  *		 @mask_base, etc., @main_status, or any of @config_base.
  *		 The index will be in the range [0, num_main_regs[ for the
- *		 main status base, [0, num_type_settings[ for any config
+ *		 main status base, [0, num_config_regs[ for any config
  *		 register base, and [0, num_regs[ for any other base.
  *		 If unspecified then regmap_irq_get_irq_reg_linear() is used.
  * @irq_drv_data:    Driver specific IRQ data which is passed as parameter when
-- 
cgit v1.2.3


From f33a751d5a7fe03b11d95e6a33e1fdd3b4f8ec18 Mon Sep 17 00:00:00 2001
From: Aidan MacDonald <aidanmacdonald.0x0@gmail.com>
Date: Tue, 9 May 2023 12:00:56 +0100
Subject: regmap-irq: Remove virtual registers

No remaining users, and it's been replaced by config registers.

Signed-off-by: Aidan MacDonald <aidanmacdonald.0x0@gmail.com
Link: https://lore.kernel.org/r/20230509110100.3980123-3-aidanmacdonald.0x0@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org
---
 drivers/base/regmap/regmap-irq.c | 48 ----------------------------------------
 include/linux/regmap.h           | 12 ----------
 2 files changed, 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 362926d155a4..48bff842cc31 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -41,7 +41,6 @@ struct regmap_irq_chip_data {
 	unsigned int *wake_buf;
 	unsigned int *type_buf;
 	unsigned int *type_buf_def;
-	unsigned int **virt_buf;
 	unsigned int **config_buf;
 
 	unsigned int irq_reg_stride;
@@ -196,20 +195,6 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 		}
 	}
 
-	if (d->chip->num_virt_regs) {
-		for (i = 0; i < d->chip->num_virt_regs; i++) {
-			for (j = 0; j < d->chip->num_regs; j++) {
-				reg = d->get_irq_reg(d, d->chip->virt_reg_base[i],
-						     j);
-				ret = regmap_write(map, reg, d->virt_buf[i][j]);
-				if (ret != 0)
-					dev_err(d->map->dev,
-						"Failed to write virt 0x%x: %d\n",
-						reg, ret);
-			}
-		}
-	}
-
 	for (i = 0; i < d->chip->num_config_bases; i++) {
 		for (j = 0; j < d->chip->num_config_regs; j++) {
 			reg = d->get_irq_reg(d, d->chip->config_base[i], j);
@@ -320,13 +305,6 @@ static int regmap_irq_set_type(struct irq_data *data, unsigned int type)
 		return -EINVAL;
 	}
 
-	if (d->chip->set_type_virt) {
-		ret = d->chip->set_type_virt(d->virt_buf, type, data->hwirq,
-					     reg);
-		if (ret)
-			return ret;
-	}
-
 	if (d->chip->set_type_config) {
 		ret = d->chip->set_type_config(d->config_buf, type, irq_data,
 					       reg, d->chip->irq_drv_data);
@@ -758,9 +736,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 	if (chip->num_type_reg)
 		dev_warn(map->dev, "type registers are deprecated; use config registers instead");
 
-	if (chip->num_virt_regs || chip->virt_reg_base || chip->set_type_virt)
-		dev_warn(map->dev, "virtual registers are deprecated; use config registers instead");
-
 	if (irq_base) {
 		irq_base = irq_alloc_descs(irq_base, 0, chip->num_irqs, 0);
 		if (irq_base < 0) {
@@ -824,24 +799,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			goto err_alloc;
 	}
 
-	if (chip->num_virt_regs) {
-		/*
-		 * Create virt_buf[chip->num_extra_config_regs][chip->num_regs]
-		 */
-		d->virt_buf = kcalloc(chip->num_virt_regs, sizeof(*d->virt_buf),
-				      GFP_KERNEL);
-		if (!d->virt_buf)
-			goto err_alloc;
-
-		for (i = 0; i < chip->num_virt_regs; i++) {
-			d->virt_buf[i] = kcalloc(chip->num_regs,
-						 sizeof(**d->virt_buf),
-						 GFP_KERNEL);
-			if (!d->virt_buf[i])
-				goto err_alloc;
-		}
-	}
-
 	if (chip->num_config_bases && chip->num_config_regs) {
 		/*
 		 * Create config_buf[num_config_bases][num_config_regs]
@@ -1063,11 +1020,6 @@ err_alloc:
 	kfree(d->mask_buf);
 	kfree(d->status_buf);
 	kfree(d->status_reg_buf);
-	if (d->virt_buf) {
-		for (i = 0; i < chip->num_virt_regs; i++)
-			kfree(d->virt_buf[i]);
-		kfree(d->virt_buf);
-	}
 	if (d->config_buf) {
 		for (i = 0; i < chip->num_config_bases; i++)
 			kfree(d->config_buf[i]);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 253f99fb282f..2ad0e3d77b95 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1544,8 +1544,6 @@ struct regmap_irq_chip_data;
  * @wake_base:   Base address for wake enables.  If zero unsupported.
  * @type_base:   Base address for irq type.  If zero unsupported.  Deprecated,
  *		 use @config_base instead.
- * @virt_reg_base:   Base addresses for extra config regs. Deprecated, use
- *		     @config_base instead.
  * @config_base: Base address for IRQ type config regs. If null unsupported.
  * @irq_reg_stride:  Stride to use for chips where registers are not contiguous.
  * @init_ack_masked: Ack all masked interrupts once during initalization.
@@ -1586,9 +1584,6 @@ struct regmap_irq_chip_data;
  *
  * @num_type_reg:    Number of type registers. Deprecated, use config registers
  *		     instead.
- * @num_virt_regs:   Number of non-standard irq configuration registers.
- *		     If zero unsupported. Deprecated, use config registers
- *		     instead.
  * @num_config_bases:	Number of config base registers.
  * @num_config_regs:	Number of config registers for each config base register.
  *
@@ -1598,9 +1593,6 @@ struct regmap_irq_chip_data;
  *		     after handling the interrupts in regmap_irq_handler().
  * @handle_mask_sync: Callback used to handle IRQ mask syncs. The index will be
  *		      in the range [0, num_regs)
- * @set_type_virt:   Driver specific callback to extend regmap_irq_set_type()
- *		     and configure virt regs. Deprecated, use @set_type_config
- *		     callback and config registers instead.
  * @set_type_config: Callback used for configuring irq types.
  * @get_irq_reg: Callback for mapping (base register, index) pairs to register
  *		 addresses. The base register will be one of @status_base,
@@ -1630,7 +1622,6 @@ struct regmap_irq_chip {
 	unsigned int ack_base;
 	unsigned int wake_base;
 	unsigned int type_base;
-	unsigned int *virt_reg_base;
 	const unsigned int *config_base;
 	unsigned int irq_reg_stride;
 	unsigned int init_ack_masked:1;
@@ -1652,7 +1643,6 @@ struct regmap_irq_chip {
 	int num_irqs;
 
 	int num_type_reg;
-	int num_virt_regs;
 	int num_config_bases;
 	int num_config_regs;
 
@@ -1660,8 +1650,6 @@ struct regmap_irq_chip {
 	int (*handle_post_irq)(void *irq_drv_data);
 	int (*handle_mask_sync)(int index, unsigned int mask_buf_def,
 				unsigned int mask_buf, void *irq_drv_data);
-	int (*set_type_virt)(unsigned int **buf, unsigned int type,
-			     unsigned long hwirq, int reg);
 	int (*set_type_config)(unsigned int **buf, unsigned int type,
 			       const struct regmap_irq *irq_data, int idx,
 			       void *irq_drv_data);
-- 
cgit v1.2.3


From 7a3cc29136960c45eff362a7304dd4f6eaf34cdd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 20 Mar 2023 18:37:51 +0100
Subject: rcu: Remove RCU_NONIDLE()

Since there are now exactly _zero_ users of RCU_NONIDLE(), make it go
away before someone else decides to (ab)use it.

[ paulmck: Remove extraneous whitespace. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 .../RCU/Design/Requirements/Requirements.rst       | 36 +---------------------
 Documentation/RCU/whatisRCU.rst                    |  1 -
 include/linux/rcupdate.h                           | 25 ---------------
 3 files changed, 1 insertion(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst
index 49387d823619..f3b605285a87 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
@@ -2071,41 +2071,7 @@ call.
 
 Because RCU avoids interrupting idle CPUs, it is illegal to execute an
 RCU read-side critical section on an idle CPU. (Kernels built with
-``CONFIG_PROVE_RCU=y`` will splat if you try it.) The RCU_NONIDLE()
-macro and ``_rcuidle`` event tracing is provided to work around this
-restriction. In addition, rcu_is_watching() may be used to test
-whether or not it is currently legal to run RCU read-side critical
-sections on this CPU. I learned of the need for diagnostics on the one
-hand and RCU_NONIDLE() on the other while inspecting idle-loop code.
-Steven Rostedt supplied ``_rcuidle`` event tracing, which is used quite
-heavily in the idle loop. However, there are some restrictions on the
-code placed within RCU_NONIDLE():
-
-#. Blocking is prohibited. In practice, this is not a serious
-   restriction given that idle tasks are prohibited from blocking to
-   begin with.
-#. Although nesting RCU_NONIDLE() is permitted, they cannot nest
-   indefinitely deeply. However, given that they can be nested on the
-   order of a million deep, even on 32-bit systems, this should not be a
-   serious restriction. This nesting limit would probably be reached
-   long after the compiler OOMed or the stack overflowed.
-#. Any code path that enters RCU_NONIDLE() must sequence out of that
-   same RCU_NONIDLE(). For example, the following is grossly
-   illegal:
-
-      ::
-
-	  1     RCU_NONIDLE({
-	  2       do_something();
-	  3       goto bad_idea;  /* BUG!!! */
-	  4       do_something_else();});
-	  5   bad_idea:
-
-
-   It is just as illegal to transfer control into the middle of
-   RCU_NONIDLE()'s argument. Yes, in theory, you could transfer in
-   as long as you also transferred out, but in practice you could also
-   expect to get sharply worded review comments.
+``CONFIG_PROVE_RCU=y`` will splat if you try it.)
 
 It is similarly socially unacceptable to interrupt an ``nohz_full`` CPU
 running in userspace. RCU must therefore track ``nohz_full`` userspace
diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst
index 8eddef28d3a1..e488c8e557a9 100644
--- a/Documentation/RCU/whatisRCU.rst
+++ b/Documentation/RCU/whatisRCU.rst
@@ -1117,7 +1117,6 @@ All: lockdep-checked RCU utility APIs::
 
 	RCU_LOCKDEP_WARN
 	rcu_sleep_check
-	RCU_NONIDLE
 
 All: Unchecked RCU-protected pointer access::
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index dcd2cf1e8326..aae31a3e28dd 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -156,31 +156,6 @@ static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; }
 static inline void rcu_nocb_flush_deferred_wakeup(void) { }
 #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
 
-/**
- * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
- * @a: Code that RCU needs to pay attention to.
- *
- * RCU read-side critical sections are forbidden in the inner idle loop,
- * that is, between the ct_idle_enter() and the ct_idle_exit() -- RCU
- * will happily ignore any such read-side critical sections.  However,
- * things like powertop need tracepoints in the inner idle loop.
- *
- * This macro provides the way out:  RCU_NONIDLE(do_something_with_RCU())
- * will tell RCU that it needs to pay attention, invoke its argument
- * (in this example, calling the do_something_with_RCU() function),
- * and then tell RCU to go back to ignoring this CPU.  It is permissible
- * to nest RCU_NONIDLE() wrappers, but not indefinitely (but the limit is
- * on the order of a million or so, even on 32-bit systems).  It is
- * not legal to block within RCU_NONIDLE(), nor is it permissible to
- * transfer control either into or out of RCU_NONIDLE()'s statement.
- */
-#define RCU_NONIDLE(a) \
-	do { \
-		ct_irq_enter_irqson(); \
-		do { a; } while (0); \
-		ct_irq_exit_irqson(); \
-	} while (0)
-
 /*
  * Note a quasi-voluntary context switch for RCU-tasks's benefit.
  * This is a macro rather than an inline function to avoid #include hell.
-- 
cgit v1.2.3


From f05cbadce7e409b38acdf21f0a05d4420afa1b11 Mon Sep 17 00:00:00 2001
From: Aidan MacDonald <aidanmacdonald.0x0@gmail.com>
Date: Thu, 11 May 2023 10:13:39 +0100
Subject: regmap-irq: Remove type registers

No remaining users, these have been replaced by config registers.

Signed-off-by: Aidan MacDonald <aidanmacdonald.0x0@gmail.com
Link: https://lore.kernel.org/r/20230511091342.26604-2-aidanmacdonald.0x0@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org
---
 drivers/base/regmap/regmap-irq.c | 82 ++++------------------------------------
 include/linux/regmap.h           |  7 ----
 2 files changed, 8 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 48bff842cc31..c11295e3fe5e 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -181,20 +181,6 @@ static void regmap_irq_sync_unlock(struct irq_data *data)
 		}
 	}
 
-	/* Don't update the type bits if we're using mask bits for irq type. */
-	if (!d->chip->type_in_mask) {
-		for (i = 0; i < d->chip->num_type_reg; i++) {
-			if (!d->type_buf_def[i])
-				continue;
-			reg = d->get_irq_reg(d, d->chip->type_base, i);
-			ret = regmap_update_bits(d->map, reg,
-						 d->type_buf_def[i], d->type_buf[i]);
-			if (ret != 0)
-				dev_err(d->map->dev, "Failed to sync type in %x\n",
-					reg);
-		}
-	}
-
 	for (i = 0; i < d->chip->num_config_bases; i++) {
 		for (j = 0; j < d->chip->num_config_regs; j++) {
 			reg = d->get_irq_reg(d, d->chip->config_base[i], j);
@@ -273,36 +259,11 @@ static int regmap_irq_set_type(struct irq_data *data, unsigned int type)
 
 	reg = t->type_reg_offset / map->reg_stride;
 
-	if (t->type_reg_mask)
-		d->type_buf[reg] &= ~t->type_reg_mask;
-	else
-		d->type_buf[reg] &= ~(t->type_falling_val |
-				      t->type_rising_val |
-				      t->type_level_low_val |
-				      t->type_level_high_val);
-	switch (type) {
-	case IRQ_TYPE_EDGE_FALLING:
-		d->type_buf[reg] |= t->type_falling_val;
-		break;
-
-	case IRQ_TYPE_EDGE_RISING:
-		d->type_buf[reg] |= t->type_rising_val;
-		break;
-
-	case IRQ_TYPE_EDGE_BOTH:
-		d->type_buf[reg] |= (t->type_falling_val |
-					t->type_rising_val);
-		break;
-
-	case IRQ_TYPE_LEVEL_HIGH:
-		d->type_buf[reg] |= t->type_level_high_val;
-		break;
-
-	case IRQ_TYPE_LEVEL_LOW:
-		d->type_buf[reg] |= t->type_level_low_val;
-		break;
-	default:
-		return -EINVAL;
+	if (d->chip->type_in_mask) {
+		ret = regmap_irq_set_type_config_simple(&d->type_buf, type,
+							irq_data, reg, d->chip->irq_drv_data);
+		if (ret)
+			return ret;
 	}
 
 	if (d->chip->set_type_config) {
@@ -707,8 +668,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 	struct regmap_irq_chip_data *d;
 	int i;
 	int ret = -ENOMEM;
-	int num_type_reg;
-	int num_regs;
 	u32 reg;
 
 	if (chip->num_regs <= 0)
@@ -733,9 +692,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 				return -EINVAL;
 	}
 
-	if (chip->num_type_reg)
-		dev_warn(map->dev, "type registers are deprecated; use config registers instead");
-
 	if (irq_base) {
 		irq_base = irq_alloc_descs(irq_base, 0, chip->num_irqs, 0);
 		if (irq_base < 0) {
@@ -780,21 +736,13 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			goto err_alloc;
 	}
 
-	/*
-	 * Use num_config_regs if defined, otherwise fall back to num_type_reg
-	 * to maintain backward compatibility.
-	 */
-	num_type_reg = chip->num_config_regs ? chip->num_config_regs
-			: chip->num_type_reg;
-	num_regs = chip->type_in_mask ? chip->num_regs : num_type_reg;
-	if (num_regs) {
-		d->type_buf_def = kcalloc(num_regs,
+	if (chip->type_in_mask) {
+		d->type_buf_def = kcalloc(chip->num_regs,
 					  sizeof(*d->type_buf_def), GFP_KERNEL);
 		if (!d->type_buf_def)
 			goto err_alloc;
 
-		d->type_buf = kcalloc(num_regs, sizeof(*d->type_buf),
-				      GFP_KERNEL);
+		d->type_buf = kcalloc(chip->num_regs, sizeof(*d->type_buf), GFP_KERNEL);
 		if (!d->type_buf)
 			goto err_alloc;
 	}
@@ -970,20 +918,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 		}
 	}
 
-	if (chip->num_type_reg && !chip->type_in_mask) {
-		for (i = 0; i < chip->num_type_reg; ++i) {
-			reg = d->get_irq_reg(d, d->chip->type_base, i);
-
-			ret = regmap_read(map, reg, &d->type_buf_def[i]);
-
-			if (ret) {
-				dev_err(map->dev, "Failed to get type defaults at 0x%x: %d\n",
-					reg, ret);
-				goto err_alloc;
-			}
-		}
-	}
-
 	if (irq_base)
 		d->domain = irq_domain_create_legacy(fwnode, chip->num_irqs,
 						     irq_base, 0,
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 2ad0e3d77b95..0b4b9eca480d 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1542,8 +1542,6 @@ struct regmap_irq_chip_data;
  * @ack_base:    Base ack address. If zero then the chip is clear on read.
  *               Using zero value is possible with @use_ack bit.
  * @wake_base:   Base address for wake enables.  If zero unsupported.
- * @type_base:   Base address for irq type.  If zero unsupported.  Deprecated,
- *		 use @config_base instead.
  * @config_base: Base address for IRQ type config regs. If null unsupported.
  * @irq_reg_stride:  Stride to use for chips where registers are not contiguous.
  * @init_ack_masked: Ack all masked interrupts once during initalization.
@@ -1581,9 +1579,6 @@ struct regmap_irq_chip_data;
  * @irqs:        Descriptors for individual IRQs.  Interrupt numbers are
  *               assigned based on the index in the array of the interrupt.
  * @num_irqs:    Number of descriptors.
- *
- * @num_type_reg:    Number of type registers. Deprecated, use config registers
- *		     instead.
  * @num_config_bases:	Number of config base registers.
  * @num_config_regs:	Number of config registers for each config base register.
  *
@@ -1621,7 +1616,6 @@ struct regmap_irq_chip {
 	unsigned int unmask_base;
 	unsigned int ack_base;
 	unsigned int wake_base;
-	unsigned int type_base;
 	const unsigned int *config_base;
 	unsigned int irq_reg_stride;
 	unsigned int init_ack_masked:1;
@@ -1642,7 +1636,6 @@ struct regmap_irq_chip {
 	const struct regmap_irq *irqs;
 	int num_irqs;
 
-	int num_type_reg;
 	int num_config_bases;
 	int num_config_regs;
 
-- 
cgit v1.2.3


From 72cc0f523babca3886421721aa662c7d352a6d32 Mon Sep 17 00:00:00 2001
From: Aidan MacDonald <aidanmacdonald.0x0@gmail.com>
Date: Thu, 11 May 2023 10:13:40 +0100
Subject: regmap-irq: Remove support for not_fixed_stride

No remaining users, use a custom .get_irq_reg() callback instead.

Signed-off-by: Aidan MacDonald <aidanmacdonald.0x0@gmail.com
Link: https://lore.kernel.org/r/20230511091342.26604-3-aidanmacdonald.0x0@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org
---
 drivers/base/regmap/regmap-irq.c | 43 +++-------------------------------------
 include/linux/regmap.h           |  9 ---------
 2 files changed, 3 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index c11295e3fe5e..7cb457af2332 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -328,15 +328,8 @@ static inline int read_sub_irq_data(struct regmap_irq_chip_data *data,
 			unsigned int offset = subreg->offset[i];
 			unsigned int index = offset / map->reg_stride;
 
-			if (chip->not_fixed_stride)
-				ret = regmap_read(map,
-						chip->status_base + offset,
-						&data->status_buf[b]);
-			else
-				ret = regmap_read(map,
-						chip->status_base + offset,
-						&data->status_buf[index]);
-
+			ret = regmap_read(map, chip->status_base + offset,
+					  &data->status_buf[index]);
 			if (ret)
 				break;
 		}
@@ -391,17 +384,7 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 		 * sake of simplicity. and add bulk reads only if needed
 		 */
 		for (i = 0; i < chip->num_main_regs; i++) {
-			/*
-			 * For not_fixed_stride, don't use ->get_irq_reg().
-			 * It would produce an incorrect result.
-			 */
-			if (data->chip->not_fixed_stride)
-				reg = chip->main_status +
-					i * map->reg_stride * data->irq_reg_stride;
-			else
-				reg = data->get_irq_reg(data,
-							chip->main_status, i);
-
+			reg = data->get_irq_reg(data, chip->main_status, i);
 			ret = regmap_read(map, reg, &data->main_status_buf[i]);
 			if (ret) {
 				dev_err(map->dev,
@@ -567,20 +550,8 @@ static const struct irq_domain_ops regmap_domain_ops = {
 unsigned int regmap_irq_get_irq_reg_linear(struct regmap_irq_chip_data *data,
 					   unsigned int base, int index)
 {
-	const struct regmap_irq_chip *chip = data->chip;
 	struct regmap *map = data->map;
 
-	/*
-	 * FIXME: This is for backward compatibility and should be removed
-	 * when not_fixed_stride is dropped (it's only used by qcom-pm8008).
-	 */
-	if (chip->not_fixed_stride && chip->sub_reg_offsets) {
-		struct regmap_irq_sub_irq_map *subreg;
-
-		subreg = &chip->sub_reg_offsets[0];
-		return base + subreg->offset[0];
-	}
-
 	return base + index * map->reg_stride * data->irq_reg_stride;
 }
 EXPORT_SYMBOL_GPL(regmap_irq_get_irq_reg_linear);
@@ -684,14 +655,6 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			return -EINVAL;
 	}
 
-	if (chip->not_fixed_stride) {
-		dev_warn(map->dev, "not_fixed_stride is deprecated; use ->get_irq_reg() instead");
-
-		for (i = 0; i < chip->num_regs; i++)
-			if (chip->sub_reg_offsets[i].num_regs != 1)
-				return -EINVAL;
-	}
-
 	if (irq_base) {
 		irq_base = irq_alloc_descs(irq_base, 0, chip->num_irqs, 0);
 		if (irq_base < 0) {
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 0b4b9eca480d..8fc0b3ebce44 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1528,9 +1528,6 @@ struct regmap_irq_chip_data;
  *		     status_base. Should contain num_regs arrays.
  *		     Can be provided for chips with more complex mapping than
  *		     1.st bit to 1.st sub-reg, 2.nd bit to 2.nd sub-reg, ...
- *		     When used with not_fixed_stride, each one-element array
- *		     member contains offset calculated as address from each
- *		     peripheral to first peripheral.
  * @num_main_regs: Number of 'main status' irq registers for chips which have
  *		   main_status set.
  *
@@ -1567,11 +1564,6 @@ struct regmap_irq_chip_data;
  *                   registers before unmasking interrupts to clear any bits
  *                   set when they were masked.
  * @runtime_pm:  Hold a runtime PM lock on the device when accessing it.
- * @not_fixed_stride: Used when chip peripherals are not laid out with fixed
- *		      stride. Must be used with sub_reg_offsets containing the
- *		      offsets to each peripheral. Deprecated; the same thing
- *		      can be accomplished with a @get_irq_reg callback, without
- *		      the need for a @sub_reg_offsets table.
  * @no_status: No status register: all interrupts assumed generated by device.
  *
  * @num_regs:    Number of registers in each control bank.
@@ -1628,7 +1620,6 @@ struct regmap_irq_chip {
 	unsigned int type_in_mask:1;
 	unsigned int clear_on_unmask:1;
 	unsigned int runtime_pm:1;
-	unsigned int not_fixed_stride:1;
 	unsigned int no_status:1;
 
 	int num_regs;
-- 
cgit v1.2.3


From 1f7d5520719dd1fed1a2947679f6cc26a55f1e6b Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Fri, 28 Apr 2023 19:30:55 +0530
Subject: USB: Extend pci resume function to handle PM events

Currently, the pci_resume method has only a flag indicating whether the
system is resuming from hibernation. In order to handle all PM events like
AUTO_RESUME (runtime resume from device in D3), RESUME (system resume from
s2idle, S3 or S4 states) etc change the pci_resume method to handle all PM
events.

Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20230428140056.1318981-2-Basavaraj.Natikar@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd-pci.c    | 24 +++++++++++++-----------
 drivers/usb/host/ehci-pci.c   |  3 ++-
 drivers/usb/host/ohci-pci.c   |  8 +++++++-
 drivers/usb/host/uhci-pci.c   |  7 ++++---
 drivers/usb/host/xhci-histb.c |  2 +-
 drivers/usb/host/xhci-pci.c   |  4 ++--
 drivers/usb/host/xhci-plat.c  |  4 ++--
 drivers/usb/host/xhci-tegra.c |  2 +-
 drivers/usb/host/xhci.c       |  3 ++-
 drivers/usb/host/xhci.h       |  2 +-
 include/linux/usb/hcd.h       |  2 +-
 11 files changed, 36 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index ab2f3737764e..990280688b25 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -415,12 +415,15 @@ static int check_root_hub_suspended(struct device *dev)
 	return 0;
 }
 
-static int suspend_common(struct device *dev, bool do_wakeup)
+static int suspend_common(struct device *dev, pm_message_t msg)
 {
 	struct pci_dev		*pci_dev = to_pci_dev(dev);
 	struct usb_hcd		*hcd = pci_get_drvdata(pci_dev);
+	bool			do_wakeup;
 	int			retval;
 
+	do_wakeup = PMSG_IS_AUTO(msg) ? true : device_may_wakeup(dev);
+
 	/* Root hub suspend should have stopped all downstream traffic,
 	 * and all bus master traffic.  And done so for both the interface
 	 * and the stub usb_device (which we check here).  But maybe it
@@ -447,7 +450,7 @@ static int suspend_common(struct device *dev, bool do_wakeup)
 				(retval == 0 && do_wakeup && hcd->shared_hcd &&
 				 HCD_WAKEUP_PENDING(hcd->shared_hcd))) {
 			if (hcd->driver->pci_resume)
-				hcd->driver->pci_resume(hcd, false);
+				hcd->driver->pci_resume(hcd, msg);
 			retval = -EBUSY;
 		}
 		if (retval)
@@ -470,7 +473,7 @@ static int suspend_common(struct device *dev, bool do_wakeup)
 	return retval;
 }
 
-static int resume_common(struct device *dev, int event)
+static int resume_common(struct device *dev, pm_message_t msg)
 {
 	struct pci_dev		*pci_dev = to_pci_dev(dev);
 	struct usb_hcd		*hcd = pci_get_drvdata(pci_dev);
@@ -498,12 +501,11 @@ static int resume_common(struct device *dev, int event)
 		 * No locking is needed because PCI controller drivers do not
 		 * get unbound during system resume.
 		 */
-		if (pci_dev->class == CL_EHCI && event != PM_EVENT_AUTO_RESUME)
+		if (pci_dev->class == CL_EHCI && msg.event != PM_EVENT_AUTO_RESUME)
 			for_each_companion(pci_dev, hcd,
 					ehci_wait_for_companions);
 
-		retval = hcd->driver->pci_resume(hcd,
-				event == PM_EVENT_RESTORE);
+		retval = hcd->driver->pci_resume(hcd, msg);
 		if (retval) {
 			dev_err(dev, "PCI post-resume error %d!\n", retval);
 			usb_hc_died(hcd);
@@ -516,7 +518,7 @@ static int resume_common(struct device *dev, int event)
 
 static int hcd_pci_suspend(struct device *dev)
 {
-	return suspend_common(dev, device_may_wakeup(dev));
+	return suspend_common(dev, PMSG_SUSPEND);
 }
 
 static int hcd_pci_suspend_noirq(struct device *dev)
@@ -577,12 +579,12 @@ static int hcd_pci_resume_noirq(struct device *dev)
 
 static int hcd_pci_resume(struct device *dev)
 {
-	return resume_common(dev, PM_EVENT_RESUME);
+	return resume_common(dev, PMSG_RESUME);
 }
 
 static int hcd_pci_restore(struct device *dev)
 {
-	return resume_common(dev, PM_EVENT_RESTORE);
+	return resume_common(dev, PMSG_RESTORE);
 }
 
 #else
@@ -600,7 +602,7 @@ static int hcd_pci_runtime_suspend(struct device *dev)
 {
 	int	retval;
 
-	retval = suspend_common(dev, true);
+	retval = suspend_common(dev, PMSG_AUTO_SUSPEND);
 	if (retval == 0)
 		powermac_set_asic(to_pci_dev(dev), 0);
 	dev_dbg(dev, "hcd_pci_runtime_suspend: %d\n", retval);
@@ -612,7 +614,7 @@ static int hcd_pci_runtime_resume(struct device *dev)
 	int	retval;
 
 	powermac_set_asic(to_pci_dev(dev), 1);
-	retval = resume_common(dev, PM_EVENT_AUTO_RESUME);
+	retval = resume_common(dev, PMSG_AUTO_RESUME);
 	dev_dbg(dev, "hcd_pci_runtime_resume: %d\n", retval);
 	return retval;
 }
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index 4b148fe5e43b..889dc4426271 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -354,10 +354,11 @@ done:
  * Also they depend on separate root hub suspend/resume.
  */
 
-static int ehci_pci_resume(struct usb_hcd *hcd, bool hibernated)
+static int ehci_pci_resume(struct usb_hcd *hcd, pm_message_t msg)
 {
 	struct ehci_hcd		*ehci = hcd_to_ehci(hcd);
 	struct pci_dev		*pdev = to_pci_dev(hcd->self.controller);
+	bool			hibernated = (msg.event == PM_EVENT_RESTORE);
 
 	if (ehci_resume(hcd, hibernated) != 0)
 		(void) ehci_pci_reinit(ehci, pdev);
diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c
index d7b4f40f9ff4..900ea0d368e0 100644
--- a/drivers/usb/host/ohci-pci.c
+++ b/drivers/usb/host/ohci-pci.c
@@ -301,6 +301,12 @@ static struct pci_driver ohci_pci_driver = {
 #endif
 };
 
+#ifdef CONFIG_PM
+static int ohci_pci_resume(struct usb_hcd *hcd, pm_message_t msg)
+{
+	return ohci_resume(hcd, msg.event == PM_EVENT_RESTORE);
+}
+#endif
 static int __init ohci_pci_init(void)
 {
 	if (usb_disabled())
@@ -311,7 +317,7 @@ static int __init ohci_pci_init(void)
 #ifdef	CONFIG_PM
 	/* Entries for the PCI suspend/resume callbacks are special */
 	ohci_pci_hc_driver.pci_suspend = ohci_suspend;
-	ohci_pci_hc_driver.pci_resume = ohci_resume;
+	ohci_pci_hc_driver.pci_resume = ohci_pci_resume;
 #endif
 
 	return pci_register_driver(&ohci_pci_driver);
diff --git a/drivers/usb/host/uhci-pci.c b/drivers/usb/host/uhci-pci.c
index 3592f757fe05..5df4a1832b09 100644
--- a/drivers/usb/host/uhci-pci.c
+++ b/drivers/usb/host/uhci-pci.c
@@ -167,7 +167,7 @@ static void uhci_shutdown(struct pci_dev *pdev)
 
 #ifdef CONFIG_PM
 
-static int uhci_pci_resume(struct usb_hcd *hcd, bool hibernated);
+static int uhci_pci_resume(struct usb_hcd *hcd, pm_message_t state);
 
 static int uhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
 {
@@ -202,14 +202,15 @@ done_okay:
 
 	/* Check for race with a wakeup request */
 	if (do_wakeup && HCD_WAKEUP_PENDING(hcd)) {
-		uhci_pci_resume(hcd, false);
+		uhci_pci_resume(hcd, PMSG_SUSPEND);
 		rc = -EBUSY;
 	}
 	return rc;
 }
 
-static int uhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
+static int uhci_pci_resume(struct usb_hcd *hcd, pm_message_t msg)
 {
+	bool hibernated = (msg.event == PM_EVENT_RESTORE);
 	struct uhci_hcd *uhci = hcd_to_uhci(hcd);
 
 	dev_dbg(uhci_dev(uhci), "%s\n", __func__);
diff --git a/drivers/usb/host/xhci-histb.c b/drivers/usb/host/xhci-histb.c
index 08369857686e..91ce97821de5 100644
--- a/drivers/usb/host/xhci-histb.c
+++ b/drivers/usb/host/xhci-histb.c
@@ -367,7 +367,7 @@ static int __maybe_unused xhci_histb_resume(struct device *dev)
 	if (!device_may_wakeup(dev))
 		xhci_histb_host_enable(histb);
 
-	return xhci_resume(xhci, 0);
+	return xhci_resume(xhci, PMSG_RESUME);
 }
 
 static const struct dev_pm_ops xhci_histb_pm_ops = {
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index ddb79f23fb3b..e834c77e188e 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -824,7 +824,7 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
 	return ret;
 }
 
-static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
+static int xhci_pci_resume(struct usb_hcd *hcd, pm_message_t msg)
 {
 	struct xhci_hcd		*xhci = hcd_to_xhci(hcd);
 	struct pci_dev		*pdev = to_pci_dev(hcd->self.controller);
@@ -859,7 +859,7 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
 	if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
 		xhci_pme_quirk(hcd);
 
-	retval = xhci_resume(xhci, hibernated);
+	retval = xhci_resume(xhci, msg);
 	return retval;
 }
 
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index b0c8e8efc43b..f36633fa8362 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -478,7 +478,7 @@ static int __maybe_unused xhci_plat_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	ret = xhci_resume(xhci, 0);
+	ret = xhci_resume(xhci, PMSG_RESUME);
 	if (ret)
 		return ret;
 
@@ -507,7 +507,7 @@ static int __maybe_unused xhci_plat_runtime_resume(struct device *dev)
 	struct usb_hcd  *hcd = dev_get_drvdata(dev);
 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 
-	return xhci_resume(xhci, 0);
+	return xhci_resume(xhci, PMSG_AUTO_RESUME);
 }
 
 const struct dev_pm_ops xhci_plat_pm_ops = {
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index c75d93244143..8a9c7deb7686 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -2272,7 +2272,7 @@ static int tegra_xusb_exit_elpg(struct tegra_xusb *tegra, bool runtime)
 	if (wakeup)
 		tegra_xhci_disable_phy_sleepwalk(tegra);
 
-	err = xhci_resume(xhci, 0);
+	err = xhci_resume(xhci, runtime ? PMSG_AUTO_RESUME : PMSG_RESUME);
 	if (err < 0) {
 		dev_err(tegra->dev, "failed to resume XHCI: %d\n", err);
 		goto disable_phy;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 78790dc13c5f..65d54c8a2492 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -960,8 +960,9 @@ EXPORT_SYMBOL_GPL(xhci_suspend);
  * This is called when the machine transition from S3/S4 mode.
  *
  */
-int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
+int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg)
 {
+	bool			hibernated = (msg.event == PM_EVENT_RESTORE);
 	u32			command, temp = 0;
 	struct usb_hcd		*hcd = xhci_to_hcd(xhci);
 	int			retval = 0;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 08d721921b7b..047b290404b4 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -2140,7 +2140,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id);
 int xhci_ext_cap_init(struct xhci_hcd *xhci);
 
 int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup);
-int xhci_resume(struct xhci_hcd *xhci, bool hibernated);
+int xhci_resume(struct xhci_hcd *xhci, pm_message_t msg);
 
 irqreturn_t xhci_irq(struct usb_hcd *hcd);
 irqreturn_t xhci_msi_irq(int irq, void *hcd);
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 094c77eaf455..30ab8994f0c1 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -267,7 +267,7 @@ struct hc_driver {
 	int	(*pci_suspend)(struct usb_hcd *hcd, bool do_wakeup);
 
 	/* called after entering D0 (etc), before resuming the hub */
-	int	(*pci_resume)(struct usb_hcd *hcd, bool hibernated);
+	int	(*pci_resume)(struct usb_hcd *hcd, pm_message_t state);
 
 	/* called just before hibernate final D3 state, allows host to poweroff parts */
 	int	(*pci_poweroff_late)(struct usb_hcd *hcd, bool do_wakeup);
-- 
cgit v1.2.3


From b245aa0cc583c3efa0335c50f309a32e5fec8ab7 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Thu, 11 May 2023 15:10:24 +0300
Subject: serial: 8250: Change dl_read/write to handle value as u32
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Divisor latch read/write functions currently handle the value is int.
As the value is related to HW context, u32 makes much more sense than a
signed type.

While at it, name the parameters in the callback signature.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20230511121029.13128-2-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.h          |  4 ++--
 drivers/tty/serial/8250/8250_em.c       |  4 ++--
 drivers/tty/serial/8250/8250_port.c     | 10 +++++-----
 drivers/tty/serial/8250/8250_pxa.c      |  2 +-
 drivers/tty/serial/8250/8250_uniphier.c |  4 ++--
 include/linux/serial_8250.h             |  4 ++--
 6 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 1e8fe44a7099..5418708f4631 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -167,12 +167,12 @@ static unsigned int __maybe_unused serial_icr_read(struct uart_8250_port *up,
 
 void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p);
 
-static inline int serial_dl_read(struct uart_8250_port *up)
+static inline u32 serial_dl_read(struct uart_8250_port *up)
 {
 	return up->dl_read(up);
 }
 
-static inline void serial_dl_write(struct uart_8250_port *up, int value)
+static inline void serial_dl_write(struct uart_8250_port *up, u32 value)
 {
 	up->dl_write(up, value);
 }
diff --git a/drivers/tty/serial/8250/8250_em.c b/drivers/tty/serial/8250/8250_em.c
index 25a9ecf26be6..ef5019e944ea 100644
--- a/drivers/tty/serial/8250/8250_em.c
+++ b/drivers/tty/serial/8250/8250_em.c
@@ -139,12 +139,12 @@ static void serial8250_em_serial_out(struct uart_port *p, int offset, int value)
 	}
 }
 
-static int serial8250_em_serial_dl_read(struct uart_8250_port *up)
+static u32 serial8250_em_serial_dl_read(struct uart_8250_port *up)
 {
 	return serial_in(up, UART_DLL_EM) | serial_in(up, UART_DLM_EM) << 8;
 }
 
-static void serial8250_em_serial_dl_write(struct uart_8250_port *up, int value)
+static void serial8250_em_serial_dl_write(struct uart_8250_port *up, u32 value)
 {
 	serial_out(up, UART_DLL_EM, value & 0xff);
 	serial_out(up, UART_DLM_EM, value >> 8 & 0xff);
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index fe8d79c4ae95..344bd447639b 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -325,7 +325,7 @@ static const struct serial8250_config uart_config[] = {
 };
 
 /* Uart divisor latch read */
-static int default_serial_dl_read(struct uart_8250_port *up)
+static u32 default_serial_dl_read(struct uart_8250_port *up)
 {
 	/* Assign these in pieces to truncate any bits above 7.  */
 	unsigned char dll = serial_in(up, UART_DLL);
@@ -335,7 +335,7 @@ static int default_serial_dl_read(struct uart_8250_port *up)
 }
 
 /* Uart divisor latch write */
-static void default_serial_dl_write(struct uart_8250_port *up, int value)
+static void default_serial_dl_write(struct uart_8250_port *up, u32 value)
 {
 	serial_out(up, UART_DLL, value & 0xff);
 	serial_out(up, UART_DLM, value >> 8 & 0xff);
@@ -389,12 +389,12 @@ void au_serial_out(struct uart_port *p, int offset, int value)
 }
 
 /* Au1x00 haven't got a standard divisor latch */
-static int au_serial_dl_read(struct uart_8250_port *up)
+static u32 au_serial_dl_read(struct uart_8250_port *up)
 {
 	return __raw_readl(up->port.membase + 0x28);
 }
 
-static void au_serial_dl_write(struct uart_8250_port *up, int value)
+static void au_serial_dl_write(struct uart_8250_port *up, u32 value)
 {
 	__raw_writel(value, up->port.membase + 0x28);
 }
@@ -847,7 +847,7 @@ static void disable_rsa(struct uart_8250_port *up)
 static int size_fifo(struct uart_8250_port *up)
 {
 	unsigned char old_fcr, old_mcr, old_lcr;
-	unsigned short old_dl;
+	u32 old_dl;
 	int count;
 
 	old_lcr = serial_in(up, UART_LCR);
diff --git a/drivers/tty/serial/8250/8250_pxa.c b/drivers/tty/serial/8250/8250_pxa.c
index 795e55142d4c..28b341f602c6 100644
--- a/drivers/tty/serial/8250/8250_pxa.c
+++ b/drivers/tty/serial/8250/8250_pxa.c
@@ -60,7 +60,7 @@ static const struct of_device_id serial_pxa_dt_ids[] = {
 MODULE_DEVICE_TABLE(of, serial_pxa_dt_ids);
 
 /* Uart divisor latch write */
-static void serial_pxa_dl_write(struct uart_8250_port *up, int value)
+static void serial_pxa_dl_write(struct uart_8250_port *up, u32 value)
 {
 	unsigned int dll;
 
diff --git a/drivers/tty/serial/8250/8250_uniphier.c b/drivers/tty/serial/8250/8250_uniphier.c
index a2978abab0db..a405155264b1 100644
--- a/drivers/tty/serial/8250/8250_uniphier.c
+++ b/drivers/tty/serial/8250/8250_uniphier.c
@@ -145,12 +145,12 @@ static void uniphier_serial_out(struct uart_port *p, int offset, int value)
  * The divisor latch register exists at different address.
  * Override dl_read/write callbacks.
  */
-static int uniphier_serial_dl_read(struct uart_8250_port *up)
+static u32 uniphier_serial_dl_read(struct uart_8250_port *up)
 {
 	return readl(up->port.membase + UNIPHIER_UART_DLR);
 }
 
-static void uniphier_serial_dl_write(struct uart_8250_port *up, int value)
+static void uniphier_serial_dl_write(struct uart_8250_port *up, u32 value)
 {
 	writel(value, up->port.membase + UNIPHIER_UART_DLR);
 }
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 6f78f302d272..7b5d558e4e0c 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -129,8 +129,8 @@ struct uart_8250_port {
 	const struct uart_8250_ops *ops;
 
 	/* 8250 specific callbacks */
-	int			(*dl_read)(struct uart_8250_port *);
-	void			(*dl_write)(struct uart_8250_port *, int);
+	u32			(*dl_read)(struct uart_8250_port *up);
+	void			(*dl_write)(struct uart_8250_port *up, u32 value);
 
 	struct uart_8250_em485 *em485;
 	void			(*rs485_start_tx)(struct uart_8250_port *);
-- 
cgit v1.2.3


From 98658ae8f3925d05c2d0c67749ff6cf47b9077a5 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Thu, 11 May 2023 15:10:25 +0300
Subject: serial: 8250: Document uart_8250_port's ->dl_read/write()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add documentation for the struct uart_8250_port divisor latch function
pointers. Documentation is in kernel doc format but don't enable kernel
doc yet as many other fields remain undocumented.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20230511121029.13128-3-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_8250.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 7b5d558e4e0c..d64e7bbe1f2f 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -90,8 +90,17 @@ struct uart_8250_em485 {
  * their own 8250 ports without registering their own
  * platform device.  Using these will make your driver
  * dependent on the 8250 driver.
+ *
+ * @dl_read: ``u32 ()(struct uart_8250_port *port)``
+ *
+ *	UART divisor latch read.
+ *
+ * @dl_write: ``void ()(struct uart_8250_port *port, u32 value)``
+ *
+ *	Write @value into UART divisor latch.
+ *
+ *	Locking: Caller holds port's lock.
  */
-
 struct uart_8250_port {
 	struct uart_port	port;
 	struct timer_list	timer;		/* "no irq" timer */
-- 
cgit v1.2.3


From 30c61f53fdf23e8a396dca8378615b4900edff6f Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Thu, 11 May 2023 15:10:26 +0300
Subject: serial: 8250: Add dl_read/write, bugs and mapsize into
 plat_serial8250_port
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add mapsize, bugs, and divisor latch read/write functions
(->dl_read/write()) into plat_serial8250_port to carry the setup
necessary for RT288x/Au1xxx devices over to uart port.

Document the added members with kerneldoc style but do not enable
kerneldoc yet as there are many fields which remain undocumented.

While at it, convert .bugs in struct uart_8250_port to u16 to match it
with the type used in struct plat_serial8250_port.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20230511121029.13128-4-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c |  4 ++++
 include/linux/serial_8250.h         | 21 ++++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 560a3592ec05..4434c3256a92 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -850,12 +850,16 @@ static int serial8250_probe(struct platform_device *dev)
 		uart.port.iotype	= p->iotype;
 		uart.port.flags		= p->flags;
 		uart.port.mapbase	= p->mapbase;
+		uart.port.mapsize	= p->mapsize;
 		uart.port.hub6		= p->hub6;
 		uart.port.has_sysrq	= p->has_sysrq;
 		uart.port.private_data	= p->private_data;
 		uart.port.type		= p->type;
+		uart.bugs		= p->bugs;
 		uart.port.serial_in	= p->serial_in;
 		uart.port.serial_out	= p->serial_out;
+		uart.dl_read		= p->dl_read;
+		uart.dl_write		= p->dl_write;
 		uart.port.handle_irq	= p->handle_irq;
 		uart.port.handle_break	= p->handle_break;
 		uart.port.set_termios	= p->set_termios;
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index d64e7bbe1f2f..42fc8f64f48e 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -11,13 +11,29 @@
 #include <linux/serial_reg.h>
 #include <linux/platform_device.h>
 
+struct uart_8250_port;
+
 /*
  * This is the platform device platform_data structure
+ *
+ * @mapsize:	Port size for ioremap()
+ * @bugs:	Port bugs
+ *
+ * @dl_read: ``u32 ()(struct uart_8250_port *up)``
+ *
+ *	UART divisor latch read.
+ *
+ * @dl_write: ``void ()(struct uart_8250_port *up, u32 value)``
+ *
+ *	Write @value into UART divisor latch.
+ *
+ *	Locking: Caller holds port's lock.
  */
 struct plat_serial8250_port {
 	unsigned long	iobase;		/* io base address */
 	void __iomem	*membase;	/* ioremap cookie or NULL */
 	resource_size_t	mapbase;	/* resource base */
+	resource_size_t	mapsize;
 	unsigned int	uartclk;	/* UART clock rate */
 	unsigned int	irq;		/* interrupt number */
 	unsigned long	irqflags;	/* request_irq flags */
@@ -28,8 +44,11 @@ struct plat_serial8250_port {
 	unsigned char	has_sysrq;	/* supports magic SysRq */
 	unsigned int	type;		/* If UPF_FIXED_TYPE */
 	upf_t		flags;		/* UPF_* flags */
+	u16		bugs;		/* port bugs */
 	unsigned int	(*serial_in)(struct uart_port *, int);
 	void		(*serial_out)(struct uart_port *, int, int);
+	u32		(*dl_read)(struct uart_8250_port *up);
+	void		(*dl_write)(struct uart_8250_port *up, u32 value);
 	void		(*set_termios)(struct uart_port *,
 			               struct ktermios *new,
 			               const struct ktermios *old);
@@ -106,7 +125,7 @@ struct uart_8250_port {
 	struct timer_list	timer;		/* "no irq" timer */
 	struct list_head	list;		/* ports on this IRQ */
 	u32			capabilities;	/* port capabilities */
-	unsigned short		bugs;		/* port bugs */
+	u16			bugs;		/* port bugs */
 	bool			fifo_bug;	/* min RX trigger if enabled */
 	unsigned int		tx_loadsz;	/* transmit fifo load size */
 	unsigned char		acr;
-- 
cgit v1.2.3


From b334214ea08d941af376fec853621d856b12bc81 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Thu, 11 May 2023 15:10:27 +0300
Subject: serial: 8250: RT288x/Au1xxx code away from core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A non-trivial amount of RT288x/Au1xxx code is encapsulated into
ifdeffery in 8250_port / 8250_early and some if UPIO_AU blocks.
Create a separate file from them.

Also handle errors properly in the cases where RT288x/Au1xxx code is
not configured.

It seems that 0x1000 mapsize is likely overkill but I've kept it the
same as previously (the value was shrunk to that value in commit
b2b13cdfd05e ("SERIAL 8250: Fixes for Alchemy UARTs.")). Seemingly, the
driver only needs to access register at 0x28 for the divisor latch.

The Kconfig side is a bit tricky. As SERIAL_8250_RT288X is bool it can
only be =y. It is possible to have SERIAL_8250=m + SERIAL_8250_RT288X=y
which required altering when 8250/ is included or the rt288x would not
be built.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20230511121029.13128-5-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/alchemy/common/platform.c   |  10 ++-
 drivers/tty/serial/8250/8250_early.c  |  21 -----
 drivers/tty/serial/8250/8250_of.c     |   4 +-
 drivers/tty/serial/8250/8250_port.c   |  78 -------------------
 drivers/tty/serial/8250/8250_rt288x.c | 142 ++++++++++++++++++++++++++++++++++
 drivers/tty/serial/8250/Makefile      |   1 +
 drivers/tty/serial/Makefile           |   2 +-
 include/linux/serial_8250.h           |   8 +-
 8 files changed, 161 insertions(+), 105 deletions(-)
 create mode 100644 drivers/tty/serial/8250/8250_rt288x.c

(limited to 'include/linux')

diff --git a/arch/mips/alchemy/common/platform.c b/arch/mips/alchemy/common/platform.c
index b8f3397c59c9..d4ab34b3b404 100644
--- a/arch/mips/alchemy/common/platform.c
+++ b/arch/mips/alchemy/common/platform.c
@@ -51,9 +51,9 @@ static void alchemy_8250_pm(struct uart_port *port, unsigned int state,
 #define PORT(_base, _irq)					\
 	{							\
 		.mapbase	= _base,			\
+		.mapsize	= 0x1000,			\
 		.irq		= _irq,				\
 		.regshift	= 2,				\
-		.iotype		= UPIO_AU,			\
 		.flags		= UPF_SKIP_TEST | UPF_IOREMAP | \
 				  UPF_FIXED_TYPE,		\
 		.type		= PORT_16550A,			\
@@ -124,8 +124,14 @@ static void __init alchemy_setup_uarts(int ctype)
 	au1xx0_uart_device.dev.platform_data = ports;
 
 	/* Fill up uartclk. */
-	for (s = 0; s < c; s++)
+	for (s = 0; s < c; s++) {
 		ports[s].uartclk = uartclk;
+		if (au_platform_setup(&ports[s]) < 0) {
+			kfree(ports);
+			printk(KERN_INFO "Alchemy: missing support for UARTs\n");
+			return;
+		}
+	}
 	if (platform_device_register(&au1xx0_uart_device))
 		printk(KERN_INFO "Alchemy: failed to register UARTs\n");
 }
diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index 0ebde0ab8167..4299a8bd83d9 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -36,7 +36,6 @@
 
 static unsigned int serial8250_early_in(struct uart_port *port, int offset)
 {
-	int reg_offset = offset;
 	offset <<= port->regshift;
 
 	switch (port->iotype) {
@@ -50,8 +49,6 @@ static unsigned int serial8250_early_in(struct uart_port *port, int offset)
 		return ioread32be(port->membase + offset);
 	case UPIO_PORT:
 		return inb(port->iobase + offset);
-	case UPIO_AU:
-		return port->serial_in(port, reg_offset);
 	default:
 		return 0;
 	}
@@ -59,7 +56,6 @@ static unsigned int serial8250_early_in(struct uart_port *port, int offset)
 
 static void serial8250_early_out(struct uart_port *port, int offset, int value)
 {
-	int reg_offset = offset;
 	offset <<= port->regshift;
 
 	switch (port->iotype) {
@@ -78,9 +74,6 @@ static void serial8250_early_out(struct uart_port *port, int offset, int value)
 	case UPIO_PORT:
 		outb(value, port->iobase + offset);
 		break;
-	case UPIO_AU:
-		port->serial_out(port, reg_offset, value);
-		break;
 	}
 }
 
@@ -199,17 +192,3 @@ OF_EARLYCON_DECLARE(omap8250, "ti,omap3-uart", early_omap8250_setup);
 OF_EARLYCON_DECLARE(omap8250, "ti,omap4-uart", early_omap8250_setup);
 
 #endif
-
-#ifdef CONFIG_SERIAL_8250_RT288X
-
-static int __init early_au_setup(struct earlycon_device *dev, const char *opt)
-{
-	dev->port.serial_in = au_serial_in;
-	dev->port.serial_out = au_serial_out;
-	dev->port.iotype = UPIO_AU;
-	dev->con->write = early_serial8250_write;
-	return 0;
-}
-OF_EARLYCON_DECLARE(palmchip, "ralink,rt2880-uart", early_au_setup);
-
-#endif
diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c
index 1b461fba15a3..c9f6bd7a7038 100644
--- a/drivers/tty/serial/8250/8250_of.c
+++ b/drivers/tty/serial/8250/8250_of.c
@@ -171,7 +171,9 @@ static int of_platform_serial_setup(struct platform_device *ofdev,
 
 	switch (type) {
 	case PORT_RT2880:
-		port->iotype = UPIO_AU;
+		ret = rt288x_setup(port);
+		if (ret)
+			goto err_unprepare;
 		break;
 	}
 
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 344bd447639b..0cef9bfd0471 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -341,66 +341,6 @@ static void default_serial_dl_write(struct uart_8250_port *up, u32 value)
 	serial_out(up, UART_DLM, value >> 8 & 0xff);
 }
 
-#ifdef CONFIG_SERIAL_8250_RT288X
-
-#define UART_REG_UNMAPPED	-1
-
-/* Au1x00/RT288x UART hardware has a weird register layout */
-static const s8 au_io_in_map[8] = {
-	[UART_RX]	= 0,
-	[UART_IER]	= 2,
-	[UART_IIR]	= 3,
-	[UART_LCR]	= 5,
-	[UART_MCR]	= 6,
-	[UART_LSR]	= 7,
-	[UART_MSR]	= 8,
-	[UART_SCR]	= UART_REG_UNMAPPED,
-};
-
-static const s8 au_io_out_map[8] = {
-	[UART_TX]	= 1,
-	[UART_IER]	= 2,
-	[UART_FCR]	= 4,
-	[UART_LCR]	= 5,
-	[UART_MCR]	= 6,
-	[UART_LSR]	= UART_REG_UNMAPPED,
-	[UART_MSR]	= UART_REG_UNMAPPED,
-	[UART_SCR]	= UART_REG_UNMAPPED,
-};
-
-unsigned int au_serial_in(struct uart_port *p, int offset)
-{
-	if (offset >= ARRAY_SIZE(au_io_in_map))
-		return UINT_MAX;
-	offset = au_io_in_map[offset];
-	if (offset == UART_REG_UNMAPPED)
-		return UINT_MAX;
-	return __raw_readl(p->membase + (offset << p->regshift));
-}
-
-void au_serial_out(struct uart_port *p, int offset, int value)
-{
-	if (offset >= ARRAY_SIZE(au_io_out_map))
-		return;
-	offset = au_io_out_map[offset];
-	if (offset == UART_REG_UNMAPPED)
-		return;
-	__raw_writel(value, p->membase + (offset << p->regshift));
-}
-
-/* Au1x00 haven't got a standard divisor latch */
-static u32 au_serial_dl_read(struct uart_8250_port *up)
-{
-	return __raw_readl(up->port.membase + 0x28);
-}
-
-static void au_serial_dl_write(struct uart_8250_port *up, u32 value)
-{
-	__raw_writel(value, up->port.membase + 0x28);
-}
-
-#endif
-
 static unsigned int hub6_serial_in(struct uart_port *p, int offset)
 {
 	offset = offset << p->regshift;
@@ -510,15 +450,6 @@ static void set_io_from_upio(struct uart_port *p)
 		p->serial_out = mem32be_serial_out;
 		break;
 
-#ifdef CONFIG_SERIAL_8250_RT288X
-	case UPIO_AU:
-		p->serial_in = au_serial_in;
-		p->serial_out = au_serial_out;
-		up->dl_read = au_serial_dl_read;
-		up->dl_write = au_serial_dl_write;
-		break;
-#endif
-
 	default:
 		p->serial_in = io_serial_in;
 		p->serial_out = io_serial_out;
@@ -2968,11 +2899,6 @@ static unsigned int serial8250_port_size(struct uart_8250_port *pt)
 {
 	if (pt->port.mapsize)
 		return pt->port.mapsize;
-	if (pt->port.iotype == UPIO_AU) {
-		if (pt->port.type == PORT_RT2880)
-			return 0x100;
-		return 0x1000;
-	}
 	if (is_omap1_8250(pt))
 		return 0x16 << pt->port.regshift;
 
@@ -3222,10 +3148,6 @@ static void serial8250_config_port(struct uart_port *port, int flags)
 	if (flags & UART_CONFIG_TYPE)
 		autoconfig(up);
 
-	/* if access method is AU, it is a 16550 with a quirk */
-	if (port->type == PORT_16550A && port->iotype == UPIO_AU)
-		up->bugs |= UART_BUG_NOMSR;
-
 	/* HW bugs may trigger IRQ while IIR == NO_INT */
 	if (port->type == PORT_TEGRA)
 		up->bugs |= UART_BUG_NOMSR;
diff --git a/drivers/tty/serial/8250/8250_rt288x.c b/drivers/tty/serial/8250/8250_rt288x.c
new file mode 100644
index 000000000000..51b1cf5476dd
--- /dev/null
+++ b/drivers/tty/serial/8250/8250_rt288x.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RT288x/Au1xxx driver
+ */
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+
+#include "8250.h"
+
+#define UART_REG_UNMAPPED	-1
+
+/* Au1x00/RT288x UART hardware has a weird register layout */
+static const s8 au_io_in_map[8] = {
+	[UART_RX]	= 0,
+	[UART_IER]	= 2,
+	[UART_IIR]	= 3,
+	[UART_LCR]	= 5,
+	[UART_MCR]	= 6,
+	[UART_LSR]	= 7,
+	[UART_MSR]	= 8,
+	[UART_SCR]	= UART_REG_UNMAPPED,
+};
+
+static const s8 au_io_out_map[8] = {
+	[UART_TX]	= 1,
+	[UART_IER]	= 2,
+	[UART_FCR]	= 4,
+	[UART_LCR]	= 5,
+	[UART_MCR]	= 6,
+	[UART_LSR]	= UART_REG_UNMAPPED,
+	[UART_MSR]	= UART_REG_UNMAPPED,
+	[UART_SCR]	= UART_REG_UNMAPPED,
+};
+
+static unsigned int au_serial_in(struct uart_port *p, int offset)
+{
+	if (offset >= ARRAY_SIZE(au_io_in_map))
+		return UINT_MAX;
+	offset = au_io_in_map[offset];
+	if (offset == UART_REG_UNMAPPED)
+		return UINT_MAX;
+	return __raw_readl(p->membase + (offset << p->regshift));
+}
+
+static void au_serial_out(struct uart_port *p, int offset, int value)
+{
+	if (offset >= ARRAY_SIZE(au_io_out_map))
+		return;
+	offset = au_io_out_map[offset];
+	if (offset == UART_REG_UNMAPPED)
+		return;
+	__raw_writel(value, p->membase + (offset << p->regshift));
+}
+
+/* Au1x00 haven't got a standard divisor latch */
+static u32 au_serial_dl_read(struct uart_8250_port *up)
+{
+	return __raw_readl(up->port.membase + 0x28);
+}
+
+static void au_serial_dl_write(struct uart_8250_port *up, u32 value)
+{
+	__raw_writel(value, up->port.membase + 0x28);
+}
+
+int au_platform_setup(struct plat_serial8250_port *p)
+{
+	p->iotype = UPIO_AU;
+
+	p->serial_in = au_serial_in;
+	p->serial_out = au_serial_out;
+	p->dl_read = au_serial_dl_read;
+	p->dl_write = au_serial_dl_write;
+
+	p->mapsize = 0x1000;
+
+	p->bugs |= UART_BUG_NOMSR;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(au_platform_setup);
+
+int rt288x_setup(struct uart_port *p)
+{
+	struct uart_8250_port *up = up_to_u8250p(p);
+
+	p->iotype = UPIO_AU;
+
+	p->serial_in = au_serial_in;
+	p->serial_out = au_serial_out;
+	up->dl_read = au_serial_dl_read;
+	up->dl_write = au_serial_dl_write;
+
+	p->mapsize = 0x100;
+
+	up->bugs |= UART_BUG_NOMSR;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rt288x_setup);
+
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+static void au_putc(struct uart_port *port, unsigned char c)
+{
+	unsigned int status;
+
+	au_serial_out(port, UART_TX, c);
+
+	for (;;) {
+		status = au_serial_in(port, UART_LSR);
+		if (uart_lsr_tx_empty(status))
+			break;
+		cpu_relax();
+	}
+}
+
+static void au_early_serial8250_write(struct console *console,
+				      const char *s, unsigned int count)
+{
+	struct earlycon_device *device = console->data;
+	struct uart_port *port = &device->port;
+
+	uart_console_write(port, s, count, au_putc);
+}
+
+static int __init early_au_setup(struct earlycon_device *dev, const char *opt)
+{
+	rt288x_setup(&dev->port);
+	dev->con->write = au_early_serial8250_write;
+
+	return 0;
+}
+OF_EARLYCON_DECLARE(palmchip, "ralink,rt2880-uart", early_au_setup);
+#endif
+
+MODULE_DESCRIPTION("RT288x/Au1xxx UART driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile
index 4fc2fc1f41b6..628b75be312e 100644
--- a/drivers/tty/serial/8250/Makefile
+++ b/drivers/tty/serial/8250/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_SERIAL_8250_DW)		+= 8250_dw.o
 obj-$(CONFIG_SERIAL_8250_EM)		+= 8250_em.o
 obj-$(CONFIG_SERIAL_8250_IOC3)		+= 8250_ioc3.o
 obj-$(CONFIG_SERIAL_8250_OMAP)		+= 8250_omap.o
+obj-$(CONFIG_SERIAL_8250_RT288X)	+= 8250_rt288x.o
 obj-$(CONFIG_SERIAL_8250_LPC18XX)	+= 8250_lpc18xx.o
 obj-$(CONFIG_SERIAL_8250_MT6577)	+= 8250_mtk.o
 obj-$(CONFIG_SERIAL_8250_UNIPHIER)	+= 8250_uniphier.o
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index cd9afd9e3018..531ec3a19dae 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -21,7 +21,7 @@ obj-$(CONFIG_SERIAL_SUNSAB) += sunsab.o
 obj-$(CONFIG_SERIAL_21285) += 21285.o
 
 # Now bring in any enabled 8250/16450/16550 type drivers.
-obj-$(CONFIG_SERIAL_8250) += 8250/
+obj-y += 8250/
 
 obj-$(CONFIG_SERIAL_AMBA_PL010) += amba-pl010.o
 obj-$(CONFIG_SERIAL_AMBA_PL011) += amba-pl011.o
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 42fc8f64f48e..eb44420b39ec 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -7,6 +7,7 @@
 #ifndef _LINUX_SERIAL_8250_H
 #define _LINUX_SERIAL_8250_H
 
+#include <linux/errno.h>
 #include <linux/serial_core.h>
 #include <linux/serial_reg.h>
 #include <linux/platform_device.h>
@@ -211,8 +212,11 @@ void serial8250_set_isa_configurator(void (*v)(int port, struct uart_port *up,
 					       u32 *capabilities));
 
 #ifdef CONFIG_SERIAL_8250_RT288X
-unsigned int au_serial_in(struct uart_port *p, int offset);
-void au_serial_out(struct uart_port *p, int offset, int value);
+int rt288x_setup(struct uart_port *p);
+int au_platform_setup(struct plat_serial8250_port *p);
+#else
+static inline int rt288x_setup(struct uart_port *p) { return -ENODEV; }
+static inline int au_platform_setup(struct plat_serial8250_port *p) { return -ENODEV; }
 #endif
 
 #endif
-- 
cgit v1.2.3


From d5b3d02d0b107345f2a6ecb5b06f98356f5c97ab Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 12 May 2023 19:38:10 +0200
Subject: serial: Make uart_remove_one_port() return void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The return value is only ever used as a return value for remove callbacks
of platform drivers. This return value is ignored by the driver core.
(The only effect is an error message, but uart_remove_one_port() already
emitted one in this case.)

So the return value isn't used at all and uart_remove_one_port() can be
changed to return void without any loss. Also this better matches the
Linux device model as remove functions are not supposed to fail.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20230512173810.131447-3-u.kleine-koenig@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c           |  5 ++---
 drivers/tty/serial/clps711x.c               |  4 +++-
 drivers/tty/serial/cpm_uart/cpm_uart_core.c |  5 ++++-
 drivers/tty/serial/imx.c                    |  4 +++-
 drivers/tty/serial/lantiq.c                 |  4 +++-
 drivers/tty/serial/serial_core.c            |  6 +-----
 drivers/tty/serial/st-asc.c                 |  4 +++-
 drivers/tty/serial/uartlite.c               | 12 ++++--------
 drivers/tty/serial/xilinx_uartps.c          |  5 ++---
 include/linux/serial_core.h                 |  2 +-
 10 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 9cd7479b03c0..6e9192f122aa 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -3006,14 +3006,13 @@ static int atmel_serial_remove(struct platform_device *pdev)
 {
 	struct uart_port *port = platform_get_drvdata(pdev);
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	int ret = 0;
 
 	tasklet_kill(&atmel_port->tasklet_rx);
 	tasklet_kill(&atmel_port->tasklet_tx);
 
 	device_init_wakeup(&pdev->dev, 0);
 
-	ret = uart_remove_one_port(&atmel_uart, port);
+	uart_remove_one_port(&atmel_uart, port);
 
 	kfree(atmel_port->rx_ring.buf);
 
@@ -3023,7 +3022,7 @@ static int atmel_serial_remove(struct platform_device *pdev)
 
 	pdev->dev.of_node = NULL;
 
-	return ret;
+	return 0;
 }
 
 static SIMPLE_DEV_PM_OPS(atmel_serial_pm_ops, atmel_serial_suspend,
diff --git a/drivers/tty/serial/clps711x.c b/drivers/tty/serial/clps711x.c
index e190dce58f46..e49bc4019b50 100644
--- a/drivers/tty/serial/clps711x.c
+++ b/drivers/tty/serial/clps711x.c
@@ -514,7 +514,9 @@ static int uart_clps711x_remove(struct platform_device *pdev)
 {
 	struct clps711x_port *s = platform_get_drvdata(pdev);
 
-	return uart_remove_one_port(&clps711x_uart, &s->port);
+	uart_remove_one_port(&clps711x_uart, &s->port);
+
+	return 0;
 }
 
 static const struct of_device_id __maybe_unused clps711x_uart_dt_ids[] = {
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index 349e7da643f0..66afa9bea6bf 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -1431,7 +1431,10 @@ static int cpm_uart_probe(struct platform_device *ofdev)
 static int cpm_uart_remove(struct platform_device *ofdev)
 {
 	struct uart_cpm_port *pinfo = platform_get_drvdata(ofdev);
-	return uart_remove_one_port(&cpm_reg, &pinfo->port);
+
+	uart_remove_one_port(&cpm_reg, &pinfo->port);
+
+	return 0;
 }
 
 static const struct of_device_id cpm_uart_match[] = {
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index c5e17569c3ad..b2bf3cb449f4 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -2467,7 +2467,9 @@ static int imx_uart_remove(struct platform_device *pdev)
 {
 	struct imx_port *sport = platform_get_drvdata(pdev);
 
-	return uart_remove_one_port(&imx_uart_uart_driver, &sport->port);
+	uart_remove_one_port(&imx_uart_uart_driver, &sport->port);
+
+	return 0;
 }
 
 static void imx_uart_restore_context(struct imx_port *sport)
diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c
index a58e9277dfad..d413f97f7190 100644
--- a/drivers/tty/serial/lantiq.c
+++ b/drivers/tty/serial/lantiq.c
@@ -889,7 +889,9 @@ static int lqasc_remove(struct platform_device *pdev)
 {
 	struct uart_port *port = platform_get_drvdata(pdev);
 
-	return uart_remove_one_port(&lqasc_reg, port);
+	uart_remove_one_port(&lqasc_reg, port);
+
+	return 0;
 }
 
 static const struct ltq_soc_data soc_data_lantiq = {
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 54e82f476a2c..4b98d13555c0 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -3154,13 +3154,12 @@ EXPORT_SYMBOL(uart_add_one_port);
  * This unhooks (and hangs up) the specified port structure from the core
  * driver. No further calls will be made to the low-level code for this port.
  */
-int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
+void uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 {
 	struct uart_state *state = drv->state + uport->line;
 	struct tty_port *port = &state->port;
 	struct uart_port *uart_port;
 	struct tty_struct *tty;
-	int ret = 0;
 
 	mutex_lock(&port_mutex);
 
@@ -3176,7 +3175,6 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 
 	if (!uart_port) {
 		mutex_unlock(&port->mutex);
-		ret = -EINVAL;
 		goto out;
 	}
 	uport->flags |= UPF_DEAD;
@@ -3219,8 +3217,6 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 	mutex_unlock(&port->mutex);
 out:
 	mutex_unlock(&port_mutex);
-
-	return ret;
 }
 EXPORT_SYMBOL(uart_remove_one_port);
 
diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index 5215e6910f68..dc2f2051435c 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -796,7 +796,9 @@ static int asc_serial_remove(struct platform_device *pdev)
 {
 	struct uart_port *port = platform_get_drvdata(pdev);
 
-	return uart_remove_one_port(&asc_uart_driver, port);
+	uart_remove_one_port(&asc_uart_driver, port);
+
+	return 0;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c
index 94584e54ebbe..679574893ebe 100644
--- a/drivers/tty/serial/uartlite.c
+++ b/drivers/tty/serial/uartlite.c
@@ -685,18 +685,15 @@ static int ulite_assign(struct device *dev, int id, phys_addr_t base, int irq,
  *
  * @dev: pointer to device structure
  */
-static int ulite_release(struct device *dev)
+static void ulite_release(struct device *dev)
 {
 	struct uart_port *port = dev_get_drvdata(dev);
-	int rc = 0;
 
 	if (port) {
-		rc = uart_remove_one_port(&ulite_uart_driver, port);
+		uart_remove_one_port(&ulite_uart_driver, port);
 		dev_set_drvdata(dev, NULL);
 		port->mapbase = 0;
 	}
-
-	return rc;
 }
 
 /**
@@ -900,14 +897,13 @@ static int ulite_remove(struct platform_device *pdev)
 {
 	struct uart_port *port = dev_get_drvdata(&pdev->dev);
 	struct uartlite_data *pdata = port->private_data;
-	int rc;
 
 	clk_disable_unprepare(pdata->clk);
-	rc = ulite_release(&pdev->dev);
+	ulite_release(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_set_suspended(&pdev->dev);
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
-	return rc;
+	return 0;
 }
 
 /* work with hotplug and coldplug */
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index 8e521c69a959..20a751663ef9 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -1670,14 +1670,13 @@ static int cdns_uart_remove(struct platform_device *pdev)
 {
 	struct uart_port *port = platform_get_drvdata(pdev);
 	struct cdns_uart *cdns_uart_data = port->private_data;
-	int rc;
 
 	/* Remove the cdns_uart port from the serial core */
 #ifdef CONFIG_COMMON_CLK
 	clk_notifier_unregister(cdns_uart_data->uartclk,
 			&cdns_uart_data->clk_rate_change_nb);
 #endif
-	rc = uart_remove_one_port(cdns_uart_data->cdns_uart_driver, port);
+	uart_remove_one_port(cdns_uart_data->cdns_uart_driver, port);
 	port->mapbase = 0;
 	clk_disable_unprepare(cdns_uart_data->uartclk);
 	clk_disable_unprepare(cdns_uart_data->pclk);
@@ -1693,7 +1692,7 @@ static int cdns_uart_remove(struct platform_device *pdev)
 
 	if (!--instances)
 		uart_unregister_driver(cdns_uart_data->cdns_uart_driver);
-	return rc;
+	return 0;
 }
 
 static struct platform_driver cdns_uart_platform_driver = {
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 66ecec15a1bf..ddcdb5b8523e 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -853,7 +853,7 @@ void uart_console_write(struct uart_port *port, const char *s,
 int uart_register_driver(struct uart_driver *uart);
 void uart_unregister_driver(struct uart_driver *uart);
 int uart_add_one_port(struct uart_driver *reg, struct uart_port *port);
-int uart_remove_one_port(struct uart_driver *reg, struct uart_port *port);
+void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port);
 bool uart_match_port(const struct uart_port *port1,
 		const struct uart_port *port2);
 
-- 
cgit v1.2.3


From a7e3448086d580abadccff399316c6eb5ecdedbf Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 11 May 2023 10:21:08 -0700
Subject: net: phy: Allow drivers to always call into ->suspend()

A few PHY drivers are currently attempting to not suspend the PHY when
Wake-on-LAN is enabled, however that code is not currently executing at
all due to an early check in phy_suspend().

This prevents PHY drivers from making an appropriate decisions and put
the hardware into a low power state if desired.

In order to allow the PHY drivers to opt into getting their ->suspend
routine to be called, add a PHY_ALWAYS_CALL_SUSPEND bit which can be
set. A boolean that tracks whether the PHY or the attached MAC has
Wake-on-LAN enabled is also provided for convenience.

If phydev::wol_enabled then the PHY shall not prevent its own
Wake-on-LAN detection logic from working and shall not prevent the
Ethernet MAC from receiving packets for matching.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 5 +++--
 include/linux/phy.h          | 4 ++++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 17d0d0555a79..8852b0c53114 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1860,9 +1860,10 @@ int phy_suspend(struct phy_device *phydev)
 	if (phydev->suspended)
 		return 0;
 
-	/* If the device has WOL enabled, we cannot suspend the PHY */
 	phy_ethtool_get_wol(phydev, &wol);
-	if (wol.wolopts || (netdev && netdev->wol_enabled))
+	phydev->wol_enabled = wol.wolopts || (netdev && netdev->wol_enabled);
+	/* If the device has WOL enabled, we cannot suspend the PHY */
+	if (phydev->wol_enabled && !(phydrv->flags & PHY_ALWAYS_CALL_SUSPEND))
 		return -EBUSY;
 
 	if (!phydrv || !phydrv->suspend)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index c5a0dc829714..e0df8b3c2bdb 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -86,6 +86,7 @@ extern const int phy_10gbit_features_array[1];
 #define PHY_IS_INTERNAL		0x00000001
 #define PHY_RST_AFTER_CLK_EN	0x00000002
 #define PHY_POLL_CABLE_TEST	0x00000004
+#define PHY_ALWAYS_CALL_SUSPEND	0x00000008
 #define MDIO_DEVICE_IS_PHY	0x80000000
 
 /**
@@ -548,6 +549,8 @@ struct macsec_ops;
  * @downshifted_rate: Set true if link speed has been downshifted.
  * @is_on_sfp_module: Set true if PHY is located on an SFP module.
  * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY
+ * @wol_enabled: Set to true if the PHY or the attached MAC have Wake-on-LAN
+ * 		 enabled.
  * @state: State of the PHY for management purposes
  * @dev_flags: Device-specific flags used by the PHY driver.
  *
@@ -644,6 +647,7 @@ struct phy_device {
 	unsigned downshifted_rate:1;
 	unsigned is_on_sfp_module:1;
 	unsigned mac_managed_pm:1;
+	unsigned wol_enabled:1;
 
 	unsigned autoneg:1;
 	/* The most recently read link state */
-- 
cgit v1.2.3


From 8baddaa9d4bac939004b5058f3ade7e2bf0a6e43 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 11 May 2023 10:21:09 -0700
Subject: net: phy: broadcom: Add support for Wake-on-LAN

Add support for WAKE_UCAST, WAKE_MCAST, WAKE_BCAST, WAKE_MAGIC and
WAKE_MAGICSECURE. This is only supported with the BCM54210E and
compatible Ethernet PHYs. Using the in-band interrupt or an out of band
GPIO interrupts are supported.

Broadcom PHYs will generate a Wake-on-LAN level low interrupt on LED4 as
soon as one of the supported patterns is being matched. That includes
generating such an interrupt even if the PHY is operated during normal
modes. If WAKE_UCAST is selected, this could lead to the LED4 interrupt
firing up for every packet being received which is absolutely
undesirable from a performance point of view.

Because the Wake-on-LAN configuration can be set long before the system
is actually put to sleep, we cannot have an interrupt service routine to
clear on read the interrupt status register and ensure that new packet
matches will be detected.

It is desirable to enable the Wake-on-LAN interrupt as late as possible
during the system suspend process such that we limit the number of
interrupts to be handled by the system, but also conversely feed into
the Linux's system suspend way of dealing with interrupts in and around
the points of no return.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm-phy-lib.c | 212 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/phy/bcm-phy-lib.h |   5 +
 drivers/net/phy/broadcom.c    | 126 ++++++++++++++++++++++++-
 include/linux/brcmphy.h       |  55 +++++++++++
 4 files changed, 395 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index b2c0baa51f39..27c57f6ab211 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -6,12 +6,14 @@
 #include "bcm-phy-lib.h"
 #include <linux/bitfield.h>
 #include <linux/brcmphy.h>
+#include <linux/etherdevice.h>
 #include <linux/export.h>
 #include <linux/mdio.h>
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/ethtool.h>
 #include <linux/ethtool_netlink.h>
+#include <linux/netdevice.h>
 
 #define MII_BCM_CHANNEL_WIDTH     0x2000
 #define BCM_CL45VEN_EEE_ADV       0x3c
@@ -816,6 +818,216 @@ int bcm_phy_cable_test_get_status_rdb(struct phy_device *phydev,
 }
 EXPORT_SYMBOL_GPL(bcm_phy_cable_test_get_status_rdb);
 
+#define BCM54XX_WOL_SUPPORTED_MASK	(WAKE_UCAST | \
+					 WAKE_MCAST | \
+					 WAKE_BCAST | \
+					 WAKE_MAGIC | \
+					 WAKE_MAGICSECURE)
+
+int bcm_phy_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol)
+{
+	struct net_device *ndev = phydev->attached_dev;
+	u8 da[ETH_ALEN], mask[ETH_ALEN];
+	unsigned int i;
+	u16 ctl;
+	int ret;
+
+	/* Allow a MAC driver to play through its own Wake-on-LAN
+	 * implementation
+	 */
+	if (wol->wolopts & ~BCM54XX_WOL_SUPPORTED_MASK)
+		return -EOPNOTSUPP;
+
+	/* The PHY supports passwords of 4, 6 and 8 bytes in size, but Linux's
+	 * ethtool only supports 6, for now.
+	 */
+	BUILD_BUG_ON(sizeof(wol->sopass) != ETH_ALEN);
+
+	/* Clear previous interrupts */
+	ret = bcm_phy_read_exp(phydev, BCM54XX_WOL_INT_STATUS);
+	if (ret < 0)
+		return ret;
+
+	ret = bcm_phy_read_exp(phydev, BCM54XX_WOL_MAIN_CTL);
+	if (ret < 0)
+		return ret;
+
+	ctl = ret;
+
+	if (!wol->wolopts) {
+		if (phy_interrupt_is_valid(phydev))
+			disable_irq_wake(phydev->irq);
+
+		/* Leave all interrupts disabled */
+		ret = bcm_phy_write_exp(phydev, BCM54XX_WOL_INT_MASK,
+					BCM54XX_WOL_ALL_INTRS);
+		if (ret < 0)
+			return ret;
+
+		/* Disable the global Wake-on-LAN enable bit */
+		ctl &= ~BCM54XX_WOL_EN;
+
+		return bcm_phy_write_exp(phydev, BCM54XX_WOL_MAIN_CTL, ctl);
+	}
+
+	/* Clear the previously configured mode and mask mode for Wake-on-LAN */
+	ctl &= ~(BCM54XX_WOL_MODE_MASK << BCM54XX_WOL_MODE_SHIFT);
+	ctl &= ~(BCM54XX_WOL_MASK_MODE_MASK << BCM54XX_WOL_MASK_MODE_SHIFT);
+	ctl &= ~BCM54XX_WOL_DIR_PKT_EN;
+	ctl &= ~(BCM54XX_WOL_SECKEY_OPT_MASK << BCM54XX_WOL_SECKEY_OPT_SHIFT);
+
+	/* When using WAKE_MAGIC, we program the magic pattern filter to match
+	 * the device's MAC address and we accept any MAC DA in the Ethernet
+	 * frame.
+	 *
+	 * When using WAKE_UCAST, WAKE_BCAST or WAKE_MCAST, we program the
+	 * following:
+	 * - WAKE_UCAST -> MAC DA is the device's MAC with a perfect match
+	 * - WAKE_MCAST -> MAC DA is X1:XX:XX:XX:XX:XX where XX is don't care
+	 * - WAKE_BCAST -> MAC DA is FF:FF:FF:FF:FF:FF with a perfect match
+	 *
+	 * Note that the Broadcast MAC DA is inherently going to match the
+	 * multicast pattern being matched.
+	 */
+	memset(mask, 0, sizeof(mask));
+
+	if (wol->wolopts & WAKE_MCAST) {
+		memset(da, 0, sizeof(da));
+		memset(mask, 0xff, sizeof(mask));
+		da[0] = 0x01;
+		mask[0] = ~da[0];
+	} else {
+		if (wol->wolopts & WAKE_UCAST) {
+			ether_addr_copy(da, ndev->dev_addr);
+		} else if (wol->wolopts & WAKE_BCAST) {
+			eth_broadcast_addr(da);
+		} else if (wol->wolopts & WAKE_MAGICSECURE) {
+			ether_addr_copy(da, wol->sopass);
+		} else if (wol->wolopts & WAKE_MAGIC) {
+			memset(da, 0, sizeof(da));
+			memset(mask, 0xff, sizeof(mask));
+		}
+	}
+
+	for (i = 0; i < ETH_ALEN / 2; i++) {
+		if (wol->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) {
+			ret = bcm_phy_write_exp(phydev,
+						BCM54XX_WOL_MPD_DATA1(2 - i),
+						ndev->dev_addr[i * 2] << 8 |
+						ndev->dev_addr[i * 2 + 1]);
+			if (ret < 0)
+				return ret;
+		}
+
+		ret = bcm_phy_write_exp(phydev, BCM54XX_WOL_MPD_DATA2(2 - i),
+					da[i * 2] << 8 | da[i * 2 + 1]);
+		if (ret < 0)
+			return ret;
+
+		ret = bcm_phy_write_exp(phydev, BCM54XX_WOL_MASK(2 - i),
+					mask[i * 2] << 8 | mask[i * 2 + 1]);
+		if (ret)
+			return ret;
+	}
+
+	if (wol->wolopts & WAKE_MAGICSECURE) {
+		ctl |= BCM54XX_WOL_SECKEY_OPT_6B <<
+		       BCM54XX_WOL_SECKEY_OPT_SHIFT;
+		ctl |= BCM54XX_WOL_MODE_SINGLE_MPDSEC << BCM54XX_WOL_MODE_SHIFT;
+		ctl |= BCM54XX_WOL_MASK_MODE_DA_FF <<
+		       BCM54XX_WOL_MASK_MODE_SHIFT;
+	} else {
+		if (wol->wolopts & WAKE_MAGIC)
+			ctl |= BCM54XX_WOL_MODE_SINGLE_MPD;
+		else
+			ctl |= BCM54XX_WOL_DIR_PKT_EN;
+		ctl |= BCM54XX_WOL_MASK_MODE_DA_ONLY <<
+		       BCM54XX_WOL_MASK_MODE_SHIFT;
+	}
+
+	/* Globally enable Wake-on-LAN */
+	ctl |= BCM54XX_WOL_EN | BCM54XX_WOL_CRC_CHK;
+
+	ret = bcm_phy_write_exp(phydev, BCM54XX_WOL_MAIN_CTL, ctl);
+	if (ret < 0)
+		return ret;
+
+	/* Enable WOL interrupt on LED4 */
+	ret = bcm_phy_read_exp(phydev, BCM54XX_TOP_MISC_LED_CTL);
+	if (ret < 0)
+		return ret;
+
+	ret |= BCM54XX_LED4_SEL_INTR;
+	ret = bcm_phy_write_exp(phydev, BCM54XX_TOP_MISC_LED_CTL, ret);
+	if (ret < 0)
+		return ret;
+
+	/* Enable all Wake-on-LAN interrupt sources */
+	ret = bcm_phy_write_exp(phydev, BCM54XX_WOL_INT_MASK, 0);
+	if (ret < 0)
+		return ret;
+
+	if (phy_interrupt_is_valid(phydev))
+		enable_irq_wake(phydev->irq);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_set_wol);
+
+void bcm_phy_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol)
+{
+	struct net_device *ndev = phydev->attached_dev;
+	u8 da[ETH_ALEN];
+	unsigned int i;
+	int ret;
+	u16 ctl;
+
+	wol->supported = BCM54XX_WOL_SUPPORTED_MASK;
+	wol->wolopts = 0;
+
+	ret = bcm_phy_read_exp(phydev, BCM54XX_WOL_MAIN_CTL);
+	if (ret < 0)
+		return;
+
+	ctl = ret;
+
+	if (!(ctl & BCM54XX_WOL_EN))
+		return;
+
+	for (i = 0; i < sizeof(da) / 2; i++) {
+		ret = bcm_phy_read_exp(phydev,
+				       BCM54XX_WOL_MPD_DATA2(2 - i));
+		if (ret < 0)
+			return;
+
+		da[i * 2] = ret >> 8;
+		da[i * 2 + 1] = ret & 0xff;
+	}
+
+	if (ctl & BCM54XX_WOL_DIR_PKT_EN) {
+		if (is_broadcast_ether_addr(da))
+			wol->wolopts |= WAKE_BCAST;
+		else if (is_multicast_ether_addr(da))
+			wol->wolopts |= WAKE_MCAST;
+		else if (ether_addr_equal(da, ndev->dev_addr))
+			wol->wolopts |= WAKE_UCAST;
+	} else {
+		ctl = (ctl >> BCM54XX_WOL_MODE_SHIFT) & BCM54XX_WOL_MODE_MASK;
+		switch (ctl) {
+		case BCM54XX_WOL_MODE_SINGLE_MPD:
+			wol->wolopts |= WAKE_MAGIC;
+			break;
+		case BCM54XX_WOL_MODE_SINGLE_MPDSEC:
+			wol->wolopts |= WAKE_MAGICSECURE;
+			memcpy(wol->sopass, da, sizeof(da));
+			break;
+		default:
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(bcm_phy_get_wol);
+
 MODULE_DESCRIPTION("Broadcom PHY Library");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Broadcom Corporation");
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index 729db441797a..c6fed43ec913 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -9,6 +9,8 @@
 #include <linux/brcmphy.h>
 #include <linux/phy.h>
 
+struct ethtool_wolinfo;
+
 /* 28nm only register definitions */
 #define MISC_ADDR(base, channel)	base, channel
 
@@ -111,4 +113,7 @@ static inline void bcm_ptp_stop(struct bcm_ptp_private *priv)
 }
 #endif
 
+int bcm_phy_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol);
+void bcm_phy_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol);
+
 #endif /* _LINUX_BCM_PHY_LIB_H */
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index ad71c88c87e7..418e6bc0e998 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -14,8 +14,12 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/phy.h>
+#include <linux/pm_wakeup.h>
 #include <linux/brcmphy.h>
 #include <linux/of.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/gpio/consumer.h>
 
 #define BRCM_PHY_MODEL(phydev) \
 	((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask)
@@ -30,8 +34,17 @@ MODULE_LICENSE("GPL");
 struct bcm54xx_phy_priv {
 	u64	*stats;
 	struct bcm_ptp_private *ptp;
+	int	wake_irq;
+	bool	wake_irq_enabled;
 };
 
+static bool bcm54xx_phy_can_wakeup(struct phy_device *phydev)
+{
+	struct bcm54xx_phy_priv *priv = phydev->priv;
+
+	return phy_interrupt_is_valid(phydev) || priv->wake_irq >= 0;
+}
+
 static int bcm54xx_config_clock_delay(struct phy_device *phydev)
 {
 	int rc, val;
@@ -413,6 +426,16 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 
 	bcm54xx_ptp_config_init(phydev);
 
+	/* Acknowledge any left over interrupt and charge the device for
+	 * wake-up.
+	 */
+	err = bcm_phy_read_exp(phydev, BCM54XX_WOL_INT_STATUS);
+	if (err < 0)
+		return err;
+
+	if (err)
+		pm_wakeup_event(&phydev->mdio.dev, 0);
+
 	return 0;
 }
 
@@ -437,12 +460,39 @@ out:
 	return ret;
 }
 
+static int bcm54xx_set_wakeup_irq(struct phy_device *phydev, bool state)
+{
+	struct bcm54xx_phy_priv *priv = phydev->priv;
+	int ret = 0;
+
+	if (!bcm54xx_phy_can_wakeup(phydev))
+		return ret;
+
+	if (priv->wake_irq_enabled != state) {
+		if (state)
+			ret = enable_irq_wake(priv->wake_irq);
+		else
+			ret = disable_irq_wake(priv->wake_irq);
+		priv->wake_irq_enabled = state;
+	}
+
+	return ret;
+}
+
 static int bcm54xx_suspend(struct phy_device *phydev)
 {
-	int ret;
+	int ret = 0;
 
 	bcm54xx_ptp_stop(phydev);
 
+	/* Acknowledge any Wake-on-LAN interrupt prior to suspend */
+	ret = bcm_phy_read_exp(phydev, BCM54XX_WOL_INT_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (phydev->wol_enabled)
+		return bcm54xx_set_wakeup_irq(phydev, true);
+
 	/* We cannot use a read/modify/write here otherwise the PHY gets into
 	 * a bad state where its LEDs keep flashing, thus defeating the purpose
 	 * of low power mode.
@@ -456,7 +506,13 @@ static int bcm54xx_suspend(struct phy_device *phydev)
 
 static int bcm54xx_resume(struct phy_device *phydev)
 {
-	int ret;
+	int ret = 0;
+
+	if (phydev->wol_enabled) {
+		ret = bcm54xx_set_wakeup_irq(phydev, false);
+		if (ret)
+			return ret;
+	}
 
 	ret = bcm54xx_iddq_set(phydev, false);
 	if (ret < 0)
@@ -801,14 +857,54 @@ static int brcm_fet_suspend(struct phy_device *phydev)
 	return err;
 }
 
+static void bcm54xx_phy_get_wol(struct phy_device *phydev,
+				struct ethtool_wolinfo *wol)
+{
+	/* We cannot wake-up if we do not have a dedicated PHY interrupt line
+	 * or an out of band GPIO descriptor for wake-up. Zeroing
+	 * wol->supported allows the caller (MAC driver) to play through and
+	 * offer its own Wake-on-LAN scheme if available.
+	 */
+	if (!bcm54xx_phy_can_wakeup(phydev)) {
+		wol->supported = 0;
+		return;
+	}
+
+	bcm_phy_get_wol(phydev, wol);
+}
+
+static int bcm54xx_phy_set_wol(struct phy_device *phydev,
+			       struct ethtool_wolinfo *wol)
+{
+	int ret;
+
+	/* We cannot wake-up if we do not have a dedicated PHY interrupt line
+	 * or an out of band GPIO descriptor for wake-up. Returning -EOPNOTSUPP
+	 * allows the caller (MAC driver) to play through and offer its own
+	 * Wake-on-LAN scheme if available.
+	 */
+	if (!bcm54xx_phy_can_wakeup(phydev))
+		return -EOPNOTSUPP;
+
+	ret = bcm_phy_set_wol(phydev, wol);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
 static int bcm54xx_phy_probe(struct phy_device *phydev)
 {
 	struct bcm54xx_phy_priv *priv;
+	struct gpio_desc *wakeup_gpio;
+	int ret = 0;
 
 	priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
+	priv->wake_irq = -ENXIO;
+
 	phydev->priv = priv;
 
 	priv->stats = devm_kcalloc(&phydev->mdio.dev,
@@ -821,7 +917,28 @@ static int bcm54xx_phy_probe(struct phy_device *phydev)
 	if (IS_ERR(priv->ptp))
 		return PTR_ERR(priv->ptp);
 
-	return 0;
+	/* We cannot utilize the _optional variant here since we want to know
+	 * whether the GPIO descriptor exists or not to advertise Wake-on-LAN
+	 * support or not.
+	 */
+	wakeup_gpio = devm_gpiod_get(&phydev->mdio.dev, "wakeup", GPIOD_IN);
+	if (PTR_ERR(wakeup_gpio) == -EPROBE_DEFER)
+		return PTR_ERR(wakeup_gpio);
+
+	if (!IS_ERR(wakeup_gpio)) {
+		priv->wake_irq = gpiod_to_irq(wakeup_gpio);
+		ret = irq_set_irq_type(priv->wake_irq, IRQ_TYPE_LEVEL_LOW);
+		if (ret)
+			return ret;
+	}
+
+	/* If we do not have a main interrupt or a side-band wake-up interrupt,
+	 * then the device cannot be marked as wake-up capable.
+	 */
+	if (!bcm54xx_phy_can_wakeup(phydev))
+		return 0;
+
+	return device_init_wakeup(&phydev->mdio.dev, true);
 }
 
 static void bcm54xx_get_stats(struct phy_device *phydev,
@@ -894,6 +1011,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Broadcom BCM54210E",
 	/* PHY_GBIT_FEATURES */
+	.flags		= PHY_ALWAYS_CALL_SUSPEND,
 	.get_sset_count	= bcm_phy_get_sset_count,
 	.get_strings	= bcm_phy_get_strings,
 	.get_stats	= bcm54xx_get_stats,
@@ -904,6 +1022,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.link_change_notify	= bcm54xx_link_change_notify,
 	.suspend	= bcm54xx_suspend,
 	.resume		= bcm54xx_resume,
+	.get_wol	= bcm54xx_phy_get_wol,
+	.set_wol	= bcm54xx_phy_set_wol,
 }, {
 	.phy_id		= PHY_ID_BCM5461,
 	.phy_id_mask	= 0xfffffff0,
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 9e77165f3ef6..e9afbfb6d7a5 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -89,6 +89,7 @@
 #define MII_BCM54XX_EXP_SEL	0x17	/* Expansion register select */
 #define MII_BCM54XX_EXP_SEL_TOP	0x0d00	/* TOP_MISC expansion register select */
 #define MII_BCM54XX_EXP_SEL_SSD	0x0e00	/* Secondary SerDes select */
+#define MII_BCM54XX_EXP_SEL_WOL	0x0e00	/* Wake-on-LAN expansion select register */
 #define MII_BCM54XX_EXP_SEL_ER	0x0f00	/* Expansion register select */
 #define MII_BCM54XX_EXP_SEL_ETC	0x0d00	/* Expansion register spare + 2k mem */
 
@@ -253,6 +254,9 @@
 #define BCM54XX_TOP_MISC_IDDQ_SD		(1 << 2)
 #define BCM54XX_TOP_MISC_IDDQ_SR		(1 << 3)
 
+#define BCM54XX_TOP_MISC_LED_CTL		(MII_BCM54XX_EXP_SEL_TOP + 0x0C)
+#define  BCM54XX_LED4_SEL_INTR			BIT(1)
+
 /*
  * BCM5482: Secondary SerDes registers
  */
@@ -272,6 +276,57 @@
 #define BCM54612E_EXP_SPARE0		(MII_BCM54XX_EXP_SEL_ETC + 0x34)
 #define BCM54612E_LED4_CLK125OUT_EN	(1 << 1)
 
+
+/* Wake-on-LAN registers */
+#define BCM54XX_WOL_MAIN_CTL		(MII_BCM54XX_EXP_SEL_WOL + 0x80)
+#define  BCM54XX_WOL_EN			BIT(0)
+#define  BCM54XX_WOL_MODE_SINGLE_MPD	0
+#define  BCM54XX_WOL_MODE_SINGLE_MPDSEC	1
+#define  BCM54XX_WOL_MODE_DUAL		2
+#define  BCM54XX_WOL_MODE_SHIFT		1
+#define  BCM54XX_WOL_MODE_MASK		0x3
+#define  BCM54XX_WOL_MP_MSB_FF_EN	BIT(3)
+#define  BCM54XX_WOL_SECKEY_OPT_4B	0
+#define  BCM54XX_WOL_SECKEY_OPT_6B	1
+#define  BCM54XX_WOL_SECKEY_OPT_8B	2
+#define  BCM54XX_WOL_SECKEY_OPT_SHIFT	4
+#define  BCM54XX_WOL_SECKEY_OPT_MASK	0x3
+#define  BCM54XX_WOL_L2_TYPE_CHK	BIT(6)
+#define  BCM54XX_WOL_L4IPV4UDP_CHK	BIT(7)
+#define  BCM54XX_WOL_L4IPV6UDP_CHK	BIT(8)
+#define  BCM54XX_WOL_UDPPORT_CHK	BIT(9)
+#define  BCM54XX_WOL_CRC_CHK		BIT(10)
+#define  BCM54XX_WOL_SECKEY_MODE	BIT(11)
+#define  BCM54XX_WOL_RST		BIT(12)
+#define  BCM54XX_WOL_DIR_PKT_EN		BIT(13)
+#define  BCM54XX_WOL_MASK_MODE_DA_FF	0
+#define  BCM54XX_WOL_MASK_MODE_DA_MPD	1
+#define  BCM54XX_WOL_MASK_MODE_DA_ONLY	2
+#define  BCM54XX_WOL_MASK_MODE_MPD	3
+#define  BCM54XX_WOL_MASK_MODE_SHIFT	14
+#define  BCM54XX_WOL_MASK_MODE_MASK	0x3
+
+#define BCM54XX_WOL_INNER_PROTO		(MII_BCM54XX_EXP_SEL_WOL + 0x81)
+#define BCM54XX_WOL_OUTER_PROTO		(MII_BCM54XX_EXP_SEL_WOL + 0x82)
+#define BCM54XX_WOL_OUTER_PROTO2	(MII_BCM54XX_EXP_SEL_WOL + 0x83)
+
+#define BCM54XX_WOL_MPD_DATA1(x)	(MII_BCM54XX_EXP_SEL_WOL + 0x84 + (x))
+#define BCM54XX_WOL_MPD_DATA2(x)	(MII_BCM54XX_EXP_SEL_WOL + 0x87 + (x))
+#define BCM54XX_WOL_SEC_KEY_8B		(MII_BCM54XX_EXP_SEL_WOL + 0x8A)
+#define BCM54XX_WOL_MASK(x)		(MII_BCM54XX_EXP_SEL_WOL + 0x8B + (x))
+#define BCM54XX_SEC_KEY_STORE(x)	(MII_BCM54XX_EXP_SEL_WOL + 0x8E)
+#define BCM54XX_WOL_SHARED_CNT		(MII_BCM54XX_EXP_SEL_WOL + 0x92)
+
+#define BCM54XX_WOL_INT_MASK		(MII_BCM54XX_EXP_SEL_WOL + 0x93)
+#define  BCM54XX_WOL_PKT1		BIT(0)
+#define  BCM54XX_WOL_PKT2		BIT(1)
+#define  BCM54XX_WOL_DIR		BIT(2)
+#define  BCM54XX_WOL_ALL_INTRS		(BCM54XX_WOL_PKT1 | \
+					 BCM54XX_WOL_PKT2 | \
+					 BCM54XX_WOL_DIR)
+
+#define BCM54XX_WOL_INT_STATUS		(MII_BCM54XX_EXP_SEL_WOL + 0x94)
+
 /*****************************************************************************/
 /* Fast Ethernet Transceiver definitions. */
 /*****************************************************************************/
-- 
cgit v1.2.3


From 672cde9ef80ffde9e76d38f7aa2b287c4a18de9a Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Fri, 21 Apr 2023 08:46:11 +0300
Subject: iio: fix doc for iio_gts_find_sel_by_int_time

The kerneldoc for iio_gts_find_sel_by_int_time() has an error.
Documentation states that function is searching a selector for a HW-gain
while it is searching a selector for an integration time.

Fix the documentation by saying the function is looking for a selector
for an integration time.

Fixes: 38416c28e168 ("iio: light: Add gain-time-scale helpers")
Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/ZEIjI4YUzqPZk/9X@fedora
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio-gts-helper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio-gts-helper.h b/include/linux/iio/iio-gts-helper.h
index dd64e544a3da..9cb6c80dea71 100644
--- a/include/linux/iio/iio-gts-helper.h
+++ b/include/linux/iio/iio-gts-helper.h
@@ -135,7 +135,7 @@ static inline int iio_gts_find_int_time_by_sel(struct iio_gts *gts, int sel)
 /**
  * iio_gts_find_sel_by_int_time - find selector matching integration time
  * @gts:	Gain time scale descriptor
- * @gain:	HW-gain for which matching selector is searched for
+ * @time:	Integration time for which matching selector is searched for
  *
  * Return:	a selector matching given integration time or -EINVAL if
  *		selector was not found.
-- 
cgit v1.2.3


From 9445368bca2f62cadfcf98e06219f784ae94dce0 Mon Sep 17 00:00:00 2001
From: Marius Hoch <mail@mariushoch.de>
Date: Sun, 16 Apr 2023 01:11:25 +0200
Subject: iio: accel: st_accel: Add LSM303D

The lsm303d has the same register mapping as the lsm9ds0,
thus we can just re-use that.

Tested on a Lenovo Yoga Tablet 2 1051-F.

Signed-off-by: Marius Hoch <mail@mariushoch.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Hans de Goede <hansg@kernel.org>
Link: https://lore.kernel.org/r/20230415231130.115094-2-mail@mariushoch.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_core.c     | 1 +
 include/linux/iio/common/st_sensors.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 5f7d81b44b1d..7c4f58c90f94 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1007,6 +1007,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = {
 		.wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS,
 		.sensors_supported = {
 			[0] = LSM9DS0_IMU_DEV_NAME,
+			[1] = LSM303D_IMU_DEV_NAME,
 		},
 		.ch = (struct iio_chan_spec *)st_accel_16bit_channels,
 		.odr = {
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index f5f3ee57bc70..607c3a89a647 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -22,6 +22,7 @@
 #include <linux/platform_data/st_sensors_pdata.h>
 
 #define LSM9DS0_IMU_DEV_NAME		"lsm9ds0"
+#define LSM303D_IMU_DEV_NAME		"lsm303d"
 
 /*
  * Buffer size max case: 2bytes per channel, 3 channels in total +
-- 
cgit v1.2.3


From 69ee1fb21340cb83df531175f298010697a87448 Mon Sep 17 00:00:00 2001
From: Marius Hoch <mail@mariushoch.de>
Date: Sun, 16 Apr 2023 01:11:29 +0200
Subject: iio: Comment that the LSM303D also has the Magnetometer DRDY

Per its datasheet, the LSM303D also features that pin.

Signed-off-by: Marius Hoch <mail@mariushoch.de>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Hans de Goede <hansg@kernel.org>
Link: https://lore.kernel.org/r/20230415231130.115094-6-mail@mariushoch.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/platform_data/st_sensors_pdata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/st_sensors_pdata.h b/include/linux/platform_data/st_sensors_pdata.h
index 897051e51b78..a657830232ae 100644
--- a/include/linux/platform_data/st_sensors_pdata.h
+++ b/include/linux/platform_data/st_sensors_pdata.h
@@ -15,7 +15,7 @@
  * @drdy_int_pin: Redirect DRDY on pin 1 (1) or pin 2 (2).
  *	Available only for accelerometer, magnetometer and pressure sensors.
  *	Accelerometer DRDY on LSM330 available only on pin 1 (see datasheet).
- *	Magnetometer DRDY is supported only on LSM9DS0.
+ *	Magnetometer DRDY is supported only on LSM9DS0 and LSM303D.
  * @open_drain: set the interrupt line to be open drain if possible.
  * @spi_3wire: enable spi-3wire mode.
  * @pullups: enable/disable i2c controller pullup resistors.
-- 
cgit v1.2.3


From 123627ad03d9915f6a6ecb69bb86a80da69ee972 Mon Sep 17 00:00:00 2001
From: Marijn Suijten <marijn.suijten@somainline.org>
Date: Tue, 2 May 2023 01:17:33 +0200
Subject: iio: core: Point users of extend_name field to read_label callback

As mentioned and discussed in [1] extend_name should not be used for
full channel labels (and most drivers seem to only use it to express a
short type of a channel) as this affects sysfs filenames, while the
label name is supposed to be extracted from the *_label sysfs file
instead.  This appears to have been unclear to some drivers as
extend_name is also used when read_label is unset, achieving an initial
goal of providing sensible names in *_label sysfs files without noticing
that sysfs filenames are (negatively and likely unintentionally)
affected as well.

Point readers of iio_chan_spec::extend_name to iio_info::read_label by
mentioning deprecation and side-effects of this field.

[1]: https://lore.kernel.org/linux-arm-msm/20221221223432.si2aasbleiicayfl@SoMainline.org/

Suggested-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Marijn Suijten <marijn.suijten@somainline.org>
Link: https://lore.kernel.org/r/20230502-iio-adc-propagate-fw-node-label-v3-1-6be5db6e6b5a@somainline.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 81413cd3a3e7..6fc06063505a 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -221,6 +221,9 @@ struct iio_event_spec {
  * @extend_name:	Allows labeling of channel attributes with an
  *			informative name. Note this has no effect codes etc,
  *			unlike modifiers.
+ *			This field is deprecated in favour of providing
+ *			iio_info->read_label() to override the label, which
+ *			unlike @extend_name does not affect sysfs filenames.
  * @datasheet_name:	A name used in in-kernel mapping of channels. It should
  *			correspond to the first name that the channel is referred
  *			to by in the datasheet (e.g. IND), or the nearest
-- 
cgit v1.2.3


From b51f4113ebb02011f0ca86abc3134b28d2071b6a Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Thu, 11 May 2023 09:12:12 +0800
Subject: net: introduce and use skb_frag_fill_page_desc()

Most users use __skb_frag_set_page()/skb_frag_off_set()/
skb_frag_size_set() to fill the page desc for a skb frag.

Introduce skb_frag_fill_page_desc() to do that.

net/bpf/test_run.c does not call skb_frag_off_set() to
set the offset, "copy_from_user(page_address(page), ...)"
and 'shinfo' being part of the 'data' kzalloced in
bpf_test_init() suggest that it is assuming offset to be
initialized as zero, so call skb_frag_fill_page_desc()
with offset being zero for this case.

Also, skb_frag_set_page() is not used anymore, so remove
it.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c |  6 ++---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c        |  5 ++--
 drivers/net/ethernet/chelsio/cxgb3/sge.c         |  5 ++--
 drivers/net/ethernet/emulex/benet/be_main.c      | 32 +++++++++++++-----------
 drivers/net/ethernet/freescale/enetc/enetc.c     |  5 ++--
 drivers/net/ethernet/fungible/funeth/funeth_rx.c |  5 ++--
 drivers/net/ethernet/marvell/mvneta.c            |  5 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c  |  4 +--
 drivers/net/ethernet/sun/cassini.c               |  8 ++----
 drivers/net/virtio_net.c                         |  4 +--
 drivers/net/vmxnet3/vmxnet3_drv.c                |  4 +--
 drivers/net/xen-netback/netback.c                |  4 +--
 include/linux/skbuff.h                           | 27 ++++++++------------
 net/bpf/test_run.c                               |  3 +--
 net/core/gro.c                                   |  4 +--
 net/core/pktgen.c                                | 13 ++++++----
 net/core/skbuff.c                                |  7 +++---
 net/tls/tls_device.c                             | 10 +++-----
 net/xfrm/xfrm_ipcomp.c                           |  5 +---
 19 files changed, 64 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 7f933175cbda..4de22eed099a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -532,10 +532,10 @@ static bool aq_add_rx_fragment(struct device *dev,
 					      buff_->rxdata.pg_off,
 					      buff_->len,
 					      DMA_FROM_DEVICE);
-		skb_frag_off_set(frag, buff_->rxdata.pg_off);
-		skb_frag_size_set(frag, buff_->len);
 		sinfo->xdp_frags_size += buff_->len;
-		__skb_frag_set_page(frag, buff_->rxdata.page);
+		skb_frag_fill_page_desc(frag, buff_->rxdata.page,
+					buff_->rxdata.pg_off,
+					buff_->len);
 
 		buff_->is_cleaned = 1;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index dcd9367f05af..efaff5018af8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1085,9 +1085,8 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
 			    RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT;
 
 		cons_rx_buf = &rxr->rx_agg_ring[cons];
-		skb_frag_off_set(frag, cons_rx_buf->offset);
-		skb_frag_size_set(frag, frag_len);
-		__skb_frag_set_page(frag, cons_rx_buf->page);
+		skb_frag_fill_page_desc(frag, cons_rx_buf->page,
+					cons_rx_buf->offset, frag_len);
 		shinfo->nr_frags = i + 1;
 		__clear_bit(cons, rxr->rx_agg_bmap);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index efa7f401529e..2e9a74fe0970 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -2184,9 +2184,8 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
 	len -= offset;
 
 	rx_frag += nr_frags;
-	__skb_frag_set_page(rx_frag, sd->pg_chunk.page);
-	skb_frag_off_set(rx_frag, sd->pg_chunk.offset + offset);
-	skb_frag_size_set(rx_frag, len);
+	skb_frag_fill_page_desc(rx_frag, sd->pg_chunk.page,
+				sd->pg_chunk.offset + offset, len);
 
 	skb->len += len;
 	skb->data_len += len;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 7e408bcc88de..3164ed205cf7 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2343,11 +2343,10 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
 		hdr_len = ETH_HLEN;
 		memcpy(skb->data, start, hdr_len);
 		skb_shinfo(skb)->nr_frags = 1;
-		skb_frag_set_page(skb, 0, page_info->page);
-		skb_frag_off_set(&skb_shinfo(skb)->frags[0],
-				 page_info->page_offset + hdr_len);
-		skb_frag_size_set(&skb_shinfo(skb)->frags[0],
-				  curr_frag_len - hdr_len);
+		skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[0],
+					page_info->page,
+					page_info->page_offset + hdr_len,
+					curr_frag_len - hdr_len);
 		skb->data_len = curr_frag_len - hdr_len;
 		skb->truesize += rx_frag_size;
 		skb->tail += hdr_len;
@@ -2369,16 +2368,17 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
 		if (page_info->page_offset == 0) {
 			/* Fresh page */
 			j++;
-			skb_frag_set_page(skb, j, page_info->page);
-			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
-					 page_info->page_offset);
-			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
+			skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
+						page_info->page,
+						page_info->page_offset,
+						curr_frag_len);
 			skb_shinfo(skb)->nr_frags++;
 		} else {
 			put_page(page_info->page);
+			skb_frag_size_add(&skb_shinfo(skb)->frags[j],
+					  curr_frag_len);
 		}
 
-		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
 		skb->len += curr_frag_len;
 		skb->data_len += curr_frag_len;
 		skb->truesize += rx_frag_size;
@@ -2451,14 +2451,16 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
 		if (i == 0 || page_info->page_offset == 0) {
 			/* First frag or Fresh page */
 			j++;
-			skb_frag_set_page(skb, j, page_info->page);
-			skb_frag_off_set(&skb_shinfo(skb)->frags[j],
-					 page_info->page_offset);
-			skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
+			skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
+						page_info->page,
+						page_info->page_offset,
+						curr_frag_len);
 		} else {
 			put_page(page_info->page);
+			skb_frag_size_add(&skb_shinfo(skb)->frags[j],
+					  curr_frag_len);
 		}
-		skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
+
 		skb->truesize += rx_frag_size;
 		remaining -= curr_frag_len;
 		memset(page_info, 0, sizeof(*page_info));
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 3c4fa26f0f9b..63854294ac33 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1445,9 +1445,8 @@ static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
 		xdp_buff_set_frag_pfmemalloc(xdp_buff);
 
 	frag = &shinfo->frags[shinfo->nr_frags];
-	skb_frag_off_set(frag, rx_swbd->page_offset);
-	skb_frag_size_set(frag, size);
-	__skb_frag_set_page(frag, rx_swbd->page);
+	skb_frag_fill_page_desc(frag, rx_swbd->page, rx_swbd->page_offset,
+				size);
 
 	shinfo->nr_frags++;
 }
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c
index 29a6c2ede43a..7e2584895de3 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_rx.c
+++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c
@@ -323,9 +323,8 @@ static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len,
 		if (ref_ok)
 			ref_ok |= buf->node;
 
-		__skb_frag_set_page(frags, buf->page);
-		skb_frag_off_set(frags, q->buf_offset);
-		skb_frag_size_set(frags++, frag_len);
+		skb_frag_fill_page_desc(frags++, buf->page, q->buf_offset,
+					frag_len);
 
 		tot_len -= frag_len;
 		if (!tot_len)
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 6c6b66d3ea6e..e2abc00d0472 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2376,9 +2376,8 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
 	if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
 		skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++];
 
-		skb_frag_off_set(frag, pp->rx_offset_correction);
-		skb_frag_size_set(frag, data_len);
-		__skb_frag_set_page(frag, page);
+		skb_frag_fill_page_desc(frag, page,
+					pp->rx_offset_correction, data_len);
 
 		if (!xdp_buff_has_frags(xdp)) {
 			sinfo->xdp_frags_size = *size;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 69634829558e..704b022cd1f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -491,9 +491,7 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf
 	}
 
 	frag = &sinfo->frags[sinfo->nr_frags++];
-	__skb_frag_set_page(frag, frag_page->page);
-	skb_frag_off_set(frag, frag_offset);
-	skb_frag_size_set(frag, len);
+	skb_frag_fill_page_desc(frag, frag_page->page, frag_offset, len);
 
 	if (page_is_pfmemalloc(frag_page->page))
 		xdp_buff_set_frag_pfmemalloc(xdp);
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 4ef05bad4613..2d52f54ebb45 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -1998,10 +1998,8 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 		skb->truesize += hlen - swivel;
 		skb->len      += hlen - swivel;
 
-		__skb_frag_set_page(frag, page->buffer);
+		skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel);
 		__skb_frag_ref(frag);
-		skb_frag_off_set(frag, off);
-		skb_frag_size_set(frag, hlen - swivel);
 
 		/* any more data? */
 		if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) {
@@ -2024,10 +2022,8 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 			skb->len      += hlen;
 			frag++;
 
-			__skb_frag_set_page(frag, page->buffer);
+			skb_frag_fill_page_desc(frag, page->buffer, 0, hlen);
 			__skb_frag_ref(frag);
-			skb_frag_off_set(frag, 0);
-			skb_frag_size_set(frag, hlen);
 			RX_USED_ADD(page, hlen + cp->crc_size);
 		}
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 97241006d64a..29eccc8ff41f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1272,9 +1272,7 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
 		}
 
 		frag = &shinfo->frags[shinfo->nr_frags++];
-		__skb_frag_set_page(frag, page);
-		skb_frag_off_set(frag, offset);
-		skb_frag_size_set(frag, len);
+		skb_frag_fill_page_desc(frag, page, offset, len);
 		if (page_is_pfmemalloc(page))
 			xdp_buff_set_frag_pfmemalloc(xdp);
 
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index f2b76ee866a4..7fa74b8b2100 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -686,9 +686,7 @@ vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
 
 	BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS);
 
-	__skb_frag_set_page(frag, rbi->page);
-	skb_frag_off_set(frag, 0);
-	skb_frag_size_set(frag, rcd->len);
+	skb_frag_fill_page_desc(frag, rbi->page, 0, rcd->len);
 	skb->data_len += rcd->len;
 	skb->truesize += PAGE_SIZE;
 	skb_shinfo(skb)->nr_frags++;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index c1501f41e2d8..3d79b35eb577 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1128,9 +1128,7 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
 			BUG();
 
 		offset += len;
-		__skb_frag_set_page(&frags[i], page);
-		skb_frag_off_set(&frags[i], 0);
-		skb_frag_size_set(&frags[i], len);
+		skb_frag_fill_page_desc(&frags[i], page, 0, len);
 	}
 
 	/* Release all the original (foreign) frags. */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 738776ab8838..30be21c7d05f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2411,6 +2411,15 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
 	return skb_headlen(skb) + __skb_pagelen(skb);
 }
 
+static inline void skb_frag_fill_page_desc(skb_frag_t *frag,
+					   struct page *page,
+					   int off, int size)
+{
+	frag->bv_page = page;
+	frag->bv_offset = off;
+	skb_frag_size_set(frag, size);
+}
+
 static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
 					      int i, struct page *page,
 					      int off, int size)
@@ -2422,9 +2431,7 @@ static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
 	 * that not all callers have unique ownership of the page but rely
 	 * on page_is_pfmemalloc doing the right thing(tm).
 	 */
-	frag->bv_page		  = page;
-	frag->bv_offset		  = off;
-	skb_frag_size_set(frag, size);
+	skb_frag_fill_page_desc(frag, page, off, size);
 }
 
 /**
@@ -3496,20 +3503,6 @@ static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
 	frag->bv_page = page;
 }
 
-/**
- * skb_frag_set_page - sets the page contained in a paged fragment of an skb
- * @skb: the buffer
- * @f: the fragment offset
- * @page: the page to set
- *
- * Sets the @f'th fragment of @skb to contain @page.
- */
-static inline void skb_frag_set_page(struct sk_buff *skb, int f,
-				     struct page *page)
-{
-	__skb_frag_set_page(&skb_shinfo(skb)->frags[f], page);
-}
-
 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
 
 /**
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index e79e3a415ca9..98143b86a9dd 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1415,11 +1415,10 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 			}
 
 			frag = &sinfo->frags[sinfo->nr_frags++];
-			__skb_frag_set_page(frag, page);
 
 			data_len = min_t(u32, kattr->test.data_size_in - size,
 					 PAGE_SIZE);
-			skb_frag_size_set(frag, data_len);
+			skb_frag_fill_page_desc(frag, page, 0, data_len);
 
 			if (copy_from_user(page_address(page), data_in + size,
 					   data_len)) {
diff --git a/net/core/gro.c b/net/core/gro.c
index 2d84165cb4f1..6783a47a6136 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -239,9 +239,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 
 		pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
 
-		__skb_frag_set_page(frag, page);
-		skb_frag_off_set(frag, first_offset);
-		skb_frag_size_set(frag, first_size);
+		skb_frag_fill_page_desc(frag, page, first_offset, first_size);
 
 		memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
 		/* We dont need to clear skbinfo->nr_frags here */
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 760238196db1..f56b8d697014 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2785,14 +2785,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
 					break;
 			}
 			get_page(pkt_dev->page);
-			skb_frag_set_page(skb, i, pkt_dev->page);
-			skb_frag_off_set(&skb_shinfo(skb)->frags[i], 0);
+
 			/*last fragment, fill rest of data*/
 			if (i == (frags - 1))
-				skb_frag_size_set(&skb_shinfo(skb)->frags[i],
-				    (datalen < PAGE_SIZE ? datalen : PAGE_SIZE));
+				skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i],
+							pkt_dev->page, 0,
+							(datalen < PAGE_SIZE ?
+							 datalen : PAGE_SIZE));
 			else
-				skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len);
+				skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i],
+							pkt_dev->page, 0, frag_len);
+
 			datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
 			skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
 			skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 01b48e68aca0..6724a84ebb09 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4234,10 +4234,9 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
 	struct page *page;
 
 	page = virt_to_head_page(frag_skb->head);
-	__skb_frag_set_page(&head_frag, page);
-	skb_frag_off_set(&head_frag, frag_skb->data -
-			 (unsigned char *)page_address(page));
-	skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
+	skb_frag_fill_page_desc(&head_frag, page, frag_skb->data -
+				(unsigned char *)page_address(page),
+				skb_headlen(frag_skb));
 	return head_frag;
 }
 
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a7cc4f9faac2..daeff54bdbfa 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -268,9 +268,8 @@ static void tls_append_frag(struct tls_record_info *record,
 		skb_frag_size_add(frag, size);
 	} else {
 		++frag;
-		__skb_frag_set_page(frag, pfrag->page);
-		skb_frag_off_set(frag, pfrag->offset);
-		skb_frag_size_set(frag, size);
+		skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset,
+					size);
 		++record->num_frags;
 		get_page(pfrag->page);
 	}
@@ -357,9 +356,8 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
 		return -ENOMEM;
 
 	frag = &record->frags[0];
-	__skb_frag_set_page(frag, pfrag->page);
-	skb_frag_off_set(frag, pfrag->offset);
-	skb_frag_size_set(frag, prepend_size);
+	skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset,
+				prepend_size);
 
 	get_page(pfrag->page);
 	pfrag->offset += prepend_size;
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 80143360bf09..9c0fa0e1786a 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -74,14 +74,11 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 		if (!page)
 			return -ENOMEM;
 
-		__skb_frag_set_page(frag, page);
-
 		len = PAGE_SIZE;
 		if (dlen < len)
 			len = dlen;
 
-		skb_frag_off_set(frag, 0);
-		skb_frag_size_set(frag, len);
+		skb_frag_fill_page_desc(frag, page, 0, len);
 		memcpy(skb_frag_address(frag), scratch, len);
 
 		skb->truesize += len;
-- 
cgit v1.2.3


From 278fda0d52f67244044384abd7dd5b3a5b3a5604 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Thu, 11 May 2023 09:12:13 +0800
Subject: net: remove __skb_frag_set_page()

The remaining users calling __skb_frag_set_page() with
page being NULL seems to be doing defensive programming,
as shinfo->nr_frags is already decremented, so remove
them.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2.c      |  1 -
 drivers/net/ethernet/broadcom/bnxt/bnxt.c |  5 +----
 include/linux/skbuff.h                    | 12 ------------
 3 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 466e1d62bcf6..0d917a9699c5 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -2955,7 +2955,6 @@ bnx2_reuse_rx_skb_pages(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
 		shinfo = skb_shinfo(skb);
 		shinfo->nr_frags--;
 		page = skb_frag_page(&shinfo->frags[shinfo->nr_frags]);
-		__skb_frag_set_page(&shinfo->frags[shinfo->nr_frags], NULL);
 
 		cons_rx_pg->page = page;
 		dev_kfree_skb(skb);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index efaff5018af8..f42e51bd3e42 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1102,10 +1102,7 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
 			xdp_buff_set_frag_pfmemalloc(xdp);
 
 		if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_ATOMIC) != 0) {
-			unsigned int nr_frags;
-
-			nr_frags = --shinfo->nr_frags;
-			__skb_frag_set_page(&shinfo->frags[nr_frags], NULL);
+			--shinfo->nr_frags;
 			cons_rx_buf->page = page;
 
 			/* Update prod since possibly some pages have been
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 30be21c7d05f..00e8c435fa1a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3491,18 +3491,6 @@ static inline void skb_frag_page_copy(skb_frag_t *fragto,
 	fragto->bv_page = fragfrom->bv_page;
 }
 
-/**
- * __skb_frag_set_page - sets the page contained in a paged fragment
- * @frag: the paged fragment
- * @page: the page to set
- *
- * Sets the fragment @frag to contain @page.
- */
-static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
-{
-	frag->bv_page = page;
-}
-
 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
 
 /**
-- 
cgit v1.2.3


From a0b7955310a445fc0d45a0ac576bad8720cd6057 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 12 May 2023 17:58:37 +0100
Subject: net: phylink: constify fwnode arguments

Both phylink_create() and phylink_fwnode_phy_connect() do not modify
the fwnode argument that they are passed, so lets constify these.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 11 ++++++-----
 include/linux/phylink.h   |  9 +++++----
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index a4111f1be375..cf53096047e6 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -708,7 +708,7 @@ static int phylink_validate(struct phylink *pl, unsigned long *supported,
 }
 
 static int phylink_parse_fixedlink(struct phylink *pl,
-				   struct fwnode_handle *fwnode)
+				   const struct fwnode_handle *fwnode)
 {
 	struct fwnode_handle *fixed_node;
 	bool pause, asym_pause, autoneg;
@@ -819,7 +819,8 @@ static int phylink_parse_fixedlink(struct phylink *pl,
 	return 0;
 }
 
-static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
+static int phylink_parse_mode(struct phylink *pl,
+			      const struct fwnode_handle *fwnode)
 {
 	struct fwnode_handle *dn;
 	const char *managed;
@@ -1441,7 +1442,7 @@ static void phylink_fixed_poll(struct timer_list *t)
 static const struct sfp_upstream_ops sfp_phylink_ops;
 
 static int phylink_register_sfp(struct phylink *pl,
-				struct fwnode_handle *fwnode)
+				const struct fwnode_handle *fwnode)
 {
 	struct sfp_bus *bus;
 	int ret;
@@ -1480,7 +1481,7 @@ static int phylink_register_sfp(struct phylink *pl,
  * must use IS_ERR() to check for errors from this function.
  */
 struct phylink *phylink_create(struct phylink_config *config,
-			       struct fwnode_handle *fwnode,
+			       const struct fwnode_handle *fwnode,
 			       phy_interface_t iface,
 			       const struct phylink_mac_ops *mac_ops)
 {
@@ -1809,7 +1810,7 @@ EXPORT_SYMBOL_GPL(phylink_of_phy_connect);
  * Returns 0 on success or a negative errno.
  */
 int phylink_fwnode_phy_connect(struct phylink *pl,
-			       struct fwnode_handle *fwnode,
+			       const struct fwnode_handle *fwnode,
 			       u32 flags)
 {
 	struct fwnode_handle *phy_fwnode;
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 71755c66c162..bb782f05ad08 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -568,16 +568,17 @@ void phylink_generic_validate(struct phylink_config *config,
 			      unsigned long *supported,
 			      struct phylink_link_state *state);
 
-struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *,
-			       phy_interface_t iface,
-			       const struct phylink_mac_ops *mac_ops);
+struct phylink *phylink_create(struct phylink_config *,
+			       const struct fwnode_handle *,
+			       phy_interface_t,
+			       const struct phylink_mac_ops *);
 void phylink_destroy(struct phylink *);
 bool phylink_expects_phy(struct phylink *pl);
 
 int phylink_connect_phy(struct phylink *, struct phy_device *);
 int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags);
 int phylink_fwnode_phy_connect(struct phylink *pl,
-			       struct fwnode_handle *fwnode,
+			       const struct fwnode_handle *fwnode,
 			       u32 flags);
 void phylink_disconnect_phy(struct phylink *);
 
-- 
cgit v1.2.3


From a7eb54d44045d424624d3ac7d02feb8ef96744ec Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 12 May 2023 22:46:46 +0200
Subject: ata: libata: Make ata_platform_remove_one return void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function returned zero unconditionally, so the function returning an
int is something between useless and irritating. With the goal to make
platform drivers' remove function return void, it's helpful to convert
the function accordingly. This converts several drivers to the new
.remove_new callback that was introduced to smoothen the platform driver
conversion.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Acked-by: Serge Semin <fancer.lancer@gmail.com>
Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
 drivers/ata/ahci_brcm.c        | 6 +++---
 drivers/ata/ahci_ceva.c        | 2 +-
 drivers/ata/ahci_da850.c       | 2 +-
 drivers/ata/ahci_dm816.c       | 2 +-
 drivers/ata/ahci_dwc.c         | 2 +-
 drivers/ata/ahci_imx.c         | 2 +-
 drivers/ata/ahci_mtk.c         | 2 +-
 drivers/ata/ahci_mvebu.c       | 2 +-
 drivers/ata/ahci_platform.c    | 2 +-
 drivers/ata/ahci_qoriq.c       | 2 +-
 drivers/ata/ahci_seattle.c     | 2 +-
 drivers/ata/ahci_st.c          | 2 +-
 drivers/ata/ahci_sunxi.c       | 2 +-
 drivers/ata/ahci_tegra.c       | 2 +-
 drivers/ata/ahci_xgene.c       | 2 +-
 drivers/ata/libata-core.c      | 4 +---
 drivers/ata/pata_ixp4xx_cf.c   | 2 +-
 drivers/ata/pata_of_platform.c | 2 +-
 drivers/ata/pata_platform.c    | 2 +-
 drivers/ata/sata_highbank.c    | 2 +-
 include/linux/libata.h         | 2 +-
 21 files changed, 23 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c
index 4e3dc2b6d67f..70c3a33eee6f 100644
--- a/drivers/ata/ahci_brcm.c
+++ b/drivers/ata/ahci_brcm.c
@@ -544,7 +544,7 @@ out_reset:
 	return ret;
 }
 
-static int brcm_ahci_remove(struct platform_device *pdev)
+static void brcm_ahci_remove(struct platform_device *pdev)
 {
 	struct ata_host *host = dev_get_drvdata(&pdev->dev);
 	struct ahci_host_priv *hpriv = host->private_data;
@@ -552,7 +552,7 @@ static int brcm_ahci_remove(struct platform_device *pdev)
 
 	brcm_sata_phys_disable(priv);
 
-	return ata_platform_remove_one(pdev);
+	ata_platform_remove_one(pdev);
 }
 
 static void brcm_ahci_shutdown(struct platform_device *pdev)
@@ -573,7 +573,7 @@ static SIMPLE_DEV_PM_OPS(ahci_brcm_pm_ops, brcm_ahci_suspend, brcm_ahci_resume);
 
 static struct platform_driver brcm_ahci_driver = {
 	.probe = brcm_ahci_probe,
-	.remove = brcm_ahci_remove,
+	.remove_new = brcm_ahci_remove,
 	.shutdown = brcm_ahci_shutdown,
 	.driver = {
 		.name = DRV_NAME,
diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c
index bc027468decb..c2b6be083af4 100644
--- a/drivers/ata/ahci_ceva.c
+++ b/drivers/ata/ahci_ceva.c
@@ -369,7 +369,7 @@ MODULE_DEVICE_TABLE(of, ceva_ahci_of_match);
 
 static struct platform_driver ceva_ahci_driver = {
 	.probe = ceva_ahci_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = ceva_ahci_of_match,
diff --git a/drivers/ata/ahci_da850.c b/drivers/ata/ahci_da850.c
index ca0924dc5bd2..55a6627d5450 100644
--- a/drivers/ata/ahci_da850.c
+++ b/drivers/ata/ahci_da850.c
@@ -238,7 +238,7 @@ MODULE_DEVICE_TABLE(of, ahci_da850_of_match);
 
 static struct platform_driver ahci_da850_driver = {
 	.probe = ahci_da850_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_da850_of_match,
diff --git a/drivers/ata/ahci_dm816.c b/drivers/ata/ahci_dm816.c
index b08547b877a1..4cb70064fb99 100644
--- a/drivers/ata/ahci_dm816.c
+++ b/drivers/ata/ahci_dm816.c
@@ -182,7 +182,7 @@ MODULE_DEVICE_TABLE(of, ahci_dm816_of_match);
 
 static struct platform_driver ahci_dm816_driver = {
 	.probe = ahci_dm816_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.driver = {
 		.name = AHCI_DM816_DRV_NAME,
 		.of_match_table = ahci_dm816_of_match,
diff --git a/drivers/ata/ahci_dwc.c b/drivers/ata/ahci_dwc.c
index 4bfbb09cdc02..9604a2f6ed48 100644
--- a/drivers/ata/ahci_dwc.c
+++ b/drivers/ata/ahci_dwc.c
@@ -478,7 +478,7 @@ MODULE_DEVICE_TABLE(of, ahci_dwc_of_match);
 
 static struct platform_driver ahci_dwc_driver = {
 	.probe = ahci_dwc_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.shutdown = ahci_platform_shutdown,
 	.driver = {
 		.name = DRV_NAME,
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
index 3a8c248e7c0e..9fa005965f3b 100644
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c
@@ -1223,7 +1223,7 @@ static SIMPLE_DEV_PM_OPS(ahci_imx_pm_ops, imx_ahci_suspend, imx_ahci_resume);
 
 static struct platform_driver imx_ahci_driver = {
 	.probe = imx_ahci_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = imx_ahci_of_match,
diff --git a/drivers/ata/ahci_mtk.c b/drivers/ata/ahci_mtk.c
index 0bf83a297091..5083fb6c4927 100644
--- a/drivers/ata/ahci_mtk.c
+++ b/drivers/ata/ahci_mtk.c
@@ -173,7 +173,7 @@ MODULE_DEVICE_TABLE(of, ahci_of_match);
 
 static struct platform_driver mtk_ahci_driver = {
 	.probe = mtk_ahci_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_of_match,
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index 596cf017f427..764501518582 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -245,7 +245,7 @@ MODULE_DEVICE_TABLE(of, ahci_mvebu_of_match);
 
 static struct platform_driver ahci_mvebu_driver = {
 	.probe = ahci_mvebu_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.suspend = ahci_mvebu_suspend,
 	.resume = ahci_mvebu_resume,
 	.driver = {
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 299ee686ac49..ab30c7138d73 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -96,7 +96,7 @@ MODULE_DEVICE_TABLE(acpi, ahci_acpi_match);
 
 static struct platform_driver ahci_driver = {
 	.probe = ahci_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.shutdown = ahci_platform_shutdown,
 	.driver = {
 		.name = DRV_NAME,
diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c
index 0ba764d283c8..3d01b118c9a1 100644
--- a/drivers/ata/ahci_qoriq.c
+++ b/drivers/ata/ahci_qoriq.c
@@ -359,7 +359,7 @@ static SIMPLE_DEV_PM_OPS(ahci_qoriq_pm_ops, ahci_platform_suspend,
 
 static struct platform_driver ahci_qoriq_driver = {
 	.probe = ahci_qoriq_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_qoriq_of_match,
diff --git a/drivers/ata/ahci_seattle.c b/drivers/ata/ahci_seattle.c
index 9eda7bbd2151..2c32d58c6ae7 100644
--- a/drivers/ata/ahci_seattle.c
+++ b/drivers/ata/ahci_seattle.c
@@ -187,7 +187,7 @@ MODULE_DEVICE_TABLE(acpi, ahci_acpi_match);
 
 static struct platform_driver ahci_seattle_driver = {
 	.probe = ahci_seattle_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.acpi_match_table = ahci_acpi_match,
diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c
index f2c1edb36986..d4a626f87963 100644
--- a/drivers/ata/ahci_st.c
+++ b/drivers/ata/ahci_st.c
@@ -239,7 +239,7 @@ static struct platform_driver st_ahci_driver = {
 		.of_match_table = st_ahci_match,
 	},
 	.probe = st_ahci_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 };
 module_platform_driver(st_ahci_driver);
 
diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c
index 076c12b4ba08..04531fa95e40 100644
--- a/drivers/ata/ahci_sunxi.c
+++ b/drivers/ata/ahci_sunxi.c
@@ -292,7 +292,7 @@ MODULE_DEVICE_TABLE(of, ahci_sunxi_of_match);
 
 static struct platform_driver ahci_sunxi_driver = {
 	.probe = ahci_sunxi_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = ahci_sunxi_of_match,
diff --git a/drivers/ata/ahci_tegra.c b/drivers/ata/ahci_tegra.c
index 8e5e2b359f2d..21c20793e517 100644
--- a/drivers/ata/ahci_tegra.c
+++ b/drivers/ata/ahci_tegra.c
@@ -609,7 +609,7 @@ deinit_controller:
 
 static struct platform_driver tegra_ahci_driver = {
 	.probe = tegra_ahci_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = tegra_ahci_of_match,
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index 83f5ff54ef5b..eb773f2e28fc 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -868,7 +868,7 @@ disable_resources:
 
 static struct platform_driver xgene_ahci_driver = {
 	.probe = xgene_ahci_probe,
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
 	.driver = {
 		.name = DRV_NAME,
 		.of_match_table = xgene_ahci_of_match,
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8bf612bdd61a..e9fc69fbe06b 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6141,13 +6141,11 @@ EXPORT_SYMBOL_GPL(ata_pci_device_resume);
  *	LOCKING:
  *	Inherited from platform layer (may sleep).
  */
-int ata_platform_remove_one(struct platform_device *pdev)
+void ata_platform_remove_one(struct platform_device *pdev)
 {
 	struct ata_host *host = platform_get_drvdata(pdev);
 
 	ata_host_detach(host);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(ata_platform_remove_one);
 
diff --git a/drivers/ata/pata_ixp4xx_cf.c b/drivers/ata/pata_ixp4xx_cf.c
index 99a2ce723495..b1daa4d3fcd9 100644
--- a/drivers/ata/pata_ixp4xx_cf.c
+++ b/drivers/ata/pata_ixp4xx_cf.c
@@ -303,7 +303,7 @@ static struct platform_driver ixp4xx_pata_platform_driver = {
 		.of_match_table = ixp4xx_pata_of_match,
 	},
 	.probe		= ixp4xx_pata_probe,
-	.remove		= ata_platform_remove_one,
+	.remove_new	= ata_platform_remove_one,
 };
 
 module_platform_driver(ixp4xx_pata_platform_driver);
diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c
index 178b28eff170..4956f0f5b93f 100644
--- a/drivers/ata/pata_of_platform.c
+++ b/drivers/ata/pata_of_platform.c
@@ -89,7 +89,7 @@ static struct platform_driver pata_of_platform_driver = {
 		.of_match_table = pata_of_platform_match,
 	},
 	.probe		= pata_of_platform_probe,
-	.remove		= ata_platform_remove_one,
+	.remove_new	= ata_platform_remove_one,
 };
 
 module_platform_driver(pata_of_platform_driver);
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index 87479bc893b2..232c3dad7ee8 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -223,7 +223,7 @@ static int pata_platform_probe(struct platform_device *pdev)
 
 static struct platform_driver pata_platform_driver = {
 	.probe		= pata_platform_probe,
-	.remove		= ata_platform_remove_one,
+	.remove_new	= ata_platform_remove_one,
 	.driver = {
 		.name		= DRV_NAME,
 	},
diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c
index 8237ece4a46f..d6b324d03e59 100644
--- a/drivers/ata/sata_highbank.c
+++ b/drivers/ata/sata_highbank.c
@@ -614,7 +614,7 @@ static SIMPLE_DEV_PM_OPS(ahci_highbank_pm_ops,
 		  ahci_highbank_suspend, ahci_highbank_resume);
 
 static struct platform_driver ahci_highbank_driver = {
-	.remove = ata_platform_remove_one,
+	.remove_new = ata_platform_remove_one,
         .driver = {
                 .name = "highbank-ahci",
                 .of_match_table = ahci_of_match,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 311cd93377c7..01f9fbb69f89 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1276,7 +1276,7 @@ extern int ata_pci_device_resume(struct pci_dev *pdev);
 
 struct platform_device;
 
-extern int ata_platform_remove_one(struct platform_device *pdev);
+extern void ata_platform_remove_one(struct platform_device *pdev);
 
 /*
  * ACPI - drivers/ata/libata-acpi.c
-- 
cgit v1.2.3


From 38f1755a3e59a3f88e33030f8e4ee0421de2f05a Mon Sep 17 00:00:00 2001
From: Min-Hua Chen <minhuadotchen@gmail.com>
Date: Fri, 12 May 2023 00:46:25 +0800
Subject: fs: use correct __poll_t type

Fix the following sparse warnings by using __poll_t instead
of unsigned type.

fs/eventpoll.c:541:9: sparse: warning: restricted __poll_t degrades to integer
fs/eventfd.c:67:17: sparse: warning: restricted __poll_t degrades to integer

Signed-off-by: Min-Hua Chen <minhuadotchen@gmail.com>
Message-Id: <20230511164628.336586-1-minhuadotchen@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/eventfd.c            | 2 +-
 fs/eventpoll.c          | 2 +-
 include/linux/eventfd.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventfd.c b/fs/eventfd.c
index 95850a13ce8d..6c06a527747f 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -43,7 +43,7 @@ struct eventfd_ctx {
 	int id;
 };
 
-__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask)
+__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask)
 {
 	unsigned long flags;
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 980483455cc0..e0eabaae7402 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -536,7 +536,7 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
 #else
 
 static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
-			     unsigned pollflags)
+			     __poll_t pollflags)
 {
 	wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
 }
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 36a486505b08..98d31cdaca40 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -40,7 +40,7 @@ struct file *eventfd_fget(int fd);
 struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
 __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
-__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask);
+__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, __poll_t mask);
 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
 				  __u64 *cnt);
 void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
-- 
cgit v1.2.3


From 6f0621238b7e7680d5e26c00aa4cd473314d05b2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 12 May 2023 23:07:27 +0200
Subject: cpu/hotplug: Add CPU state tracking and synchronization

The CPU state tracking and synchronization mechanism in smpboot.c is
completely independent of the hotplug code and all logic around it is
implemented in architecture specific code.

Except for the state reporting of the AP there is absolutely nothing
architecture specific and the sychronization and decision functions can be
moved into the generic hotplug core code.

Provide an integrated variant and add the core synchronization and decision
points. This comes in two flavours:

  1) DEAD state synchronization

     Updated by the architecture code once the AP reaches the point where
     it is ready to be torn down by the control CPU, e.g. by removing power
     or clocks or tear down via the hypervisor.

     The control CPU waits for this state to be reached with a timeout. If
     the state is reached an architecture specific cleanup function is
     invoked.

  2) Full state synchronization

     This extends #1 with AP alive synchronization. This is new
     functionality, which allows to replace architecture specific wait
     mechanims, e.g. cpumasks, completely.

     It also prevents that an AP which is in a limbo state can be brought
     up again. This can happen when an AP failed to report dead state
     during a previous off-line operation.

The dead synchronization is what most architectures use. Only x86 makes a
bringup decision based on that state at the moment.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/20230512205256.476305035@linutronix.de
---
 arch/Kconfig               |  15 ++++
 include/linux/cpuhotplug.h |  12 +++
 kernel/cpu.c               | 193 ++++++++++++++++++++++++++++++++++++++++++++-
 kernel/smpboot.c           |   2 +
 4 files changed, 221 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 205fd23e0cad..f55c5fcbea38 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -34,6 +34,21 @@ config ARCH_HAS_SUBPAGE_FAULTS
 config HOTPLUG_SMT
 	bool
 
+# Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL
+config HOTPLUG_CORE_SYNC
+	bool
+
+# Basic CPU dead synchronization selected by architecture
+config HOTPLUG_CORE_SYNC_DEAD
+	bool
+	select HOTPLUG_CORE_SYNC
+
+# Full CPU synchronization with alive state selected by architecture
+config HOTPLUG_CORE_SYNC_FULL
+	bool
+	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
+	select HOTPLUG_CORE_SYNC
+
 config GENERIC_ENTRY
 	bool
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0f1001dca0e0..5def71f81ec5 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -517,4 +517,16 @@ void cpuhp_online_idle(enum cpuhp_state state);
 static inline void cpuhp_online_idle(enum cpuhp_state state) { }
 #endif
 
+void cpuhp_ap_sync_alive(void);
+void arch_cpuhp_sync_state_poll(void);
+void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu);
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
+void cpuhp_ap_report_dead(void);
+void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu);
+#else
+static inline void cpuhp_ap_report_dead(void) { }
+static inline void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
+#endif
+
 #endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index df8f137f0271..64b624291316 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -17,6 +17,7 @@
 #include <linux/cpu.h>
 #include <linux/oom.h>
 #include <linux/rcupdate.h>
+#include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/bug.h>
 #include <linux/kthread.h>
@@ -59,6 +60,7 @@
  * @last:	For multi-instance rollback, remember how far we got
  * @cb_state:	The state for a single callback (install/uninstall)
  * @result:	Result of the operation
+ * @ap_sync_state:	State for AP synchronization
  * @done_up:	Signal completion to the issuer of the task for cpu-up
  * @done_down:	Signal completion to the issuer of the task for cpu-down
  */
@@ -76,6 +78,7 @@ struct cpuhp_cpu_state {
 	struct hlist_node	*last;
 	enum cpuhp_state	cb_state;
 	int			result;
+	atomic_t		ap_sync_state;
 	struct completion	done_up;
 	struct completion	done_down;
 #endif
@@ -276,6 +279,182 @@ static bool cpuhp_is_atomic_state(enum cpuhp_state state)
 	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
 }
 
+/* Synchronization state management */
+enum cpuhp_sync_state {
+	SYNC_STATE_DEAD,
+	SYNC_STATE_KICKED,
+	SYNC_STATE_SHOULD_DIE,
+	SYNC_STATE_ALIVE,
+	SYNC_STATE_SHOULD_ONLINE,
+	SYNC_STATE_ONLINE,
+};
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC
+/**
+ * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
+ * @state:	The synchronization state to set
+ *
+ * No synchronization point. Just update of the synchronization state, but implies
+ * a full barrier so that the AP changes are visible before the control CPU proceeds.
+ */
+static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
+{
+	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
+
+	(void)atomic_xchg(st, state);
+}
+
+void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
+
+static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
+				      enum cpuhp_sync_state next_state)
+{
+	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+	ktime_t now, end, start = ktime_get();
+	int sync;
+
+	end = start + 10ULL * NSEC_PER_SEC;
+
+	sync = atomic_read(st);
+	while (1) {
+		if (sync == state) {
+			if (!atomic_try_cmpxchg(st, &sync, next_state))
+				continue;
+			return true;
+		}
+
+		now = ktime_get();
+		if (now > end) {
+			/* Timeout. Leave the state unchanged */
+			return false;
+		} else if (now - start < NSEC_PER_MSEC) {
+			/* Poll for one millisecond */
+			arch_cpuhp_sync_state_poll();
+		} else {
+			usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE);
+		}
+		sync = atomic_read(st);
+	}
+	return true;
+}
+#else  /* CONFIG_HOTPLUG_CORE_SYNC */
+static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
+/**
+ * cpuhp_ap_report_dead - Update synchronization state to DEAD
+ *
+ * No synchronization point. Just update of the synchronization state.
+ */
+void cpuhp_ap_report_dead(void)
+{
+	cpuhp_ap_update_sync_state(SYNC_STATE_DEAD);
+}
+
+void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
+
+/*
+ * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
+ * because the AP cannot issue complete() at this stage.
+ */
+static void cpuhp_bp_sync_dead(unsigned int cpu)
+{
+	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+	int sync = atomic_read(st);
+
+	do {
+		/* CPU can have reported dead already. Don't overwrite that! */
+		if (sync == SYNC_STATE_DEAD)
+			break;
+	} while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE));
+
+	if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) {
+		/* CPU reached dead state. Invoke the cleanup function */
+		arch_cpuhp_cleanup_dead_cpu(cpu);
+		return;
+	}
+
+	/* No further action possible. Emit message and give up. */
+	pr_err("CPU%u failed to report dead state\n", cpu);
+}
+#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
+static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */
+
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
+/**
+ * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
+ *
+ * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
+ * for the BP to release it.
+ */
+void cpuhp_ap_sync_alive(void)
+{
+	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
+
+	cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE);
+
+	/* Wait for the control CPU to release it. */
+	while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE)
+		cpu_relax();
+}
+
+static bool cpuhp_can_boot_ap(unsigned int cpu)
+{
+	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
+	int sync = atomic_read(st);
+
+again:
+	switch (sync) {
+	case SYNC_STATE_DEAD:
+		/* CPU is properly dead */
+		break;
+	case SYNC_STATE_KICKED:
+		/* CPU did not come up in previous attempt */
+		break;
+	case SYNC_STATE_ALIVE:
+		/* CPU is stuck cpuhp_ap_sync_alive(). */
+		break;
+	default:
+		/* CPU failed to report online or dead and is in limbo state. */
+		return false;
+	}
+
+	/* Prepare for booting */
+	if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED))
+		goto again;
+
+	return true;
+}
+
+void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
+
+/*
+ * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
+ * because the AP cannot issue complete() so early in the bringup.
+ */
+static int cpuhp_bp_sync_alive(unsigned int cpu)
+{
+	int ret = 0;
+
+	if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
+		return 0;
+
+	if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) {
+		pr_err("CPU%u failed to report alive state\n", cpu);
+		ret = -EIO;
+	}
+
+	/* Let the architecture cleanup the kick alive mechanics. */
+	arch_cpuhp_cleanup_kick_cpu(cpu);
+	return ret;
+}
+#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
+static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; }
+static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */
+
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 bool cpuhp_tasks_frozen;
@@ -588,6 +767,9 @@ static int bringup_cpu(unsigned int cpu)
 	struct task_struct *idle = idle_thread_get(cpu);
 	int ret;
 
+	if (!cpuhp_can_boot_ap(cpu))
+		return -EAGAIN;
+
 	/*
 	 * Reset stale stack state from the last time this CPU was online.
 	 */
@@ -610,6 +792,10 @@ static int bringup_cpu(unsigned int cpu)
 	if (ret)
 		goto out_unlock;
 
+	ret = cpuhp_bp_sync_alive(cpu);
+	if (ret)
+		goto out_unlock;
+
 	ret = bringup_wait_for_ap_online(cpu);
 	if (ret)
 		goto out_unlock;
@@ -1113,6 +1299,8 @@ static int takedown_cpu(unsigned int cpu)
 	/* This actually kills the CPU. */
 	__cpu_die(cpu);
 
+	cpuhp_bp_sync_dead(cpu);
+
 	tick_cleanup_dead_cpu(cpu);
 	rcutree_migrate_callbacks(cpu);
 	return 0;
@@ -1359,8 +1547,10 @@ void cpuhp_online_idle(enum cpuhp_state state)
 	if (state != CPUHP_AP_ONLINE_IDLE)
 		return;
 
+	cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE);
+
 	/*
-	 * Unpart the stopper thread before we start the idle loop (and start
+	 * Unpark the stopper thread before we start the idle loop (and start
 	 * scheduling); this ensures the stopper task is always available.
 	 */
 	stop_machine_unpark(smp_processor_id());
@@ -2737,6 +2927,7 @@ void __init boot_cpu_hotplug_init(void)
 {
 #ifdef CONFIG_SMP
 	cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
+	atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE);
 #endif
 	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
 	this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 2c7396da470c..3dcfd3f04ed0 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -326,6 +326,7 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
 }
 EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
 
+#ifndef CONFIG_HOTPLUG_CORE_SYNC
 static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
 
 /*
@@ -488,3 +489,4 @@ bool cpu_report_death(void)
 }
 
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
+#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
-- 
cgit v1.2.3


From 5356297d12d9ee6f70d09485878904bc41bac422 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 12 May 2023 23:07:30 +0200
Subject: cpu/hotplug: Remove cpu_report_state() and related unused cruft

No more users.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/20230512205256.582584351@linutronix.de
---
 include/linux/cpu.h |  2 --
 kernel/smpboot.c    | 90 -----------------------------------------------------
 2 files changed, 92 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 8582a7142623..68f69e8e4f19 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -184,8 +184,6 @@ void arch_cpu_idle_enter(void);
 void arch_cpu_idle_exit(void);
 void __noreturn arch_cpu_idle_dead(void);
 
-int cpu_report_state(int cpu);
-int cpu_check_up_prepare(int cpu);
 void cpu_set_state_online(int cpu);
 void play_idle_precise(u64 duration_ns, u64 latency_ns);
 
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 3dcfd3f04ed0..1940f33a40a3 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -329,97 +329,7 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
 #ifndef CONFIG_HOTPLUG_CORE_SYNC
 static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
 
-/*
- * Called to poll specified CPU's state, for example, when waiting for
- * a CPU to come online.
- */
-int cpu_report_state(int cpu)
-{
-	return atomic_read(&per_cpu(cpu_hotplug_state, cpu));
-}
-
-/*
- * If CPU has died properly, set its state to CPU_UP_PREPARE and
- * return success.  Otherwise, return -EBUSY if the CPU died after
- * cpu_wait_death() timed out.  And yet otherwise again, return -EAGAIN
- * if cpu_wait_death() timed out and the CPU still hasn't gotten around
- * to dying.  In the latter two cases, the CPU might not be set up
- * properly, but it is up to the arch-specific code to decide.
- * Finally, -EIO indicates an unanticipated problem.
- *
- * Note that it is permissible to omit this call entirely, as is
- * done in architectures that do no CPU-hotplug error checking.
- */
-int cpu_check_up_prepare(int cpu)
-{
-	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
-		atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE);
-		return 0;
-	}
-
-	switch (atomic_read(&per_cpu(cpu_hotplug_state, cpu))) {
-
-	case CPU_POST_DEAD:
-
-		/* The CPU died properly, so just start it up again. */
-		atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE);
-		return 0;
-
-	case CPU_DEAD_FROZEN:
-
-		/*
-		 * Timeout during CPU death, so let caller know.
-		 * The outgoing CPU completed its processing, but after
-		 * cpu_wait_death() timed out and reported the error. The
-		 * caller is free to proceed, in which case the state
-		 * will be reset properly by cpu_set_state_online().
-		 * Proceeding despite this -EBUSY return makes sense
-		 * for systems where the outgoing CPUs take themselves
-		 * offline, with no post-death manipulation required from
-		 * a surviving CPU.
-		 */
-		return -EBUSY;
-
-	case CPU_BROKEN:
-
-		/*
-		 * The most likely reason we got here is that there was
-		 * a timeout during CPU death, and the outgoing CPU never
-		 * did complete its processing.  This could happen on
-		 * a virtualized system if the outgoing VCPU gets preempted
-		 * for more than five seconds, and the user attempts to
-		 * immediately online that same CPU.  Trying again later
-		 * might return -EBUSY above, hence -EAGAIN.
-		 */
-		return -EAGAIN;
-
-	case CPU_UP_PREPARE:
-		/*
-		 * Timeout while waiting for the CPU to show up. Allow to try
-		 * again later.
-		 */
-		return 0;
-
-	default:
-
-		/* Should not happen.  Famous last words. */
-		return -EIO;
-	}
-}
-
-/*
- * Mark the specified CPU online.
- *
- * Note that it is permissible to omit this call entirely, as is
- * done in architectures that do no CPU-hotplug error checking.
- */
-void cpu_set_state_online(int cpu)
-{
-	(void)atomic_xchg(&per_cpu(cpu_hotplug_state, cpu), CPU_ONLINE);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
-
 /*
  * Wait for the specified CPU to exit the idle loop and die.
  */
-- 
cgit v1.2.3


From bc088f9a0d5bdf12bb18980739336dfcc092e55b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 12 May 2023 23:07:41 +0200
Subject: cpu/hotplug: Remove unused state functions

All users converted to the hotplug core mechanism.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/20230512205256.972894276@linutronix.de
---
 include/linux/cpu.h |  2 --
 kernel/smpboot.c    | 75 -----------------------------------------------------
 2 files changed, 77 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 68f69e8e4f19..d321dbd53405 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -193,8 +193,6 @@ static inline void play_idle(unsigned long duration_us)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-bool cpu_wait_death(unsigned int cpu, int seconds);
-bool cpu_report_death(void);
 void cpuhp_report_idle_dead(void);
 #else
 static inline void cpuhp_report_idle_dead(void) { }
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 1940f33a40a3..f47d8f375946 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -325,78 +325,3 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
 	cpus_read_unlock();
 }
 EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
-
-#ifndef CONFIG_HOTPLUG_CORE_SYNC
-static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Wait for the specified CPU to exit the idle loop and die.
- */
-bool cpu_wait_death(unsigned int cpu, int seconds)
-{
-	int jf_left = seconds * HZ;
-	int oldstate;
-	bool ret = true;
-	int sleep_jf = 1;
-
-	might_sleep();
-
-	/* The outgoing CPU will normally get done quite quickly. */
-	if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD)
-		goto update_state_early;
-	udelay(5);
-
-	/* But if the outgoing CPU dawdles, wait increasingly long times. */
-	while (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) != CPU_DEAD) {
-		schedule_timeout_uninterruptible(sleep_jf);
-		jf_left -= sleep_jf;
-		if (jf_left <= 0)
-			break;
-		sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10);
-	}
-update_state_early:
-	oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
-update_state:
-	if (oldstate == CPU_DEAD) {
-		/* Outgoing CPU died normally, update state. */
-		smp_mb(); /* atomic_read() before update. */
-		atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD);
-	} else {
-		/* Outgoing CPU still hasn't died, set state accordingly. */
-		if (!atomic_try_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
-					&oldstate, CPU_BROKEN))
-			goto update_state;
-		ret = false;
-	}
-	return ret;
-}
-
-/*
- * Called by the outgoing CPU to report its successful death.  Return
- * false if this report follows the surviving CPU's timing out.
- *
- * A separate "CPU_DEAD_FROZEN" is used when the surviving CPU
- * timed out.  This approach allows architectures to omit calls to
- * cpu_check_up_prepare() and cpu_set_state_online() without defeating
- * the next cpu_wait_death()'s polling loop.
- */
-bool cpu_report_death(void)
-{
-	int oldstate;
-	int newstate;
-	int cpu = smp_processor_id();
-
-	oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
-	do {
-		if (oldstate != CPU_BROKEN)
-			newstate = CPU_DEAD;
-		else
-			newstate = CPU_DEAD_FROZEN;
-	} while (!atomic_try_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
-				     &oldstate, newstate));
-	return newstate == CPU_DEAD;
-}
-
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-#endif /* !CONFIG_HOTPLUG_CORE_SYNC */
-- 
cgit v1.2.3


From a631be92b996c5db9b368e8b96305d22fb8c4180 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 12 May 2023 23:07:45 +0200
Subject: cpu/hotplug: Provide a split up CPUHP_BRINGUP mechanism

The bring up logic of a to be onlined CPU consists of several parts, which
are considered to be a single hotplug state:

  1) Control CPU issues the wake-up

  2) To be onlined CPU starts up, does the minimal initialization,
     reports to be alive and waits for release into the complete bring-up.

  3) Control CPU waits for the alive report and releases the upcoming CPU
     for the complete bring-up.

Allow to split this into two states:

  1) Control CPU issues the wake-up

     After that the to be onlined CPU starts up, does the minimal
     initialization, reports to be alive and waits for release into the
     full bring-up. As this can run after the control CPU dropped the
     hotplug locks the code which is executed on the AP before it reports
     alive has to be carefully audited to not violate any of the hotplug
     constraints, especially not modifying any of the various cpumasks.

     This is really only meant to avoid waiting for the AP to react on the
     wake-up. Of course an architecture can move strict CPU related setup
     functionality, e.g. microcode loading, with care before the
     synchronization point to save further pointless waiting time.

  2) Control CPU waits for the alive report and releases the upcoming CPU
     for the complete bring-up.

This allows that the two states can be split up to run all to be onlined
CPUs up to state #1 on the control CPU and then at a later point run state
#2. This spares some of the latencies of the full serialized per CPU
bringup by avoiding the per CPU wakeup/wait serialization. The assumption
is that the first AP already waits when the last AP has been woken up. This
obvioulsy depends on the hardware latencies and depending on the timings
this might still not completely eliminate all wait scenarios.

This split is just a preparatory step for enabling the parallel bringup
later. The boot time bringup is still fully serialized. It has a separate
config switch so that architectures which want to support parallel bringup
can test the split of the CPUHP_BRINGUG step separately.

To enable this the architecture must support the CPU hotplug core sync
mechanism and has to be audited that there are no implicit hotplug state
dependencies which require a fully serialized bringup.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/20230512205257.080801387@linutronix.de
---
 arch/Kconfig               |  4 +++
 include/linux/cpuhotplug.h |  4 +++
 kernel/cpu.c               | 70 ++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 76 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index f55c5fcbea38..d3015a61c148 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -49,6 +49,10 @@ config HOTPLUG_CORE_SYNC_FULL
 	select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
 	select HOTPLUG_CORE_SYNC
 
+config HOTPLUG_SPLIT_STARTUP
+	bool
+	select HOTPLUG_CORE_SYNC_FULL
+
 config GENERIC_ENTRY
 	bool
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 5def71f81ec5..bc2d0a1d7608 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -133,6 +133,7 @@ enum cpuhp_state {
 	CPUHP_MIPS_SOC_PREPARE,
 	CPUHP_BP_PREPARE_DYN,
 	CPUHP_BP_PREPARE_DYN_END		= CPUHP_BP_PREPARE_DYN + 20,
+	CPUHP_BP_KICK_AP,
 	CPUHP_BRINGUP_CPU,
 
 	/*
@@ -517,9 +518,12 @@ void cpuhp_online_idle(enum cpuhp_state state);
 static inline void cpuhp_online_idle(enum cpuhp_state state) { }
 #endif
 
+struct task_struct;
+
 void cpuhp_ap_sync_alive(void);
 void arch_cpuhp_sync_state_poll(void);
 void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu);
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle);
 
 #ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
 void cpuhp_ap_report_dead(void);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0ab6a7d430c6..d2487aa4e7c8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -761,6 +761,47 @@ static int bringup_wait_for_ap_online(unsigned int cpu)
 	return 0;
 }
 
+#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
+static int cpuhp_kick_ap_alive(unsigned int cpu)
+{
+	if (!cpuhp_can_boot_ap(cpu))
+		return -EAGAIN;
+
+	return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu));
+}
+
+static int cpuhp_bringup_ap(unsigned int cpu)
+{
+	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+	int ret;
+
+	/*
+	 * Some architectures have to walk the irq descriptors to
+	 * setup the vector space for the cpu which comes online.
+	 * Prevent irq alloc/free across the bringup.
+	 */
+	irq_lock_sparse();
+
+	ret = cpuhp_bp_sync_alive(cpu);
+	if (ret)
+		goto out_unlock;
+
+	ret = bringup_wait_for_ap_online(cpu);
+	if (ret)
+		goto out_unlock;
+
+	irq_unlock_sparse();
+
+	if (st->target <= CPUHP_AP_ONLINE_IDLE)
+		return 0;
+
+	return cpuhp_kick_ap(cpu, st, st->target);
+
+out_unlock:
+	irq_unlock_sparse();
+	return ret;
+}
+#else
 static int bringup_cpu(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -781,7 +822,6 @@ static int bringup_cpu(unsigned int cpu)
 	 */
 	irq_lock_sparse();
 
-	/* Arch-specific enabling code. */
 	ret = __cpu_up(cpu, idle);
 	if (ret)
 		goto out_unlock;
@@ -805,6 +845,7 @@ out_unlock:
 	irq_unlock_sparse();
 	return ret;
 }
+#endif
 
 static int finish_cpu(unsigned int cpu)
 {
@@ -1944,13 +1985,38 @@ static struct cpuhp_step cpuhp_hp_states[] = {
 		.startup.single		= timers_prepare_cpu,
 		.teardown.single	= timers_dead_cpu,
 	},
-	/* Kicks the plugged cpu into life */
+
+#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
+	/*
+	 * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until
+	 * the next step will release it.
+	 */
+	[CPUHP_BP_KICK_AP] = {
+		.name			= "cpu:kick_ap",
+		.startup.single		= cpuhp_kick_ap_alive,
+	},
+
+	/*
+	 * Waits for the AP to reach cpuhp_ap_sync_alive() and then
+	 * releases it for the complete bringup.
+	 */
+	[CPUHP_BRINGUP_CPU] = {
+		.name			= "cpu:bringup",
+		.startup.single		= cpuhp_bringup_ap,
+		.teardown.single	= finish_cpu,
+		.cant_stop		= true,
+	},
+#else
+	/*
+	 * All-in-one CPU bringup state which includes the kick alive.
+	 */
 	[CPUHP_BRINGUP_CPU] = {
 		.name			= "cpu:bringup",
 		.startup.single		= bringup_cpu,
 		.teardown.single	= finish_cpu,
 		.cant_stop		= true,
 	},
+#endif
 	/* Final state before CPU kills itself */
 	[CPUHP_AP_IDLE_DEAD] = {
 		.name			= "idle:dead",
-- 
cgit v1.2.3


From 18415f33e2ac4ab382cbca8b5ff82a9036b5bd49 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 12 May 2023 23:07:50 +0200
Subject: cpu/hotplug: Allow "parallel" bringup up to CPUHP_BP_KICK_AP_STATE

There is often significant latency in the early stages of CPU bringup, and
time is wasted by waking each CPU (e.g. with SIPI/INIT/INIT on x86) and
then waiting for it to respond before moving on to the next.

Allow a platform to enable parallel setup which brings all to be onlined
CPUs up to the CPUHP_BP_KICK_AP state. While this state advancement on the
control CPU (BP) is single-threaded the important part is the last state
CPUHP_BP_KICK_AP which wakes the to be onlined CPUs up.

This allows the CPUs to run up to the first sychronization point
cpuhp_ap_sync_alive() where they wait for the control CPU to release them
one by one for the full onlining procedure.

This parallelism depends on the CPU hotplug core sync mechanism which
ensures that the parallel brought up CPUs wait for release before touching
any state which would make the CPU visible to anything outside the hotplug
control mechanism.

To handle the SMT constraints of X86 correctly the bringup happens in two
iterations when CONFIG_HOTPLUG_SMT is enabled. The control CPU brings up
the primary SMT threads of each core first, which can load the microcode
without the need to rendevouz with the thread siblings. Once that's
completed it brings up the secondary SMT threads.

Co-developed-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/20230512205257.240231377@linutronix.de
---
 Documentation/admin-guide/kernel-parameters.txt |   6 ++
 arch/Kconfig                                    |   4 +
 include/linux/cpuhotplug.h                      |   1 +
 kernel/cpu.c                                    | 103 ++++++++++++++++++++++--
 4 files changed, 109 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ccb91a255c6d..79fb1248f0ce 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -838,6 +838,12 @@
 			on every CPU online, such as boot, and resume from suspend.
 			Default: 10000
 
+	cpuhp.parallel=
+			[SMP] Enable/disable parallel bringup of secondary CPUs
+			Format: <bool>
+			Default is enabled if CONFIG_HOTPLUG_PARALLEL=y. Otherwise
+			the parameter has no effect.
+
 	crash_kexec_post_notifiers
 			Run kdump after running panic-notifiers and dumping
 			kmsg. This only for the users who doubt kdump always
diff --git a/arch/Kconfig b/arch/Kconfig
index d3015a61c148..64d771855ecd 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -53,6 +53,10 @@ config HOTPLUG_SPLIT_STARTUP
 	bool
 	select HOTPLUG_CORE_SYNC_FULL
 
+config HOTPLUG_PARALLEL
+	bool
+	select HOTPLUG_SPLIT_STARTUP
+
 config GENERIC_ENTRY
 	bool
 
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index bc2d0a1d7608..a5e414cd82be 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -524,6 +524,7 @@ void cpuhp_ap_sync_alive(void);
 void arch_cpuhp_sync_state_poll(void);
 void arch_cpuhp_cleanup_kick_cpu(unsigned int cpu);
 int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle);
+bool arch_cpuhp_init_parallel_bringup(void);
 
 #ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
 void cpuhp_ap_report_dead(void);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d2487aa4e7c8..005f863a3d2b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -649,8 +649,23 @@ bool cpu_smt_possible(void)
 		cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
 }
 EXPORT_SYMBOL_GPL(cpu_smt_possible);
+
+static inline bool cpuhp_smt_aware(void)
+{
+	return topology_smt_supported();
+}
+
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+	return cpu_primary_thread_mask;
+}
 #else
 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
+static inline bool cpuhp_smt_aware(void) { return false; }
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+	return cpu_present_mask;
+}
 #endif
 
 static inline enum cpuhp_state
@@ -1747,18 +1762,96 @@ int bringup_hibernate_cpu(unsigned int sleep_cpu)
 	return 0;
 }
 
-void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
+static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus,
+				      enum cpuhp_state target)
 {
 	unsigned int cpu;
 
-	for_each_present_cpu(cpu) {
-		if (num_online_cpus() >= setup_max_cpus)
+	for_each_cpu(cpu, mask) {
+		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+
+		if (!--ncpus)
 			break;
-		if (!cpu_online(cpu))
-			cpu_up(cpu, CPUHP_ONLINE);
+
+		if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
+			/*
+			 * If this failed then cpu_up() might have only
+			 * rolled back to CPUHP_BP_KICK_AP for the final
+			 * online. Clean it up. NOOP if already rolled back.
+			 */
+			WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
+		}
 	}
 }
 
+#ifdef CONFIG_HOTPLUG_PARALLEL
+static bool __cpuhp_parallel_bringup __ro_after_init = true;
+
+static int __init parallel_bringup_parse_param(char *arg)
+{
+	return kstrtobool(arg, &__cpuhp_parallel_bringup);
+}
+early_param("cpuhp.parallel", parallel_bringup_parse_param);
+
+/*
+ * On architectures which have enabled parallel bringup this invokes all BP
+ * prepare states for each of the to be onlined APs first. The last state
+ * sends the startup IPI to the APs. The APs proceed through the low level
+ * bringup code in parallel and then wait for the control CPU to release
+ * them one by one for the final onlining procedure.
+ *
+ * This avoids waiting for each AP to respond to the startup IPI in
+ * CPUHP_BRINGUP_CPU.
+ */
+static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
+{
+	const struct cpumask *mask = cpu_present_mask;
+
+	if (__cpuhp_parallel_bringup)
+		__cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
+	if (!__cpuhp_parallel_bringup)
+		return false;
+
+	if (cpuhp_smt_aware()) {
+		const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
+		static struct cpumask tmp_mask __initdata;
+
+		/*
+		 * X86 requires to prevent that SMT siblings stopped while
+		 * the primary thread does a microcode update for various
+		 * reasons. Bring the primary threads up first.
+		 */
+		cpumask_and(&tmp_mask, mask, pmask);
+		cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP);
+		cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE);
+		/* Account for the online CPUs */
+		ncpus -= num_online_cpus();
+		if (!ncpus)
+			return true;
+		/* Create the mask for secondary CPUs */
+		cpumask_andnot(&tmp_mask, mask, pmask);
+		mask = &tmp_mask;
+	}
+
+	/* Bring the not-yet started CPUs up */
+	cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
+	cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE);
+	return true;
+}
+#else
+static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
+#endif /* CONFIG_HOTPLUG_PARALLEL */
+
+void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
+{
+	/* Try parallel bringup optimization if enabled */
+	if (cpuhp_bringup_cpus_parallel(setup_max_cpus))
+		return;
+
+	/* Full per CPU serialized bringup */
+	cpuhp_bringup_mask(cpu_present_mask, setup_max_cpus, CPUHP_ONLINE);
+}
+
 #ifdef CONFIG_PM_SLEEP_SMP
 static cpumask_var_t frozen_cpus;
 
-- 
cgit v1.2.3


From 049449976f549605a6913d468b61356a9950a6a2 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Thu, 4 May 2023 19:36:08 +0200
Subject: mfd: rk808: Replace 'struct i2c_client' with 'struct device'

Put 'struct device' pointer into the MFD platform_data instead
of the 'struct i2c_client' pointer. This simplifies the code
and prepares the MFD for SPI support.

Tested-by: Diederik de Haas <didi.debian@cknow.org> # Rock64, Quartz64 Model A + B
Tested-by: Vincent Legoll <vincent.legoll@gmail.com> # Pine64 QuartzPro64
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20230504173618.142075-5-sebastian.reichel@collabora.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/rk808.c       | 6 +++---
 include/linux/mfd/rk808.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rk808.c b/drivers/mfd/rk808.c
index f42e09e3a3f5..ce52307cbaea 100644
--- a/drivers/mfd/rk808.c
+++ b/drivers/mfd/rk808.c
@@ -577,7 +577,7 @@ static int rk808_power_off(struct sys_off_data *data)
 	}
 	ret = regmap_update_bits(rk808->regmap, reg, bit, bit);
 	if (ret)
-		dev_err(&rk808->i2c->dev, "Failed to shutdown device!\n");
+		dev_err(rk808->dev, "Failed to shutdown device!\n");
 
 	return NOTIFY_DONE;
 }
@@ -600,7 +600,7 @@ static int rk808_restart(struct sys_off_data *data)
 	}
 	ret = regmap_update_bits(rk808->regmap, reg, bit, bit);
 	if (ret)
-		dev_err(&rk808->i2c->dev, "Failed to restart device!\n");
+		dev_err(rk808->dev, "Failed to restart device!\n");
 
 	return NOTIFY_DONE;
 }
@@ -720,7 +720,7 @@ static int rk808_probe(struct i2c_client *client)
 		return -EINVAL;
 	}
 
-	rk808->i2c = client;
+	rk808->dev = &client->dev;
 	i2c_set_clientdata(client, rk808);
 
 	rk808->regmap = devm_regmap_init_i2c(client, rk808->regmap_cfg);
diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h
index 9af1f3105f80..a89ddd9ba68e 100644
--- a/include/linux/mfd/rk808.h
+++ b/include/linux/mfd/rk808.h
@@ -787,7 +787,7 @@ enum {
 };
 
 struct rk808 {
-	struct i2c_client		*i2c;
+	struct device			*dev;
 	struct regmap_irq_chip_data	*irq_data;
 	struct regmap			*regmap;
 	long				variant;
-- 
cgit v1.2.3


From c20e8c5b1203af3726561ee5649b147194e0618e Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Thu, 4 May 2023 19:36:09 +0200
Subject: mfd: rk808: Split into core and i2c

Split rk808 into a core and an i2c part in preparation for
SPI support.

Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com> # for RTC
Tested-by: Diederik de Haas <didi.debian@cknow.org> # Rock64, Quartz64 Model A + B
Tested-by: Vincent Legoll <vincent.legoll@gmail.com> # Pine64 QuartzPro64
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20230504173618.142075-6-sebastian.reichel@collabora.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/clk/Kconfig          |   2 +-
 drivers/input/misc/Kconfig   |   2 +-
 drivers/mfd/Kconfig          |   7 +-
 drivers/mfd/Makefile         |   3 +-
 drivers/mfd/rk808.c          | 845 -------------------------------------------
 drivers/mfd/rk8xx-core.c     | 706 ++++++++++++++++++++++++++++++++++++
 drivers/mfd/rk8xx-i2c.c      | 200 ++++++++++
 drivers/pinctrl/Kconfig      |   2 +-
 drivers/power/supply/Kconfig |   2 +-
 drivers/regulator/Kconfig    |   2 +-
 drivers/rtc/Kconfig          |   2 +-
 include/linux/mfd/rk808.h    |   6 +
 sound/soc/codecs/Kconfig     |   2 +-
 13 files changed, 927 insertions(+), 854 deletions(-)
 delete mode 100644 drivers/mfd/rk808.c
 create mode 100644 drivers/mfd/rk8xx-core.c
 create mode 100644 drivers/mfd/rk8xx-i2c.c

(limited to 'include/linux')

diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 016814e15536..c0c8e526a1e9 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -82,7 +82,7 @@ config COMMON_CLK_MAX9485
 
 config COMMON_CLK_RK808
 	tristate "Clock driver for RK805/RK808/RK809/RK817/RK818"
-	depends on MFD_RK808
+	depends on MFD_RK8XX
 	help
 	  This driver supports RK805, RK809 and RK817, RK808 and RK818 crystal oscillator clock.
 	  These multi-function devices have two fixed-rate oscillators, clocked at 32KHz each.
diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 81a54a59e13c..8a320e6218e3 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -609,7 +609,7 @@ config INPUT_PWM_VIBRA
 
 config INPUT_RK805_PWRKEY
 	tristate "Rockchip RK805 PMIC power key support"
-	depends on MFD_RK808
+	depends on MFD_RK8XX
 	help
 	  Select this option to enable power key driver for RK805.
 
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index e90463c4441c..de53e6c701fd 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1183,12 +1183,17 @@ config MFD_RC5T583
 	  Additional drivers must be enabled in order to use the
 	  different functionality of the device.
 
-config MFD_RK808
+config MFD_RK8XX
+	bool
+	select MFD_CORE
+
+config MFD_RK8XX_I2C
 	tristate "Rockchip RK805/RK808/RK809/RK817/RK818 Power Management Chip"
 	depends on I2C && OF
 	select MFD_CORE
 	select REGMAP_I2C
 	select REGMAP_IRQ
+	select MFD_RK8XX
 	help
 	  If you say yes here you get support for the RK805, RK808, RK809,
 	  RK817 and RK818 Power Management chips.
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 1d2392f06f78..ba373193e999 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -214,7 +214,8 @@ obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
 obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
 obj-$(CONFIG_MFD_NTXEC)		+= ntxec.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
-obj-$(CONFIG_MFD_RK808)		+= rk808.o
+obj-$(CONFIG_MFD_RK8XX)		+= rk8xx-core.o
+obj-$(CONFIG_MFD_RK8XX_I2C)	+= rk8xx-i2c.o
 obj-$(CONFIG_MFD_RN5T618)	+= rn5t618.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
diff --git a/drivers/mfd/rk808.c b/drivers/mfd/rk808.c
deleted file mode 100644
index ce52307cbaea..000000000000
--- a/drivers/mfd/rk808.c
+++ /dev/null
@@ -1,845 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * MFD core driver for Rockchip RK808/RK818
- *
- * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
- *
- * Author: Chris Zhong <zyw@rock-chips.com>
- * Author: Zhang Qing <zhangqing@rock-chips.com>
- *
- * Copyright (C) 2016 PHYTEC Messtechnik GmbH
- *
- * Author: Wadim Egorov <w.egorov@phytec.de>
- */
-
-#include <linux/i2c.h>
-#include <linux/interrupt.h>
-#include <linux/mfd/rk808.h>
-#include <linux/mfd/core.h>
-#include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/regmap.h>
-#include <linux/reboot.h>
-
-struct rk808_reg_data {
-	int addr;
-	int mask;
-	int value;
-};
-
-static bool rk808_is_volatile_reg(struct device *dev, unsigned int reg)
-{
-	/*
-	 * Notes:
-	 * - Technically the ROUND_30s bit makes RTC_CTRL_REG volatile, but
-	 *   we don't use that feature.  It's better to cache.
-	 * - It's unlikely we care that RK808_DEVCTRL_REG is volatile since
-	 *   bits are cleared in case when we shutoff anyway, but better safe.
-	 */
-
-	switch (reg) {
-	case RK808_SECONDS_REG ... RK808_WEEKS_REG:
-	case RK808_RTC_STATUS_REG:
-	case RK808_VB_MON_REG:
-	case RK808_THERMAL_REG:
-	case RK808_DCDC_UV_STS_REG:
-	case RK808_LDO_UV_STS_REG:
-	case RK808_DCDC_PG_REG:
-	case RK808_LDO_PG_REG:
-	case RK808_DEVCTRL_REG:
-	case RK808_INT_STS_REG1:
-	case RK808_INT_STS_REG2:
-		return true;
-	}
-
-	return false;
-}
-
-static bool rk817_is_volatile_reg(struct device *dev, unsigned int reg)
-{
-	/*
-	 * Notes:
-	 * - Technically the ROUND_30s bit makes RTC_CTRL_REG volatile, but
-	 *   we don't use that feature.  It's better to cache.
-	 */
-
-	switch (reg) {
-	case RK817_SECONDS_REG ... RK817_WEEKS_REG:
-	case RK817_RTC_STATUS_REG:
-	case RK817_CODEC_DTOP_LPT_SRST:
-	case RK817_GAS_GAUGE_ADC_CONFIG0 ... RK817_GAS_GAUGE_CUR_ADC_K0:
-	case RK817_PMIC_CHRG_STS:
-	case RK817_PMIC_CHRG_OUT:
-	case RK817_PMIC_CHRG_IN:
-	case RK817_INT_STS_REG0:
-	case RK817_INT_STS_REG1:
-	case RK817_INT_STS_REG2:
-	case RK817_SYS_STS:
-		return true;
-	}
-
-	return false;
-}
-
-static const struct regmap_config rk818_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-	.max_register = RK818_USB_CTRL_REG,
-	.cache_type = REGCACHE_RBTREE,
-	.volatile_reg = rk808_is_volatile_reg,
-};
-
-static const struct regmap_config rk805_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-	.max_register = RK805_OFF_SOURCE_REG,
-	.cache_type = REGCACHE_RBTREE,
-	.volatile_reg = rk808_is_volatile_reg,
-};
-
-static const struct regmap_config rk808_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-	.max_register = RK808_IO_POL_REG,
-	.cache_type = REGCACHE_RBTREE,
-	.volatile_reg = rk808_is_volatile_reg,
-};
-
-static const struct regmap_config rk817_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-	.max_register = RK817_GPIO_INT_CFG,
-	.cache_type = REGCACHE_NONE,
-	.volatile_reg = rk817_is_volatile_reg,
-};
-
-static const struct resource rtc_resources[] = {
-	DEFINE_RES_IRQ(RK808_IRQ_RTC_ALARM),
-};
-
-static const struct resource rk817_rtc_resources[] = {
-	DEFINE_RES_IRQ(RK817_IRQ_RTC_ALARM),
-};
-
-static const struct resource rk805_key_resources[] = {
-	DEFINE_RES_IRQ(RK805_IRQ_PWRON_RISE),
-	DEFINE_RES_IRQ(RK805_IRQ_PWRON_FALL),
-};
-
-static const struct resource rk817_pwrkey_resources[] = {
-	DEFINE_RES_IRQ(RK817_IRQ_PWRON_RISE),
-	DEFINE_RES_IRQ(RK817_IRQ_PWRON_FALL),
-};
-
-static const struct resource rk817_charger_resources[] = {
-	DEFINE_RES_IRQ(RK817_IRQ_PLUG_IN),
-	DEFINE_RES_IRQ(RK817_IRQ_PLUG_OUT),
-};
-
-static const struct mfd_cell rk805s[] = {
-	{ .name = "rk808-clkout", .id = PLATFORM_DEVID_NONE, },
-	{ .name = "rk808-regulator", .id = PLATFORM_DEVID_NONE, },
-	{ .name = "rk805-pinctrl", .id = PLATFORM_DEVID_NONE, },
-	{
-		.name = "rk808-rtc",
-		.num_resources = ARRAY_SIZE(rtc_resources),
-		.resources = &rtc_resources[0],
-		.id = PLATFORM_DEVID_NONE,
-	},
-	{	.name = "rk805-pwrkey",
-		.num_resources = ARRAY_SIZE(rk805_key_resources),
-		.resources = &rk805_key_resources[0],
-		.id = PLATFORM_DEVID_NONE,
-	},
-};
-
-static const struct mfd_cell rk808s[] = {
-	{ .name = "rk808-clkout", .id = PLATFORM_DEVID_NONE, },
-	{ .name = "rk808-regulator", .id = PLATFORM_DEVID_NONE, },
-	{
-		.name = "rk808-rtc",
-		.num_resources = ARRAY_SIZE(rtc_resources),
-		.resources = rtc_resources,
-		.id = PLATFORM_DEVID_NONE,
-	},
-};
-
-static const struct mfd_cell rk817s[] = {
-	{ .name = "rk808-clkout", .id = PLATFORM_DEVID_NONE, },
-	{ .name = "rk808-regulator", .id = PLATFORM_DEVID_NONE, },
-	{
-		.name = "rk805-pwrkey",
-		.num_resources = ARRAY_SIZE(rk817_pwrkey_resources),
-		.resources = &rk817_pwrkey_resources[0],
-		.id = PLATFORM_DEVID_NONE,
-	},
-	{
-		.name = "rk808-rtc",
-		.num_resources = ARRAY_SIZE(rk817_rtc_resources),
-		.resources = &rk817_rtc_resources[0],
-		.id = PLATFORM_DEVID_NONE,
-	},
-	{ .name = "rk817-codec", .id = PLATFORM_DEVID_NONE, },
-	{
-		.name = "rk817-charger",
-		.num_resources = ARRAY_SIZE(rk817_charger_resources),
-		.resources = &rk817_charger_resources[0],
-		.id = PLATFORM_DEVID_NONE,
-	},
-};
-
-static const struct mfd_cell rk818s[] = {
-	{ .name = "rk808-clkout", .id = PLATFORM_DEVID_NONE, },
-	{ .name = "rk808-regulator", .id = PLATFORM_DEVID_NONE, },
-	{
-		.name = "rk808-rtc",
-		.num_resources = ARRAY_SIZE(rtc_resources),
-		.resources = rtc_resources,
-		.id = PLATFORM_DEVID_NONE,
-	},
-};
-
-static const struct rk808_reg_data rk805_pre_init_reg[] = {
-	{RK805_BUCK1_CONFIG_REG, RK805_BUCK1_2_ILMAX_MASK,
-				 RK805_BUCK1_2_ILMAX_4000MA},
-	{RK805_BUCK2_CONFIG_REG, RK805_BUCK1_2_ILMAX_MASK,
-				 RK805_BUCK1_2_ILMAX_4000MA},
-	{RK805_BUCK3_CONFIG_REG, RK805_BUCK3_4_ILMAX_MASK,
-				 RK805_BUCK3_ILMAX_3000MA},
-	{RK805_BUCK4_CONFIG_REG, RK805_BUCK3_4_ILMAX_MASK,
-				 RK805_BUCK4_ILMAX_3500MA},
-	{RK805_BUCK4_CONFIG_REG, BUCK_ILMIN_MASK, BUCK_ILMIN_400MA},
-	{RK805_THERMAL_REG, TEMP_HOTDIE_MSK, TEMP115C},
-};
-
-static const struct rk808_reg_data rk808_pre_init_reg[] = {
-	{ RK808_BUCK3_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_150MA },
-	{ RK808_BUCK4_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_200MA },
-	{ RK808_BOOST_CONFIG_REG, BOOST_ILMIN_MASK, BOOST_ILMIN_100MA },
-	{ RK808_BUCK1_CONFIG_REG, BUCK1_RATE_MASK,  BUCK_ILMIN_200MA },
-	{ RK808_BUCK2_CONFIG_REG, BUCK2_RATE_MASK,  BUCK_ILMIN_200MA },
-	{ RK808_DCDC_UV_ACT_REG,  BUCK_UV_ACT_MASK, BUCK_UV_ACT_DISABLE},
-	{ RK808_VB_MON_REG,       MASK_ALL,         VB_LO_ACT |
-						    VB_LO_SEL_3500MV },
-};
-
-static const struct rk808_reg_data rk817_pre_init_reg[] = {
-	{RK817_RTC_CTRL_REG, RTC_STOP, RTC_STOP},
-	/* Codec specific registers */
-	{ RK817_CODEC_DTOP_VUCTL, MASK_ALL, 0x03 },
-	{ RK817_CODEC_DTOP_VUCTIME, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DTOP_LPT_SRST, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DTOP_DIGEN_CLKE, MASK_ALL, 0x00 },
-	/* from vendor driver, CODEC_AREF_RTCFG0 not defined in data sheet */
-	{ RK817_CODEC_AREF_RTCFG0, MASK_ALL, 0x00 },
-	{ RK817_CODEC_AREF_RTCFG1, MASK_ALL, 0x06 },
-	{ RK817_CODEC_AADC_CFG0, MASK_ALL, 0xc8 },
-	/* from vendor driver, CODEC_AADC_CFG1 not defined in data sheet */
-	{ RK817_CODEC_AADC_CFG1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DADC_VOLL, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DADC_VOLR, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DADC_SR_ACL0, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DADC_ALC1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DADC_ALC2, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DADC_NG, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DADC_HPF, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DADC_RVOLL, MASK_ALL, 0xff },
-	{ RK817_CODEC_DADC_RVOLR, MASK_ALL, 0xff },
-	{ RK817_CODEC_AMIC_CFG0, MASK_ALL, 0x70 },
-	{ RK817_CODEC_AMIC_CFG1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DMIC_PGA_GAIN, MASK_ALL, 0x66 },
-	{ RK817_CODEC_DMIC_LMT1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DMIC_LMT2, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DMIC_NG1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DMIC_NG2, MASK_ALL, 0x00 },
-	/* from vendor driver, CODEC_ADAC_CFG0 not defined in data sheet */
-	{ RK817_CODEC_ADAC_CFG0, MASK_ALL, 0x00 },
-	{ RK817_CODEC_ADAC_CFG1, MASK_ALL, 0x07 },
-	{ RK817_CODEC_DDAC_POPD_DACST, MASK_ALL, 0x82 },
-	{ RK817_CODEC_DDAC_VOLL, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DDAC_VOLR, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DDAC_SR_LMT0, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DDAC_LMT1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DDAC_LMT2, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DDAC_MUTE_MIXCTL, MASK_ALL, 0xa0 },
-	{ RK817_CODEC_DDAC_RVOLL, MASK_ALL, 0xff },
-	{ RK817_CODEC_DADC_RVOLR, MASK_ALL, 0xff },
-	{ RK817_CODEC_AMIC_CFG0, MASK_ALL, 0x70 },
-	{ RK817_CODEC_AMIC_CFG1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DMIC_PGA_GAIN, MASK_ALL, 0x66 },
-	{ RK817_CODEC_DMIC_LMT1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DMIC_LMT2, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DMIC_NG1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DMIC_NG2, MASK_ALL, 0x00 },
-	/* from vendor driver, CODEC_ADAC_CFG0 not defined in data sheet */
-	{ RK817_CODEC_ADAC_CFG0, MASK_ALL, 0x00 },
-	{ RK817_CODEC_ADAC_CFG1, MASK_ALL, 0x07 },
-	{ RK817_CODEC_DDAC_POPD_DACST, MASK_ALL, 0x82 },
-	{ RK817_CODEC_DDAC_VOLL, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DDAC_VOLR, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DDAC_SR_LMT0, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DDAC_LMT1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DDAC_LMT2, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DDAC_MUTE_MIXCTL, MASK_ALL, 0xa0 },
-	{ RK817_CODEC_DDAC_RVOLL, MASK_ALL, 0xff },
-	{ RK817_CODEC_DDAC_RVOLR, MASK_ALL, 0xff },
-	{ RK817_CODEC_AHP_ANTI0, MASK_ALL, 0x00 },
-	{ RK817_CODEC_AHP_ANTI1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_AHP_CFG0, MASK_ALL, 0xe0 },
-	{ RK817_CODEC_AHP_CFG1, MASK_ALL, 0x1f },
-	{ RK817_CODEC_AHP_CP, MASK_ALL, 0x09 },
-	{ RK817_CODEC_ACLASSD_CFG1, MASK_ALL, 0x69 },
-	{ RK817_CODEC_ACLASSD_CFG2, MASK_ALL, 0x44 },
-	{ RK817_CODEC_APLL_CFG0, MASK_ALL, 0x04 },
-	{ RK817_CODEC_APLL_CFG1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_APLL_CFG2, MASK_ALL, 0x30 },
-	{ RK817_CODEC_APLL_CFG3, MASK_ALL, 0x19 },
-	{ RK817_CODEC_APLL_CFG4, MASK_ALL, 0x65 },
-	{ RK817_CODEC_APLL_CFG5, MASK_ALL, 0x01 },
-	{ RK817_CODEC_DI2S_CKM, MASK_ALL, 0x01 },
-	{ RK817_CODEC_DI2S_RSD, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DI2S_RXCR1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DI2S_RXCR2, MASK_ALL, 0x17 },
-	{ RK817_CODEC_DI2S_RXCMD_TSD, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DI2S_TXCR1, MASK_ALL, 0x00 },
-	{ RK817_CODEC_DI2S_TXCR2, MASK_ALL, 0x17 },
-	{ RK817_CODEC_DI2S_TXCR3_TXCMD, MASK_ALL, 0x00 },
-	{RK817_GPIO_INT_CFG, RK817_INT_POL_MSK, RK817_INT_POL_L},
-	{RK817_SYS_CFG(1), RK817_HOTDIE_TEMP_MSK | RK817_TSD_TEMP_MSK,
-					   RK817_HOTDIE_105 | RK817_TSD_140},
-};
-
-static const struct rk808_reg_data rk818_pre_init_reg[] = {
-	/* improve efficiency */
-	{ RK818_BUCK2_CONFIG_REG, BUCK2_RATE_MASK,  BUCK_ILMIN_250MA },
-	{ RK818_BUCK4_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_250MA },
-	{ RK818_BOOST_CONFIG_REG, BOOST_ILMIN_MASK, BOOST_ILMIN_100MA },
-	{ RK818_USB_CTRL_REG,	  RK818_USB_ILIM_SEL_MASK,
-						    RK818_USB_ILMIN_2000MA },
-	/* close charger when usb lower then 3.4V */
-	{ RK818_USB_CTRL_REG,	  RK818_USB_CHG_SD_VSEL_MASK,
-						    (0x7 << 4) },
-	/* no action when vref */
-	{ RK818_H5V_EN_REG,	  BIT(1),	    RK818_REF_RDY_CTRL },
-	/* enable HDMI 5V */
-	{ RK818_H5V_EN_REG,	  BIT(0),	    RK818_H5V_EN },
-	{ RK808_VB_MON_REG,	  MASK_ALL,	    VB_LO_ACT |
-						    VB_LO_SEL_3500MV },
-};
-
-static const struct regmap_irq rk805_irqs[] = {
-	[RK805_IRQ_PWRON_RISE] = {
-		.mask = RK805_IRQ_PWRON_RISE_MSK,
-		.reg_offset = 0,
-	},
-	[RK805_IRQ_VB_LOW] = {
-		.mask = RK805_IRQ_VB_LOW_MSK,
-		.reg_offset = 0,
-	},
-	[RK805_IRQ_PWRON] = {
-		.mask = RK805_IRQ_PWRON_MSK,
-		.reg_offset = 0,
-	},
-	[RK805_IRQ_PWRON_LP] = {
-		.mask = RK805_IRQ_PWRON_LP_MSK,
-		.reg_offset = 0,
-	},
-	[RK805_IRQ_HOTDIE] = {
-		.mask = RK805_IRQ_HOTDIE_MSK,
-		.reg_offset = 0,
-	},
-	[RK805_IRQ_RTC_ALARM] = {
-		.mask = RK805_IRQ_RTC_ALARM_MSK,
-		.reg_offset = 0,
-	},
-	[RK805_IRQ_RTC_PERIOD] = {
-		.mask = RK805_IRQ_RTC_PERIOD_MSK,
-		.reg_offset = 0,
-	},
-	[RK805_IRQ_PWRON_FALL] = {
-		.mask = RK805_IRQ_PWRON_FALL_MSK,
-		.reg_offset = 0,
-	},
-};
-
-static const struct regmap_irq rk808_irqs[] = {
-	/* INT_STS */
-	[RK808_IRQ_VOUT_LO] = {
-		.mask = RK808_IRQ_VOUT_LO_MSK,
-		.reg_offset = 0,
-	},
-	[RK808_IRQ_VB_LO] = {
-		.mask = RK808_IRQ_VB_LO_MSK,
-		.reg_offset = 0,
-	},
-	[RK808_IRQ_PWRON] = {
-		.mask = RK808_IRQ_PWRON_MSK,
-		.reg_offset = 0,
-	},
-	[RK808_IRQ_PWRON_LP] = {
-		.mask = RK808_IRQ_PWRON_LP_MSK,
-		.reg_offset = 0,
-	},
-	[RK808_IRQ_HOTDIE] = {
-		.mask = RK808_IRQ_HOTDIE_MSK,
-		.reg_offset = 0,
-	},
-	[RK808_IRQ_RTC_ALARM] = {
-		.mask = RK808_IRQ_RTC_ALARM_MSK,
-		.reg_offset = 0,
-	},
-	[RK808_IRQ_RTC_PERIOD] = {
-		.mask = RK808_IRQ_RTC_PERIOD_MSK,
-		.reg_offset = 0,
-	},
-
-	/* INT_STS2 */
-	[RK808_IRQ_PLUG_IN_INT] = {
-		.mask = RK808_IRQ_PLUG_IN_INT_MSK,
-		.reg_offset = 1,
-	},
-	[RK808_IRQ_PLUG_OUT_INT] = {
-		.mask = RK808_IRQ_PLUG_OUT_INT_MSK,
-		.reg_offset = 1,
-	},
-};
-
-static const struct regmap_irq rk818_irqs[] = {
-	/* INT_STS */
-	[RK818_IRQ_VOUT_LO] = {
-		.mask = RK818_IRQ_VOUT_LO_MSK,
-		.reg_offset = 0,
-	},
-	[RK818_IRQ_VB_LO] = {
-		.mask = RK818_IRQ_VB_LO_MSK,
-		.reg_offset = 0,
-	},
-	[RK818_IRQ_PWRON] = {
-		.mask = RK818_IRQ_PWRON_MSK,
-		.reg_offset = 0,
-	},
-	[RK818_IRQ_PWRON_LP] = {
-		.mask = RK818_IRQ_PWRON_LP_MSK,
-		.reg_offset = 0,
-	},
-	[RK818_IRQ_HOTDIE] = {
-		.mask = RK818_IRQ_HOTDIE_MSK,
-		.reg_offset = 0,
-	},
-	[RK818_IRQ_RTC_ALARM] = {
-		.mask = RK818_IRQ_RTC_ALARM_MSK,
-		.reg_offset = 0,
-	},
-	[RK818_IRQ_RTC_PERIOD] = {
-		.mask = RK818_IRQ_RTC_PERIOD_MSK,
-		.reg_offset = 0,
-	},
-	[RK818_IRQ_USB_OV] = {
-		.mask = RK818_IRQ_USB_OV_MSK,
-		.reg_offset = 0,
-	},
-
-	/* INT_STS2 */
-	[RK818_IRQ_PLUG_IN] = {
-		.mask = RK818_IRQ_PLUG_IN_MSK,
-		.reg_offset = 1,
-	},
-	[RK818_IRQ_PLUG_OUT] = {
-		.mask = RK818_IRQ_PLUG_OUT_MSK,
-		.reg_offset = 1,
-	},
-	[RK818_IRQ_CHG_OK] = {
-		.mask = RK818_IRQ_CHG_OK_MSK,
-		.reg_offset = 1,
-	},
-	[RK818_IRQ_CHG_TE] = {
-		.mask = RK818_IRQ_CHG_TE_MSK,
-		.reg_offset = 1,
-	},
-	[RK818_IRQ_CHG_TS1] = {
-		.mask = RK818_IRQ_CHG_TS1_MSK,
-		.reg_offset = 1,
-	},
-	[RK818_IRQ_TS2] = {
-		.mask = RK818_IRQ_TS2_MSK,
-		.reg_offset = 1,
-	},
-	[RK818_IRQ_CHG_CVTLIM] = {
-		.mask = RK818_IRQ_CHG_CVTLIM_MSK,
-		.reg_offset = 1,
-	},
-	[RK818_IRQ_DISCHG_ILIM] = {
-		.mask = RK818_IRQ_DISCHG_ILIM_MSK,
-		.reg_offset = 1,
-	},
-};
-
-static const struct regmap_irq rk817_irqs[RK817_IRQ_END] = {
-	REGMAP_IRQ_REG_LINE(0, 8),
-	REGMAP_IRQ_REG_LINE(1, 8),
-	REGMAP_IRQ_REG_LINE(2, 8),
-	REGMAP_IRQ_REG_LINE(3, 8),
-	REGMAP_IRQ_REG_LINE(4, 8),
-	REGMAP_IRQ_REG_LINE(5, 8),
-	REGMAP_IRQ_REG_LINE(6, 8),
-	REGMAP_IRQ_REG_LINE(7, 8),
-	REGMAP_IRQ_REG_LINE(8, 8),
-	REGMAP_IRQ_REG_LINE(9, 8),
-	REGMAP_IRQ_REG_LINE(10, 8),
-	REGMAP_IRQ_REG_LINE(11, 8),
-	REGMAP_IRQ_REG_LINE(12, 8),
-	REGMAP_IRQ_REG_LINE(13, 8),
-	REGMAP_IRQ_REG_LINE(14, 8),
-	REGMAP_IRQ_REG_LINE(15, 8),
-	REGMAP_IRQ_REG_LINE(16, 8),
-	REGMAP_IRQ_REG_LINE(17, 8),
-	REGMAP_IRQ_REG_LINE(18, 8),
-	REGMAP_IRQ_REG_LINE(19, 8),
-	REGMAP_IRQ_REG_LINE(20, 8),
-	REGMAP_IRQ_REG_LINE(21, 8),
-	REGMAP_IRQ_REG_LINE(22, 8),
-	REGMAP_IRQ_REG_LINE(23, 8)
-};
-
-static struct regmap_irq_chip rk805_irq_chip = {
-	.name = "rk805",
-	.irqs = rk805_irqs,
-	.num_irqs = ARRAY_SIZE(rk805_irqs),
-	.num_regs = 1,
-	.status_base = RK805_INT_STS_REG,
-	.mask_base = RK805_INT_STS_MSK_REG,
-	.ack_base = RK805_INT_STS_REG,
-	.init_ack_masked = true,
-};
-
-static const struct regmap_irq_chip rk808_irq_chip = {
-	.name = "rk808",
-	.irqs = rk808_irqs,
-	.num_irqs = ARRAY_SIZE(rk808_irqs),
-	.num_regs = 2,
-	.irq_reg_stride = 2,
-	.status_base = RK808_INT_STS_REG1,
-	.mask_base = RK808_INT_STS_MSK_REG1,
-	.ack_base = RK808_INT_STS_REG1,
-	.init_ack_masked = true,
-};
-
-static struct regmap_irq_chip rk817_irq_chip = {
-	.name = "rk817",
-	.irqs = rk817_irqs,
-	.num_irqs = ARRAY_SIZE(rk817_irqs),
-	.num_regs = 3,
-	.irq_reg_stride = 2,
-	.status_base = RK817_INT_STS_REG0,
-	.mask_base = RK817_INT_STS_MSK_REG0,
-	.ack_base = RK817_INT_STS_REG0,
-	.init_ack_masked = true,
-};
-
-static const struct regmap_irq_chip rk818_irq_chip = {
-	.name = "rk818",
-	.irqs = rk818_irqs,
-	.num_irqs = ARRAY_SIZE(rk818_irqs),
-	.num_regs = 2,
-	.irq_reg_stride = 2,
-	.status_base = RK818_INT_STS_REG1,
-	.mask_base = RK818_INT_STS_MSK_REG1,
-	.ack_base = RK818_INT_STS_REG1,
-	.init_ack_masked = true,
-};
-
-static int rk808_power_off(struct sys_off_data *data)
-{
-	struct rk808 *rk808 = data->cb_data;
-	int ret;
-	unsigned int reg, bit;
-
-	switch (rk808->variant) {
-	case RK805_ID:
-		reg = RK805_DEV_CTRL_REG;
-		bit = DEV_OFF;
-		break;
-	case RK808_ID:
-		reg = RK808_DEVCTRL_REG,
-		bit = DEV_OFF_RST;
-		break;
-	case RK809_ID:
-	case RK817_ID:
-		reg = RK817_SYS_CFG(3);
-		bit = DEV_OFF;
-		break;
-	case RK818_ID:
-		reg = RK818_DEVCTRL_REG;
-		bit = DEV_OFF;
-		break;
-	default:
-		return NOTIFY_DONE;
-	}
-	ret = regmap_update_bits(rk808->regmap, reg, bit, bit);
-	if (ret)
-		dev_err(rk808->dev, "Failed to shutdown device!\n");
-
-	return NOTIFY_DONE;
-}
-
-static int rk808_restart(struct sys_off_data *data)
-{
-	struct rk808 *rk808 = data->cb_data;
-	unsigned int reg, bit;
-	int ret;
-
-	switch (rk808->variant) {
-	case RK809_ID:
-	case RK817_ID:
-		reg = RK817_SYS_CFG(3);
-		bit = DEV_RST;
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-	ret = regmap_update_bits(rk808->regmap, reg, bit, bit);
-	if (ret)
-		dev_err(rk808->dev, "Failed to restart device!\n");
-
-	return NOTIFY_DONE;
-}
-
-static void rk8xx_shutdown(struct i2c_client *client)
-{
-	struct rk808 *rk808 = i2c_get_clientdata(client);
-	int ret;
-
-	switch (rk808->variant) {
-	case RK805_ID:
-		ret = regmap_update_bits(rk808->regmap,
-					 RK805_GPIO_IO_POL_REG,
-					 SLP_SD_MSK,
-					 SHUTDOWN_FUN);
-		break;
-	case RK809_ID:
-	case RK817_ID:
-		ret = regmap_update_bits(rk808->regmap,
-					 RK817_SYS_CFG(3),
-					 RK817_SLPPIN_FUNC_MSK,
-					 SLPPIN_DN_FUN);
-		break;
-	default:
-		return;
-	}
-	if (ret)
-		dev_warn(&client->dev,
-			 "Cannot switch to power down function\n");
-}
-
-static const struct of_device_id rk808_of_match[] = {
-	{ .compatible = "rockchip,rk805" },
-	{ .compatible = "rockchip,rk808" },
-	{ .compatible = "rockchip,rk809" },
-	{ .compatible = "rockchip,rk817" },
-	{ .compatible = "rockchip,rk818" },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, rk808_of_match);
-
-static int rk808_probe(struct i2c_client *client)
-{
-	struct device_node *np = client->dev.of_node;
-	struct rk808 *rk808;
-	const struct rk808_reg_data *pre_init_reg;
-	const struct mfd_cell *cells;
-	int nr_pre_init_regs;
-	int nr_cells;
-	int msb, lsb;
-	unsigned char pmic_id_msb, pmic_id_lsb;
-	int ret;
-	int i;
-
-	rk808 = devm_kzalloc(&client->dev, sizeof(*rk808), GFP_KERNEL);
-	if (!rk808)
-		return -ENOMEM;
-
-	if (of_device_is_compatible(np, "rockchip,rk817") ||
-	    of_device_is_compatible(np, "rockchip,rk809")) {
-		pmic_id_msb = RK817_ID_MSB;
-		pmic_id_lsb = RK817_ID_LSB;
-	} else {
-		pmic_id_msb = RK808_ID_MSB;
-		pmic_id_lsb = RK808_ID_LSB;
-	}
-
-	/* Read chip variant */
-	msb = i2c_smbus_read_byte_data(client, pmic_id_msb);
-	if (msb < 0)
-		return dev_err_probe(&client->dev, msb, "failed to read the chip id MSB\n");
-
-	lsb = i2c_smbus_read_byte_data(client, pmic_id_lsb);
-	if (lsb < 0)
-		return dev_err_probe(&client->dev, lsb, "failed to read the chip id LSB\n");
-
-	rk808->variant = ((msb << 8) | lsb) & RK8XX_ID_MSK;
-	dev_info(&client->dev, "chip id: 0x%x\n", (unsigned int)rk808->variant);
-
-	switch (rk808->variant) {
-	case RK805_ID:
-		rk808->regmap_cfg = &rk805_regmap_config;
-		rk808->regmap_irq_chip = &rk805_irq_chip;
-		pre_init_reg = rk805_pre_init_reg;
-		nr_pre_init_regs = ARRAY_SIZE(rk805_pre_init_reg);
-		cells = rk805s;
-		nr_cells = ARRAY_SIZE(rk805s);
-		break;
-	case RK808_ID:
-		rk808->regmap_cfg = &rk808_regmap_config;
-		rk808->regmap_irq_chip = &rk808_irq_chip;
-		pre_init_reg = rk808_pre_init_reg;
-		nr_pre_init_regs = ARRAY_SIZE(rk808_pre_init_reg);
-		cells = rk808s;
-		nr_cells = ARRAY_SIZE(rk808s);
-		break;
-	case RK818_ID:
-		rk808->regmap_cfg = &rk818_regmap_config;
-		rk808->regmap_irq_chip = &rk818_irq_chip;
-		pre_init_reg = rk818_pre_init_reg;
-		nr_pre_init_regs = ARRAY_SIZE(rk818_pre_init_reg);
-		cells = rk818s;
-		nr_cells = ARRAY_SIZE(rk818s);
-		break;
-	case RK809_ID:
-	case RK817_ID:
-		rk808->regmap_cfg = &rk817_regmap_config;
-		rk808->regmap_irq_chip = &rk817_irq_chip;
-		pre_init_reg = rk817_pre_init_reg;
-		nr_pre_init_regs = ARRAY_SIZE(rk817_pre_init_reg);
-		cells = rk817s;
-		nr_cells = ARRAY_SIZE(rk817s);
-		break;
-	default:
-		dev_err(&client->dev, "Unsupported RK8XX ID %lu\n",
-			rk808->variant);
-		return -EINVAL;
-	}
-
-	rk808->dev = &client->dev;
-	i2c_set_clientdata(client, rk808);
-
-	rk808->regmap = devm_regmap_init_i2c(client, rk808->regmap_cfg);
-	if (IS_ERR(rk808->regmap))
-		return dev_err_probe(&client->dev, PTR_ERR(rk808->regmap),
-				     "regmap initialization failed\n");
-
-	if (!client->irq)
-		return dev_err_probe(&client->dev, -EINVAL, "No interrupt support, no core IRQ\n");
-
-	ret = devm_regmap_add_irq_chip(&client->dev, rk808->regmap, client->irq,
-				       IRQF_ONESHOT, -1,
-				       rk808->regmap_irq_chip, &rk808->irq_data);
-	if (ret)
-		return dev_err_probe(&client->dev, ret, "Failed to add irq_chip\n");
-
-	for (i = 0; i < nr_pre_init_regs; i++) {
-		ret = regmap_update_bits(rk808->regmap,
-					pre_init_reg[i].addr,
-					pre_init_reg[i].mask,
-					pre_init_reg[i].value);
-		if (ret)
-			return dev_err_probe(&client->dev, ret, "0x%x write err\n",
-					     pre_init_reg[i].addr);
-	}
-
-	ret = devm_mfd_add_devices(&client->dev, PLATFORM_DEVID_NONE,
-			      cells, nr_cells, NULL, 0,
-			      regmap_irq_get_domain(rk808->irq_data));
-	if (ret)
-		return dev_err_probe(&client->dev, ret, "failed to add MFD devices\n");
-
-	if (of_property_read_bool(np, "rockchip,system-power-controller")) {
-		ret = devm_register_sys_off_handler(&client->dev,
-				    SYS_OFF_MODE_POWER_OFF_PREPARE, SYS_OFF_PRIO_HIGH,
-				    &rk808_power_off, rk808);
-		if (ret)
-			return dev_err_probe(&client->dev, ret,
-					     "failed to register poweroff handler\n");
-
-		switch (rk808->variant) {
-		case RK809_ID:
-		case RK817_ID:
-			ret = devm_register_sys_off_handler(&client->dev,
-							    SYS_OFF_MODE_RESTART, SYS_OFF_PRIO_HIGH,
-							    &rk808_restart, rk808);
-			if (ret)
-				dev_warn(&client->dev, "failed to register rst handler, %d\n", ret);
-			break;
-		default:
-			dev_dbg(&client->dev, "pmic controlled board reset not supported\n");
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int __maybe_unused rk8xx_suspend(struct device *dev)
-{
-	struct rk808 *rk808 = i2c_get_clientdata(to_i2c_client(dev));
-	int ret = 0;
-
-	switch (rk808->variant) {
-	case RK805_ID:
-		ret = regmap_update_bits(rk808->regmap,
-					 RK805_GPIO_IO_POL_REG,
-					 SLP_SD_MSK,
-					 SLEEP_FUN);
-		break;
-	case RK809_ID:
-	case RK817_ID:
-		ret = regmap_update_bits(rk808->regmap,
-					 RK817_SYS_CFG(3),
-					 RK817_SLPPIN_FUNC_MSK,
-					 SLPPIN_SLP_FUN);
-		break;
-	default:
-		break;
-	}
-
-	return ret;
-}
-
-static int __maybe_unused rk8xx_resume(struct device *dev)
-{
-	struct rk808 *rk808 = i2c_get_clientdata(to_i2c_client(dev));
-	int ret = 0;
-
-	switch (rk808->variant) {
-	case RK809_ID:
-	case RK817_ID:
-		ret = regmap_update_bits(rk808->regmap,
-					 RK817_SYS_CFG(3),
-					 RK817_SLPPIN_FUNC_MSK,
-					 SLPPIN_NULL_FUN);
-		break;
-	default:
-		break;
-	}
-
-	return ret;
-}
-static SIMPLE_DEV_PM_OPS(rk8xx_pm_ops, rk8xx_suspend, rk8xx_resume);
-
-static struct i2c_driver rk808_i2c_driver = {
-	.driver = {
-		.name = "rk808",
-		.of_match_table = rk808_of_match,
-		.pm = &rk8xx_pm_ops,
-	},
-	.probe_new = rk808_probe,
-	.shutdown = rk8xx_shutdown,
-};
-
-module_i2c_driver(rk808_i2c_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Chris Zhong <zyw@rock-chips.com>");
-MODULE_AUTHOR("Zhang Qing <zhangqing@rock-chips.com>");
-MODULE_AUTHOR("Wadim Egorov <w.egorov@phytec.de>");
-MODULE_DESCRIPTION("RK808/RK818 PMIC driver");
diff --git a/drivers/mfd/rk8xx-core.c b/drivers/mfd/rk8xx-core.c
new file mode 100644
index 000000000000..5c0a5acef34c
--- /dev/null
+++ b/drivers/mfd/rk8xx-core.c
@@ -0,0 +1,706 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MFD core driver for Rockchip RK8XX
+ *
+ * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
+ * Copyright (C) 2016 PHYTEC Messtechnik GmbH
+ *
+ * Author: Chris Zhong <zyw@rock-chips.com>
+ * Author: Zhang Qing <zhangqing@rock-chips.com>
+ * Author: Wadim Egorov <w.egorov@phytec.de>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/mfd/rk808.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/reboot.h>
+
+struct rk808_reg_data {
+	int addr;
+	int mask;
+	int value;
+};
+
+static const struct resource rtc_resources[] = {
+	DEFINE_RES_IRQ(RK808_IRQ_RTC_ALARM),
+};
+
+static const struct resource rk817_rtc_resources[] = {
+	DEFINE_RES_IRQ(RK817_IRQ_RTC_ALARM),
+};
+
+static const struct resource rk805_key_resources[] = {
+	DEFINE_RES_IRQ(RK805_IRQ_PWRON_RISE),
+	DEFINE_RES_IRQ(RK805_IRQ_PWRON_FALL),
+};
+
+static const struct resource rk817_pwrkey_resources[] = {
+	DEFINE_RES_IRQ(RK817_IRQ_PWRON_RISE),
+	DEFINE_RES_IRQ(RK817_IRQ_PWRON_FALL),
+};
+
+static const struct resource rk817_charger_resources[] = {
+	DEFINE_RES_IRQ(RK817_IRQ_PLUG_IN),
+	DEFINE_RES_IRQ(RK817_IRQ_PLUG_OUT),
+};
+
+static const struct mfd_cell rk805s[] = {
+	{ .name = "rk808-clkout", .id = PLATFORM_DEVID_NONE, },
+	{ .name = "rk808-regulator", .id = PLATFORM_DEVID_NONE, },
+	{ .name = "rk805-pinctrl", .id = PLATFORM_DEVID_NONE, },
+	{
+		.name = "rk808-rtc",
+		.num_resources = ARRAY_SIZE(rtc_resources),
+		.resources = &rtc_resources[0],
+		.id = PLATFORM_DEVID_NONE,
+	},
+	{	.name = "rk805-pwrkey",
+		.num_resources = ARRAY_SIZE(rk805_key_resources),
+		.resources = &rk805_key_resources[0],
+		.id = PLATFORM_DEVID_NONE,
+	},
+};
+
+static const struct mfd_cell rk808s[] = {
+	{ .name = "rk808-clkout", .id = PLATFORM_DEVID_NONE, },
+	{ .name = "rk808-regulator", .id = PLATFORM_DEVID_NONE, },
+	{
+		.name = "rk808-rtc",
+		.num_resources = ARRAY_SIZE(rtc_resources),
+		.resources = rtc_resources,
+		.id = PLATFORM_DEVID_NONE,
+	},
+};
+
+static const struct mfd_cell rk817s[] = {
+	{ .name = "rk808-clkout", .id = PLATFORM_DEVID_NONE, },
+	{ .name = "rk808-regulator", .id = PLATFORM_DEVID_NONE, },
+	{
+		.name = "rk805-pwrkey",
+		.num_resources = ARRAY_SIZE(rk817_pwrkey_resources),
+		.resources = &rk817_pwrkey_resources[0],
+		.id = PLATFORM_DEVID_NONE,
+	},
+	{
+		.name = "rk808-rtc",
+		.num_resources = ARRAY_SIZE(rk817_rtc_resources),
+		.resources = &rk817_rtc_resources[0],
+		.id = PLATFORM_DEVID_NONE,
+	},
+	{ .name = "rk817-codec", .id = PLATFORM_DEVID_NONE, },
+	{
+		.name = "rk817-charger",
+		.num_resources = ARRAY_SIZE(rk817_charger_resources),
+		.resources = &rk817_charger_resources[0],
+		.id = PLATFORM_DEVID_NONE,
+	},
+};
+
+static const struct mfd_cell rk818s[] = {
+	{ .name = "rk808-clkout", .id = PLATFORM_DEVID_NONE, },
+	{ .name = "rk808-regulator", .id = PLATFORM_DEVID_NONE, },
+	{
+		.name = "rk808-rtc",
+		.num_resources = ARRAY_SIZE(rtc_resources),
+		.resources = rtc_resources,
+		.id = PLATFORM_DEVID_NONE,
+	},
+};
+
+static const struct rk808_reg_data rk805_pre_init_reg[] = {
+	{RK805_BUCK1_CONFIG_REG, RK805_BUCK1_2_ILMAX_MASK,
+				 RK805_BUCK1_2_ILMAX_4000MA},
+	{RK805_BUCK2_CONFIG_REG, RK805_BUCK1_2_ILMAX_MASK,
+				 RK805_BUCK1_2_ILMAX_4000MA},
+	{RK805_BUCK3_CONFIG_REG, RK805_BUCK3_4_ILMAX_MASK,
+				 RK805_BUCK3_ILMAX_3000MA},
+	{RK805_BUCK4_CONFIG_REG, RK805_BUCK3_4_ILMAX_MASK,
+				 RK805_BUCK4_ILMAX_3500MA},
+	{RK805_BUCK4_CONFIG_REG, BUCK_ILMIN_MASK, BUCK_ILMIN_400MA},
+	{RK805_THERMAL_REG, TEMP_HOTDIE_MSK, TEMP115C},
+};
+
+static const struct rk808_reg_data rk808_pre_init_reg[] = {
+	{ RK808_BUCK3_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_150MA },
+	{ RK808_BUCK4_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_200MA },
+	{ RK808_BOOST_CONFIG_REG, BOOST_ILMIN_MASK, BOOST_ILMIN_100MA },
+	{ RK808_BUCK1_CONFIG_REG, BUCK1_RATE_MASK,  BUCK_ILMIN_200MA },
+	{ RK808_BUCK2_CONFIG_REG, BUCK2_RATE_MASK,  BUCK_ILMIN_200MA },
+	{ RK808_DCDC_UV_ACT_REG,  BUCK_UV_ACT_MASK, BUCK_UV_ACT_DISABLE},
+	{ RK808_VB_MON_REG,       MASK_ALL,         VB_LO_ACT |
+						    VB_LO_SEL_3500MV },
+};
+
+static const struct rk808_reg_data rk817_pre_init_reg[] = {
+	{RK817_RTC_CTRL_REG, RTC_STOP, RTC_STOP},
+	/* Codec specific registers */
+	{ RK817_CODEC_DTOP_VUCTL, MASK_ALL, 0x03 },
+	{ RK817_CODEC_DTOP_VUCTIME, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DTOP_LPT_SRST, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DTOP_DIGEN_CLKE, MASK_ALL, 0x00 },
+	/* from vendor driver, CODEC_AREF_RTCFG0 not defined in data sheet */
+	{ RK817_CODEC_AREF_RTCFG0, MASK_ALL, 0x00 },
+	{ RK817_CODEC_AREF_RTCFG1, MASK_ALL, 0x06 },
+	{ RK817_CODEC_AADC_CFG0, MASK_ALL, 0xc8 },
+	/* from vendor driver, CODEC_AADC_CFG1 not defined in data sheet */
+	{ RK817_CODEC_AADC_CFG1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DADC_VOLL, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DADC_VOLR, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DADC_SR_ACL0, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DADC_ALC1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DADC_ALC2, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DADC_NG, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DADC_HPF, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DADC_RVOLL, MASK_ALL, 0xff },
+	{ RK817_CODEC_DADC_RVOLR, MASK_ALL, 0xff },
+	{ RK817_CODEC_AMIC_CFG0, MASK_ALL, 0x70 },
+	{ RK817_CODEC_AMIC_CFG1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DMIC_PGA_GAIN, MASK_ALL, 0x66 },
+	{ RK817_CODEC_DMIC_LMT1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DMIC_LMT2, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DMIC_NG1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DMIC_NG2, MASK_ALL, 0x00 },
+	/* from vendor driver, CODEC_ADAC_CFG0 not defined in data sheet */
+	{ RK817_CODEC_ADAC_CFG0, MASK_ALL, 0x00 },
+	{ RK817_CODEC_ADAC_CFG1, MASK_ALL, 0x07 },
+	{ RK817_CODEC_DDAC_POPD_DACST, MASK_ALL, 0x82 },
+	{ RK817_CODEC_DDAC_VOLL, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DDAC_VOLR, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DDAC_SR_LMT0, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DDAC_LMT1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DDAC_LMT2, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DDAC_MUTE_MIXCTL, MASK_ALL, 0xa0 },
+	{ RK817_CODEC_DDAC_RVOLL, MASK_ALL, 0xff },
+	{ RK817_CODEC_DADC_RVOLR, MASK_ALL, 0xff },
+	{ RK817_CODEC_AMIC_CFG0, MASK_ALL, 0x70 },
+	{ RK817_CODEC_AMIC_CFG1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DMIC_PGA_GAIN, MASK_ALL, 0x66 },
+	{ RK817_CODEC_DMIC_LMT1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DMIC_LMT2, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DMIC_NG1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DMIC_NG2, MASK_ALL, 0x00 },
+	/* from vendor driver, CODEC_ADAC_CFG0 not defined in data sheet */
+	{ RK817_CODEC_ADAC_CFG0, MASK_ALL, 0x00 },
+	{ RK817_CODEC_ADAC_CFG1, MASK_ALL, 0x07 },
+	{ RK817_CODEC_DDAC_POPD_DACST, MASK_ALL, 0x82 },
+	{ RK817_CODEC_DDAC_VOLL, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DDAC_VOLR, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DDAC_SR_LMT0, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DDAC_LMT1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DDAC_LMT2, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DDAC_MUTE_MIXCTL, MASK_ALL, 0xa0 },
+	{ RK817_CODEC_DDAC_RVOLL, MASK_ALL, 0xff },
+	{ RK817_CODEC_DDAC_RVOLR, MASK_ALL, 0xff },
+	{ RK817_CODEC_AHP_ANTI0, MASK_ALL, 0x00 },
+	{ RK817_CODEC_AHP_ANTI1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_AHP_CFG0, MASK_ALL, 0xe0 },
+	{ RK817_CODEC_AHP_CFG1, MASK_ALL, 0x1f },
+	{ RK817_CODEC_AHP_CP, MASK_ALL, 0x09 },
+	{ RK817_CODEC_ACLASSD_CFG1, MASK_ALL, 0x69 },
+	{ RK817_CODEC_ACLASSD_CFG2, MASK_ALL, 0x44 },
+	{ RK817_CODEC_APLL_CFG0, MASK_ALL, 0x04 },
+	{ RK817_CODEC_APLL_CFG1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_APLL_CFG2, MASK_ALL, 0x30 },
+	{ RK817_CODEC_APLL_CFG3, MASK_ALL, 0x19 },
+	{ RK817_CODEC_APLL_CFG4, MASK_ALL, 0x65 },
+	{ RK817_CODEC_APLL_CFG5, MASK_ALL, 0x01 },
+	{ RK817_CODEC_DI2S_CKM, MASK_ALL, 0x01 },
+	{ RK817_CODEC_DI2S_RSD, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DI2S_RXCR1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DI2S_RXCR2, MASK_ALL, 0x17 },
+	{ RK817_CODEC_DI2S_RXCMD_TSD, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DI2S_TXCR1, MASK_ALL, 0x00 },
+	{ RK817_CODEC_DI2S_TXCR2, MASK_ALL, 0x17 },
+	{ RK817_CODEC_DI2S_TXCR3_TXCMD, MASK_ALL, 0x00 },
+	{RK817_GPIO_INT_CFG, RK817_INT_POL_MSK, RK817_INT_POL_L},
+	{RK817_SYS_CFG(1), RK817_HOTDIE_TEMP_MSK | RK817_TSD_TEMP_MSK,
+					   RK817_HOTDIE_105 | RK817_TSD_140},
+};
+
+static const struct rk808_reg_data rk818_pre_init_reg[] = {
+	/* improve efficiency */
+	{ RK818_BUCK2_CONFIG_REG, BUCK2_RATE_MASK,  BUCK_ILMIN_250MA },
+	{ RK818_BUCK4_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_250MA },
+	{ RK818_BOOST_CONFIG_REG, BOOST_ILMIN_MASK, BOOST_ILMIN_100MA },
+	{ RK818_USB_CTRL_REG,	  RK818_USB_ILIM_SEL_MASK,
+						    RK818_USB_ILMIN_2000MA },
+	/* close charger when usb lower then 3.4V */
+	{ RK818_USB_CTRL_REG,	  RK818_USB_CHG_SD_VSEL_MASK,
+						    (0x7 << 4) },
+	/* no action when vref */
+	{ RK818_H5V_EN_REG,	  BIT(1),	    RK818_REF_RDY_CTRL },
+	/* enable HDMI 5V */
+	{ RK818_H5V_EN_REG,	  BIT(0),	    RK818_H5V_EN },
+	{ RK808_VB_MON_REG,	  MASK_ALL,	    VB_LO_ACT |
+						    VB_LO_SEL_3500MV },
+};
+
+static const struct regmap_irq rk805_irqs[] = {
+	[RK805_IRQ_PWRON_RISE] = {
+		.mask = RK805_IRQ_PWRON_RISE_MSK,
+		.reg_offset = 0,
+	},
+	[RK805_IRQ_VB_LOW] = {
+		.mask = RK805_IRQ_VB_LOW_MSK,
+		.reg_offset = 0,
+	},
+	[RK805_IRQ_PWRON] = {
+		.mask = RK805_IRQ_PWRON_MSK,
+		.reg_offset = 0,
+	},
+	[RK805_IRQ_PWRON_LP] = {
+		.mask = RK805_IRQ_PWRON_LP_MSK,
+		.reg_offset = 0,
+	},
+	[RK805_IRQ_HOTDIE] = {
+		.mask = RK805_IRQ_HOTDIE_MSK,
+		.reg_offset = 0,
+	},
+	[RK805_IRQ_RTC_ALARM] = {
+		.mask = RK805_IRQ_RTC_ALARM_MSK,
+		.reg_offset = 0,
+	},
+	[RK805_IRQ_RTC_PERIOD] = {
+		.mask = RK805_IRQ_RTC_PERIOD_MSK,
+		.reg_offset = 0,
+	},
+	[RK805_IRQ_PWRON_FALL] = {
+		.mask = RK805_IRQ_PWRON_FALL_MSK,
+		.reg_offset = 0,
+	},
+};
+
+static const struct regmap_irq rk808_irqs[] = {
+	/* INT_STS */
+	[RK808_IRQ_VOUT_LO] = {
+		.mask = RK808_IRQ_VOUT_LO_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_VB_LO] = {
+		.mask = RK808_IRQ_VB_LO_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_PWRON] = {
+		.mask = RK808_IRQ_PWRON_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_PWRON_LP] = {
+		.mask = RK808_IRQ_PWRON_LP_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_HOTDIE] = {
+		.mask = RK808_IRQ_HOTDIE_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_RTC_ALARM] = {
+		.mask = RK808_IRQ_RTC_ALARM_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_RTC_PERIOD] = {
+		.mask = RK808_IRQ_RTC_PERIOD_MSK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[RK808_IRQ_PLUG_IN_INT] = {
+		.mask = RK808_IRQ_PLUG_IN_INT_MSK,
+		.reg_offset = 1,
+	},
+	[RK808_IRQ_PLUG_OUT_INT] = {
+		.mask = RK808_IRQ_PLUG_OUT_INT_MSK,
+		.reg_offset = 1,
+	},
+};
+
+static const struct regmap_irq rk818_irqs[] = {
+	/* INT_STS */
+	[RK818_IRQ_VOUT_LO] = {
+		.mask = RK818_IRQ_VOUT_LO_MSK,
+		.reg_offset = 0,
+	},
+	[RK818_IRQ_VB_LO] = {
+		.mask = RK818_IRQ_VB_LO_MSK,
+		.reg_offset = 0,
+	},
+	[RK818_IRQ_PWRON] = {
+		.mask = RK818_IRQ_PWRON_MSK,
+		.reg_offset = 0,
+	},
+	[RK818_IRQ_PWRON_LP] = {
+		.mask = RK818_IRQ_PWRON_LP_MSK,
+		.reg_offset = 0,
+	},
+	[RK818_IRQ_HOTDIE] = {
+		.mask = RK818_IRQ_HOTDIE_MSK,
+		.reg_offset = 0,
+	},
+	[RK818_IRQ_RTC_ALARM] = {
+		.mask = RK818_IRQ_RTC_ALARM_MSK,
+		.reg_offset = 0,
+	},
+	[RK818_IRQ_RTC_PERIOD] = {
+		.mask = RK818_IRQ_RTC_PERIOD_MSK,
+		.reg_offset = 0,
+	},
+	[RK818_IRQ_USB_OV] = {
+		.mask = RK818_IRQ_USB_OV_MSK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[RK818_IRQ_PLUG_IN] = {
+		.mask = RK818_IRQ_PLUG_IN_MSK,
+		.reg_offset = 1,
+	},
+	[RK818_IRQ_PLUG_OUT] = {
+		.mask = RK818_IRQ_PLUG_OUT_MSK,
+		.reg_offset = 1,
+	},
+	[RK818_IRQ_CHG_OK] = {
+		.mask = RK818_IRQ_CHG_OK_MSK,
+		.reg_offset = 1,
+	},
+	[RK818_IRQ_CHG_TE] = {
+		.mask = RK818_IRQ_CHG_TE_MSK,
+		.reg_offset = 1,
+	},
+	[RK818_IRQ_CHG_TS1] = {
+		.mask = RK818_IRQ_CHG_TS1_MSK,
+		.reg_offset = 1,
+	},
+	[RK818_IRQ_TS2] = {
+		.mask = RK818_IRQ_TS2_MSK,
+		.reg_offset = 1,
+	},
+	[RK818_IRQ_CHG_CVTLIM] = {
+		.mask = RK818_IRQ_CHG_CVTLIM_MSK,
+		.reg_offset = 1,
+	},
+	[RK818_IRQ_DISCHG_ILIM] = {
+		.mask = RK818_IRQ_DISCHG_ILIM_MSK,
+		.reg_offset = 1,
+	},
+};
+
+static const struct regmap_irq rk817_irqs[RK817_IRQ_END] = {
+	REGMAP_IRQ_REG_LINE(0, 8),
+	REGMAP_IRQ_REG_LINE(1, 8),
+	REGMAP_IRQ_REG_LINE(2, 8),
+	REGMAP_IRQ_REG_LINE(3, 8),
+	REGMAP_IRQ_REG_LINE(4, 8),
+	REGMAP_IRQ_REG_LINE(5, 8),
+	REGMAP_IRQ_REG_LINE(6, 8),
+	REGMAP_IRQ_REG_LINE(7, 8),
+	REGMAP_IRQ_REG_LINE(8, 8),
+	REGMAP_IRQ_REG_LINE(9, 8),
+	REGMAP_IRQ_REG_LINE(10, 8),
+	REGMAP_IRQ_REG_LINE(11, 8),
+	REGMAP_IRQ_REG_LINE(12, 8),
+	REGMAP_IRQ_REG_LINE(13, 8),
+	REGMAP_IRQ_REG_LINE(14, 8),
+	REGMAP_IRQ_REG_LINE(15, 8),
+	REGMAP_IRQ_REG_LINE(16, 8),
+	REGMAP_IRQ_REG_LINE(17, 8),
+	REGMAP_IRQ_REG_LINE(18, 8),
+	REGMAP_IRQ_REG_LINE(19, 8),
+	REGMAP_IRQ_REG_LINE(20, 8),
+	REGMAP_IRQ_REG_LINE(21, 8),
+	REGMAP_IRQ_REG_LINE(22, 8),
+	REGMAP_IRQ_REG_LINE(23, 8)
+};
+
+static struct regmap_irq_chip rk805_irq_chip = {
+	.name = "rk805",
+	.irqs = rk805_irqs,
+	.num_irqs = ARRAY_SIZE(rk805_irqs),
+	.num_regs = 1,
+	.status_base = RK805_INT_STS_REG,
+	.mask_base = RK805_INT_STS_MSK_REG,
+	.ack_base = RK805_INT_STS_REG,
+	.init_ack_masked = true,
+};
+
+static const struct regmap_irq_chip rk808_irq_chip = {
+	.name = "rk808",
+	.irqs = rk808_irqs,
+	.num_irqs = ARRAY_SIZE(rk808_irqs),
+	.num_regs = 2,
+	.irq_reg_stride = 2,
+	.status_base = RK808_INT_STS_REG1,
+	.mask_base = RK808_INT_STS_MSK_REG1,
+	.ack_base = RK808_INT_STS_REG1,
+	.init_ack_masked = true,
+};
+
+static struct regmap_irq_chip rk817_irq_chip = {
+	.name = "rk817",
+	.irqs = rk817_irqs,
+	.num_irqs = ARRAY_SIZE(rk817_irqs),
+	.num_regs = 3,
+	.irq_reg_stride = 2,
+	.status_base = RK817_INT_STS_REG0,
+	.mask_base = RK817_INT_STS_MSK_REG0,
+	.ack_base = RK817_INT_STS_REG0,
+	.init_ack_masked = true,
+};
+
+static const struct regmap_irq_chip rk818_irq_chip = {
+	.name = "rk818",
+	.irqs = rk818_irqs,
+	.num_irqs = ARRAY_SIZE(rk818_irqs),
+	.num_regs = 2,
+	.irq_reg_stride = 2,
+	.status_base = RK818_INT_STS_REG1,
+	.mask_base = RK818_INT_STS_MSK_REG1,
+	.ack_base = RK818_INT_STS_REG1,
+	.init_ack_masked = true,
+};
+
+static int rk808_power_off(struct sys_off_data *data)
+{
+	struct rk808 *rk808 = data->cb_data;
+	int ret;
+	unsigned int reg, bit;
+
+	switch (rk808->variant) {
+	case RK805_ID:
+		reg = RK805_DEV_CTRL_REG;
+		bit = DEV_OFF;
+		break;
+	case RK808_ID:
+		reg = RK808_DEVCTRL_REG,
+		bit = DEV_OFF_RST;
+		break;
+	case RK809_ID:
+	case RK817_ID:
+		reg = RK817_SYS_CFG(3);
+		bit = DEV_OFF;
+		break;
+	case RK818_ID:
+		reg = RK818_DEVCTRL_REG;
+		bit = DEV_OFF;
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+	ret = regmap_update_bits(rk808->regmap, reg, bit, bit);
+	if (ret)
+		dev_err(rk808->dev, "Failed to shutdown device!\n");
+
+	return NOTIFY_DONE;
+}
+
+static int rk808_restart(struct sys_off_data *data)
+{
+	struct rk808 *rk808 = data->cb_data;
+	unsigned int reg, bit;
+	int ret;
+
+	switch (rk808->variant) {
+	case RK809_ID:
+	case RK817_ID:
+		reg = RK817_SYS_CFG(3);
+		bit = DEV_RST;
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+	ret = regmap_update_bits(rk808->regmap, reg, bit, bit);
+	if (ret)
+		dev_err(rk808->dev, "Failed to restart device!\n");
+
+	return NOTIFY_DONE;
+}
+
+void rk8xx_shutdown(struct device *dev)
+{
+	struct rk808 *rk808 = dev_get_drvdata(dev);
+	int ret;
+
+	switch (rk808->variant) {
+	case RK805_ID:
+		ret = regmap_update_bits(rk808->regmap,
+					 RK805_GPIO_IO_POL_REG,
+					 SLP_SD_MSK,
+					 SHUTDOWN_FUN);
+		break;
+	case RK809_ID:
+	case RK817_ID:
+		ret = regmap_update_bits(rk808->regmap,
+					 RK817_SYS_CFG(3),
+					 RK817_SLPPIN_FUNC_MSK,
+					 SLPPIN_DN_FUN);
+		break;
+	default:
+		return;
+	}
+	if (ret)
+		dev_warn(dev,
+			 "Cannot switch to power down function\n");
+}
+EXPORT_SYMBOL_GPL(rk8xx_shutdown);
+
+int rk8xx_probe(struct device *dev, int variant, unsigned int irq, struct regmap *regmap)
+{
+	struct rk808 *rk808;
+	const struct rk808_reg_data *pre_init_reg;
+	const struct mfd_cell *cells;
+	int nr_pre_init_regs;
+	int nr_cells;
+	int ret;
+	int i;
+
+	rk808 = devm_kzalloc(dev, sizeof(*rk808), GFP_KERNEL);
+	if (!rk808)
+		return -ENOMEM;
+	rk808->dev = dev;
+	rk808->variant = variant;
+	rk808->regmap = regmap;
+	dev_set_drvdata(dev, rk808);
+
+	switch (rk808->variant) {
+	case RK805_ID:
+		rk808->regmap_irq_chip = &rk805_irq_chip;
+		pre_init_reg = rk805_pre_init_reg;
+		nr_pre_init_regs = ARRAY_SIZE(rk805_pre_init_reg);
+		cells = rk805s;
+		nr_cells = ARRAY_SIZE(rk805s);
+		break;
+	case RK808_ID:
+		rk808->regmap_irq_chip = &rk808_irq_chip;
+		pre_init_reg = rk808_pre_init_reg;
+		nr_pre_init_regs = ARRAY_SIZE(rk808_pre_init_reg);
+		cells = rk808s;
+		nr_cells = ARRAY_SIZE(rk808s);
+		break;
+	case RK818_ID:
+		rk808->regmap_irq_chip = &rk818_irq_chip;
+		pre_init_reg = rk818_pre_init_reg;
+		nr_pre_init_regs = ARRAY_SIZE(rk818_pre_init_reg);
+		cells = rk818s;
+		nr_cells = ARRAY_SIZE(rk818s);
+		break;
+	case RK809_ID:
+	case RK817_ID:
+		rk808->regmap_irq_chip = &rk817_irq_chip;
+		pre_init_reg = rk817_pre_init_reg;
+		nr_pre_init_regs = ARRAY_SIZE(rk817_pre_init_reg);
+		cells = rk817s;
+		nr_cells = ARRAY_SIZE(rk817s);
+		break;
+	default:
+		dev_err(dev, "Unsupported RK8XX ID %lu\n", rk808->variant);
+		return -EINVAL;
+	}
+
+	dev_info(dev, "chip id: 0x%x\n", (unsigned int)rk808->variant);
+
+	if (!irq)
+		return dev_err_probe(dev, -EINVAL, "No interrupt support, no core IRQ\n");
+
+	ret = devm_regmap_add_irq_chip(dev, rk808->regmap, irq,
+				       IRQF_ONESHOT, -1,
+				       rk808->regmap_irq_chip, &rk808->irq_data);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to add irq_chip\n");
+
+	for (i = 0; i < nr_pre_init_regs; i++) {
+		ret = regmap_update_bits(rk808->regmap,
+					pre_init_reg[i].addr,
+					pre_init_reg[i].mask,
+					pre_init_reg[i].value);
+		if (ret)
+			return dev_err_probe(dev, ret, "0x%x write err\n",
+					     pre_init_reg[i].addr);
+	}
+
+	ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
+			      cells, nr_cells, NULL, 0,
+			      regmap_irq_get_domain(rk808->irq_data));
+	if (ret)
+		return dev_err_probe(dev, ret, "failed to add MFD devices\n");
+
+	if (device_property_read_bool(dev, "rockchip,system-power-controller")) {
+		ret = devm_register_sys_off_handler(dev,
+				    SYS_OFF_MODE_POWER_OFF_PREPARE, SYS_OFF_PRIO_HIGH,
+				    &rk808_power_off, rk808);
+		if (ret)
+			return dev_err_probe(dev, ret,
+					     "failed to register poweroff handler\n");
+
+		switch (rk808->variant) {
+		case RK809_ID:
+		case RK817_ID:
+			ret = devm_register_sys_off_handler(dev,
+							    SYS_OFF_MODE_RESTART, SYS_OFF_PRIO_HIGH,
+							    &rk808_restart, rk808);
+			if (ret)
+				dev_warn(dev, "failed to register rst handler, %d\n", ret);
+			break;
+		default:
+			dev_dbg(dev, "pmic controlled board reset not supported\n");
+			break;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rk8xx_probe);
+
+int rk8xx_suspend(struct device *dev)
+{
+	struct rk808 *rk808 = dev_get_drvdata(dev);
+	int ret = 0;
+
+	switch (rk808->variant) {
+	case RK805_ID:
+		ret = regmap_update_bits(rk808->regmap,
+					 RK805_GPIO_IO_POL_REG,
+					 SLP_SD_MSK,
+					 SLEEP_FUN);
+		break;
+	case RK809_ID:
+	case RK817_ID:
+		ret = regmap_update_bits(rk808->regmap,
+					 RK817_SYS_CFG(3),
+					 RK817_SLPPIN_FUNC_MSK,
+					 SLPPIN_SLP_FUN);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rk8xx_suspend);
+
+int rk8xx_resume(struct device *dev)
+{
+	struct rk808 *rk808 = dev_get_drvdata(dev);
+	int ret = 0;
+
+	switch (rk808->variant) {
+	case RK809_ID:
+	case RK817_ID:
+		ret = regmap_update_bits(rk808->regmap,
+					 RK817_SYS_CFG(3),
+					 RK817_SLPPIN_FUNC_MSK,
+					 SLPPIN_NULL_FUN);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rk8xx_resume);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chris Zhong <zyw@rock-chips.com>");
+MODULE_AUTHOR("Zhang Qing <zhangqing@rock-chips.com>");
+MODULE_AUTHOR("Wadim Egorov <w.egorov@phytec.de>");
+MODULE_DESCRIPTION("RK8xx PMIC core");
diff --git a/drivers/mfd/rk8xx-i2c.c b/drivers/mfd/rk8xx-i2c.c
new file mode 100644
index 000000000000..6d121b589fec
--- /dev/null
+++ b/drivers/mfd/rk8xx-i2c.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Rockchip RK808/RK818 Core (I2C) driver
+ *
+ * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
+ * Copyright (C) 2016 PHYTEC Messtechnik GmbH
+ *
+ * Author: Chris Zhong <zyw@rock-chips.com>
+ * Author: Zhang Qing <zhangqing@rock-chips.com>
+ * Author: Wadim Egorov <w.egorov@phytec.de>
+ */
+
+#include <linux/i2c.h>
+#include <linux/mfd/rk808.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+
+static bool rk808_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	/*
+	 * Notes:
+	 * - Technically the ROUND_30s bit makes RTC_CTRL_REG volatile, but
+	 *   we don't use that feature.  It's better to cache.
+	 * - It's unlikely we care that RK808_DEVCTRL_REG is volatile since
+	 *   bits are cleared in case when we shutoff anyway, but better safe.
+	 */
+
+	switch (reg) {
+	case RK808_SECONDS_REG ... RK808_WEEKS_REG:
+	case RK808_RTC_STATUS_REG:
+	case RK808_VB_MON_REG:
+	case RK808_THERMAL_REG:
+	case RK808_DCDC_UV_STS_REG:
+	case RK808_LDO_UV_STS_REG:
+	case RK808_DCDC_PG_REG:
+	case RK808_LDO_PG_REG:
+	case RK808_DEVCTRL_REG:
+	case RK808_INT_STS_REG1:
+	case RK808_INT_STS_REG2:
+		return true;
+	}
+
+	return false;
+}
+
+static bool rk817_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	/*
+	 * Notes:
+	 * - Technically the ROUND_30s bit makes RTC_CTRL_REG volatile, but
+	 *   we don't use that feature.  It's better to cache.
+	 */
+
+	switch (reg) {
+	case RK817_SECONDS_REG ... RK817_WEEKS_REG:
+	case RK817_RTC_STATUS_REG:
+	case RK817_CODEC_DTOP_LPT_SRST:
+	case RK817_GAS_GAUGE_ADC_CONFIG0 ... RK817_GAS_GAUGE_CUR_ADC_K0:
+	case RK817_PMIC_CHRG_STS:
+	case RK817_PMIC_CHRG_OUT:
+	case RK817_PMIC_CHRG_IN:
+	case RK817_INT_STS_REG0:
+	case RK817_INT_STS_REG1:
+	case RK817_INT_STS_REG2:
+	case RK817_SYS_STS:
+		return true;
+	}
+
+	return false;
+}
+
+
+static const struct regmap_config rk818_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = RK818_USB_CTRL_REG,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_reg = rk808_is_volatile_reg,
+};
+
+static const struct regmap_config rk805_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = RK805_OFF_SOURCE_REG,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_reg = rk808_is_volatile_reg,
+};
+
+static const struct regmap_config rk808_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = RK808_IO_POL_REG,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_reg = rk808_is_volatile_reg,
+};
+
+static const struct regmap_config rk817_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = RK817_GPIO_INT_CFG,
+	.cache_type = REGCACHE_NONE,
+	.volatile_reg = rk817_is_volatile_reg,
+};
+
+static int rk8xx_i2c_get_variant(struct i2c_client *client)
+{
+	u8 pmic_id_msb, pmic_id_lsb;
+	int msb, lsb;
+
+	if (of_device_is_compatible(client->dev.of_node, "rockchip,rk817") ||
+	    of_device_is_compatible(client->dev.of_node, "rockchip,rk809")) {
+		pmic_id_msb = RK817_ID_MSB;
+		pmic_id_lsb = RK817_ID_LSB;
+	} else {
+		pmic_id_msb = RK808_ID_MSB;
+		pmic_id_lsb = RK808_ID_LSB;
+	}
+
+	/* Read chip variant */
+	msb = i2c_smbus_read_byte_data(client, pmic_id_msb);
+	if (msb < 0)
+		return dev_err_probe(&client->dev, msb, "failed to read the chip id MSB\n");
+
+	lsb = i2c_smbus_read_byte_data(client, pmic_id_lsb);
+	if (lsb < 0)
+		return dev_err_probe(&client->dev, lsb, "failed to read the chip id LSB\n");
+
+	return ((msb << 8) | lsb) & RK8XX_ID_MSK;
+}
+
+static int rk8xx_i2c_probe(struct i2c_client *client)
+{
+	const struct regmap_config *regmap_cfg;
+	struct regmap *regmap;
+	int variant;
+
+	variant = rk8xx_i2c_get_variant(client);
+	if (variant < 0)
+		return variant;
+
+	switch (variant) {
+	case RK805_ID:
+		regmap_cfg = &rk805_regmap_config;
+		break;
+	case RK808_ID:
+		regmap_cfg = &rk808_regmap_config;
+		break;
+	case RK818_ID:
+		regmap_cfg = &rk818_regmap_config;
+		break;
+	case RK809_ID:
+	case RK817_ID:
+		regmap_cfg = &rk817_regmap_config;
+		break;
+	default:
+		return dev_err_probe(&client->dev, -EINVAL, "Unsupported RK8XX ID %x\n", variant);
+	}
+
+	regmap = devm_regmap_init_i2c(client, regmap_cfg);
+	if (IS_ERR(regmap))
+		return dev_err_probe(&client->dev, PTR_ERR(regmap),
+				     "regmap initialization failed\n");
+
+	return rk8xx_probe(&client->dev, variant, client->irq, regmap);
+}
+
+static void rk8xx_i2c_shutdown(struct i2c_client *client)
+{
+	rk8xx_shutdown(&client->dev);
+}
+
+static SIMPLE_DEV_PM_OPS(rk8xx_i2c_pm_ops, rk8xx_suspend, rk8xx_resume);
+
+static const struct of_device_id rk8xx_i2c_of_match[] = {
+	{ .compatible = "rockchip,rk805" },
+	{ .compatible = "rockchip,rk808" },
+	{ .compatible = "rockchip,rk809" },
+	{ .compatible = "rockchip,rk817" },
+	{ .compatible = "rockchip,rk818" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, rk8xx_i2c_of_match);
+
+static struct i2c_driver rk8xx_i2c_driver = {
+	.driver = {
+		.name = "rk8xx-i2c",
+		.of_match_table = rk8xx_i2c_of_match,
+		.pm = &rk8xx_i2c_pm_ops,
+	},
+	.probe_new = rk8xx_i2c_probe,
+	.shutdown  = rk8xx_i2c_shutdown,
+};
+module_i2c_driver(rk8xx_i2c_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chris Zhong <zyw@rock-chips.com>");
+MODULE_AUTHOR("Zhang Qing <zhangqing@rock-chips.com>");
+MODULE_AUTHOR("Wadim Egorov <w.egorov@phytec.de>");
+MODULE_DESCRIPTION("RK8xx I2C PMIC driver");
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index 5787c579dcf6..77ff9a641aeb 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -407,7 +407,7 @@ config PINCTRL_PISTACHIO
 
 config PINCTRL_RK805
 	tristate "Pinctrl and GPIO driver for RK805 PMIC"
-	depends on MFD_RK808
+	depends on MFD_RK8XX
 	select GPIOLIB
 	select PINMUX
 	select GENERIC_PINCONF
diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index c78be9f322e6..4a5e8e1d1237 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -706,7 +706,7 @@ config CHARGER_BQ256XX
 
 config CHARGER_RK817
 	tristate "Rockchip RK817 PMIC Battery Charger"
-	depends on MFD_RK808
+	depends on MFD_RK8XX
 	help
 	  Say Y to include support for Rockchip RK817 Battery Charger.
 
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index e5f3613c15fa..f2881fe3e0a7 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1056,7 +1056,7 @@ config REGULATOR_RC5T583
 
 config REGULATOR_RK808
 	tristate "Rockchip RK805/RK808/RK809/RK817/RK818 Power regulators"
-	depends on MFD_RK808
+	depends on MFD_RK8XX
 	help
 	  Select this option to enable the power regulator of ROCKCHIP
 	  PMIC RK805,RK809&RK817,RK808 and RK818.
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 753872408615..ffca9a8bb878 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -395,7 +395,7 @@ config RTC_DRV_NCT3018Y
 
 config RTC_DRV_RK808
 	tristate "Rockchip RK805/RK808/RK809/RK817/RK818 RTC"
-	depends on MFD_RK808
+	depends on MFD_RK8XX
 	help
 	  If you say yes here you will get support for the
 	  RTC of RK805, RK809 and RK817, RK808 and RK818 PMIC.
diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h
index a89ddd9ba68e..4183427a80fe 100644
--- a/include/linux/mfd/rk808.h
+++ b/include/linux/mfd/rk808.h
@@ -794,4 +794,10 @@ struct rk808 {
 	const struct regmap_config	*regmap_cfg;
 	const struct regmap_irq_chip	*regmap_irq_chip;
 };
+
+void rk8xx_shutdown(struct device *dev);
+int rk8xx_probe(struct device *dev, int variant, unsigned int irq, struct regmap *regmap);
+int rk8xx_suspend(struct device *dev);
+int rk8xx_resume(struct device *dev);
+
 #endif /* __LINUX_REGULATOR_RK808_H */
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 8020097d4e4c..0c4c5cbaa809 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -1313,7 +1313,7 @@ config SND_SOC_RK3328
 
 config SND_SOC_RK817
 	tristate "Rockchip RK817 audio CODEC"
-	depends on MFD_RK808 || COMPILE_TEST
+	depends on MFD_RK8XX || COMPILE_TEST
 
 config SND_SOC_RL6231
 	tristate
-- 
cgit v1.2.3


From 210f418f8ace9f056c337f7945e0ae3e242b3389 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Thu, 4 May 2023 19:36:12 +0200
Subject: mfd: rk8xx: Add rk806 support

Add support for SPI connected rk806, which is used by the RK3588
evaluation boards. The PMIC is advertised to support I2C and SPI,
but the evaluation boards all use SPI. Thus only SPI support is
added here.

Tested-by: Diederik de Haas <didi.debian@cknow.org> # Rock64, Quartz64 Model A + B
Tested-by: Vincent Legoll <vincent.legoll@gmail.com> # Pine64 QuartzPro64
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20230504173618.142075-9-sebastian.reichel@collabora.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/Kconfig       |  14 ++
 drivers/mfd/Makefile      |   1 +
 drivers/mfd/rk8xx-core.c  |  69 +++++++-
 drivers/mfd/rk8xx-spi.c   | 124 ++++++++++++++
 include/linux/mfd/rk808.h | 409 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 614 insertions(+), 3 deletions(-)
 create mode 100644 drivers/mfd/rk8xx-spi.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index de53e6c701fd..d4879cb4e1f6 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1201,6 +1201,20 @@ config MFD_RK8XX_I2C
 	  through I2C interface. The device supports multiple sub-devices
 	  including interrupts, RTC, LDO & DCDC regulators, and onkey.
 
+config MFD_RK8XX_SPI
+	tristate "Rockchip RK806 Power Management Chip"
+	depends on SPI && OF
+	select MFD_CORE
+	select REGMAP_SPI
+	select REGMAP_IRQ
+	select MFD_RK8XX
+	help
+	  If you say yes here you get support for the RK806 Power Management
+	  chip.
+	  This driver provides common support for accessing the device
+	  through an SPI interface. The device supports multiple sub-devices
+	  including interrupts, LDO & DCDC regulators, and power on-key.
+
 config MFD_RN5T618
 	tristate "Ricoh RN5T567/618 PMIC"
 	depends on I2C
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index ba373193e999..4e666ef5b7fc 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -216,6 +216,7 @@ obj-$(CONFIG_MFD_NTXEC)		+= ntxec.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_RK8XX)		+= rk8xx-core.o
 obj-$(CONFIG_MFD_RK8XX_I2C)	+= rk8xx-i2c.o
+obj-$(CONFIG_MFD_RK8XX_SPI)	+= rk8xx-spi.o
 obj-$(CONFIG_MFD_RN5T618)	+= rn5t618.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
diff --git a/drivers/mfd/rk8xx-core.c b/drivers/mfd/rk8xx-core.c
index ddf2052c5190..e8fc9e2ab1d0 100644
--- a/drivers/mfd/rk8xx-core.c
+++ b/drivers/mfd/rk8xx-core.c
@@ -37,6 +37,11 @@ static const struct resource rk805_key_resources[] = {
 	DEFINE_RES_IRQ(RK805_IRQ_PWRON_FALL),
 };
 
+static struct resource rk806_pwrkey_resources[] = {
+	DEFINE_RES_IRQ(RK806_IRQ_PWRON_FALL),
+	DEFINE_RES_IRQ(RK806_IRQ_PWRON_RISE),
+};
+
 static const struct resource rk817_pwrkey_resources[] = {
 	DEFINE_RES_IRQ(RK817_IRQ_PWRON_RISE),
 	DEFINE_RES_IRQ(RK817_IRQ_PWRON_FALL),
@@ -64,6 +69,17 @@ static const struct mfd_cell rk805s[] = {
 	},
 };
 
+static const struct mfd_cell rk806s[] = {
+	{ .name = "rk805-pinctrl", .id = PLATFORM_DEVID_AUTO, },
+	{ .name = "rk808-regulator", .id = PLATFORM_DEVID_AUTO, },
+	{
+		.name = "rk805-pwrkey",
+		.resources = rk806_pwrkey_resources,
+		.num_resources = ARRAY_SIZE(rk806_pwrkey_resources),
+		.id = PLATFORM_DEVID_AUTO,
+	},
+};
+
 static const struct mfd_cell rk808s[] = {
 	{ .name = "rk808-clkout", .id = PLATFORM_DEVID_NONE, },
 	{ .name = "rk808-regulator", .id = PLATFORM_DEVID_NONE, },
@@ -123,6 +139,12 @@ static const struct rk808_reg_data rk805_pre_init_reg[] = {
 	{RK805_THERMAL_REG, TEMP_HOTDIE_MSK, TEMP115C},
 };
 
+static const struct rk808_reg_data rk806_pre_init_reg[] = {
+	{ RK806_GPIO_INT_CONFIG, RK806_INT_POL_MSK, RK806_INT_POL_L },
+	{ RK806_SYS_CFG3, RK806_SLAVE_RESTART_FUN_MSK, RK806_SLAVE_RESTART_FUN_EN },
+	{ RK806_SYS_OPTION, RK806_SYS_ENB2_2M_MSK, RK806_SYS_ENB2_2M_EN },
+};
+
 static const struct rk808_reg_data rk808_pre_init_reg[] = {
 	{ RK808_BUCK3_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_150MA },
 	{ RK808_BUCK4_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_200MA },
@@ -273,6 +295,27 @@ static const struct regmap_irq rk805_irqs[] = {
 	},
 };
 
+static const struct regmap_irq rk806_irqs[] = {
+	/* INT_STS0 IRQs */
+	REGMAP_IRQ_REG(RK806_IRQ_PWRON_FALL, 0, RK806_INT_STS_PWRON_FALL),
+	REGMAP_IRQ_REG(RK806_IRQ_PWRON_RISE, 0, RK806_INT_STS_PWRON_RISE),
+	REGMAP_IRQ_REG(RK806_IRQ_PWRON, 0, RK806_INT_STS_PWRON),
+	REGMAP_IRQ_REG(RK806_IRQ_PWRON_LP, 0, RK806_INT_STS_PWRON_LP),
+	REGMAP_IRQ_REG(RK806_IRQ_HOTDIE, 0, RK806_INT_STS_HOTDIE),
+	REGMAP_IRQ_REG(RK806_IRQ_VDC_RISE, 0, RK806_INT_STS_VDC_RISE),
+	REGMAP_IRQ_REG(RK806_IRQ_VDC_FALL, 0, RK806_INT_STS_VDC_FALL),
+	REGMAP_IRQ_REG(RK806_IRQ_VB_LO, 0, RK806_INT_STS_VB_LO),
+	/* INT_STS1 IRQs */
+	REGMAP_IRQ_REG(RK806_IRQ_REV0, 1, RK806_INT_STS_REV0),
+	REGMAP_IRQ_REG(RK806_IRQ_REV1, 1, RK806_INT_STS_REV1),
+	REGMAP_IRQ_REG(RK806_IRQ_REV2, 1, RK806_INT_STS_REV2),
+	REGMAP_IRQ_REG(RK806_IRQ_CRC_ERROR, 1, RK806_INT_STS_CRC_ERROR),
+	REGMAP_IRQ_REG(RK806_IRQ_SLP3_GPIO, 1, RK806_INT_STS_SLP3_GPIO),
+	REGMAP_IRQ_REG(RK806_IRQ_SLP2_GPIO, 1, RK806_INT_STS_SLP2_GPIO),
+	REGMAP_IRQ_REG(RK806_IRQ_SLP1_GPIO, 1, RK806_INT_STS_SLP1_GPIO),
+	REGMAP_IRQ_REG(RK806_IRQ_WDT, 1, RK806_INT_STS_WDT),
+};
+
 static const struct regmap_irq rk808_irqs[] = {
 	/* INT_STS */
 	[RK808_IRQ_VOUT_LO] = {
@@ -423,6 +466,18 @@ static struct regmap_irq_chip rk805_irq_chip = {
 	.init_ack_masked = true,
 };
 
+static struct regmap_irq_chip rk806_irq_chip = {
+	.name = "rk806",
+	.irqs = rk806_irqs,
+	.num_irqs = ARRAY_SIZE(rk806_irqs),
+	.num_regs = 2,
+	.irq_reg_stride = 2,
+	.mask_base = RK806_INT_MSK0,
+	.status_base = RK806_INT_STS0,
+	.ack_base = RK806_INT_STS0,
+	.init_ack_masked = true,
+};
+
 static const struct regmap_irq_chip rk808_irq_chip = {
 	.name = "rk808",
 	.irqs = rk808_irqs,
@@ -549,6 +604,7 @@ int rk8xx_probe(struct device *dev, int variant, unsigned int irq, struct regmap
 	struct rk808 *rk808;
 	const struct rk808_reg_data *pre_init_reg;
 	const struct mfd_cell *cells;
+	int dual_support = 0;
 	int nr_pre_init_regs;
 	int nr_cells;
 	int ret;
@@ -570,6 +626,14 @@ int rk8xx_probe(struct device *dev, int variant, unsigned int irq, struct regmap
 		cells = rk805s;
 		nr_cells = ARRAY_SIZE(rk805s);
 		break;
+	case RK806_ID:
+		rk808->regmap_irq_chip = &rk806_irq_chip;
+		pre_init_reg = rk806_pre_init_reg;
+		nr_pre_init_regs = ARRAY_SIZE(rk806_pre_init_reg);
+		cells = rk806s;
+		nr_cells = ARRAY_SIZE(rk806s);
+		dual_support = IRQF_SHARED;
+		break;
 	case RK808_ID:
 		rk808->regmap_irq_chip = &rk808_irq_chip;
 		pre_init_reg = rk808_pre_init_reg;
@@ -601,7 +665,7 @@ int rk8xx_probe(struct device *dev, int variant, unsigned int irq, struct regmap
 		return dev_err_probe(dev, -EINVAL, "No interrupt support, no core IRQ\n");
 
 	ret = devm_regmap_add_irq_chip(dev, rk808->regmap, irq,
-				       IRQF_ONESHOT, -1,
+				       IRQF_ONESHOT | dual_support, -1,
 				       rk808->regmap_irq_chip, &rk808->irq_data);
 	if (ret)
 		return dev_err_probe(dev, ret, "Failed to add irq_chip\n");
@@ -616,8 +680,7 @@ int rk8xx_probe(struct device *dev, int variant, unsigned int irq, struct regmap
 					     pre_init_reg[i].addr);
 	}
 
-	ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
-			      cells, nr_cells, NULL, 0,
+	ret = devm_mfd_add_devices(dev, 0, cells, nr_cells, NULL, 0,
 			      regmap_irq_get_domain(rk808->irq_data));
 	if (ret)
 		return dev_err_probe(dev, ret, "failed to add MFD devices\n");
diff --git a/drivers/mfd/rk8xx-spi.c b/drivers/mfd/rk8xx-spi.c
new file mode 100644
index 000000000000..fd137f38c2c4
--- /dev/null
+++ b/drivers/mfd/rk8xx-spi.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Rockchip RK806 Core (SPI) driver
+ *
+ * Copyright (c) 2021 Rockchip Electronics Co., Ltd.
+ * Copyright (c) 2023 Collabora Ltd.
+ *
+ * Author: Xu Shengfei <xsf@rock-chips.com>
+ * Author: Sebastian Reichel <sebastian.reichel@collabora.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/rk808.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+
+#define RK806_ADDR_SIZE 2
+#define RK806_CMD_WITH_SIZE(CMD, VALUE_BYTES) \
+	(RK806_CMD_##CMD | RK806_CMD_CRC_DIS | (VALUE_BYTES - 1))
+
+static const struct regmap_range rk806_volatile_ranges[] = {
+	regmap_reg_range(RK806_POWER_EN0, RK806_POWER_EN5),
+	regmap_reg_range(RK806_DVS_START_CTRL, RK806_INT_MSK1),
+};
+
+static const struct regmap_access_table rk806_volatile_table = {
+	.yes_ranges = rk806_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(rk806_volatile_ranges),
+};
+
+static const struct regmap_config rk806_regmap_config_spi = {
+	.reg_bits = 16,
+	.val_bits = 8,
+	.max_register = RK806_BUCK_RSERVE_REG5,
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_table = &rk806_volatile_table,
+};
+
+static int rk806_spi_bus_write(void *context, const void *vdata, size_t count)
+{
+	struct device *dev = context;
+	struct spi_device *spi = to_spi_device(dev);
+	struct spi_transfer xfer[2] = { 0 };
+	/* data and thus count includes the register address */
+	size_t val_size = count - RK806_ADDR_SIZE;
+	char cmd;
+
+	if (val_size < 1 || val_size > (RK806_CMD_LEN_MSK + 1))
+		return -EINVAL;
+
+	cmd = RK806_CMD_WITH_SIZE(WRITE, val_size);
+
+	xfer[0].tx_buf = &cmd;
+	xfer[0].len = sizeof(cmd);
+	xfer[1].tx_buf = vdata;
+	xfer[1].len = count;
+
+	return spi_sync_transfer(spi, xfer, ARRAY_SIZE(xfer));
+}
+
+static int rk806_spi_bus_read(void *context, const void *vreg, size_t reg_size,
+			      void *val, size_t val_size)
+{
+	struct device *dev = context;
+	struct spi_device *spi = to_spi_device(dev);
+	char txbuf[3] = { 0 };
+
+	if (reg_size != RK806_ADDR_SIZE ||
+	    val_size < 1 || val_size > (RK806_CMD_LEN_MSK + 1))
+		return -EINVAL;
+
+	/* TX buffer contains command byte followed by two address bytes */
+	txbuf[0] = RK806_CMD_WITH_SIZE(READ, val_size);
+	memcpy(txbuf+1, vreg, reg_size);
+
+	return spi_write_then_read(spi, txbuf, sizeof(txbuf), val, val_size);
+}
+
+static const struct regmap_bus rk806_regmap_bus_spi = {
+	.write = rk806_spi_bus_write,
+	.read = rk806_spi_bus_read,
+	.reg_format_endian_default = REGMAP_ENDIAN_LITTLE,
+};
+
+static int rk8xx_spi_probe(struct spi_device *spi)
+{
+	struct regmap *regmap;
+
+	regmap = devm_regmap_init(&spi->dev, &rk806_regmap_bus_spi,
+				  &spi->dev, &rk806_regmap_config_spi);
+	if (IS_ERR(regmap))
+		return dev_err_probe(&spi->dev, PTR_ERR(regmap),
+				     "Failed to init regmap\n");
+
+	return rk8xx_probe(&spi->dev, RK806_ID, spi->irq, regmap);
+}
+
+static const struct of_device_id rk8xx_spi_of_match[] = {
+	{ .compatible = "rockchip,rk806", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rk8xx_spi_of_match);
+
+static const struct spi_device_id rk8xx_spi_id_table[] = {
+	{ "rk806", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, rk8xx_spi_id_table);
+
+static struct spi_driver rk8xx_spi_driver = {
+	.driver		= {
+		.name	= "rk8xx-spi",
+		.of_match_table = rk8xx_spi_of_match,
+	},
+	.probe		= rk8xx_spi_probe,
+	.id_table	= rk8xx_spi_id_table,
+};
+module_spi_driver(rk8xx_spi_driver);
+
+MODULE_AUTHOR("Xu Shengfei <xsf@rock-chips.com>");
+MODULE_DESCRIPTION("RK8xx SPI PMIC driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h
index 4183427a80fe..78e167a92483 100644
--- a/include/linux/mfd/rk808.h
+++ b/include/linux/mfd/rk808.h
@@ -289,6 +289,414 @@ enum rk805_reg {
 #define RK805_INT_ALARM_EN		(1 << 3)
 #define RK805_INT_TIMER_EN		(1 << 2)
 
+/* RK806 */
+#define RK806_POWER_EN0			0x0
+#define RK806_POWER_EN1			0x1
+#define RK806_POWER_EN2			0x2
+#define RK806_POWER_EN3			0x3
+#define RK806_POWER_EN4			0x4
+#define RK806_POWER_EN5			0x5
+#define RK806_POWER_SLP_EN0		0x6
+#define RK806_POWER_SLP_EN1		0x7
+#define RK806_POWER_SLP_EN2		0x8
+#define RK806_POWER_DISCHRG_EN0		0x9
+#define RK806_POWER_DISCHRG_EN1		0xA
+#define RK806_POWER_DISCHRG_EN2		0xB
+#define RK806_BUCK_FB_CONFIG		0xC
+#define RK806_SLP_LP_CONFIG		0xD
+#define RK806_POWER_FPWM_EN0		0xE
+#define RK806_POWER_FPWM_EN1		0xF
+#define RK806_BUCK1_CONFIG		0x10
+#define RK806_BUCK2_CONFIG		0x11
+#define RK806_BUCK3_CONFIG		0x12
+#define RK806_BUCK4_CONFIG		0x13
+#define RK806_BUCK5_CONFIG		0x14
+#define RK806_BUCK6_CONFIG		0x15
+#define RK806_BUCK7_CONFIG		0x16
+#define RK806_BUCK8_CONFIG		0x17
+#define RK806_BUCK9_CONFIG		0x18
+#define RK806_BUCK10_CONFIG		0x19
+#define RK806_BUCK1_ON_VSEL		0x1A
+#define RK806_BUCK2_ON_VSEL		0x1B
+#define RK806_BUCK3_ON_VSEL		0x1C
+#define RK806_BUCK4_ON_VSEL		0x1D
+#define RK806_BUCK5_ON_VSEL		0x1E
+#define RK806_BUCK6_ON_VSEL		0x1F
+#define RK806_BUCK7_ON_VSEL		0x20
+#define RK806_BUCK8_ON_VSEL		0x21
+#define RK806_BUCK9_ON_VSEL		0x22
+#define RK806_BUCK10_ON_VSEL		0x23
+#define RK806_BUCK1_SLP_VSEL		0x24
+#define RK806_BUCK2_SLP_VSEL		0x25
+#define RK806_BUCK3_SLP_VSEL		0x26
+#define RK806_BUCK4_SLP_VSEL		0x27
+#define RK806_BUCK5_SLP_VSEL		0x28
+#define RK806_BUCK6_SLP_VSEL		0x29
+#define RK806_BUCK7_SLP_VSEL		0x2A
+#define RK806_BUCK8_SLP_VSEL		0x2B
+#define RK806_BUCK9_SLP_VSEL		0x2D
+#define RK806_BUCK10_SLP_VSEL		0x2E
+#define RK806_BUCK_DEBUG1		0x30
+#define RK806_BUCK_DEBUG2		0x31
+#define RK806_BUCK_DEBUG3		0x32
+#define RK806_BUCK_DEBUG4		0x33
+#define RK806_BUCK_DEBUG5		0x34
+#define RK806_BUCK_DEBUG6		0x35
+#define RK806_BUCK_DEBUG7		0x36
+#define RK806_BUCK_DEBUG8		0x37
+#define RK806_BUCK_DEBUG9		0x38
+#define RK806_BUCK_DEBUG10		0x39
+#define RK806_BUCK_DEBUG11		0x3A
+#define RK806_BUCK_DEBUG12		0x3B
+#define RK806_BUCK_DEBUG13		0x3C
+#define RK806_BUCK_DEBUG14		0x3D
+#define RK806_BUCK_DEBUG15		0x3E
+#define RK806_BUCK_DEBUG16		0x3F
+#define RK806_BUCK_DEBUG17		0x40
+#define RK806_BUCK_DEBUG18		0x41
+#define RK806_NLDO_IMAX			0x42
+#define RK806_NLDO1_ON_VSEL		0x43
+#define RK806_NLDO2_ON_VSEL		0x44
+#define RK806_NLDO3_ON_VSEL		0x45
+#define RK806_NLDO4_ON_VSEL		0x46
+#define RK806_NLDO5_ON_VSEL		0x47
+#define RK806_NLDO1_SLP_VSEL		0x48
+#define RK806_NLDO2_SLP_VSEL		0x49
+#define RK806_NLDO3_SLP_VSEL		0x4A
+#define RK806_NLDO4_SLP_VSEL		0x4B
+#define RK806_NLDO5_SLP_VSEL		0x4C
+#define RK806_PLDO_IMAX			0x4D
+#define RK806_PLDO1_ON_VSEL		0x4E
+#define RK806_PLDO2_ON_VSEL		0x4F
+#define RK806_PLDO3_ON_VSEL		0x50
+#define RK806_PLDO4_ON_VSEL		0x51
+#define RK806_PLDO5_ON_VSEL		0x52
+#define RK806_PLDO6_ON_VSEL		0x53
+#define RK806_PLDO1_SLP_VSEL		0x54
+#define RK806_PLDO2_SLP_VSEL		0x55
+#define RK806_PLDO3_SLP_VSEL		0x56
+#define RK806_PLDO4_SLP_VSEL		0x57
+#define RK806_PLDO5_SLP_VSEL		0x58
+#define RK806_PLDO6_SLP_VSEL		0x59
+#define RK806_CHIP_NAME			0x5A
+#define RK806_CHIP_VER			0x5B
+#define RK806_OTP_VER			0x5C
+#define RK806_SYS_STS			0x5D
+#define RK806_SYS_CFG0			0x5E
+#define RK806_SYS_CFG1			0x5F
+#define RK806_SYS_OPTION		0x61
+#define RK806_SLEEP_CONFIG0		0x62
+#define RK806_SLEEP_CONFIG1		0x63
+#define RK806_SLEEP_CTR_SEL0		0x64
+#define RK806_SLEEP_CTR_SEL1		0x65
+#define RK806_SLEEP_CTR_SEL2		0x66
+#define RK806_SLEEP_CTR_SEL3		0x67
+#define RK806_SLEEP_CTR_SEL4		0x68
+#define RK806_SLEEP_CTR_SEL5		0x69
+#define RK806_DVS_CTRL_SEL0		0x6A
+#define RK806_DVS_CTRL_SEL1		0x6B
+#define RK806_DVS_CTRL_SEL2		0x6C
+#define RK806_DVS_CTRL_SEL3		0x6D
+#define RK806_DVS_CTRL_SEL4		0x6E
+#define RK806_DVS_CTRL_SEL5		0x6F
+#define RK806_DVS_START_CTRL		0x70
+#define RK806_SLEEP_GPIO		0x71
+#define RK806_SYS_CFG3			0x72
+#define RK806_ON_SOURCE			0x74
+#define RK806_OFF_SOURCE		0x75
+#define RK806_PWRON_KEY			0x76
+#define RK806_INT_STS0			0x77
+#define RK806_INT_MSK0			0x78
+#define RK806_INT_STS1			0x79
+#define RK806_INT_MSK1			0x7A
+#define RK806_GPIO_INT_CONFIG		0x7B
+#define RK806_DATA_REG0			0x7C
+#define RK806_DATA_REG1			0x7D
+#define RK806_DATA_REG2			0x7E
+#define RK806_DATA_REG3			0x7F
+#define RK806_DATA_REG4			0x80
+#define RK806_DATA_REG5			0x81
+#define RK806_DATA_REG6			0x82
+#define RK806_DATA_REG7			0x83
+#define RK806_DATA_REG8			0x84
+#define RK806_DATA_REG9			0x85
+#define RK806_DATA_REG10		0x86
+#define RK806_DATA_REG11		0x87
+#define RK806_DATA_REG12		0x88
+#define RK806_DATA_REG13		0x89
+#define RK806_DATA_REG14		0x8A
+#define RK806_DATA_REG15		0x8B
+#define RK806_TM_REG			0x8C
+#define RK806_OTP_EN_REG		0x8D
+#define RK806_FUNC_OTP_EN_REG		0x8E
+#define RK806_TEST_REG1			0x8F
+#define RK806_TEST_REG2			0x90
+#define RK806_TEST_REG3			0x91
+#define RK806_TEST_REG4			0x92
+#define RK806_TEST_REG5			0x93
+#define RK806_BUCK_VSEL_OTP_REG0	0x94
+#define RK806_BUCK_VSEL_OTP_REG1	0x95
+#define RK806_BUCK_VSEL_OTP_REG2	0x96
+#define RK806_BUCK_VSEL_OTP_REG3	0x97
+#define RK806_BUCK_VSEL_OTP_REG4	0x98
+#define RK806_BUCK_VSEL_OTP_REG5	0x99
+#define RK806_BUCK_VSEL_OTP_REG6	0x9A
+#define RK806_BUCK_VSEL_OTP_REG7	0x9B
+#define RK806_BUCK_VSEL_OTP_REG8	0x9C
+#define RK806_BUCK_VSEL_OTP_REG9	0x9D
+#define RK806_NLDO1_VSEL_OTP_REG0	0x9E
+#define RK806_NLDO1_VSEL_OTP_REG1	0x9F
+#define RK806_NLDO1_VSEL_OTP_REG2	0xA0
+#define RK806_NLDO1_VSEL_OTP_REG3	0xA1
+#define RK806_NLDO1_VSEL_OTP_REG4	0xA2
+#define RK806_PLDO_VSEL_OTP_REG0	0xA3
+#define RK806_PLDO_VSEL_OTP_REG1	0xA4
+#define RK806_PLDO_VSEL_OTP_REG2	0xA5
+#define RK806_PLDO_VSEL_OTP_REG3	0xA6
+#define RK806_PLDO_VSEL_OTP_REG4	0xA7
+#define RK806_PLDO_VSEL_OTP_REG5	0xA8
+#define RK806_BUCK_EN_OTP_REG1		0xA9
+#define RK806_NLDO_EN_OTP_REG1		0xAA
+#define RK806_PLDO_EN_OTP_REG1		0xAB
+#define RK806_BUCK_FB_RES_OTP_REG1	0xAC
+#define RK806_OTP_RESEV_REG0		0xAD
+#define RK806_OTP_RESEV_REG1		0xAE
+#define RK806_OTP_RESEV_REG2		0xAF
+#define RK806_OTP_RESEV_REG3		0xB0
+#define RK806_OTP_RESEV_REG4		0xB1
+#define RK806_BUCK_SEQ_REG0		0xB2
+#define RK806_BUCK_SEQ_REG1		0xB3
+#define RK806_BUCK_SEQ_REG2		0xB4
+#define RK806_BUCK_SEQ_REG3		0xB5
+#define RK806_BUCK_SEQ_REG4		0xB6
+#define RK806_BUCK_SEQ_REG5		0xB7
+#define RK806_BUCK_SEQ_REG6		0xB8
+#define RK806_BUCK_SEQ_REG7		0xB9
+#define RK806_BUCK_SEQ_REG8		0xBA
+#define RK806_BUCK_SEQ_REG9		0xBB
+#define RK806_BUCK_SEQ_REG10		0xBC
+#define RK806_BUCK_SEQ_REG11		0xBD
+#define RK806_BUCK_SEQ_REG12		0xBE
+#define RK806_BUCK_SEQ_REG13		0xBF
+#define RK806_BUCK_SEQ_REG14		0xC0
+#define RK806_BUCK_SEQ_REG15		0xC1
+#define RK806_BUCK_SEQ_REG16		0xC2
+#define RK806_BUCK_SEQ_REG17		0xC3
+#define RK806_HK_TRIM_REG1		0xC4
+#define RK806_HK_TRIM_REG2		0xC5
+#define RK806_BUCK_REF_TRIM_REG1	0xC6
+#define RK806_BUCK_REF_TRIM_REG2	0xC7
+#define RK806_BUCK_REF_TRIM_REG3	0xC8
+#define RK806_BUCK_REF_TRIM_REG4	0xC9
+#define RK806_BUCK_REF_TRIM_REG5	0xCA
+#define RK806_BUCK_OSC_TRIM_REG1	0xCB
+#define RK806_BUCK_OSC_TRIM_REG2	0xCC
+#define RK806_BUCK_OSC_TRIM_REG3	0xCD
+#define RK806_BUCK_OSC_TRIM_REG4	0xCE
+#define RK806_BUCK_OSC_TRIM_REG5	0xCF
+#define RK806_BUCK_TRIM_ZCDIOS_REG1	0xD0
+#define RK806_BUCK_TRIM_ZCDIOS_REG2	0xD1
+#define RK806_NLDO_TRIM_REG1		0xD2
+#define RK806_NLDO_TRIM_REG2		0xD3
+#define RK806_NLDO_TRIM_REG3		0xD4
+#define RK806_PLDO_TRIM_REG1		0xD5
+#define RK806_PLDO_TRIM_REG2		0xD6
+#define RK806_PLDO_TRIM_REG3		0xD7
+#define RK806_TRIM_ICOMP_REG1		0xD8
+#define RK806_TRIM_ICOMP_REG2		0xD9
+#define RK806_EFUSE_CONTROL_REGH	0xDA
+#define RK806_FUSE_PROG_REG		0xDB
+#define RK806_MAIN_FSM_STS_REG		0xDD
+#define RK806_FSM_REG			0xDE
+#define RK806_TOP_RESEV_OFFR		0xEC
+#define RK806_TOP_RESEV_POR		0xED
+#define RK806_BUCK_VRSN_REG1		0xEE
+#define RK806_BUCK_VRSN_REG2		0xEF
+#define RK806_NLDO_RLOAD_SEL_REG1	0xF0
+#define RK806_PLDO_RLOAD_SEL_REG1	0xF1
+#define RK806_PLDO_RLOAD_SEL_REG2	0xF2
+#define RK806_BUCK_CMIN_MX_REG1		0xF3
+#define RK806_BUCK_CMIN_MX_REG2		0xF4
+#define RK806_BUCK_FREQ_SET_REG1	0xF5
+#define RK806_BUCK_FREQ_SET_REG2	0xF6
+#define RK806_BUCK_RS_MEABS_REG1	0xF7
+#define RK806_BUCK_RS_MEABS_REG2	0xF8
+#define RK806_BUCK_RS_ZDLEB_REG1	0xF9
+#define RK806_BUCK_RS_ZDLEB_REG2	0xFA
+#define RK806_BUCK_RSERVE_REG1		0xFB
+#define RK806_BUCK_RSERVE_REG2		0xFC
+#define RK806_BUCK_RSERVE_REG3		0xFD
+#define RK806_BUCK_RSERVE_REG4		0xFE
+#define RK806_BUCK_RSERVE_REG5		0xFF
+
+/* INT_STS Register field definitions */
+#define RK806_INT_STS_PWRON_FALL	BIT(0)
+#define RK806_INT_STS_PWRON_RISE	BIT(1)
+#define RK806_INT_STS_PWRON		BIT(2)
+#define RK806_INT_STS_PWRON_LP		BIT(3)
+#define RK806_INT_STS_HOTDIE		BIT(4)
+#define RK806_INT_STS_VDC_RISE		BIT(5)
+#define RK806_INT_STS_VDC_FALL		BIT(6)
+#define RK806_INT_STS_VB_LO		BIT(7)
+#define RK806_INT_STS_REV0		BIT(0)
+#define RK806_INT_STS_REV1		BIT(1)
+#define RK806_INT_STS_REV2		BIT(2)
+#define RK806_INT_STS_CRC_ERROR		BIT(3)
+#define RK806_INT_STS_SLP3_GPIO		BIT(4)
+#define RK806_INT_STS_SLP2_GPIO		BIT(5)
+#define RK806_INT_STS_SLP1_GPIO		BIT(6)
+#define RK806_INT_STS_WDT		BIT(7)
+
+/* SPI command */
+#define RK806_CMD_READ			0
+#define RK806_CMD_WRITE			BIT(7)
+#define RK806_CMD_CRC_EN		BIT(6)
+#define RK806_CMD_CRC_DIS		0
+#define RK806_CMD_LEN_MSK		0x0f
+#define RK806_REG_H			0x00
+
+#define VERSION_AB		0x01
+
+enum rk806_reg_id {
+	RK806_ID_DCDC1 = 0,
+	RK806_ID_DCDC2,
+	RK806_ID_DCDC3,
+	RK806_ID_DCDC4,
+	RK806_ID_DCDC5,
+	RK806_ID_DCDC6,
+	RK806_ID_DCDC7,
+	RK806_ID_DCDC8,
+	RK806_ID_DCDC9,
+	RK806_ID_DCDC10,
+
+	RK806_ID_NLDO1,
+	RK806_ID_NLDO2,
+	RK806_ID_NLDO3,
+	RK806_ID_NLDO4,
+	RK806_ID_NLDO5,
+
+	RK806_ID_PLDO1,
+	RK806_ID_PLDO2,
+	RK806_ID_PLDO3,
+	RK806_ID_PLDO4,
+	RK806_ID_PLDO5,
+	RK806_ID_PLDO6,
+	RK806_ID_END,
+};
+
+/* Define the RK806 IRQ numbers */
+enum rk806_irqs {
+	/* INT_STS0 registers */
+	RK806_IRQ_PWRON_FALL,
+	RK806_IRQ_PWRON_RISE,
+	RK806_IRQ_PWRON,
+	RK806_IRQ_PWRON_LP,
+	RK806_IRQ_HOTDIE,
+	RK806_IRQ_VDC_RISE,
+	RK806_IRQ_VDC_FALL,
+	RK806_IRQ_VB_LO,
+
+	/* INT_STS0 registers */
+	RK806_IRQ_REV0,
+	RK806_IRQ_REV1,
+	RK806_IRQ_REV2,
+	RK806_IRQ_CRC_ERROR,
+	RK806_IRQ_SLP3_GPIO,
+	RK806_IRQ_SLP2_GPIO,
+	RK806_IRQ_SLP1_GPIO,
+	RK806_IRQ_WDT,
+};
+
+/* VCC1 Low Voltage Threshold */
+enum rk806_lv_sel {
+	VB_LO_SEL_2800,
+	VB_LO_SEL_2900,
+	VB_LO_SEL_3000,
+	VB_LO_SEL_3100,
+	VB_LO_SEL_3200,
+	VB_LO_SEL_3300,
+	VB_LO_SEL_3400,
+	VB_LO_SEL_3500,
+};
+
+/* System Shutdown Voltage Select */
+enum rk806_uv_sel {
+	VB_UV_SEL_2700,
+	VB_UV_SEL_2800,
+	VB_UV_SEL_2900,
+	VB_UV_SEL_3000,
+	VB_UV_SEL_3100,
+	VB_UV_SEL_3200,
+	VB_UV_SEL_3300,
+	VB_UV_SEL_3400,
+};
+
+/* Pin Function */
+enum rk806_pwrctrl_fun {
+	PWRCTRL_NULL_FUN,
+	PWRCTRL_SLP_FUN,
+	PWRCTRL_POWOFF_FUN,
+	PWRCTRL_RST_FUN,
+	PWRCTRL_DVS_FUN,
+	PWRCTRL_GPIO_FUN,
+};
+
+/* Pin Polarity */
+enum rk806_pin_level {
+	POL_LOW,
+	POL_HIGH,
+};
+
+enum rk806_vsel_ctr_sel {
+	CTR_BY_NO_EFFECT,
+	CTR_BY_PWRCTRL1,
+	CTR_BY_PWRCTRL2,
+	CTR_BY_PWRCTRL3,
+};
+
+enum rk806_dvs_ctr_sel {
+	CTR_SEL_NO_EFFECT,
+	CTR_SEL_DVS_START1,
+	CTR_SEL_DVS_START2,
+	CTR_SEL_DVS_START3,
+};
+
+enum rk806_pin_dr_sel {
+	RK806_PIN_INPUT,
+	RK806_PIN_OUTPUT,
+};
+
+#define RK806_INT_POL_MSK		BIT(1)
+#define RK806_INT_POL_H			BIT(1)
+#define RK806_INT_POL_L			0
+
+#define RK806_SLAVE_RESTART_FUN_MSK	BIT(1)
+#define RK806_SLAVE_RESTART_FUN_EN	BIT(1)
+#define RK806_SLAVE_RESTART_FUN_OFF	0
+
+#define RK806_SYS_ENB2_2M_MSK		BIT(1)
+#define RK806_SYS_ENB2_2M_EN		BIT(1)
+#define RK806_SYS_ENB2_2M_OFF		0
+
+enum rk806_int_fun {
+	RK806_INT_ONLY,
+	RK806_INT_ADN_WKUP,
+};
+
+enum rk806_dvs_mode {
+	RK806_DVS_NOT_SUPPORT,
+	RK806_DVS_START1,
+	RK806_DVS_START2,
+	RK806_DVS_START3,
+	RK806_DVS_PWRCTRL1,
+	RK806_DVS_PWRCTRL2,
+	RK806_DVS_PWRCTRL3,
+	RK806_DVS_START_PWRCTR1,
+	RK806_DVS_START_PWRCTR2,
+	RK806_DVS_START_PWRCTR3,
+	RK806_DVS_END,
+};
+
 /* RK808 IRQ Definitions */
 #define RK808_IRQ_VOUT_LO	0
 #define RK808_IRQ_VB_LO		1
@@ -780,6 +1188,7 @@ enum {
 
 enum {
 	RK805_ID = 0x8050,
+	RK806_ID = 0x8060,
 	RK808_ID = 0x0000,
 	RK809_ID = 0x8090,
 	RK817_ID = 0x8170,
-- 
cgit v1.2.3


From 47e79cbeea4b3891ad476047f4c68543eb51c8e0 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Mon, 15 May 2023 13:08:48 +0000
Subject: bpf: Remove bpf trampoline selector

After commit e21aa341785c ("bpf: Fix fexit trampoline."), the selector is only
used to indicate how many times the bpf trampoline image are updated and been
displayed in the trampoline ksym name. After the trampoline is freed, the
selector will start from 0 again. So the selector is a useless value to the
user. We can remove it.

If the user want to check whether the bpf trampoline image has been updated
or not, the user can compare the address. Each time the trampoline image is
updated, the address will change consequently. Jiri also pointed out another
issue that perf is still using the old name "bpf_trampoline_%lu", so this
change can fix the issue in perf.

Fixes: e21aa341785c ("bpf: Fix fexit trampoline.")
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <song@kernel.org>
Cc: Jiri Olsa <olsajiri@gmail.com>
Link: https://lore.kernel.org/bpf/ZFvOOlrmHiY9AgXE@krava
Link: https://lore.kernel.org/bpf/20230515130849.57502-3-laoar.shao@gmail.com
---
 include/linux/bpf.h     |  1 -
 kernel/bpf/trampoline.c | 11 ++++-------
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 456f33b9d205..36e4b2d8cca2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1125,7 +1125,6 @@ struct bpf_trampoline {
 	int progs_cnt[BPF_TRAMP_MAX];
 	/* Executable image of trampoline */
 	struct bpf_tramp_image *cur_image;
-	u64 selector;
 	struct module *mod;
 };
 
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index ac021bc43a66..84850e66ce3d 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -344,7 +344,7 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im)
 	call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
 }
 
-static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
+static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key)
 {
 	struct bpf_tramp_image *im;
 	struct bpf_ksym *ksym;
@@ -371,7 +371,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
 
 	ksym = &im->ksym;
 	INIT_LIST_HEAD_RCU(&ksym->lnode);
-	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx);
+	snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
 	bpf_image_ksym_add(image, ksym);
 	return im;
 
@@ -401,11 +401,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
 		err = unregister_fentry(tr, tr->cur_image->image);
 		bpf_tramp_image_put(tr->cur_image);
 		tr->cur_image = NULL;
-		tr->selector = 0;
 		goto out;
 	}
 
-	im = bpf_tramp_image_alloc(tr->key, tr->selector);
+	im = bpf_tramp_image_alloc(tr->key);
 	if (IS_ERR(im)) {
 		err = PTR_ERR(im);
 		goto out;
@@ -442,8 +441,7 @@ again:
 
 	set_memory_rox((long)im->image, 1);
 
-	WARN_ON(tr->cur_image && tr->selector == 0);
-	WARN_ON(!tr->cur_image && tr->selector);
+	WARN_ON(tr->cur_image && total == 0);
 	if (tr->cur_image)
 		/* progs already running at this address */
 		err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
@@ -473,7 +471,6 @@ again:
 	if (tr->cur_image)
 		bpf_tramp_image_put(tr->cur_image);
 	tr->cur_image = im;
-	tr->selector++;
 out:
 	/* If any error happens, restore previous flags */
 	if (err)
-- 
cgit v1.2.3


From 6882011e8854c6cb227770fccb57ed70a88a716f Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Sun, 23 Apr 2023 13:06:39 +0200
Subject: can: length: make header self contained

Include the headers that "can/length.h" depends on.

Fixes: bdd2e413192d ("can: dev: move length related code into seperate file")
Link: https://lore.kernel.org/all/20230509122854.350426-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/length.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/can/length.h b/include/linux/can/length.h
index 6995092b774e..69336549d24f 100644
--- a/include/linux/can/length.h
+++ b/include/linux/can/length.h
@@ -6,6 +6,9 @@
 #ifndef _CAN_LENGTH_H
 #define _CAN_LENGTH_H
 
+#include <linux/can.h>
+#include <linux/can/netlink.h>
+
 /*
  * Size of a Classical CAN Standard Frame
  *
-- 
cgit v1.2.3


From 8b33485128ad932f807f4535e0b440733d8b5808 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Mon, 15 May 2023 13:01:07 +0800
Subject: net: skbuff: update comment about pfmemalloc propagating

__skb_fill_page_desc_noacc() is not doing any pfmemalloc
propagating, and yet it has a comment about that, commit
84ce071e38a6 ("net: introduce __skb_fill_page_desc_noacc")
may have accidentally moved it to __skb_fill_page_desc_noacc(),
so move it back to __skb_fill_page_desc() which is supposed
to be doing pfmemalloc propagating.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
CC: Pavel Begunkov <asml.silence@gmail.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/20230515050107.46397-1-linyunsheng@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/linux/skbuff.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 00e8c435fa1a..4b8d55247198 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2426,11 +2426,6 @@ static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
 {
 	skb_frag_t *frag = &shinfo->frags[i];
 
-	/*
-	 * Propagate page pfmemalloc to the skb if we can. The problem is
-	 * that not all callers have unique ownership of the page but rely
-	 * on page_is_pfmemalloc doing the right thing(tm).
-	 */
 	skb_frag_fill_page_desc(frag, page, off, size);
 }
 
@@ -2463,6 +2458,11 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
 					struct page *page, int off, int size)
 {
 	__skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
+
+	/* Propagate page pfmemalloc to the skb if we can. The problem is
+	 * that not all callers have unique ownership of the page but rely
+	 * on page_is_pfmemalloc doing the right thing(tm).
+	 */
 	page = compound_head(page);
 	if (page_is_pfmemalloc(page))
 		skb->pfmemalloc	= true;
-- 
cgit v1.2.3


From 31b2ebc0929e964f4edfbfa7129d43f7e3c17165 Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Fri, 21 Apr 2023 15:16:12 +0530
Subject: fs/buffer.c: Add generic_buffers_fsync*() implementation

Some of the higher layers like iomap takes inode_lock() when calling
generic_write_sync().
Also writeback already happens from other paths without inode lock,
so it's difficult to say that we really need sync_mapping_buffers() to
take any inode locking here. Having said that, let's add
generic_buffers_fsync/_noflush() implementation in buffer.c with no
inode_lock/unlock() for now so that filesystems like ext2 and
ext4's nojournal mode can use it.

Ext4 when got converted to iomap for direct-io already copied it's own
variant of __generic_file_fsync() without lock.

This patch adds generic_buffers_fsync()
& generic_buffers_fsync_noflush() implementations for use in filesystems
like ext2 & ext4 respectively.

Later we can review other filesystems as well to see if we can make
generic_buffers_fsync/_noflush() which does not take any inode_lock() as
the default path.

Tested-by: Disha Goel <disgoel@linux.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Message-Id: <d573408ac8408627d23a3d2d166e748c172c4c9e.1682069716.git.ritesh.list@gmail.com>
---
 fs/buffer.c                 | 70 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/buffer_head.h |  4 +++
 2 files changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index a7fc561758b1..00cad2658a07 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -592,6 +592,76 @@ int sync_mapping_buffers(struct address_space *mapping)
 }
 EXPORT_SYMBOL(sync_mapping_buffers);
 
+/**
+ * generic_buffers_fsync_noflush - generic buffer fsync implementation
+ * for simple filesystems with no inode lock
+ *
+ * @file:	file to synchronize
+ * @start:	start offset in bytes
+ * @end:	end offset in bytes (inclusive)
+ * @datasync:	only synchronize essential metadata if true
+ *
+ * This is a generic implementation of the fsync method for simple
+ * filesystems which track all non-inode metadata in the buffers list
+ * hanging off the address_space structure.
+ */
+int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
+				  bool datasync)
+{
+	struct inode *inode = file->f_mapping->host;
+	int err;
+	int ret;
+
+	err = file_write_and_wait_range(file, start, end);
+	if (err)
+		return err;
+
+	ret = sync_mapping_buffers(inode->i_mapping);
+	if (!(inode->i_state & I_DIRTY_ALL))
+		goto out;
+	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+		goto out;
+
+	err = sync_inode_metadata(inode, 1);
+	if (ret == 0)
+		ret = err;
+
+out:
+	/* check and advance again to catch errors after syncing out buffers */
+	err = file_check_and_advance_wb_err(file);
+	if (ret == 0)
+		ret = err;
+	return ret;
+}
+EXPORT_SYMBOL(generic_buffers_fsync_noflush);
+
+/**
+ * generic_buffers_fsync - generic buffer fsync implementation
+ * for simple filesystems with no inode lock
+ *
+ * @file:	file to synchronize
+ * @start:	start offset in bytes
+ * @end:	end offset in bytes (inclusive)
+ * @datasync:	only synchronize essential metadata if true
+ *
+ * This is a generic implementation of the fsync method for simple
+ * filesystems which track all non-inode metadata in the buffers list
+ * hanging off the address_space structure. This also makes sure that
+ * a device cache flush operation is called at the end.
+ */
+int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
+			  bool datasync)
+{
+	struct inode *inode = file->f_mapping->host;
+	int ret;
+
+	ret = generic_buffers_fsync_noflush(file, start, end, datasync);
+	if (!ret)
+		ret = blkdev_issue_flush(inode->i_sb->s_bdev);
+	return ret;
+}
+EXPORT_SYMBOL(generic_buffers_fsync);
+
 /*
  * Called when we've recently written block `bblock', and it is known that
  * `bblock' was for a buffer_boundary() buffer.  This means that the block at
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 1520793c72da..1bd73cefd311 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -217,6 +217,10 @@ int inode_has_buffers(struct inode *);
 void invalidate_inode_buffers(struct inode *);
 int remove_inode_buffers(struct inode *inode);
 int sync_mapping_buffers(struct address_space *mapping);
+int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
+				  bool datasync);
+int generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
+			  bool datasync);
 void clean_bdev_aliases(struct block_device *bdev, sector_t block,
 			sector_t len);
 static inline void clean_bdev_bh_alias(struct buffer_head *bh)
-- 
cgit v1.2.3


From 9a38cb27668e275ed912e67388cf11f454a24cc6 Mon Sep 17 00:00:00 2001
From: Sumit Gupta <sumitg@nvidia.com>
Date: Thu, 11 May 2023 23:02:04 +0530
Subject: memory: tegra: Add interconnect support for DRAM scaling in Tegra234

Add Interconnect framework support to dynamically set the DRAM
bandwidth from different clients. Both the MC and EMC drivers are
added as ICC providers. The path for any request is:
 MC-Client[1-n] -> MC -> EMC -> EMEM/DRAM

MC client's request for bandwidth will go to the MC driver which
passes the client request info like BPMP Client ID, Client type
and the Bandwidth to the BPMP-FW. The final DRAM freq to achieve
the requested bandwidth is set by the BPMP-FW based on the passed
parameters.

Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/memory/tegra/mc.c           |   5 ++
 drivers/memory/tegra/tegra186-emc.c | 133 ++++++++++++++++++++++++++++++++++
 drivers/memory/tegra/tegra234.c     | 138 +++++++++++++++++++++++++++++++++++-
 include/linux/tegra-icc.h           |  65 +++++++++++++++++
 include/soc/tegra/mc.h              |   7 ++
 5 files changed, 347 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/tegra-icc.h

(limited to 'include/linux')

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 9082b6c3763d..983455b1f98d 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -15,6 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
+#include <linux/tegra-icc.h>
 
 #include <soc/tegra/fuse.h>
 
@@ -792,6 +793,8 @@ static int tegra_mc_interconnect_setup(struct tegra_mc *mc)
 	mc->provider.data = &mc->provider;
 	mc->provider.set = mc->soc->icc_ops->set;
 	mc->provider.aggregate = mc->soc->icc_ops->aggregate;
+	mc->provider.get_bw = mc->soc->icc_ops->get_bw;
+	mc->provider.xlate = mc->soc->icc_ops->xlate;
 	mc->provider.xlate_extended = mc->soc->icc_ops->xlate_extended;
 
 	icc_provider_init(&mc->provider);
@@ -824,6 +827,8 @@ static int tegra_mc_interconnect_setup(struct tegra_mc *mc)
 		err = icc_link_create(node, TEGRA_ICC_MC);
 		if (err)
 			goto remove_nodes;
+
+		node->data = (struct tegra_mc_client *)&(mc->soc->clients[i]);
 	}
 
 	err = icc_provider_register(&mc->provider);
diff --git a/drivers/memory/tegra/tegra186-emc.c b/drivers/memory/tegra/tegra186-emc.c
index e935ad4e95b6..6ad8a4023dd7 100644
--- a/drivers/memory/tegra/tegra186-emc.c
+++ b/drivers/memory/tegra/tegra186-emc.c
@@ -7,9 +7,11 @@
 #include <linux/debugfs.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 
 #include <soc/tegra/bpmp.h>
+#include "mc.h"
 
 struct tegra186_emc_dvfs {
 	unsigned long latency;
@@ -29,8 +31,15 @@ struct tegra186_emc {
 		unsigned long min_rate;
 		unsigned long max_rate;
 	} debugfs;
+
+	struct icc_provider provider;
 };
 
+static inline struct tegra186_emc *to_tegra186_emc(struct icc_provider *provider)
+{
+	return container_of(provider, struct tegra186_emc, provider);
+}
+
 /*
  * debugfs interface
  *
@@ -146,8 +155,102 @@ DEFINE_DEBUGFS_ATTRIBUTE(tegra186_emc_debug_max_rate_fops,
 			  tegra186_emc_debug_max_rate_get,
 			  tegra186_emc_debug_max_rate_set, "%llu\n");
 
+/*
+ * tegra_emc_icc_set_bw() - Set BW api for EMC provider
+ * @src: ICC node for External Memory Controller (EMC)
+ * @dst: ICC node for External Memory (DRAM)
+ *
+ * Do nothing here as info to BPMP-FW is now passed in the BW set function
+ * of the MC driver. BPMP-FW sets the final Freq based on the passed values.
+ */
+static int tegra_emc_icc_set_bw(struct icc_node *src, struct icc_node *dst)
+{
+	return 0;
+}
+
+static struct icc_node *
+tegra_emc_of_icc_xlate(struct of_phandle_args *spec, void *data)
+{
+	struct icc_provider *provider = data;
+	struct icc_node *node;
+
+	/* External Memory is the only possible ICC route */
+	list_for_each_entry(node, &provider->nodes, node_list) {
+		if (node->id != TEGRA_ICC_EMEM)
+			continue;
+
+		return node;
+	}
+
+	return ERR_PTR(-EPROBE_DEFER);
+}
+
+static int tegra_emc_icc_get_init_bw(struct icc_node *node, u32 *avg, u32 *peak)
+{
+	*avg = 0;
+	*peak = 0;
+
+	return 0;
+}
+
+static int tegra_emc_interconnect_init(struct tegra186_emc *emc)
+{
+	struct tegra_mc *mc = dev_get_drvdata(emc->dev->parent);
+	const struct tegra_mc_soc *soc = mc->soc;
+	struct icc_node *node;
+	int err;
+
+	emc->provider.dev = emc->dev;
+	emc->provider.set = tegra_emc_icc_set_bw;
+	emc->provider.data = &emc->provider;
+	emc->provider.aggregate = soc->icc_ops->aggregate;
+	emc->provider.xlate = tegra_emc_of_icc_xlate;
+	emc->provider.get_bw = tegra_emc_icc_get_init_bw;
+
+	icc_provider_init(&emc->provider);
+
+	/* create External Memory Controller node */
+	node = icc_node_create(TEGRA_ICC_EMC);
+	if (IS_ERR(node)) {
+		err = PTR_ERR(node);
+		goto err_msg;
+	}
+
+	node->name = "External Memory Controller";
+	icc_node_add(node, &emc->provider);
+
+	/* link External Memory Controller to External Memory (DRAM) */
+	err = icc_link_create(node, TEGRA_ICC_EMEM);
+	if (err)
+		goto remove_nodes;
+
+	/* create External Memory node */
+	node = icc_node_create(TEGRA_ICC_EMEM);
+	if (IS_ERR(node)) {
+		err = PTR_ERR(node);
+		goto remove_nodes;
+	}
+
+	node->name = "External Memory (DRAM)";
+	icc_node_add(node, &emc->provider);
+
+	err = icc_provider_register(&emc->provider);
+	if (err)
+		goto remove_nodes;
+
+	return 0;
+
+remove_nodes:
+	icc_nodes_remove(&emc->provider);
+err_msg:
+	dev_err(emc->dev, "failed to initialize ICC: %d\n", err);
+
+	return err;
+}
+
 static int tegra186_emc_probe(struct platform_device *pdev)
 {
+	struct tegra_mc *mc = dev_get_drvdata(pdev->dev.parent);
 	struct mrq_emc_dvfs_latency_response response;
 	struct tegra_bpmp_message msg;
 	struct tegra186_emc *emc;
@@ -236,6 +339,32 @@ static int tegra186_emc_probe(struct platform_device *pdev)
 	debugfs_create_file("max_rate", S_IRUGO | S_IWUSR, emc->debugfs.root,
 			    emc, &tegra186_emc_debug_max_rate_fops);
 
+	if (mc && mc->soc->icc_ops) {
+		if (tegra_bpmp_mrq_is_supported(emc->bpmp, MRQ_BWMGR_INT)) {
+			mc->bwmgr_mrq_supported = true;
+
+			/*
+			 * MC driver probe can't get BPMP reference as it gets probed
+			 * earlier than BPMP. So, save the BPMP ref got from the EMC
+			 * DT node in the mc->bpmp and use it in MC's icc_set hook.
+			 */
+			mc->bpmp = emc->bpmp;
+			barrier();
+		}
+
+		/*
+		 * Initialize the ICC even if BPMP-FW doesn't support 'MRQ_BWMGR_INT'.
+		 * Use the flag 'mc->bwmgr_mrq_supported' within MC driver and return
+		 * EINVAL instead of passing the request to BPMP-FW later when the BW
+		 * request is made by client with 'icc_set_bw()' call.
+		 */
+		err = tegra_emc_interconnect_init(emc);
+		if (err) {
+			mc->bpmp = NULL;
+			goto put_bpmp;
+		}
+	}
+
 	return 0;
 
 put_bpmp:
@@ -245,9 +374,12 @@ put_bpmp:
 
 static int tegra186_emc_remove(struct platform_device *pdev)
 {
+	struct tegra_mc *mc = dev_get_drvdata(pdev->dev.parent);
 	struct tegra186_emc *emc = platform_get_drvdata(pdev);
 
 	debugfs_remove_recursive(emc->debugfs.root);
+
+	mc->bpmp = NULL;
 	tegra_bpmp_put(emc->bpmp);
 
 	return 0;
@@ -272,6 +404,7 @@ static struct platform_driver tegra186_emc_driver = {
 		.name = "tegra186-emc",
 		.of_match_table = tegra186_emc_of_match,
 		.suppress_bind_attrs = true,
+		.sync_state = icc_sync_state,
 	},
 	.probe = tegra186_emc_probe,
 	.remove = tegra186_emc_remove,
diff --git a/drivers/memory/tegra/tegra234.c b/drivers/memory/tegra/tegra234.c
index 02dcc5748bba..56d911926d54 100644
--- a/drivers/memory/tegra/tegra234.c
+++ b/drivers/memory/tegra/tegra234.c
@@ -1,18 +1,23 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (C) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
  */
 
 #include <soc/tegra/mc.h>
 
 #include <dt-bindings/memory/tegra234-mc.h>
+#include <linux/interconnect.h>
+#include <linux/tegra-icc.h>
 
+#include <soc/tegra/bpmp.h>
 #include "mc.h"
 
 static const struct tegra_mc_client tegra234_mc_clients[] = {
 	{
 		.id = TEGRA234_MEMORY_CLIENT_MGBEARD,
 		.name = "mgbeard",
+		.bpmp_id = TEGRA_ICC_BPMP_EQOS,
+		.type = TEGRA_ICC_NISO,
 		.sid = TEGRA234_SID_MGBE,
 		.regs = {
 			.sid = {
@@ -23,6 +28,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_MGBEBRD,
 		.name = "mgbebrd",
+		.bpmp_id = TEGRA_ICC_BPMP_EQOS,
+		.type = TEGRA_ICC_NISO,
 		.sid = TEGRA234_SID_MGBE_VF1,
 		.regs = {
 			.sid = {
@@ -33,6 +40,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_MGBECRD,
 		.name = "mgbecrd",
+		.bpmp_id = TEGRA_ICC_BPMP_EQOS,
+		.type = TEGRA_ICC_NISO,
 		.sid = TEGRA234_SID_MGBE_VF2,
 		.regs = {
 			.sid = {
@@ -43,6 +52,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_MGBEDRD,
 		.name = "mgbedrd",
+		.bpmp_id = TEGRA_ICC_BPMP_EQOS,
+		.type = TEGRA_ICC_NISO,
 		.sid = TEGRA234_SID_MGBE_VF3,
 		.regs = {
 			.sid = {
@@ -52,6 +63,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 		},
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_MGBEAWR,
+		.bpmp_id = TEGRA_ICC_BPMP_EQOS,
+		.type = TEGRA_ICC_NISO,
 		.name = "mgbeawr",
 		.sid = TEGRA234_SID_MGBE,
 		.regs = {
@@ -63,6 +76,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_MGBEBWR,
 		.name = "mgbebwr",
+		.bpmp_id = TEGRA_ICC_BPMP_EQOS,
+		.type = TEGRA_ICC_NISO,
 		.sid = TEGRA234_SID_MGBE_VF1,
 		.regs = {
 			.sid = {
@@ -73,6 +88,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_MGBECWR,
 		.name = "mgbecwr",
+		.bpmp_id = TEGRA_ICC_BPMP_EQOS,
+		.type = TEGRA_ICC_NISO,
 		.sid = TEGRA234_SID_MGBE_VF2,
 		.regs = {
 			.sid = {
@@ -83,6 +100,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_SDMMCRAB,
 		.name = "sdmmcrab",
+		.bpmp_id = TEGRA_ICC_BPMP_SDMMC_4,
+		.type = TEGRA_ICC_NISO,
 		.sid = TEGRA234_SID_SDMMC4,
 		.regs = {
 			.sid = {
@@ -93,6 +112,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_MGBEDWR,
 		.name = "mgbedwr",
+		.bpmp_id = TEGRA_ICC_BPMP_EQOS,
+		.type = TEGRA_ICC_NISO,
 		.sid = TEGRA234_SID_MGBE_VF3,
 		.regs = {
 			.sid = {
@@ -103,6 +124,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_SDMMCWAB,
 		.name = "sdmmcwab",
+		.bpmp_id = TEGRA_ICC_BPMP_SDMMC_4,
+		.type = TEGRA_ICC_NISO,
 		.sid = TEGRA234_SID_SDMMC4,
 		.regs = {
 			.sid = {
@@ -153,6 +176,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_APEDMAR,
 		.name = "apedmar",
+		.bpmp_id = TEGRA_ICC_BPMP_APEDMA,
+		.type = TEGRA_ICC_ISO_AUDIO,
 		.sid = TEGRA234_SID_APE,
 		.regs = {
 			.sid = {
@@ -163,6 +188,8 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	}, {
 		.id = TEGRA234_MEMORY_CLIENT_APEDMAW,
 		.name = "apedmaw",
+		.bpmp_id = TEGRA_ICC_BPMP_APEDMA,
+		.type = TEGRA_ICC_ISO_AUDIO,
 		.sid = TEGRA234_SID_APE,
 		.regs = {
 			.sid = {
@@ -333,6 +360,114 @@ static const struct tegra_mc_client tegra234_mc_clients[] = {
 	},
 };
 
+/*
+ * tegra234_mc_icc_set() - Pass MC client info to the BPMP-FW
+ * @src: ICC node for Memory Controller's (MC) Client
+ * @dst: ICC node for Memory Controller (MC)
+ *
+ * Passing the current request info from the MC to the BPMP-FW where
+ * LA and PTSA registers are accessed and the final EMC freq is set
+ * based on client_id, type, latency and bandwidth.
+ * icc_set_bw() makes set_bw calls for both MC and EMC providers in
+ * sequence. Both the calls are protected by 'mutex_lock(&icc_lock)'.
+ * So, the data passed won't be updated by concurrent set calls from
+ * other clients.
+ */
+static int tegra234_mc_icc_set(struct icc_node *src, struct icc_node *dst)
+{
+	struct tegra_mc *mc = icc_provider_to_tegra_mc(dst->provider);
+	struct mrq_bwmgr_int_request bwmgr_req = { 0 };
+	struct mrq_bwmgr_int_response bwmgr_resp = { 0 };
+	const struct tegra_mc_client *pclient = src->data;
+	struct tegra_bpmp_message msg;
+	int ret;
+
+	/*
+	 * Same Src and Dst node will happen during boot from icc_node_add().
+	 * This can be used to pre-initialize and set bandwidth for all clients
+	 * before their drivers are loaded. We are skipping this case as for us,
+	 * the pre-initialization already happened in Bootloader(MB2) and BPMP-FW.
+	 */
+	if (src->id == dst->id)
+		return 0;
+
+	if (!mc->bwmgr_mrq_supported)
+		return -EINVAL;
+
+	if (!mc->bpmp) {
+		dev_err(mc->dev, "BPMP reference NULL\n");
+		return -ENOENT;
+	}
+
+	if (pclient->type == TEGRA_ICC_NISO)
+		bwmgr_req.bwmgr_calc_set_req.niso_bw = src->avg_bw;
+	else
+		bwmgr_req.bwmgr_calc_set_req.iso_bw = src->avg_bw;
+
+	bwmgr_req.bwmgr_calc_set_req.client_id = pclient->bpmp_id;
+
+	bwmgr_req.cmd = CMD_BWMGR_INT_CALC_AND_SET;
+	bwmgr_req.bwmgr_calc_set_req.mc_floor = src->peak_bw;
+	bwmgr_req.bwmgr_calc_set_req.floor_unit = BWMGR_INT_UNIT_KBPS;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.mrq = MRQ_BWMGR_INT;
+	msg.tx.data = &bwmgr_req;
+	msg.tx.size = sizeof(bwmgr_req);
+	msg.rx.data = &bwmgr_resp;
+	msg.rx.size = sizeof(bwmgr_resp);
+
+	ret = tegra_bpmp_transfer(mc->bpmp, &msg);
+	if (ret < 0) {
+		dev_err(mc->dev, "BPMP transfer failed: %d\n", ret);
+		goto error;
+	}
+	if (msg.rx.ret < 0) {
+		pr_err("failed to set bandwidth for %u: %d\n",
+		       bwmgr_req.bwmgr_calc_set_req.client_id, msg.rx.ret);
+		ret = -EINVAL;
+	}
+
+error:
+	return ret;
+}
+
+static struct icc_node*
+tegra234_mc_of_icc_xlate(struct of_phandle_args *spec, void *data)
+{
+	struct tegra_mc *mc = icc_provider_to_tegra_mc(data);
+	unsigned int cl_id = spec->args[0];
+	struct icc_node *node;
+
+	list_for_each_entry(node, &mc->provider.nodes, node_list) {
+		if (node->id != cl_id)
+			continue;
+
+		return node;
+	}
+
+	/*
+	 * If a client driver calls devm_of_icc_get() before the MC driver
+	 * is probed, then return EPROBE_DEFER to the client driver.
+	 */
+	return ERR_PTR(-EPROBE_DEFER);
+}
+
+static int tegra234_mc_icc_get_init_bw(struct icc_node *node, u32 *avg, u32 *peak)
+{
+	*avg = 0;
+	*peak = 0;
+
+	return 0;
+}
+
+static const struct tegra_mc_icc_ops tegra234_mc_icc_ops = {
+	.xlate = tegra234_mc_of_icc_xlate,
+	.aggregate = icc_std_aggregate,
+	.get_bw = tegra234_mc_icc_get_init_bw,
+	.set = tegra234_mc_icc_set,
+};
+
 const struct tegra_mc_soc tegra234_mc_soc = {
 	.num_clients = ARRAY_SIZE(tegra234_mc_clients),
 	.clients = tegra234_mc_clients,
@@ -345,6 +480,7 @@ const struct tegra_mc_soc tegra234_mc_soc = {
 		   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
 	.has_addr_hi_reg = true,
 	.ops = &tegra186_mc_ops,
+	.icc_ops = &tegra234_mc_icc_ops,
 	.ch_intmask = 0x0000ff00,
 	.global_intstatus_channel_shift = 8,
 	/*
diff --git a/include/linux/tegra-icc.h b/include/linux/tegra-icc.h
new file mode 100644
index 000000000000..4b4d4bee290c
--- /dev/null
+++ b/include/linux/tegra-icc.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022-2023 NVIDIA CORPORATION.  All rights reserved.
+ */
+
+#ifndef LINUX_TEGRA_ICC_H
+#define LINUX_TEGRA_ICC_H
+
+enum tegra_icc_client_type {
+	TEGRA_ICC_NONE,
+	TEGRA_ICC_NISO,
+	TEGRA_ICC_ISO_DISPLAY,
+	TEGRA_ICC_ISO_VI,
+	TEGRA_ICC_ISO_AUDIO,
+	TEGRA_ICC_ISO_VIFAL,
+};
+
+/* ICC ID's for MC client's used in BPMP */
+#define TEGRA_ICC_BPMP_DEBUG		1
+#define TEGRA_ICC_BPMP_CPU_CLUSTER0	2
+#define TEGRA_ICC_BPMP_CPU_CLUSTER1	3
+#define TEGRA_ICC_BPMP_CPU_CLUSTER2	4
+#define TEGRA_ICC_BPMP_GPU		5
+#define TEGRA_ICC_BPMP_CACTMON		6
+#define TEGRA_ICC_BPMP_DISPLAY		7
+#define TEGRA_ICC_BPMP_VI		8
+#define TEGRA_ICC_BPMP_EQOS		9
+#define TEGRA_ICC_BPMP_PCIE_0		10
+#define TEGRA_ICC_BPMP_PCIE_1		11
+#define TEGRA_ICC_BPMP_PCIE_2		12
+#define TEGRA_ICC_BPMP_PCIE_3		13
+#define TEGRA_ICC_BPMP_PCIE_4		14
+#define TEGRA_ICC_BPMP_PCIE_5		15
+#define TEGRA_ICC_BPMP_PCIE_6		16
+#define TEGRA_ICC_BPMP_PCIE_7		17
+#define TEGRA_ICC_BPMP_PCIE_8		18
+#define TEGRA_ICC_BPMP_PCIE_9		19
+#define TEGRA_ICC_BPMP_PCIE_10		20
+#define TEGRA_ICC_BPMP_DLA_0		21
+#define TEGRA_ICC_BPMP_DLA_1		22
+#define TEGRA_ICC_BPMP_SDMMC_1		23
+#define TEGRA_ICC_BPMP_SDMMC_2		24
+#define TEGRA_ICC_BPMP_SDMMC_3		25
+#define TEGRA_ICC_BPMP_SDMMC_4		26
+#define TEGRA_ICC_BPMP_NVDEC		27
+#define TEGRA_ICC_BPMP_NVENC		28
+#define TEGRA_ICC_BPMP_NVJPG_0		29
+#define TEGRA_ICC_BPMP_NVJPG_1		30
+#define TEGRA_ICC_BPMP_OFAA		31
+#define TEGRA_ICC_BPMP_XUSB_HOST	32
+#define TEGRA_ICC_BPMP_XUSB_DEV		33
+#define TEGRA_ICC_BPMP_TSEC		34
+#define TEGRA_ICC_BPMP_VIC		35
+#define TEGRA_ICC_BPMP_APE		36
+#define TEGRA_ICC_BPMP_APEDMA		37
+#define TEGRA_ICC_BPMP_SE		38
+#define TEGRA_ICC_BPMP_ISP		39
+#define TEGRA_ICC_BPMP_HDA		40
+#define TEGRA_ICC_BPMP_VIFAL		41
+#define TEGRA_ICC_BPMP_VI2FAL		42
+#define TEGRA_ICC_BPMP_VI2		43
+#define TEGRA_ICC_BPMP_RCE		44
+#define TEGRA_ICC_BPMP_PVA		45
+
+#endif /* LINUX_TEGRA_ICC_H */
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index 51a2263e1bc5..900d88b26fae 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -13,6 +13,7 @@
 #include <linux/irq.h>
 #include <linux/reset-controller.h>
 #include <linux/types.h>
+#include <linux/tegra-icc.h>
 
 struct clk;
 struct device;
@@ -26,6 +27,8 @@ struct tegra_mc_timing {
 
 struct tegra_mc_client {
 	unsigned int id;
+	unsigned int bpmp_id;
+	enum tegra_icc_client_type type;
 	const char *name;
 	/*
 	 * For Tegra210 and earlier, this is the SWGROUP ID used for IOVA translations in the
@@ -166,8 +169,10 @@ struct tegra_mc_icc_ops {
 	int (*set)(struct icc_node *src, struct icc_node *dst);
 	int (*aggregate)(struct icc_node *node, u32 tag, u32 avg_bw,
 			 u32 peak_bw, u32 *agg_avg, u32 *agg_peak);
+	struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data);
 	struct icc_node_data *(*xlate_extended)(struct of_phandle_args *spec,
 						void *data);
+	int (*get_bw)(struct icc_node *node, u32 *avg, u32 *peak);
 };
 
 struct tegra_mc_ops {
@@ -214,6 +219,7 @@ struct tegra_mc_soc {
 };
 
 struct tegra_mc {
+	struct tegra_bpmp *bpmp;
 	struct device *dev;
 	struct tegra_smmu *smmu;
 	struct gart_device *gart;
@@ -229,6 +235,7 @@ struct tegra_mc {
 	struct tegra_mc_timing *timings;
 	unsigned int num_timings;
 
+	bool bwmgr_mrq_supported;
 	struct reset_controller_dev reset;
 
 	struct icc_provider provider;
-- 
cgit v1.2.3


From 514ca14ed5444b911de59ed3381dfd195d99fe4b Mon Sep 17 00:00:00 2001
From: "ndesaulniers@google.com" <ndesaulniers@google.com>
Date: Mon, 17 Apr 2023 15:00:05 -0700
Subject: start_kernel: Add __no_stack_protector function attribute

Back during the discussion of
commit a9a3ed1eff36 ("x86: Fix early boot crash on gcc-10, third try")
we discussed the need for a function attribute to control the omission
of stack protectors on a per-function basis; at the time Clang had
support for no_stack_protector but GCC did not. This was fixed in
gcc-11. Now that the function attribute is available, let's start using
it.

Callers of boot_init_stack_canary need to use this function attribute
unless they're compiled with -fno-stack-protector, otherwise the canary
stored in the stack slot of the caller will differ upon the call to
boot_init_stack_canary. This will lead to a call to __stack_chk_fail()
then panic.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94722
Link: https://lore.kernel.org/all/20200316130414.GC12561@hirez.programming.kicks-ass.net/
Tested-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/r/20230412-no_stackp-v2-1-116f9fe4bbe7@google.com
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>

Signed-off-by: ndesaulniers@google.com <ndesaulniers@google.com>
---
 arch/powerpc/kernel/smp.c           |  1 +
 include/linux/compiler_attributes.h | 12 ++++++++++++
 init/main.c                         |  3 ++-
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 265801a3e94c..6903a7222273 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1605,6 +1605,7 @@ static void add_cpu_to_masks(int cpu)
 }
 
 /* Activate a secondary processor. */
+__no_stack_protector
 void start_secondary(void *unused)
 {
 	unsigned int cpu = raw_smp_processor_id();
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index e659cb6fded3..84864767a56a 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -255,6 +255,18 @@
  */
 #define __noreturn                      __attribute__((__noreturn__))
 
+/*
+ * Optional: only supported since GCC >= 11.1, clang >= 7.0.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-no_005fstack_005fprotector-function-attribute
+ *   clang: https://clang.llvm.org/docs/AttributeReference.html#no-stack-protector-safebuffers
+ */
+#if __has_attribute(__no_stack_protector__)
+# define __no_stack_protector		__attribute__((__no_stack_protector__))
+#else
+# define __no_stack_protector
+#endif
+
 /*
  * Optional: not supported by gcc.
  *
diff --git a/init/main.c b/init/main.c
index af50044deed5..c445c1fb19b9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -877,7 +877,8 @@ static void __init print_unknown_bootoptions(void)
 	memblock_free(unknown_options, len);
 }
 
-asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(void)
+asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector
+void start_kernel(void)
 {
 	char *command_line;
 	char *after_dashes;
-- 
cgit v1.2.3


From 03d89a2de25bbc5c77e61a0cf77663978c4b6ea7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 5 Nov 2021 17:20:54 -0600
Subject: io_uring: support for user allocated memory for rings/sqes

Currently io_uring applications must call mmap(2) twice to map the rings
themselves, and the sqes array. This works fine, but it does not support
using huge pages to back the rings/sqes.

Provide a way for the application to pass in pre-allocated memory for
the rings/sqes, which can then suitably be allocated from shmfs or
via mmap to get huge page support.

Particularly for larger rings, this reduces the TLBs needed.

If an application wishes to take advantage of that, it must pre-allocate
the memory needed for the sq/cq ring, and the sqes. The former must
be passed in via the io_uring_params->cq_off.user_data field, while the
latter is passed in via the io_uring_params->sq_off.user_data field. Then
it must set IORING_SETUP_NO_MMAP in the io_uring_params->flags field,
and io_uring will then map the existing memory into the kernel for shared
use. The application must not call mmap(2) to map rings as it otherwise
would have, that will now fail with -EINVAL if this setup flag was used.

The pages used for the rings and sqes must be contigious. The intent here
is clearly that huge pages should be used, otherwise the normal setup
procedure works fine as-is. The application may use one huge page for
both the rings and sqes.

Outside of those initialization changes, everything works like it did
before.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  10 ++++
 include/uapi/linux/io_uring.h  |   9 +++-
 io_uring/io_uring.c            | 106 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 114 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 1b2a20a42413..f04ce513fadb 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -211,6 +211,16 @@ struct io_ring_ctx {
 		unsigned int		compat: 1;
 
 		enum task_work_notify_mode	notify_method;
+
+		/*
+		 * If IORING_SETUP_NO_MMAP is used, then the below holds
+		 * the gup'ed pages for the two rings, and the sqes.
+		 */
+		unsigned short		n_ring_pages;
+		unsigned short		n_sqe_pages;
+		struct page		**ring_pages;
+		struct page		**sqe_pages;
+
 		struct io_rings			*rings;
 		struct task_struct		*submitter_task;
 		struct percpu_ref		refs;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 0716cb17e436..2edba9a274de 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -173,6 +173,11 @@ enum {
  */
 #define IORING_SETUP_DEFER_TASKRUN	(1U << 13)
 
+/*
+ * Application provides the memory for the rings
+ */
+#define IORING_SETUP_NO_MMAP		(1U << 14)
+
 enum io_uring_op {
 	IORING_OP_NOP,
 	IORING_OP_READV,
@@ -406,7 +411,7 @@ struct io_sqring_offsets {
 	__u32 dropped;
 	__u32 array;
 	__u32 resv1;
-	__u64 resv2;
+	__u64 user_addr;
 };
 
 /*
@@ -425,7 +430,7 @@ struct io_cqring_offsets {
 	__u32 cqes;
 	__u32 flags;
 	__u32 resv1;
-	__u64 resv2;
+	__u64 user_addr;
 };
 
 /*
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 74433939a318..61379cf8e7f5 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2688,12 +2688,85 @@ static void io_mem_free(void *ptr)
 		free_compound_page(page);
 }
 
+static void io_pages_free(struct page ***pages, int npages)
+{
+	struct page **page_array;
+	int i;
+
+	if (!pages)
+		return;
+	page_array = *pages;
+	for (i = 0; i < npages; i++)
+		unpin_user_page(page_array[i]);
+	kvfree(page_array);
+	*pages = NULL;
+}
+
+static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
+			    unsigned long uaddr, size_t size)
+{
+	struct page **page_array;
+	unsigned int nr_pages;
+	int ret;
+
+	*npages = 0;
+
+	if (uaddr & (PAGE_SIZE - 1) || !size)
+		return ERR_PTR(-EINVAL);
+
+	nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (nr_pages > USHRT_MAX)
+		return ERR_PTR(-EINVAL);
+	page_array = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
+	if (!page_array)
+		return ERR_PTR(-ENOMEM);
+
+	ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
+					page_array);
+	if (ret != nr_pages) {
+err:
+		io_pages_free(&page_array, ret > 0 ? ret : 0);
+		return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT);
+	}
+	/*
+	 * Should be a single page. If the ring is small enough that we can
+	 * use a normal page, that is fine. If we need multiple pages, then
+	 * userspace should use a huge page. That's the only way to guarantee
+	 * that we get contigious memory, outside of just being lucky or
+	 * (currently) having low memory fragmentation.
+	 */
+	if (page_array[0] != page_array[ret - 1])
+		goto err;
+	*pages = page_array;
+	*npages = nr_pages;
+	return page_to_virt(page_array[0]);
+}
+
+static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
+			  size_t size)
+{
+	return __io_uaddr_map(&ctx->ring_pages, &ctx->n_ring_pages, uaddr,
+				size);
+}
+
+static void *io_sqes_map(struct io_ring_ctx *ctx, unsigned long uaddr,
+			 size_t size)
+{
+	return __io_uaddr_map(&ctx->sqe_pages, &ctx->n_sqe_pages, uaddr,
+				size);
+}
+
 static void io_rings_free(struct io_ring_ctx *ctx)
 {
-	io_mem_free(ctx->rings);
-	io_mem_free(ctx->sq_sqes);
-	ctx->rings = NULL;
-	ctx->sq_sqes = NULL;
+	if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
+		io_mem_free(ctx->rings);
+		io_mem_free(ctx->sq_sqes);
+		ctx->rings = NULL;
+		ctx->sq_sqes = NULL;
+	} else {
+		io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
+		io_pages_free(&ctx->sqe_pages, ctx->n_sqe_pages);
+	}
 }
 
 static void *io_mem_alloc(size_t size)
@@ -3338,6 +3411,10 @@ static void *io_uring_validate_mmap_request(struct file *file,
 	struct page *page;
 	void *ptr;
 
+	/* Don't allow mmap if the ring was setup without it */
+	if (ctx->flags & IORING_SETUP_NO_MMAP)
+		return ERR_PTR(-EINVAL);
+
 	switch (offset & IORING_OFF_MMAP_MASK) {
 	case IORING_OFF_SQ_RING:
 	case IORING_OFF_CQ_RING:
@@ -3673,7 +3750,11 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
 	if (size == SIZE_MAX)
 		return -EOVERFLOW;
 
-	rings = io_mem_alloc(size);
+	if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+		rings = io_mem_alloc(size);
+	else
+		rings = io_rings_map(ctx, p->cq_off.user_addr, size);
+
 	if (IS_ERR(rings))
 		return PTR_ERR(rings);
 
@@ -3693,7 +3774,11 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
 		return -EOVERFLOW;
 	}
 
-	ptr = io_mem_alloc(size);
+	if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+		ptr = io_mem_alloc(size);
+	else
+		ptr = io_sqes_map(ctx, p->sq_off.user_addr, size);
+
 	if (IS_ERR(ptr)) {
 		io_rings_free(ctx);
 		return PTR_ERR(ptr);
@@ -3885,7 +3970,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
 	p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
 	p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
 	p->sq_off.resv1 = 0;
-	p->sq_off.resv2 = 0;
+	if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+		p->sq_off.user_addr = 0;
 
 	p->cq_off.head = offsetof(struct io_rings, cq.head);
 	p->cq_off.tail = offsetof(struct io_rings, cq.tail);
@@ -3895,7 +3981,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
 	p->cq_off.cqes = offsetof(struct io_rings, cqes);
 	p->cq_off.flags = offsetof(struct io_rings, cq_flags);
 	p->cq_off.resv1 = 0;
-	p->cq_off.resv2 = 0;
+	if (!(ctx->flags & IORING_SETUP_NO_MMAP))
+		p->cq_off.user_addr = 0;
 
 	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
@@ -3961,7 +4048,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 			IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
 			IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
 			IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
-			IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN))
+			IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
+			IORING_SETUP_NO_MMAP))
 		return -EINVAL;
 
 	return io_uring_create(entries, &p, params);
-- 
cgit v1.2.3


From 42ae6f1695beed57958e7a2554e6d52dddc56e43 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 16 May 2023 14:55:34 +0200
Subject: dmaengine: ste_dma40: Remove platform data

The Ux500 is device tree-only since ages. Delete the
platform data header and push it into or next to the driver
instead.

Drop the non-DT probe path since this will not happen.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20230417-ux500-dma40-cleanup-v3-4-60bfa6785968@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ste_dma40.c                     |  56 +++++---
 drivers/dma/ste_dma40.h                     | 110 +++++++++++++++
 drivers/dma/ste_dma40_ll.c                  |   3 +-
 include/linux/platform_data/dma-ste-dma40.h | 209 ----------------------------
 4 files changed, 150 insertions(+), 228 deletions(-)
 create mode 100644 drivers/dma/ste_dma40.h
 delete mode 100644 include/linux/platform_data/dma-ste-dma40.h

(limited to 'include/linux')

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 813de4efced5..48c9606cfd46 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -23,11 +23,39 @@
 #include <linux/of_dma.h>
 #include <linux/amba/bus.h>
 #include <linux/regulator/consumer.h>
-#include <linux/platform_data/dma-ste-dma40.h>
 
 #include "dmaengine.h"
+#include "ste_dma40.h"
 #include "ste_dma40_ll.h"
 
+/**
+ * struct stedma40_platform_data - Configuration struct for the dma device.
+ *
+ * @dev_tx: mapping between destination event line and io address
+ * @dev_rx: mapping between source event line and io address
+ * @disabled_channels: A vector, ending with -1, that marks physical channels
+ * that are for different reasons not available for the driver.
+ * @soft_lli_chans: A vector, that marks physical channels will use LLI by SW
+ * which avoids HW bug that exists in some versions of the controller.
+ * SoftLLI introduces relink overhead that could impact performace for
+ * certain use cases.
+ * @num_of_soft_lli_chans: The number of channels that needs to be configured
+ * to use SoftLLI.
+ * @use_esram_lcla: flag for mapping the lcla into esram region
+ * @num_of_memcpy_chans: The number of channels reserved for memcpy.
+ * @num_of_phy_chans: The number of physical channels implemented in HW.
+ * 0 means reading the number of channels from DMA HW but this is only valid
+ * for 'multiple of 4' channels, like 8.
+ */
+struct stedma40_platform_data {
+	int				 disabled_channels[STEDMA40_MAX_PHYS];
+	int				*soft_lli_chans;
+	int				 num_of_soft_lli_chans;
+	bool				 use_esram_lcla;
+	int				 num_of_memcpy_chans;
+	int				 num_of_phy_chans;
+};
+
 #define D40_NAME "dma40"
 
 #define D40_PHY_CHAN -1
@@ -2269,7 +2297,7 @@ d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
 	return NULL;
 }
 
-bool stedma40_filter(struct dma_chan *chan, void *data)
+static bool stedma40_filter(struct dma_chan *chan, void *data)
 {
 	struct stedma40_chan_cfg *info = data;
 	struct d40_chan *d40c =
@@ -2288,7 +2316,6 @@ bool stedma40_filter(struct dma_chan *chan, void *data)
 
 	return err == 0;
 }
-EXPORT_SYMBOL(stedma40_filter);
 
 static void __d40_set_prio_rt(struct d40_chan *d40c, int dev_type, bool src)
 {
@@ -3517,16 +3544,9 @@ static int __init d40_probe(struct platform_device *pdev)
 	int num_reserved_chans;
 	u32 val;
 
-	if (!plat_data) {
-		if (np) {
-			if (d40_of_probe(pdev, np)) {
-				ret = -ENOMEM;
-				goto report_failure;
-			}
-		} else {
-			d40_err(dev, "No pdata or Device Tree provided\n");
-			goto report_failure;
-		}
+	if (d40_of_probe(pdev, np)) {
+		ret = -ENOMEM;
+		goto report_failure;
 	}
 
 	base = d40_hw_detect_init(pdev);
@@ -3650,11 +3670,11 @@ static int __init d40_probe(struct platform_device *pdev)
 
 	d40_hw_init(base);
 
-	if (np) {
-		ret = of_dma_controller_register(np, d40_xlate, NULL);
-		if (ret)
-			dev_err(dev,
-				"could not register of_dma_controller\n");
+	ret = of_dma_controller_register(np, d40_xlate, NULL);
+	if (ret) {
+		dev_err(dev,
+			"could not register of_dma_controller\n");
+		goto destroy_cache;
 	}
 
 	dev_info(base->dev, "initialized\n");
diff --git a/drivers/dma/ste_dma40.h b/drivers/dma/ste_dma40.h
new file mode 100644
index 000000000000..c697bfe16a01
--- /dev/null
+++ b/drivers/dma/ste_dma40.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef STE_DMA40_H
+#define STE_DMA40_H
+
+/*
+ * Maxium size for a single dma descriptor
+ * Size is limited to 16 bits.
+ * Size is in the units of addr-widths (1,2,4,8 bytes)
+ * Larger transfers will be split up to multiple linked desc
+ */
+#define STEDMA40_MAX_SEG_SIZE 0xFFFF
+
+/* dev types for memcpy */
+#define STEDMA40_DEV_DST_MEMORY (-1)
+#define	STEDMA40_DEV_SRC_MEMORY (-1)
+
+enum stedma40_mode {
+	STEDMA40_MODE_LOGICAL = 0,
+	STEDMA40_MODE_PHYSICAL,
+	STEDMA40_MODE_OPERATION,
+};
+
+enum stedma40_mode_opt {
+	STEDMA40_PCHAN_BASIC_MODE = 0,
+	STEDMA40_LCHAN_SRC_LOG_DST_LOG = 0,
+	STEDMA40_PCHAN_MODULO_MODE,
+	STEDMA40_PCHAN_DOUBLE_DST_MODE,
+	STEDMA40_LCHAN_SRC_PHY_DST_LOG,
+	STEDMA40_LCHAN_SRC_LOG_DST_PHY,
+};
+
+#define STEDMA40_ESIZE_8_BIT  0x0
+#define STEDMA40_ESIZE_16_BIT 0x1
+#define STEDMA40_ESIZE_32_BIT 0x2
+#define STEDMA40_ESIZE_64_BIT 0x3
+
+/* The value 4 indicates that PEN-reg shall be set to 0 */
+#define STEDMA40_PSIZE_PHY_1  0x4
+#define STEDMA40_PSIZE_PHY_2  0x0
+#define STEDMA40_PSIZE_PHY_4  0x1
+#define STEDMA40_PSIZE_PHY_8  0x2
+#define STEDMA40_PSIZE_PHY_16 0x3
+
+/*
+ * The number of elements differ in logical and
+ * physical mode
+ */
+#define STEDMA40_PSIZE_LOG_1  STEDMA40_PSIZE_PHY_2
+#define STEDMA40_PSIZE_LOG_4  STEDMA40_PSIZE_PHY_4
+#define STEDMA40_PSIZE_LOG_8  STEDMA40_PSIZE_PHY_8
+#define STEDMA40_PSIZE_LOG_16 STEDMA40_PSIZE_PHY_16
+
+/* Maximum number of possible physical channels */
+#define STEDMA40_MAX_PHYS 32
+
+enum stedma40_flow_ctrl {
+	STEDMA40_NO_FLOW_CTRL,
+	STEDMA40_FLOW_CTRL,
+};
+
+/**
+ * struct stedma40_half_channel_info - dst/src channel configuration
+ *
+ * @big_endian: true if the src/dst should be read as big endian
+ * @data_width: Data width of the src/dst hardware
+ * @p_size: Burst size
+ * @flow_ctrl: Flow control on/off.
+ */
+struct stedma40_half_channel_info {
+	bool big_endian;
+	enum dma_slave_buswidth data_width;
+	int psize;
+	enum stedma40_flow_ctrl flow_ctrl;
+};
+
+/**
+ * struct stedma40_chan_cfg - Structure to be filled by client drivers.
+ *
+ * @dir: MEM 2 MEM, PERIPH 2 MEM , MEM 2 PERIPH, PERIPH 2 PERIPH
+ * @high_priority: true if high-priority
+ * @realtime: true if realtime mode is to be enabled.  Only available on DMA40
+ * version 3+, i.e DB8500v2+
+ * @mode: channel mode: physical, logical, or operation
+ * @mode_opt: options for the chosen channel mode
+ * @dev_type: src/dst device type (driver uses dir to figure out which)
+ * @src_info: Parameters for dst half channel
+ * @dst_info: Parameters for dst half channel
+ * @use_fixed_channel: if true, use physical channel specified by phy_channel
+ * @phy_channel: physical channel to use, only if use_fixed_channel is true
+ *
+ * This structure has to be filled by the client drivers.
+ * It is recommended to do all dma configurations for clients in the machine.
+ *
+ */
+struct stedma40_chan_cfg {
+	enum dma_transfer_direction		 dir;
+	bool					 high_priority;
+	bool					 realtime;
+	enum stedma40_mode			 mode;
+	enum stedma40_mode_opt			 mode_opt;
+	int					 dev_type;
+	struct stedma40_half_channel_info	 src_info;
+	struct stedma40_half_channel_info	 dst_info;
+
+	bool					 use_fixed_channel;
+	int					 phy_channel;
+};
+
+#endif /* STE_DMA40_H */
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
index b5287c661eb7..4c489b126cb2 100644
--- a/drivers/dma/ste_dma40_ll.c
+++ b/drivers/dma/ste_dma40_ll.c
@@ -6,8 +6,9 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/platform_data/dma-ste-dma40.h>
+#include <linux/dmaengine.h>
 
+#include "ste_dma40.h"
 #include "ste_dma40_ll.h"
 
 static u8 d40_width_to_bits(enum dma_slave_buswidth width)
diff --git a/include/linux/platform_data/dma-ste-dma40.h b/include/linux/platform_data/dma-ste-dma40.h
deleted file mode 100644
index 10641633facc..000000000000
--- a/include/linux/platform_data/dma-ste-dma40.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson SA 2007-2010
- * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
- * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
- */
-
-
-#ifndef STE_DMA40_H
-#define STE_DMA40_H
-
-#include <linux/dmaengine.h>
-#include <linux/scatterlist.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-
-/*
- * Maxium size for a single dma descriptor
- * Size is limited to 16 bits.
- * Size is in the units of addr-widths (1,2,4,8 bytes)
- * Larger transfers will be split up to multiple linked desc
- */
-#define STEDMA40_MAX_SEG_SIZE 0xFFFF
-
-/* dev types for memcpy */
-#define STEDMA40_DEV_DST_MEMORY (-1)
-#define	STEDMA40_DEV_SRC_MEMORY (-1)
-
-enum stedma40_mode {
-	STEDMA40_MODE_LOGICAL = 0,
-	STEDMA40_MODE_PHYSICAL,
-	STEDMA40_MODE_OPERATION,
-};
-
-enum stedma40_mode_opt {
-	STEDMA40_PCHAN_BASIC_MODE = 0,
-	STEDMA40_LCHAN_SRC_LOG_DST_LOG = 0,
-	STEDMA40_PCHAN_MODULO_MODE,
-	STEDMA40_PCHAN_DOUBLE_DST_MODE,
-	STEDMA40_LCHAN_SRC_PHY_DST_LOG,
-	STEDMA40_LCHAN_SRC_LOG_DST_PHY,
-};
-
-#define STEDMA40_ESIZE_8_BIT  0x0
-#define STEDMA40_ESIZE_16_BIT 0x1
-#define STEDMA40_ESIZE_32_BIT 0x2
-#define STEDMA40_ESIZE_64_BIT 0x3
-
-/* The value 4 indicates that PEN-reg shall be set to 0 */
-#define STEDMA40_PSIZE_PHY_1  0x4
-#define STEDMA40_PSIZE_PHY_2  0x0
-#define STEDMA40_PSIZE_PHY_4  0x1
-#define STEDMA40_PSIZE_PHY_8  0x2
-#define STEDMA40_PSIZE_PHY_16 0x3
-
-/*
- * The number of elements differ in logical and
- * physical mode
- */
-#define STEDMA40_PSIZE_LOG_1  STEDMA40_PSIZE_PHY_2
-#define STEDMA40_PSIZE_LOG_4  STEDMA40_PSIZE_PHY_4
-#define STEDMA40_PSIZE_LOG_8  STEDMA40_PSIZE_PHY_8
-#define STEDMA40_PSIZE_LOG_16 STEDMA40_PSIZE_PHY_16
-
-/* Maximum number of possible physical channels */
-#define STEDMA40_MAX_PHYS 32
-
-enum stedma40_flow_ctrl {
-	STEDMA40_NO_FLOW_CTRL,
-	STEDMA40_FLOW_CTRL,
-};
-
-/**
- * struct stedma40_half_channel_info - dst/src channel configuration
- *
- * @big_endian: true if the src/dst should be read as big endian
- * @data_width: Data width of the src/dst hardware
- * @p_size: Burst size
- * @flow_ctrl: Flow control on/off.
- */
-struct stedma40_half_channel_info {
-	bool big_endian;
-	enum dma_slave_buswidth data_width;
-	int psize;
-	enum stedma40_flow_ctrl flow_ctrl;
-};
-
-/**
- * struct stedma40_chan_cfg - Structure to be filled by client drivers.
- *
- * @dir: MEM 2 MEM, PERIPH 2 MEM , MEM 2 PERIPH, PERIPH 2 PERIPH
- * @high_priority: true if high-priority
- * @realtime: true if realtime mode is to be enabled.  Only available on DMA40
- * version 3+, i.e DB8500v2+
- * @mode: channel mode: physical, logical, or operation
- * @mode_opt: options for the chosen channel mode
- * @dev_type: src/dst device type (driver uses dir to figure out which)
- * @src_info: Parameters for dst half channel
- * @dst_info: Parameters for dst half channel
- * @use_fixed_channel: if true, use physical channel specified by phy_channel
- * @phy_channel: physical channel to use, only if use_fixed_channel is true
- *
- * This structure has to be filled by the client drivers.
- * It is recommended to do all dma configurations for clients in the machine.
- *
- */
-struct stedma40_chan_cfg {
-	enum dma_transfer_direction		 dir;
-	bool					 high_priority;
-	bool					 realtime;
-	enum stedma40_mode			 mode;
-	enum stedma40_mode_opt			 mode_opt;
-	int					 dev_type;
-	struct stedma40_half_channel_info	 src_info;
-	struct stedma40_half_channel_info	 dst_info;
-
-	bool					 use_fixed_channel;
-	int					 phy_channel;
-};
-
-/**
- * struct stedma40_platform_data - Configuration struct for the dma device.
- *
- * @dev_tx: mapping between destination event line and io address
- * @dev_rx: mapping between source event line and io address
- * @disabled_channels: A vector, ending with -1, that marks physical channels
- * that are for different reasons not available for the driver.
- * @soft_lli_chans: A vector, that marks physical channels will use LLI by SW
- * which avoids HW bug that exists in some versions of the controller.
- * SoftLLI introduces relink overhead that could impact performace for
- * certain use cases.
- * @num_of_soft_lli_chans: The number of channels that needs to be configured
- * to use SoftLLI.
- * @use_esram_lcla: flag for mapping the lcla into esram region
- * @num_of_memcpy_chans: The number of channels reserved for memcpy.
- * @num_of_phy_chans: The number of physical channels implemented in HW.
- * 0 means reading the number of channels from DMA HW but this is only valid
- * for 'multiple of 4' channels, like 8.
- */
-struct stedma40_platform_data {
-	int				 disabled_channels[STEDMA40_MAX_PHYS];
-	int				*soft_lli_chans;
-	int				 num_of_soft_lli_chans;
-	bool				 use_esram_lcla;
-	int				 num_of_memcpy_chans;
-	int				 num_of_phy_chans;
-};
-
-#ifdef CONFIG_STE_DMA40
-
-/**
- * stedma40_filter() - Provides stedma40_chan_cfg to the
- * ste_dma40 dma driver via the dmaengine framework.
- * does some checking of what's provided.
- *
- * Never directly called by client. It used by dmaengine.
- * @chan: dmaengine handle.
- * @data: Must be of type: struct stedma40_chan_cfg and is
- * the configuration of the framework.
- *
- *
- */
-
-bool stedma40_filter(struct dma_chan *chan, void *data);
-
-/**
- * stedma40_slave_mem() - Transfers a raw data buffer to or from a slave
- * (=device)
- *
- * @chan: dmaengine handle
- * @addr: source or destination physicall address.
- * @size: bytes to transfer
- * @direction: direction of transfer
- * @flags: is actually enum dma_ctrl_flags. See dmaengine.h
- */
-
-static inline struct
-dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
-					    dma_addr_t addr,
-					    unsigned int size,
-					    enum dma_transfer_direction direction,
-					    unsigned long flags)
-{
-	struct scatterlist sg;
-	sg_init_table(&sg, 1);
-	sg.dma_address = addr;
-	sg.length = size;
-
-	return dmaengine_prep_slave_sg(chan, &sg, 1, direction, flags);
-}
-
-#else
-static inline bool stedma40_filter(struct dma_chan *chan, void *data)
-{
-	return false;
-}
-
-static inline struct
-dma_async_tx_descriptor *stedma40_slave_mem(struct dma_chan *chan,
-					    dma_addr_t addr,
-					    unsigned int size,
-					    enum dma_transfer_direction direction,
-					    unsigned long flags)
-{
-	return NULL;
-}
-#endif
-
-#endif
-- 
cgit v1.2.3


From 26f457142d7ee2da20a5b701862230e4961423d9 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Wed, 26 Apr 2023 17:23:22 +0000
Subject: KVM: arm64: Export kvm_are_all_memslots_empty()

Export kvm_are_all_memslots_empty(). This will be used by a future
commit when checking before setting a capability.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Link: https://lore.kernel.org/r/20230426172330.1439644-5-ricarkol@google.com
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 include/linux/kvm_host.h | 2 ++
 virt/kvm/kvm_main.c      | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0e571e973bc2..7651069ada46 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -991,6 +991,8 @@ static inline bool kvm_memslots_empty(struct kvm_memslots *slots)
 	return RB_EMPTY_ROOT(&slots->gfn_tree);
 }
 
+bool kvm_are_all_memslots_empty(struct kvm *kvm);
+
 #define kvm_for_each_memslot(memslot, bkt, slots)			      \
 	hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \
 		if (WARN_ON_ONCE(!memslot->npages)) {			      \
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cb5c13eee193..13aed654111a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4598,7 +4598,7 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 	return -EINVAL;
 }
 
-static bool kvm_are_all_memslots_empty(struct kvm *kvm)
+bool kvm_are_all_memslots_empty(struct kvm *kvm)
 {
 	int i;
 
@@ -4611,6 +4611,7 @@ static bool kvm_are_all_memslots_empty(struct kvm *kvm)
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(kvm_are_all_memslots_empty);
 
 static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
 					   struct kvm_enable_cap *cap)
-- 
cgit v1.2.3


From ead62aa370a81c4fb42a44c4edeafe13e0a3a703 Mon Sep 17 00:00:00 2001
From: Arne Welzel <arne.welzel@corelight.com>
Date: Sat, 13 May 2023 11:18:40 +0200
Subject: fortify: strscpy: Fix flipped q and p docstring typo

Fix typo in the strscpy() docstring where q and p were flipped.

Signed-off-by: Arne Welzel <arne.welzel@corelight.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/fortify-string.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index c9de1f59ee80..e29df83bff8a 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -299,8 +299,8 @@ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
  * @q: Where to copy the string from
  * @size: Size of destination buffer
  *
- * Copy the source string @p, or as much of it as fits, into the destination
- * @q buffer. The behavior is undefined if the string buffers overlap. The
+ * Copy the source string @q, or as much of it as fits, into the destination
+ * @p buffer. The behavior is undefined if the string buffers overlap. The
  * destination @p buffer is always NUL terminated, unless it's zero-sized.
  *
  * Preferred to strlcpy() since the API doesn't require reading memory
-- 
cgit v1.2.3


From 21a2c74b0a2a784228c9e3af63cff96d0dea7b8a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 7 Apr 2023 12:27:10 -0700
Subject: fortify: Use const variables for __member_size tracking

The sizes reported by __member_size should never change in a given
function. Mark them as such.

Suggested-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/r/20230407192717.636137-4-keescook@chromium.org
---
 include/linux/fortify-string.h | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index e29df83bff8a..2e7a47b62ba2 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -20,7 +20,7 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("
 ({								\
 	char *__p = (char *)(p);				\
 	size_t __ret = SIZE_MAX;				\
-	size_t __p_size = __member_size(p);			\
+	const size_t __p_size = __member_size(p);		\
 	if (__p_size != SIZE_MAX &&				\
 	    __builtin_constant_p(*__p)) {			\
 		size_t __p_len = __p_size - 1;			\
@@ -142,7 +142,7 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size)
 __FORTIFY_INLINE __diagnose_as(__builtin_strncpy, 1, 2, 3)
 char *strncpy(char * const POS p, const char *q, __kernel_size_t size)
 {
-	size_t p_size = __member_size(p);
+	const size_t p_size = __member_size(p);
 
 	if (__compiletime_lessthan(p_size, size))
 		__write_overflow();
@@ -169,7 +169,7 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size)
 __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2)
 char *strcat(char * const POS p, const char *q)
 {
-	size_t p_size = __member_size(p);
+	const size_t p_size = __member_size(p);
 
 	if (p_size == SIZE_MAX)
 		return __underlying_strcat(p, q);
@@ -191,8 +191,8 @@ extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(st
  */
 __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen)
 {
-	size_t p_size = __member_size(p);
-	size_t p_len = __compiletime_strlen(p);
+	const size_t p_size = __member_size(p);
+	const size_t p_len = __compiletime_strlen(p);
 	size_t ret;
 
 	/* We can take compile-time actions when maxlen is const. */
@@ -233,8 +233,8 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size
 __FORTIFY_INLINE __diagnose_as(__builtin_strlen, 1)
 __kernel_size_t __fortify_strlen(const char * const POS p)
 {
+	const size_t p_size = __member_size(p);
 	__kernel_size_t ret;
-	size_t p_size = __member_size(p);
 
 	/* Give up if we don't know how large p is. */
 	if (p_size == SIZE_MAX)
@@ -267,8 +267,8 @@ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
  */
 __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size)
 {
-	size_t p_size = __member_size(p);
-	size_t q_size = __member_size(q);
+	const size_t p_size = __member_size(p);
+	const size_t q_size = __member_size(q);
 	size_t q_len;	/* Full count of source string length. */
 	size_t len;	/* Count of characters going into destination. */
 
@@ -318,10 +318,10 @@ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
  */
 __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size)
 {
-	size_t len;
 	/* Use string size rather than possible enclosing struct size. */
-	size_t p_size = __member_size(p);
-	size_t q_size = __member_size(q);
+	const size_t p_size = __member_size(p);
+	const size_t q_size = __member_size(q);
+	size_t len;
 
 	/* If we cannot get size of p and q default to call strscpy. */
 	if (p_size == SIZE_MAX && q_size == SIZE_MAX)
@@ -394,9 +394,9 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s
 __FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3)
 char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count)
 {
+	const size_t p_size = __member_size(p);
+	const size_t q_size = __member_size(q);
 	size_t p_len, copy_len;
-	size_t p_size = __member_size(p);
-	size_t q_size = __member_size(q);
 
 	if (p_size == SIZE_MAX && q_size == SIZE_MAX)
 		return __underlying_strncat(p, q, count);
@@ -639,7 +639,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size,
 extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
 __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size)
 {
-	size_t p_size = __struct_size(p);
+	const size_t p_size = __struct_size(p);
 
 	if (__compiletime_lessthan(p_size, size))
 		__read_overflow();
@@ -651,8 +651,8 @@ __FORTIFY_INLINE void *memscan(void * const POS0 p, int c, __kernel_size_t size)
 __FORTIFY_INLINE __diagnose_as(__builtin_memcmp, 1, 2, 3)
 int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t size)
 {
-	size_t p_size = __struct_size(p);
-	size_t q_size = __struct_size(q);
+	const size_t p_size = __struct_size(p);
+	const size_t q_size = __struct_size(q);
 
 	if (__builtin_constant_p(size)) {
 		if (__compiletime_lessthan(p_size, size))
@@ -668,7 +668,7 @@ int memcmp(const void * const POS0 p, const void * const POS0 q, __kernel_size_t
 __FORTIFY_INLINE __diagnose_as(__builtin_memchr, 1, 2, 3)
 void *memchr(const void * const POS0 p, int c, __kernel_size_t size)
 {
-	size_t p_size = __struct_size(p);
+	const size_t p_size = __struct_size(p);
 
 	if (__compiletime_lessthan(p_size, size))
 		__read_overflow();
@@ -680,7 +680,7 @@ void *memchr(const void * const POS0 p, int c, __kernel_size_t size)
 void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
 __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size)
 {
-	size_t p_size = __struct_size(p);
+	const size_t p_size = __struct_size(p);
 
 	if (__compiletime_lessthan(p_size, size))
 		__read_overflow();
@@ -693,7 +693,7 @@ extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kme
 								    __realloc_size(2);
 __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp)
 {
-	size_t p_size = __struct_size(p);
+	const size_t p_size = __struct_size(p);
 
 	if (__compiletime_lessthan(p_size, size))
 		__read_overflow();
@@ -720,8 +720,8 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp
 __FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2)
 char *strcpy(char * const POS p, const char * const POS q)
 {
-	size_t p_size = __member_size(p);
-	size_t q_size = __member_size(q);
+	const size_t p_size = __member_size(p);
+	const size_t q_size = __member_size(q);
 	size_t size;
 
 	/* If neither buffer size is known, immediately give up. */
-- 
cgit v1.2.3


From 605395cd7ceded5842c8ba6763ea24feee690c87 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 2 Apr 2023 23:00:05 -0700
Subject: fortify: Add protection for strlcat()

The definition of strcat() was defined in terms of unfortified strlcat(),
but that meant there was no bounds checking done on the internal strlen()
calls, and the (bounded) copy would be performed before reporting a
failure. Additionally, pathological cases (i.e. unterminated destination
buffer) did not make calls to fortify_panic(), which will make future unit
testing more difficult. Instead, explicitly define a fortified strlcat()
wrapper for strcat() to use.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/fortify-string.h | 64 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 2e7a47b62ba2..756c89bb88e0 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -371,6 +371,70 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s
 	return __real_strscpy(p, q, len);
 }
 
+/* Defined after fortified strlen() to reuse it. */
+extern size_t __real_strlcat(char *p, const char *q, size_t avail) __RENAME(strlcat);
+/**
+ * strlcat - Append a string to an existing string
+ *
+ * @p: pointer to %NUL-terminated string to append to
+ * @q: pointer to %NUL-terminated string to append from
+ * @avail: Maximum bytes available in @p
+ *
+ * Appends %NUL-terminated string @q after the %NUL-terminated
+ * string at @p, but will not write beyond @avail bytes total,
+ * potentially truncating the copy from @q. @p will stay
+ * %NUL-terminated only if a %NUL already existed within
+ * the @avail bytes of @p. If so, the resulting number of
+ * bytes copied from @q will be at most "@avail - strlen(@p) - 1".
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * read and write overflows, this is only possible when the sizes
+ * of @p and @q are known to the compiler. Prefer building the
+ * string with formatting, via scnprintf(), seq_buf, or similar.
+ *
+ * Returns total bytes that _would_ have been contained by @p
+ * regardless of truncation, similar to snprintf(). If return
+ * value is >= @avail, the string has been truncated.
+ *
+ */
+__FORTIFY_INLINE
+size_t strlcat(char * const POS p, const char * const POS q, size_t avail)
+{
+	const size_t p_size = __member_size(p);
+	const size_t q_size = __member_size(q);
+	size_t p_len, copy_len;
+	size_t actual, wanted;
+
+	/* Give up immediately if both buffer sizes are unknown. */
+	if (p_size == SIZE_MAX && q_size == SIZE_MAX)
+		return __real_strlcat(p, q, avail);
+
+	p_len = strnlen(p, avail);
+	copy_len = strlen(q);
+	wanted = actual = p_len + copy_len;
+
+	/* Cannot append any more: report truncation. */
+	if (avail <= p_len)
+		return wanted;
+
+	/* Give up if string is already overflowed. */
+	if (p_size <= p_len)
+		fortify_panic(__func__);
+
+	if (actual >= avail) {
+		copy_len = avail - p_len - 1;
+		actual = p_len + copy_len;
+	}
+
+	/* Give up if copy will overflow. */
+	if (p_size <= actual)
+		fortify_panic(__func__);
+	__underlying_memcpy(p + p_len, q, copy_len);
+	p[actual] = '\0';
+
+	return wanted;
+}
+
 /**
  * strncat - Append a string to an existing string
  *
-- 
cgit v1.2.3


From 55c84a5cf2c72a821719823ef2ebef01b119025b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 4 Apr 2023 14:24:27 -0700
Subject: fortify: strcat: Move definition to use fortified strlcat()

Move the definition of fortified strcat() to after strlcat() to use it
for bounds checking.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/fortify-string.h | 53 +++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 756c89bb88e0..da51a83b2829 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -151,33 +151,6 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size)
 	return __underlying_strncpy(p, q, size);
 }
 
-/**
- * strcat - Append a string to an existing string
- *
- * @p: pointer to NUL-terminated string to append to
- * @q: pointer to NUL-terminated source string to append from
- *
- * Do not use this function. While FORTIFY_SOURCE tries to avoid
- * read and write overflows, this is only possible when the
- * destination buffer size is known to the compiler. Prefer
- * building the string with formatting, via scnprintf() or similar.
- * At the very least, use strncat().
- *
- * Returns @p.
- *
- */
-__FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2)
-char *strcat(char * const POS p, const char *q)
-{
-	const size_t p_size = __member_size(p);
-
-	if (p_size == SIZE_MAX)
-		return __underlying_strcat(p, q);
-	if (strlcat(p, q, p_size) >= p_size)
-		fortify_panic(__func__);
-	return p;
-}
-
 extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
 /**
  * strnlen - Return bounded count of characters in a NUL-terminated string
@@ -435,6 +408,32 @@ size_t strlcat(char * const POS p, const char * const POS q, size_t avail)
 	return wanted;
 }
 
+/* Defined after fortified strlcat() to reuse it. */
+/**
+ * strcat - Append a string to an existing string
+ *
+ * @p: pointer to NUL-terminated string to append to
+ * @q: pointer to NUL-terminated source string to append from
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * read and write overflows, this is only possible when the
+ * destination buffer size is known to the compiler. Prefer
+ * building the string with formatting, via scnprintf() or similar.
+ * At the very least, use strncat().
+ *
+ * Returns @p.
+ *
+ */
+__FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2)
+char *strcat(char * const POS p, const char *q)
+{
+	const size_t p_size = __member_size(p);
+
+	if (strlcat(p, q, p_size) >= p_size)
+		fortify_panic(__func__);
+	return p;
+}
+
 /**
  * strncat - Append a string to an existing string
  *
-- 
cgit v1.2.3


From 247c8d2f9837a3e29e3b6b7a4aa9c36c37659dd4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 21:56:12 +0200
Subject: fs: pipe: reveal missing function protoypes

A couple of functions from fs/pipe.c are used both internally
and for the watch queue code, but the declaration is only
visible when the latter is enabled:

fs/pipe.c:1254:5: error: no previous prototype for 'pipe_resize_ring'
fs/pipe.c:758:15: error: no previous prototype for 'account_pipe_buffers'
fs/pipe.c:764:6: error: no previous prototype for 'too_many_pipe_buffers_soft'
fs/pipe.c:771:6: error: no previous prototype for 'too_many_pipe_buffers_hard'
fs/pipe.c:777:6: error: no previous prototype for 'pipe_is_unprivileged_user'

Make the visible unconditionally to avoid these warnings.

Fixes: c73be61cede5 ("pipe: Add general notification queue support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Message-Id: <20230516195629.551602-1-arnd@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pipe_fs_i.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index d2c3f16cf6b1..02e0086b10f6 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -261,18 +261,14 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 
 extern const struct pipe_buf_operations nosteal_pipe_buf_ops;
 
-#ifdef CONFIG_WATCH_QUEUE
 unsigned long account_pipe_buffers(struct user_struct *user,
 				   unsigned long old, unsigned long new);
 bool too_many_pipe_buffers_soft(unsigned long user_bufs);
 bool too_many_pipe_buffers_hard(unsigned long user_bufs);
 bool pipe_is_unprivileged_user(void);
-#endif
 
 /* for F_SETPIPE_SZ and F_GETPIPE_SZ */
-#ifdef CONFIG_WATCH_QUEUE
 int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
-#endif
 long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
 struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
 
-- 
cgit v1.2.3


From ef104443bffa004f631729dfc924f0b84abbd602 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 21:57:29 +0200
Subject: procfs: consolidate arch_report_meminfo declaration

The arch_report_meminfo() function is provided by four architectures,
with a __weak fallback in procfs itself. On architectures that don't
have a custom version, the __weak version causes a warning because
of the missing prototype.

Remove the architecture specific prototypes and instead add one
in linux/proc_fs.h.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com> # for arch/x86
Acked-by: Helge Deller <deller@gmx.de> # parisc
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Message-Id: <20230516195834.551901-1-arnd@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 arch/parisc/include/asm/pgtable.h    | 3 ---
 arch/powerpc/include/asm/pgtable.h   | 3 ---
 arch/s390/include/asm/pgtable.h      | 3 ---
 arch/s390/mm/pageattr.c              | 1 +
 arch/x86/include/asm/pgtable.h       | 1 +
 arch/x86/include/asm/pgtable_types.h | 3 ---
 arch/x86/mm/pat/set_memory.c         | 1 +
 include/linux/proc_fs.h              | 2 ++
 8 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index e715df5385d6..5656395c95ee 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -472,9 +472,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 
 #define pte_same(A,B)	(pte_val(A) == pte_val(B))
 
-struct seq_file;
-extern void arch_report_meminfo(struct seq_file *m);
-
 #endif /* !__ASSEMBLY__ */
 
 
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 9972626ddaf6..6a88bfdaa69b 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -165,9 +165,6 @@ static inline bool is_ioremap_addr(const void *x)
 
 	return addr >= IOREMAP_BASE && addr < IOREMAP_END;
 }
-
-struct seq_file;
-void arch_report_meminfo(struct seq_file *m);
 #endif /* CONFIG_PPC64 */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6822a11c2c8a..c55f3c3365af 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -42,9 +42,6 @@ static inline void update_page_count(int level, long count)
 		atomic_long_add(count, &direct_pages_count[level]);
 }
 
-struct seq_file;
-void arch_report_meminfo(struct seq_file *m);
-
 /*
  * The S390 doesn't have any external MMU info: the kernel page
  * tables contain all the necessary information.
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 5ba3bd8a7b12..ca5a418c58a8 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -4,6 +4,7 @@
  * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
  */
 #include <linux/hugetlb.h>
+#include <linux/proc_fs.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <asm/cacheflush.h>
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 15ae4d6ba476..5700bb337987 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -27,6 +27,7 @@
 extern pgd_t early_top_pgt[PTRS_PER_PGD];
 bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
 
+struct seq_file;
 void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm);
 void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm,
 				   bool user);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 447d4bee25c4..ba3e2554799a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -513,9 +513,6 @@ extern void native_pagetable_init(void);
 #define native_pagetable_init        paging_init
 #endif
 
-struct seq_file;
-extern void arch_report_meminfo(struct seq_file *m);
-
 enum pg_level {
 	PG_LEVEL_NONE,
 	PG_LEVEL_4K,
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 7159cf787613..d1515756e369 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/seq_file.h>
+#include <linux/proc_fs.h>
 #include <linux/debugfs.h>
 #include <linux/pfn.h>
 #include <linux/percpu.h>
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 0260f5ea98fe..253f2676d93a 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -158,6 +158,8 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task);
 #endif /* CONFIG_PROC_PID_ARCH_STATUS */
 
+void arch_report_meminfo(struct seq_file *m);
+
 #else /* CONFIG_PROC_FS */
 
 static inline void proc_root_init(void)
-- 
cgit v1.2.3


From f15afbd34d8fadbd375f1212e97837e32bc170cc Mon Sep 17 00:00:00 2001
From: Hao Ge <gehao@kylinos.cn>
Date: Mon, 24 Apr 2023 13:18:35 +0800
Subject: fs: fix undefined behavior in bit shift for SB_NOUSER

Shifting signed 32-bit value by 31 bits is undefined, so changing
significant bit to unsigned. It was spotted by UBSAN.

So let's just fix this by using the BIT() helper for all SB_* flags.

Fixes: e462ec50cb5f ("VFS: Differentiate mount flags (MS_*) from internal superblock flags")
Signed-off-by: Hao Ge <gehao@kylinos.cn>
Message-Id: <20230424051835.374204-1-gehao@kylinos.cn>
[brauner@kernel.org: use BIT() for all SB_* flags]
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21a981680856..133f0640fb24 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1076,29 +1076,29 @@ extern int send_sigurg(struct fown_struct *fown);
  * sb->s_flags.  Note that these mirror the equivalent MS_* flags where
  * represented in both.
  */
-#define SB_RDONLY	 1	/* Mount read-only */
-#define SB_NOSUID	 2	/* Ignore suid and sgid bits */
-#define SB_NODEV	 4	/* Disallow access to device special files */
-#define SB_NOEXEC	 8	/* Disallow program execution */
-#define SB_SYNCHRONOUS	16	/* Writes are synced at once */
-#define SB_MANDLOCK	64	/* Allow mandatory locks on an FS */
-#define SB_DIRSYNC	128	/* Directory modifications are synchronous */
-#define SB_NOATIME	1024	/* Do not update access times. */
-#define SB_NODIRATIME	2048	/* Do not update directory access times */
-#define SB_SILENT	32768
-#define SB_POSIXACL	(1<<16)	/* VFS does not apply the umask */
-#define SB_INLINECRYPT	(1<<17)	/* Use blk-crypto for encrypted files */
-#define SB_KERNMOUNT	(1<<22) /* this is a kern_mount call */
-#define SB_I_VERSION	(1<<23) /* Update inode I_version field */
-#define SB_LAZYTIME	(1<<25) /* Update the on-disk [acm]times lazily */
+#define SB_RDONLY       BIT(0)	/* Mount read-only */
+#define SB_NOSUID       BIT(1)	/* Ignore suid and sgid bits */
+#define SB_NODEV        BIT(2)	/* Disallow access to device special files */
+#define SB_NOEXEC       BIT(3)	/* Disallow program execution */
+#define SB_SYNCHRONOUS  BIT(4)	/* Writes are synced at once */
+#define SB_MANDLOCK     BIT(6)	/* Allow mandatory locks on an FS */
+#define SB_DIRSYNC      BIT(7)	/* Directory modifications are synchronous */
+#define SB_NOATIME      BIT(10)	/* Do not update access times. */
+#define SB_NODIRATIME   BIT(11)	/* Do not update directory access times */
+#define SB_SILENT       BIT(15)
+#define SB_POSIXACL     BIT(16)	/* VFS does not apply the umask */
+#define SB_INLINECRYPT  BIT(17)	/* Use blk-crypto for encrypted files */
+#define SB_KERNMOUNT    BIT(22)	/* this is a kern_mount call */
+#define SB_I_VERSION    BIT(23)	/* Update inode I_version field */
+#define SB_LAZYTIME     BIT(25)	/* Update the on-disk [acm]times lazily */
 
 /* These sb flags are internal to the kernel */
-#define SB_SUBMOUNT     (1<<26)
-#define SB_FORCE    	(1<<27)
-#define SB_NOSEC	(1<<28)
-#define SB_BORN		(1<<29)
-#define SB_ACTIVE	(1<<30)
-#define SB_NOUSER	(1<<31)
+#define SB_SUBMOUNT     BIT(26)
+#define SB_FORCE        BIT(27)
+#define SB_NOSEC        BIT(28)
+#define SB_BORN         BIT(29)
+#define SB_ACTIVE       BIT(30)
+#define SB_NOUSER       BIT(31)
 
 /* These flags relate to encoding and casefolding */
 #define SB_ENC_STRICT_MODE_FL	(1 << 0)
-- 
cgit v1.2.3


From 67d1b0a1030fb20d54b720df6e976c06b893fb00 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 14 Apr 2023 10:25:39 +0530
Subject: soc: ti: pruss: Add pruss_get()/put() API

Add two new get and put API, pruss_get() and pruss_put() to the
PRUSS platform driver to allow client drivers to request a handle
to a PRUSS device. This handle will be used by client drivers to
request various operations of the PRUSS platform driver through
additional API that will be added in the following patches.

The pruss_get() function returns the pruss handle corresponding
to a PRUSS device referenced by a PRU remoteproc instance. The
pruss_put() is the complimentary function to pruss_get().

Co-developed-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
Co-developed-by: Grzegorz Jaszczyk <grzegorz.jaszczyk@linaro.org>
Signed-off-by: Grzegorz Jaszczyk <grzegorz.jaszczyk@linaro.org>
Signed-off-by: Puranjay Mohan <p-mohan@ti.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Reviewed-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Link: https://lore.kernel.org/r/20230414045542.3249939-2-danishanwar@ti.com
Signed-off-by: Nishanth Menon <nm@ti.com>
---
 drivers/soc/ti/pruss.c       | 62 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pruss_driver.h | 18 +++++++++++++
 2 files changed, 80 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soc/ti/pruss.c b/drivers/soc/ti/pruss.c
index e68441bd7b30..f836660dac64 100644
--- a/drivers/soc/ti/pruss.c
+++ b/drivers/soc/ti/pruss.c
@@ -6,6 +6,7 @@
  * Author(s):
  *	Suman Anna <s-anna@ti.com>
  *	Andrew F. Davis <afd@ti.com>
+ *	Tero Kristo <t-kristo@ti.com>
  */
 
 #include <linux/clk-provider.h>
@@ -18,6 +19,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/pruss_driver.h>
 #include <linux/regmap.h>
+#include <linux/remoteproc.h>
 #include <linux/slab.h>
 
 /**
@@ -30,6 +32,66 @@ struct pruss_private_data {
 	bool has_core_mux_clock;
 };
 
+/**
+ * pruss_get() - get the pruss for a given PRU remoteproc
+ * @rproc: remoteproc handle of a PRU instance
+ *
+ * Finds the parent pruss device for a PRU given the @rproc handle of the
+ * PRU remote processor. This function increments the pruss device's refcount,
+ * so always use pruss_put() to decrement it back once pruss isn't needed
+ * anymore.
+ *
+ * This API doesn't check if @rproc is valid or not. It is expected the caller
+ * will have done a pru_rproc_get() on @rproc, before calling this API to make
+ * sure that @rproc is valid.
+ *
+ * Return: pruss handle on success, and an ERR_PTR on failure using one
+ * of the following error values
+ *    -EINVAL if invalid parameter
+ *    -ENODEV if PRU device or PRUSS device is not found
+ */
+struct pruss *pruss_get(struct rproc *rproc)
+{
+	struct pruss *pruss;
+	struct device *dev;
+	struct platform_device *ppdev;
+
+	if (IS_ERR_OR_NULL(rproc))
+		return ERR_PTR(-EINVAL);
+
+	dev = &rproc->dev;
+
+	/* make sure it is PRU rproc */
+	if (!dev->parent || !is_pru_rproc(dev->parent))
+		return ERR_PTR(-ENODEV);
+
+	ppdev = to_platform_device(dev->parent->parent);
+	pruss = platform_get_drvdata(ppdev);
+	if (!pruss)
+		return ERR_PTR(-ENODEV);
+
+	get_device(pruss->dev);
+
+	return pruss;
+}
+EXPORT_SYMBOL_GPL(pruss_get);
+
+/**
+ * pruss_put() - decrement pruss device's usecount
+ * @pruss: pruss handle
+ *
+ * Complimentary function for pruss_get(). Needs to be called
+ * after the PRUSS is used, and only if the pruss_get() succeeds.
+ */
+void pruss_put(struct pruss *pruss)
+{
+	if (IS_ERR_OR_NULL(pruss))
+		return;
+
+	put_device(pruss->dev);
+}
+EXPORT_SYMBOL_GPL(pruss_put);
+
 static void pruss_of_free_clk_provider(void *data)
 {
 	struct device_node *clk_mux_np = data;
diff --git a/include/linux/pruss_driver.h b/include/linux/pruss_driver.h
index ecfded30ed05..cb40c2b31045 100644
--- a/include/linux/pruss_driver.h
+++ b/include/linux/pruss_driver.h
@@ -9,7 +9,9 @@
 #ifndef _PRUSS_DRIVER_H_
 #define _PRUSS_DRIVER_H_
 
+#include <linux/remoteproc/pruss.h>
 #include <linux/types.h>
+#include <linux/err.h>
 
 /*
  * enum pruss_mem - PRUSS memory range identifiers
@@ -51,4 +53,20 @@ struct pruss {
 	struct clk *iep_clk_mux;
 };
 
+#if IS_ENABLED(CONFIG_TI_PRUSS)
+
+struct pruss *pruss_get(struct rproc *rproc);
+void pruss_put(struct pruss *pruss);
+
+#else
+
+static inline struct pruss *pruss_get(struct rproc *rproc)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void pruss_put(struct pruss *pruss) { }
+
+#endif /* CONFIG_TI_PRUSS */
+
 #endif	/* _PRUSS_DRIVER_H_ */
-- 
cgit v1.2.3


From b789ca1e3380ab63b60c3356c026a7e8eb26ba01 Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Fri, 14 Apr 2023 10:25:40 +0530
Subject: soc: ti: pruss: Add pruss_{request,release}_mem_region() API

Add two new API - pruss_request_mem_region() & pruss_release_mem_region(),
to the PRUSS platform driver to allow client drivers to acquire and release
the common memory resources present within a PRU-ICSS subsystem. This
allows the client drivers to directly manipulate the respective memories,
as per their design contract with the associated firmware.

Co-developed-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Andrew F. Davis <afd@ti.com>
Co-developed-by: Grzegorz Jaszczyk <grzegorz.jaszczyk@linaro.org>
Signed-off-by: Grzegorz Jaszczyk <grzegorz.jaszczyk@linaro.org>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Link: https://lore.kernel.org/r/20230414045542.3249939-3-danishanwar@ti.com
Signed-off-by: Nishanth Menon <nm@ti.com>
---
 drivers/soc/ti/pruss.c       | 77 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pruss_driver.h | 22 +++++++++++++
 2 files changed, 99 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soc/ti/pruss.c b/drivers/soc/ti/pruss.c
index f836660dac64..df5eb4d959f8 100644
--- a/drivers/soc/ti/pruss.c
+++ b/drivers/soc/ti/pruss.c
@@ -92,6 +92,82 @@ void pruss_put(struct pruss *pruss)
 }
 EXPORT_SYMBOL_GPL(pruss_put);
 
+/**
+ * pruss_request_mem_region() - request a memory resource
+ * @pruss: the pruss instance
+ * @mem_id: the memory resource id
+ * @region: pointer to memory region structure to be filled in
+ *
+ * This function allows a client driver to request a memory resource,
+ * and if successful, will let the client driver own the particular
+ * memory region until released using the pruss_release_mem_region()
+ * API.
+ *
+ * Return: 0 if requested memory region is available (in such case pointer to
+ * memory region is returned via @region), an error otherwise
+ */
+int pruss_request_mem_region(struct pruss *pruss, enum pruss_mem mem_id,
+			     struct pruss_mem_region *region)
+{
+	if (!pruss || !region || mem_id >= PRUSS_MEM_MAX)
+		return -EINVAL;
+
+	mutex_lock(&pruss->lock);
+
+	if (pruss->mem_in_use[mem_id]) {
+		mutex_unlock(&pruss->lock);
+		return -EBUSY;
+	}
+
+	*region = pruss->mem_regions[mem_id];
+	pruss->mem_in_use[mem_id] = region;
+
+	mutex_unlock(&pruss->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pruss_request_mem_region);
+
+/**
+ * pruss_release_mem_region() - release a memory resource
+ * @pruss: the pruss instance
+ * @region: the memory region to release
+ *
+ * This function is the complimentary function to
+ * pruss_request_mem_region(), and allows the client drivers to
+ * release back a memory resource.
+ *
+ * Return: 0 on success, an error code otherwise
+ */
+int pruss_release_mem_region(struct pruss *pruss,
+			     struct pruss_mem_region *region)
+{
+	int id;
+
+	if (!pruss || !region)
+		return -EINVAL;
+
+	mutex_lock(&pruss->lock);
+
+	/* find out the memory region being released */
+	for (id = 0; id < PRUSS_MEM_MAX; id++) {
+		if (pruss->mem_in_use[id] == region)
+			break;
+	}
+
+	if (id == PRUSS_MEM_MAX) {
+		mutex_unlock(&pruss->lock);
+		return -EINVAL;
+	}
+
+	pruss->mem_in_use[id] = NULL;
+
+	mutex_unlock(&pruss->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pruss_release_mem_region);
+
 static void pruss_of_free_clk_provider(void *data)
 {
 	struct device_node *clk_mux_np = data;
@@ -298,6 +374,7 @@ static int pruss_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	pruss->dev = dev;
+	mutex_init(&pruss->lock);
 
 	child = of_get_child_by_name(np, "memories");
 	if (!child) {
diff --git a/include/linux/pruss_driver.h b/include/linux/pruss_driver.h
index cb40c2b31045..c8f2e53b911b 100644
--- a/include/linux/pruss_driver.h
+++ b/include/linux/pruss_driver.h
@@ -9,6 +9,7 @@
 #ifndef _PRUSS_DRIVER_H_
 #define _PRUSS_DRIVER_H_
 
+#include <linux/mutex.h>
 #include <linux/remoteproc/pruss.h>
 #include <linux/types.h>
 #include <linux/err.h>
@@ -41,6 +42,8 @@ struct pruss_mem_region {
  * @cfg_base: base iomap for CFG region
  * @cfg_regmap: regmap for config region
  * @mem_regions: data for each of the PRUSS memory regions
+ * @mem_in_use: to indicate if memory resource is in use
+ * @lock: mutex to serialize access to resources
  * @core_clk_mux: clk handle for PRUSS CORE_CLK_MUX
  * @iep_clk_mux: clk handle for PRUSS IEP_CLK_MUX
  */
@@ -49,6 +52,8 @@ struct pruss {
 	void __iomem *cfg_base;
 	struct regmap *cfg_regmap;
 	struct pruss_mem_region mem_regions[PRUSS_MEM_MAX];
+	struct pruss_mem_region *mem_in_use[PRUSS_MEM_MAX];
+	struct mutex lock; /* PRU resource lock */
 	struct clk *core_clk_mux;
 	struct clk *iep_clk_mux;
 };
@@ -57,6 +62,10 @@ struct pruss {
 
 struct pruss *pruss_get(struct rproc *rproc);
 void pruss_put(struct pruss *pruss);
+int pruss_request_mem_region(struct pruss *pruss, enum pruss_mem mem_id,
+			     struct pruss_mem_region *region);
+int pruss_release_mem_region(struct pruss *pruss,
+			     struct pruss_mem_region *region);
 
 #else
 
@@ -67,6 +76,19 @@ static inline struct pruss *pruss_get(struct rproc *rproc)
 
 static inline void pruss_put(struct pruss *pruss) { }
 
+static inline int pruss_request_mem_region(struct pruss *pruss,
+					   enum pruss_mem mem_id,
+					   struct pruss_mem_region *region)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int pruss_release_mem_region(struct pruss *pruss,
+					   struct pruss_mem_region *region)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_TI_PRUSS */
 
 #endif	/* _PRUSS_DRIVER_H_ */
-- 
cgit v1.2.3


From 51b5760e56ef19106a3c4487a66d186d46ccc6f4 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 14 Apr 2023 10:25:41 +0530
Subject: soc: ti: pruss: Add pruss_cfg_read()/update(),
 pruss_cfg_get_gpmux()/set_gpmux() APIs

Add two new generic API pruss_cfg_read() and pruss_cfg_update() to
the PRUSS platform driver to read and program respectively a register
within the PRUSS CFG sub-module represented by a syscon driver. These
APIs are internal to PRUSS driver.

Add two new helper functions pruss_cfg_get_gpmux() & pruss_cfg_set_gpmux()
to get and set the GP MUX mode for programming the PRUSS internal wrapper
mux functionality as needed by usecases.

Various useful registers and macros for certain register bit-fields and
their values have also been added.

Signed-off-by: Suman Anna <s-anna@ti.com>
Co-developed-by: Grzegorz Jaszczyk <grzegorz.jaszczyk@linaro.org>
Signed-off-by: Grzegorz Jaszczyk <grzegorz.jaszczyk@linaro.org>
Signed-off-by: Puranjay Mohan <p-mohan@ti.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Reviewed-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Link: https://lore.kernel.org/r/20230414045542.3249939-4-danishanwar@ti.com
Signed-off-by: Nishanth Menon <nm@ti.com>
---
 drivers/soc/ti/pruss.c       | 45 ++++++++++++++++++++++
 drivers/soc/ti/pruss.h       | 88 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pruss_driver.h | 32 ++++++++++++++++
 3 files changed, 165 insertions(+)
 create mode 100644 drivers/soc/ti/pruss.h

(limited to 'include/linux')

diff --git a/drivers/soc/ti/pruss.c b/drivers/soc/ti/pruss.c
index df5eb4d959f8..2c44913c083b 100644
--- a/drivers/soc/ti/pruss.c
+++ b/drivers/soc/ti/pruss.c
@@ -21,6 +21,7 @@
 #include <linux/regmap.h>
 #include <linux/remoteproc.h>
 #include <linux/slab.h>
+#include "pruss.h"
 
 /**
  * struct pruss_private_data - PRUSS driver private data
@@ -168,6 +169,50 @@ int pruss_release_mem_region(struct pruss *pruss,
 }
 EXPORT_SYMBOL_GPL(pruss_release_mem_region);
 
+/**
+ * pruss_cfg_get_gpmux() - get the current GPMUX value for a PRU device
+ * @pruss: pruss instance
+ * @pru_id: PRU identifier (0-1)
+ * @mux: pointer to store the current mux value into
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int pruss_cfg_get_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 *mux)
+{
+	int ret;
+	u32 val;
+
+	if (pru_id >= PRUSS_NUM_PRUS || !mux)
+		return -EINVAL;
+
+	ret = pruss_cfg_read(pruss, PRUSS_CFG_GPCFG(pru_id), &val);
+	if (!ret)
+		*mux = (u8)((val & PRUSS_GPCFG_PRU_MUX_SEL_MASK) >>
+			    PRUSS_GPCFG_PRU_MUX_SEL_SHIFT);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pruss_cfg_get_gpmux);
+
+/**
+ * pruss_cfg_set_gpmux() - set the GPMUX value for a PRU device
+ * @pruss: pruss instance
+ * @pru_id: PRU identifier (0-1)
+ * @mux: new mux value for PRU
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int pruss_cfg_set_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 mux)
+{
+	if (mux >= PRUSS_GP_MUX_SEL_MAX ||
+	    pru_id >= PRUSS_NUM_PRUS)
+		return -EINVAL;
+
+	return pruss_cfg_update(pruss, PRUSS_CFG_GPCFG(pru_id),
+				PRUSS_GPCFG_PRU_MUX_SEL_MASK,
+				(u32)mux << PRUSS_GPCFG_PRU_MUX_SEL_SHIFT);
+}
+EXPORT_SYMBOL_GPL(pruss_cfg_set_gpmux);
+
 static void pruss_of_free_clk_provider(void *data)
 {
 	struct device_node *clk_mux_np = data;
diff --git a/drivers/soc/ti/pruss.h b/drivers/soc/ti/pruss.h
new file mode 100644
index 000000000000..6c55987e0e55
--- /dev/null
+++ b/drivers/soc/ti/pruss.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * PRU-ICSS Subsystem user interfaces
+ *
+ * Copyright (C) 2015-2023 Texas Instruments Incorporated - http://www.ti.com
+ *	MD Danish Anwar <danishanwar@ti.com>
+ */
+
+#ifndef _SOC_TI_PRUSS_H_
+#define _SOC_TI_PRUSS_H_
+
+#include <linux/bits.h>
+#include <linux/regmap.h>
+
+/*
+ * PRU_ICSS_CFG registers
+ * SYSCFG, ISRP, ISP, IESP, IECP, SCRP applicable on AMxxxx devices only
+ */
+#define PRUSS_CFG_REVID         0x00
+#define PRUSS_CFG_SYSCFG        0x04
+#define PRUSS_CFG_GPCFG(x)      (0x08 + (x) * 4)
+#define PRUSS_CFG_CGR           0x10
+#define PRUSS_CFG_ISRP          0x14
+#define PRUSS_CFG_ISP           0x18
+#define PRUSS_CFG_IESP          0x1C
+#define PRUSS_CFG_IECP          0x20
+#define PRUSS_CFG_SCRP          0x24
+#define PRUSS_CFG_PMAO          0x28
+#define PRUSS_CFG_MII_RT        0x2C
+#define PRUSS_CFG_IEPCLK        0x30
+#define PRUSS_CFG_SPP           0x34
+#define PRUSS_CFG_PIN_MX        0x40
+
+/* PRUSS_GPCFG register bits */
+#define PRUSS_GPCFG_PRU_GPI_MODE_MASK           GENMASK(1, 0)
+#define PRUSS_GPCFG_PRU_GPI_MODE_SHIFT          0
+
+#define PRUSS_GPCFG_PRU_MUX_SEL_SHIFT           26
+#define PRUSS_GPCFG_PRU_MUX_SEL_MASK            GENMASK(29, 26)
+
+/* PRUSS_MII_RT register bits */
+#define PRUSS_MII_RT_EVENT_EN                   BIT(0)
+
+/* PRUSS_SPP register bits */
+#define PRUSS_SPP_XFER_SHIFT_EN                 BIT(1)
+#define PRUSS_SPP_PRU1_PAD_HP_EN                BIT(0)
+#define PRUSS_SPP_RTU_XFR_SHIFT_EN              BIT(3)
+
+/**
+ * pruss_cfg_read() - read a PRUSS CFG sub-module register
+ * @pruss: the pruss instance handle
+ * @reg: register offset within the CFG sub-module
+ * @val: pointer to return the value in
+ *
+ * Reads a given register within the PRUSS CFG sub-module and
+ * returns it through the passed-in @val pointer
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+static int pruss_cfg_read(struct pruss *pruss, unsigned int reg, unsigned int *val)
+{
+	if (IS_ERR_OR_NULL(pruss))
+		return -EINVAL;
+
+	return regmap_read(pruss->cfg_regmap, reg, val);
+}
+
+/**
+ * pruss_cfg_update() - configure a PRUSS CFG sub-module register
+ * @pruss: the pruss instance handle
+ * @reg: register offset within the CFG sub-module
+ * @mask: bit mask to use for programming the @val
+ * @val: value to write
+ *
+ * Programs a given register within the PRUSS CFG sub-module
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+static int pruss_cfg_update(struct pruss *pruss, unsigned int reg,
+			    unsigned int mask, unsigned int val)
+{
+	if (IS_ERR_OR_NULL(pruss))
+		return -EINVAL;
+
+	return regmap_update_bits(pruss->cfg_regmap, reg, mask, val);
+}
+
+#endif  /* _SOC_TI_PRUSS_H_ */
diff --git a/include/linux/pruss_driver.h b/include/linux/pruss_driver.h
index c8f2e53b911b..5bb8897724a9 100644
--- a/include/linux/pruss_driver.h
+++ b/include/linux/pruss_driver.h
@@ -14,6 +14,24 @@
 #include <linux/types.h>
 #include <linux/err.h>
 
+/*
+ * enum pruss_gp_mux_sel - PRUSS GPI/O Mux modes for the
+ * PRUSS_GPCFG0/1 registers
+ *
+ * NOTE: The below defines are the most common values, but there
+ * are some exceptions like on 66AK2G, where the RESERVED and MII2
+ * values are interchanged. Also, this bit-field does not exist on
+ * AM335x SoCs
+ */
+enum pruss_gp_mux_sel {
+	PRUSS_GP_MUX_SEL_GP,
+	PRUSS_GP_MUX_SEL_ENDAT,
+	PRUSS_GP_MUX_SEL_RESERVED,
+	PRUSS_GP_MUX_SEL_SD,
+	PRUSS_GP_MUX_SEL_MII2,
+	PRUSS_GP_MUX_SEL_MAX,
+};
+
 /*
  * enum pruss_mem - PRUSS memory range identifiers
  */
@@ -66,6 +84,8 @@ int pruss_request_mem_region(struct pruss *pruss, enum pruss_mem mem_id,
 			     struct pruss_mem_region *region);
 int pruss_release_mem_region(struct pruss *pruss,
 			     struct pruss_mem_region *region);
+int pruss_cfg_get_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 *mux);
+int pruss_cfg_set_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 mux);
 
 #else
 
@@ -89,6 +109,18 @@ static inline int pruss_release_mem_region(struct pruss *pruss,
 	return -EOPNOTSUPP;
 }
 
+static inline int pruss_cfg_get_gpmux(struct pruss *pruss,
+				      enum pruss_pru_id pru_id, u8 *mux)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int pruss_cfg_set_gpmux(struct pruss *pruss,
+				      enum pruss_pru_id pru_id, u8 mux)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 #endif /* CONFIG_TI_PRUSS */
 
 #endif	/* _PRUSS_DRIVER_H_ */
-- 
cgit v1.2.3


From 0211cc1e4fbbc81853227147bf0982c47362c567 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 14 Apr 2023 10:25:42 +0530
Subject: soc: ti: pruss: Add helper functions to set GPI mode, MII_RT_event
 and XFR

The PRUSS CFG module is represented as a syscon node and is currently
managed by the PRUSS platform driver. Add easy accessor functions to set
GPI mode, MII_RT event enable/disable and XFR (XIN XOUT) enable/disable
to enable the PRUSS Ethernet usecase. These functions reuse the generic
pruss_cfg_update() API function.

Signed-off-by: Suman Anna <s-anna@ti.com>
Co-developed-by: Grzegorz Jaszczyk <grzegorz.jaszczyk@linaro.org>
Signed-off-by: Grzegorz Jaszczyk <grzegorz.jaszczyk@linaro.org>
Signed-off-by: Puranjay Mohan <p-mohan@ti.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Reviewed-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Link: https://lore.kernel.org/r/20230414045542.3249939-5-danishanwar@ti.com
Signed-off-by: Nishanth Menon <nm@ti.com>
---
 drivers/remoteproc/pru_rproc.c | 15 ---------
 drivers/soc/ti/pruss.c         | 71 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/pruss_driver.h   | 51 ++++++++++++++++++++++++++++++
 3 files changed, 122 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/pru_rproc.c b/drivers/remoteproc/pru_rproc.c
index 095f66130f48..54f5ce302e7a 100644
--- a/drivers/remoteproc/pru_rproc.c
+++ b/drivers/remoteproc/pru_rproc.c
@@ -81,21 +81,6 @@ enum pru_iomem {
 	PRU_IOMEM_MAX,
 };
 
-/**
- * enum pru_type - PRU core type identifier
- *
- * @PRU_TYPE_PRU: Programmable Real-time Unit
- * @PRU_TYPE_RTU: Auxiliary Programmable Real-Time Unit
- * @PRU_TYPE_TX_PRU: Transmit Programmable Real-Time Unit
- * @PRU_TYPE_MAX: just keep this one at the end
- */
-enum pru_type {
-	PRU_TYPE_PRU = 0,
-	PRU_TYPE_RTU,
-	PRU_TYPE_TX_PRU,
-	PRU_TYPE_MAX,
-};
-
 /**
  * struct pru_private_data - device data for a PRU core
  * @type: type of the PRU core (PRU, RTU, Tx_PRU)
diff --git a/drivers/soc/ti/pruss.c b/drivers/soc/ti/pruss.c
index 2c44913c083b..7fdefee1ed87 100644
--- a/drivers/soc/ti/pruss.c
+++ b/drivers/soc/ti/pruss.c
@@ -213,6 +213,77 @@ int pruss_cfg_set_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 mux)
 }
 EXPORT_SYMBOL_GPL(pruss_cfg_set_gpmux);
 
+/**
+ * pruss_cfg_gpimode() - set the GPI mode of the PRU
+ * @pruss: the pruss instance handle
+ * @pru_id: id of the PRU core within the PRUSS
+ * @mode: GPI mode to set
+ *
+ * Sets the GPI mode for a given PRU by programming the
+ * corresponding PRUSS_CFG_GPCFGx register
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int pruss_cfg_gpimode(struct pruss *pruss, enum pruss_pru_id pru_id,
+		      enum pruss_gpi_mode mode)
+{
+	if (pru_id >= PRUSS_NUM_PRUS || mode >= PRUSS_GPI_MODE_MAX)
+		return -EINVAL;
+
+	return pruss_cfg_update(pruss, PRUSS_CFG_GPCFG(pru_id),
+				PRUSS_GPCFG_PRU_GPI_MODE_MASK,
+				mode << PRUSS_GPCFG_PRU_GPI_MODE_SHIFT);
+}
+EXPORT_SYMBOL_GPL(pruss_cfg_gpimode);
+
+/**
+ * pruss_cfg_miirt_enable() - Enable/disable MII RT Events
+ * @pruss: the pruss instance
+ * @enable: enable/disable
+ *
+ * Enable/disable the MII RT Events for the PRUSS.
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int pruss_cfg_miirt_enable(struct pruss *pruss, bool enable)
+{
+	u32 set = enable ? PRUSS_MII_RT_EVENT_EN : 0;
+
+	return pruss_cfg_update(pruss, PRUSS_CFG_MII_RT,
+				PRUSS_MII_RT_EVENT_EN, set);
+}
+EXPORT_SYMBOL_GPL(pruss_cfg_miirt_enable);
+
+/**
+ * pruss_cfg_xfr_enable() - Enable/disable XIN XOUT shift functionality
+ * @pruss: the pruss instance
+ * @pru_type: PRU core type identifier
+ * @enable: enable/disable
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int pruss_cfg_xfr_enable(struct pruss *pruss, enum pru_type pru_type,
+			 bool enable)
+{
+	u32 mask, set;
+
+	switch (pru_type) {
+	case PRU_TYPE_PRU:
+		mask = PRUSS_SPP_XFER_SHIFT_EN;
+		break;
+	case PRU_TYPE_RTU:
+		mask = PRUSS_SPP_RTU_XFR_SHIFT_EN;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	set = enable ? mask : 0;
+
+	return pruss_cfg_update(pruss, PRUSS_CFG_SPP, mask, set);
+}
+EXPORT_SYMBOL_GPL(pruss_cfg_xfr_enable);
+
 static void pruss_of_free_clk_provider(void *data)
 {
 	struct device_node *clk_mux_np = data;
diff --git a/include/linux/pruss_driver.h b/include/linux/pruss_driver.h
index 5bb8897724a9..c9a31c567e85 100644
--- a/include/linux/pruss_driver.h
+++ b/include/linux/pruss_driver.h
@@ -32,6 +32,33 @@ enum pruss_gp_mux_sel {
 	PRUSS_GP_MUX_SEL_MAX,
 };
 
+/*
+ * enum pruss_gpi_mode - PRUSS GPI configuration modes, used
+ *			 to program the PRUSS_GPCFG0/1 registers
+ */
+enum pruss_gpi_mode {
+	PRUSS_GPI_MODE_DIRECT,
+	PRUSS_GPI_MODE_PARALLEL,
+	PRUSS_GPI_MODE_28BIT_SHIFT,
+	PRUSS_GPI_MODE_MII,
+	PRUSS_GPI_MODE_MAX,
+};
+
+/**
+ * enum pru_type - PRU core type identifier
+ *
+ * @PRU_TYPE_PRU: Programmable Real-time Unit
+ * @PRU_TYPE_RTU: Auxiliary Programmable Real-Time Unit
+ * @PRU_TYPE_TX_PRU: Transmit Programmable Real-Time Unit
+ * @PRU_TYPE_MAX: just keep this one at the end
+ */
+enum pru_type {
+	PRU_TYPE_PRU,
+	PRU_TYPE_RTU,
+	PRU_TYPE_TX_PRU,
+	PRU_TYPE_MAX,
+};
+
 /*
  * enum pruss_mem - PRUSS memory range identifiers
  */
@@ -86,6 +113,11 @@ int pruss_release_mem_region(struct pruss *pruss,
 			     struct pruss_mem_region *region);
 int pruss_cfg_get_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 *mux);
 int pruss_cfg_set_gpmux(struct pruss *pruss, enum pruss_pru_id pru_id, u8 mux);
+int pruss_cfg_gpimode(struct pruss *pruss, enum pruss_pru_id pru_id,
+		      enum pruss_gpi_mode mode);
+int pruss_cfg_miirt_enable(struct pruss *pruss, bool enable);
+int pruss_cfg_xfr_enable(struct pruss *pruss, enum pru_type pru_type,
+			 bool enable);
 
 #else
 
@@ -121,6 +153,25 @@ static inline int pruss_cfg_set_gpmux(struct pruss *pruss,
 	return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline int pruss_cfg_gpimode(struct pruss *pruss,
+				    enum pruss_pru_id pru_id,
+				    enum pruss_gpi_mode mode)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int pruss_cfg_miirt_enable(struct pruss *pruss, bool enable)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int pruss_cfg_xfr_enable(struct pruss *pruss,
+				       enum pru_type pru_type,
+				       bool enable);
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 #endif /* CONFIG_TI_PRUSS */
 
 #endif	/* _PRUSS_DRIVER_H_ */
-- 
cgit v1.2.3


From e455ca40dbcf2cd50d1e59bf4b2752b300bcdad4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 15:10:52 +0200
Subject: audit: avoid missing-prototype warnings

Building with 'make W=1' reveals two function definitions without
a previous prototype in the audit code:

lib/compat_audit.c:32:5: error: no previous prototype for 'audit_classify_compat_syscall' [-Werror=missing-prototypes]
kernel/audit.c:1813:14: error: no previous prototype for 'audit_serial' [-Werror=missing-prototypes]

The first one needs a declaration from linux/audit.h but cannot
include that header without causing conflicting (compat) syscall number
definitions, so move the it into linux/audit_arch.h.

The second one is declared conditionally based on CONFIG_AUDITSYSCALL
but needed as a local function even when that option is disabled, so
move the declaration out of the #ifdef block.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/audit.h      | 2 --
 include/linux/audit_arch.h | 2 ++
 kernel/audit.h             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 31086a72e32a..6a3a9e122bb5 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -130,8 +130,6 @@ extern unsigned compat_dir_class[];
 extern unsigned compat_chattr_class[];
 extern unsigned compat_signal_class[];
 
-extern int audit_classify_compat_syscall(int abi, unsigned syscall);
-
 /* audit_names->type values */
 #define	AUDIT_TYPE_UNKNOWN	0	/* we don't know yet */
 #define	AUDIT_TYPE_NORMAL	1	/* a "normal" audit record */
diff --git a/include/linux/audit_arch.h b/include/linux/audit_arch.h
index 8fdb1afe251a..0e34d673ef17 100644
--- a/include/linux/audit_arch.h
+++ b/include/linux/audit_arch.h
@@ -21,4 +21,6 @@ enum auditsc_class_t {
 	AUDITSC_NVALS /* count */
 };
 
+extern int audit_classify_compat_syscall(int abi, unsigned syscall);
+
 #endif
diff --git a/kernel/audit.h b/kernel/audit.h
index c57b008b9914..94738bce40b2 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -259,8 +259,8 @@ extern struct tty_struct *audit_get_tty(void);
 extern void audit_put_tty(struct tty_struct *tty);
 
 /* audit watch/mark/tree functions */
-#ifdef CONFIG_AUDITSYSCALL
 extern unsigned int audit_serial(void);
+#ifdef CONFIG_AUDITSYSCALL
 extern int auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec64 *t, unsigned int *serial);
 
-- 
cgit v1.2.3


From d86ff3333cb1d5f42d8898fb5fdb304e143c0237 Mon Sep 17 00:00:00 2001
From: Anisse Astier <an.astier@criteo.com>
Date: Wed, 17 May 2023 17:38:12 +0200
Subject: efivarfs: expose used and total size

When writing EFI variables, one might get errors with no other message
on why it fails. Being able to see how much is used by EFI variables
helps analyzing such issues.

Since this is not a conventional filesystem, block size is intentionally
set to 1 instead of PAGE_SIZE.

x86 quirks of reserved size are taken into account; so that available
and free size can be different, further helping debugging space issues.

With this patch, one can see the remaining space in EFI variable storage
via efivarfs, like this:

   $ df -h /sys/firmware/efi/efivars/
   Filesystem      Size  Used Avail Use% Mounted on
   efivarfs        176K  106K   66K  62% /sys/firmware/efi/efivars

Signed-off-by: Anisse Astier <an.astier@criteo.com>
[ardb: - rename efi_reserved_space() to efivar_reserved_space()
       - whitespace/coding style tweaks]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/platform/efi/quirks.c |  8 ++++++++
 drivers/firmware/efi/efi.c     |  1 +
 drivers/firmware/efi/vars.c    | 12 ++++++++++++
 fs/efivarfs/super.c            | 39 ++++++++++++++++++++++++++++++++++++++-
 include/linux/efi.h            | 11 +++++++++++
 5 files changed, 70 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index b0b848d6933a..f0cc00032751 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -114,6 +114,14 @@ void efi_delete_dummy_variable(void)
 				     EFI_VARIABLE_RUNTIME_ACCESS, 0, NULL);
 }
 
+u64 efivar_reserved_space(void)
+{
+	if (efi_no_storage_paranoia)
+		return 0;
+	return EFI_MIN_RESERVE;
+}
+EXPORT_SYMBOL_GPL(efivar_reserved_space);
+
 /*
  * In the nonblocking case we do not attempt to perform garbage
  * collection if we do not have enough free space. Rather, we do the
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index abeff7dc0b58..d0dfa007bffc 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -211,6 +211,7 @@ static int generic_ops_register(void)
 	generic_ops.get_variable = efi.get_variable;
 	generic_ops.get_next_variable = efi.get_next_variable;
 	generic_ops.query_variable_store = efi_query_variable_store;
+	generic_ops.query_variable_info = efi.query_variable_info;
 
 	if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE)) {
 		generic_ops.set_variable = efi.set_variable;
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index bfc5fa6aa47b..e9dc7116daf1 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -245,3 +245,15 @@ efi_status_t efivar_set_variable(efi_char16_t *name, efi_guid_t *vendor,
 	return status;
 }
 EXPORT_SYMBOL_NS_GPL(efivar_set_variable, EFIVAR);
+
+efi_status_t efivar_query_variable_info(u32 attr,
+					u64 *storage_space,
+					u64 *remaining_space,
+					u64 *max_variable_size)
+{
+	if (!__efivars->ops->query_variable_info)
+		return EFI_UNSUPPORTED;
+	return __efivars->ops->query_variable_info(attr, storage_space,
+			remaining_space, max_variable_size);
+}
+EXPORT_SYMBOL_NS_GPL(efivar_query_variable_info, EFIVAR);
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 482d612b716b..e028fafa04f3 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -13,6 +13,7 @@
 #include <linux/ucs2_string.h>
 #include <linux/slab.h>
 #include <linux/magic.h>
+#include <linux/statfs.h>
 
 #include "internal.h"
 
@@ -23,8 +24,44 @@ static void efivarfs_evict_inode(struct inode *inode)
 	clear_inode(inode);
 }
 
+static int efivarfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	const u32 attr = EFI_VARIABLE_NON_VOLATILE |
+			 EFI_VARIABLE_BOOTSERVICE_ACCESS |
+			 EFI_VARIABLE_RUNTIME_ACCESS;
+	u64 storage_space, remaining_space, max_variable_size;
+	efi_status_t status;
+
+	status = efivar_query_variable_info(attr, &storage_space, &remaining_space,
+					    &max_variable_size);
+	if (status != EFI_SUCCESS)
+		return efi_status_to_err(status);
+
+	/*
+	 * This is not a normal filesystem, so no point in pretending it has a block
+	 * size; we declare f_bsize to 1, so that we can then report the exact value
+	 * sent by EFI QueryVariableInfo in f_blocks and f_bfree
+	 */
+	buf->f_bsize	= 1;
+	buf->f_namelen	= NAME_MAX;
+	buf->f_blocks	= storage_space;
+	buf->f_bfree	= remaining_space;
+	buf->f_type	= dentry->d_sb->s_magic;
+
+	/*
+	 * In f_bavail we declare the free space that the kernel will allow writing
+	 * when the storage_paranoia x86 quirk is active. To use more, users
+	 * should boot the kernel with efi_no_storage_paranoia.
+	 */
+	if (remaining_space > efivar_reserved_space())
+		buf->f_bavail = remaining_space - efivar_reserved_space();
+	else
+		buf->f_bavail = 0;
+
+	return 0;
+}
 static const struct super_operations efivarfs_ops = {
-	.statfs = simple_statfs,
+	.statfs = efivarfs_statfs,
 	.drop_inode = generic_delete_inode,
 	.evict_inode = efivarfs_evict_inode,
 };
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 7aa62c92185f..bed3c92cbc31 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1042,6 +1042,7 @@ struct efivar_operations {
 	efi_set_variable_t *set_variable;
 	efi_set_variable_t *set_variable_nonblocking;
 	efi_query_variable_store_t *query_variable_store;
+	efi_query_variable_info_t *query_variable_info;
 };
 
 struct efivars {
@@ -1049,6 +1050,12 @@ struct efivars {
 	const struct efivar_operations *ops;
 };
 
+#ifdef CONFIG_X86
+u64 __attribute_const__ efivar_reserved_space(void);
+#else
+static inline u64 efivar_reserved_space(void) { return 0; }
+#endif
+
 /*
  * The maximum size of VariableName + Data = 1024
  * Therefore, it's reasonable to save that much
@@ -1087,6 +1094,10 @@ efi_status_t efivar_set_variable_locked(efi_char16_t *name, efi_guid_t *vendor,
 efi_status_t efivar_set_variable(efi_char16_t *name, efi_guid_t *vendor,
 				 u32 attr, unsigned long data_size, void *data);
 
+efi_status_t efivar_query_variable_info(u32 attr, u64 *storage_space,
+					u64 *remaining_space,
+					u64 *max_variable_size);
+
 #if IS_ENABLED(CONFIG_EFI_CAPSULE_LOADER)
 extern bool efi_capsule_pending(int *reset_type);
 
-- 
cgit v1.2.3


From 8ff1541da3908b504cb53e5384d5deae2b9c6e1a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 12 May 2023 12:24:42 +0200
Subject: fbdev: Include <linux/fb.h> instead of <asm/fb.h>

Replace include statements for <asm/fb.h> with <linux/fb.h>. Fixes
the coding style: if a header is available in asm/ and linux/, it
is preferable to include the header from linux/. This only affects
a few source files, most of which already include <linux/fb.h>.

Suggested-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Sui Jingfeng <suijingfeng@loongson.cn>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230512102444.5438-6-tzimmermann@suse.de
---
 arch/parisc/video/fbdev.c        | 3 +--
 arch/sparc/video/fbdev.c         | 1 -
 arch/x86/video/fbdev.c           | 2 --
 drivers/staging/sm750fb/sm750.c  | 2 +-
 drivers/video/fbdev/core/fbcon.c | 1 -
 drivers/video/fbdev/core/fbmem.c | 2 --
 include/linux/fb.h               | 2 ++
 7 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/parisc/video/fbdev.c b/arch/parisc/video/fbdev.c
index 4a0ae08fc75b..137561d98246 100644
--- a/arch/parisc/video/fbdev.c
+++ b/arch/parisc/video/fbdev.c
@@ -5,10 +5,9 @@
  * Copyright (C) 2001-2002 Thomas Bogendoerfer <tsbogend@alpha.franken.de>
  */
 
+#include <linux/fb.h>
 #include <linux/module.h>
 
-#include <asm/fb.h>
-
 #include <video/sticore.h>
 
 int fb_is_primary_device(struct fb_info *info)
diff --git a/arch/sparc/video/fbdev.c b/arch/sparc/video/fbdev.c
index dadd5799fbb3..25837f128132 100644
--- a/arch/sparc/video/fbdev.c
+++ b/arch/sparc/video/fbdev.c
@@ -4,7 +4,6 @@
 #include <linux/fb.h>
 #include <linux/module.h>
 
-#include <asm/fb.h>
 #include <asm/prom.h>
 
 int fb_is_primary_device(struct fb_info *info)
diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c
index 57ee3c158f97..f41a17ebac48 100644
--- a/arch/x86/video/fbdev.c
+++ b/arch/x86/video/fbdev.c
@@ -7,8 +7,6 @@
  *
  */
 
-#include <asm/fb.h>
-
 #include <linux/fb.h>
 #include <linux/module.h>
 #include <linux/pci.h>
diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c
index 22ace3168723..55e302a27847 100644
--- a/drivers/staging/sm750fb/sm750.c
+++ b/drivers/staging/sm750fb/sm750.c
@@ -16,7 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/screen_info.h>
 #include <linux/console.h>
-#include <asm/fb.h>
+
 #include "sm750.h"
 #include "sm750_accel.h"
 #include "sm750_cursor.h"
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index eb565a10e5cd..c6c9d040bdec 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -75,7 +75,6 @@
 #include <linux/interrupt.h>
 #include <linux/crc32.h> /* For counting font checksums */
 #include <linux/uaccess.h>
-#include <asm/fb.h>
 #include <asm/irq.h>
 
 #include "fbcon.h"
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 700b9f7e1bb8..d5f897b5ba54 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -37,8 +37,6 @@
 #include <linux/mem_encrypt.h>
 #include <linux/pci.h>
 
-#include <asm/fb.h>
-
 #include <video/nomodeset.h>
 #include <video/vga.h>
 
diff --git a/include/linux/fb.h b/include/linux/fb.h
index ec978a4969a9..4b4d9a5d200a 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -15,6 +15,8 @@
 #include <linux/list.h>
 #include <linux/backlight.h>
 #include <linux/slab.h>
+
+#include <asm/fb.h>
 #include <asm/io.h>
 
 struct vm_area_struct;
-- 
cgit v1.2.3


From 8f8eaa1b023580f7dce7fe8d73539b093edea65b Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 12 May 2023 12:24:43 +0200
Subject: fbdev: Move framebuffer I/O helpers into <asm/fb.h>

Implement framebuffer I/O helpers, such as fb_read*() and fb_write*(),
in the architecture's <asm/fb.h> header file or the generic one.

The common case has been the use of regular I/O functions, such as
__raw_readb() or memset_io(). A few architectures used plain system-
memory reads and writes. Sparc used helpers for its SBus.

The architectures that used special cases provide the same code in
their __raw_*() I/O helpers. So the patch replaces this code with the
__raw_*() functions and moves it to <asm-generic/fb.h> for all
architectures.

v8:
	* remove garbage after commit-message tags
v6:
	* fix fb_readq()/fb_writeq() on 64-bit mips (kernel test robot)
v5:
	* include <linux/io.h> in <asm-generic/fb>; fix s390 build
v4:
	* ia64, loongarch, sparc64: add fb_mem*() to arch headers
	  to keep current semantics (Arnd)
v3:
	* implement all architectures with generic helpers
	* support reordering and native byte order (Geert, Arnd)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Tested-by: Sui Jingfeng <suijingfeng@loongson.cn>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230512102444.5438-7-tzimmermann@suse.de
---
 arch/ia64/include/asm/fb.h      |  20 ++++++++
 arch/loongarch/include/asm/fb.h |  21 +++++++++
 arch/mips/include/asm/fb.h      |  22 +++++++++
 arch/sparc/include/asm/fb.h     |  20 ++++++++
 include/asm-generic/fb.h        | 102 ++++++++++++++++++++++++++++++++++++++++
 include/linux/fb.h              |  53 ---------------------
 6 files changed, 185 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/fb.h b/arch/ia64/include/asm/fb.h
index 0208f64a0da0..bcf982043a5c 100644
--- a/arch/ia64/include/asm/fb.h
+++ b/arch/ia64/include/asm/fb.h
@@ -2,7 +2,9 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
+#include <linux/compiler.h>
 #include <linux/efi.h>
+#include <linux/string.h>
 
 #include <asm/page.h>
 
@@ -18,6 +20,24 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
 }
 #define fb_pgprotect fb_pgprotect
 
+static inline void fb_memcpy_fromfb(void *to, const volatile void __iomem *from, size_t n)
+{
+	memcpy(to, (void __force *)from, n);
+}
+#define fb_memcpy_fromfb fb_memcpy_fromfb
+
+static inline void fb_memcpy_tofb(volatile void __iomem *to, const void *from, size_t n)
+{
+	memcpy((void __force *)to, from, n);
+}
+#define fb_memcpy_tofb fb_memcpy_tofb
+
+static inline void fb_memset(volatile void __iomem *addr, int c, size_t n)
+{
+	memset((void __force *)addr, c, n);
+}
+#define fb_memset fb_memset
+
 #include <asm-generic/fb.h>
 
 #endif /* _ASM_FB_H_ */
diff --git a/arch/loongarch/include/asm/fb.h b/arch/loongarch/include/asm/fb.h
index ff82f20685c8..c6fc7ef374a4 100644
--- a/arch/loongarch/include/asm/fb.h
+++ b/arch/loongarch/include/asm/fb.h
@@ -5,6 +5,27 @@
 #ifndef _ASM_FB_H_
 #define _ASM_FB_H_
 
+#include <linux/compiler.h>
+#include <linux/string.h>
+
+static inline void fb_memcpy_fromfb(void *to, const volatile void __iomem *from, size_t n)
+{
+	memcpy(to, (void __force *)from, n);
+}
+#define fb_memcpy_fromfb fb_memcpy_fromfb
+
+static inline void fb_memcpy_tofb(volatile void __iomem *to, const void *from, size_t n)
+{
+	memcpy((void __force *)to, from, n);
+}
+#define fb_memcpy_tofb fb_memcpy_tofb
+
+static inline void fb_memset(volatile void __iomem *addr, int c, size_t n)
+{
+	memset((void __force *)addr, c, n);
+}
+#define fb_memset fb_memset
+
 #include <asm-generic/fb.h>
 
 #endif /* _ASM_FB_H_ */
diff --git a/arch/mips/include/asm/fb.h b/arch/mips/include/asm/fb.h
index 6bda0a81d8ca..18b7226403ba 100644
--- a/arch/mips/include/asm/fb.h
+++ b/arch/mips/include/asm/fb.h
@@ -12,6 +12,28 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
 }
 #define fb_pgprotect fb_pgprotect
 
+/*
+ * MIPS doesn't define __raw_ I/O macros, so the helpers
+ * in <asm-generic/fb.h> don't generate fb_readq() and
+ * fb_write(). We have to provide them here.
+ *
+ * TODO: Convert MIPS to generic I/O. The helpers below can
+ *       then be removed.
+ */
+#ifdef CONFIG_64BIT
+static inline u64 fb_readq(const volatile void __iomem *addr)
+{
+	return __raw_readq(addr);
+}
+#define fb_readq fb_readq
+
+static inline void fb_writeq(u64 b, volatile void __iomem *addr)
+{
+	__raw_writeq(b, addr);
+}
+#define fb_writeq fb_writeq
+#endif
+
 #include <asm-generic/fb.h>
 
 #endif /* _ASM_FB_H_ */
diff --git a/arch/sparc/include/asm/fb.h b/arch/sparc/include/asm/fb.h
index 689ee5c60054..077da91aeba1 100644
--- a/arch/sparc/include/asm/fb.h
+++ b/arch/sparc/include/asm/fb.h
@@ -2,6 +2,8 @@
 #ifndef _SPARC_FB_H_
 #define _SPARC_FB_H_
 
+#include <linux/io.h>
+
 struct fb_info;
 struct file;
 struct vm_area_struct;
@@ -16,6 +18,24 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
 int fb_is_primary_device(struct fb_info *info);
 #define fb_is_primary_device fb_is_primary_device
 
+static inline void fb_memcpy_fromfb(void *to, const volatile void __iomem *from, size_t n)
+{
+	sbus_memcpy_fromio(to, from, n);
+}
+#define fb_memcpy_fromfb fb_memcpy_fromfb
+
+static inline void fb_memcpy_tofb(volatile void __iomem *to, const void *from, size_t n)
+{
+	sbus_memcpy_toio(to, from, n);
+}
+#define fb_memcpy_tofb fb_memcpy_tofb
+
+static inline void fb_memset(volatile void __iomem *addr, int c, size_t n)
+{
+	sbus_memset_io(addr, c, n);
+}
+#define fb_memset fb_memset
+
 #include <asm-generic/fb.h>
 
 #endif /* _SPARC_FB_H_ */
diff --git a/include/asm-generic/fb.h b/include/asm-generic/fb.h
index c8af99f5a535..0540eccdbeca 100644
--- a/include/asm-generic/fb.h
+++ b/include/asm-generic/fb.h
@@ -7,6 +7,7 @@
  * Only include this header file from your architecture's <asm/fb.h>.
  */
 
+#include <linux/io.h>
 #include <linux/mm_types.h>
 #include <linux/pgtable.h>
 
@@ -30,4 +31,105 @@ static inline int fb_is_primary_device(struct fb_info *info)
 }
 #endif
 
+/*
+ * I/O helpers for the framebuffer. Prefer these functions over their
+ * regular counterparts. The regular I/O functions provide in-order
+ * access and swap bytes to/from little-endian ordering. Neither is
+ * required for framebuffers. Instead, the helpers read and write
+ * raw framebuffer data. Independent operations can be reordered for
+ * improved performance.
+ */
+
+#ifndef fb_readb
+static inline u8 fb_readb(const volatile void __iomem *addr)
+{
+	return __raw_readb(addr);
+}
+#define fb_readb fb_readb
+#endif
+
+#ifndef fb_readw
+static inline u16 fb_readw(const volatile void __iomem *addr)
+{
+	return __raw_readw(addr);
+}
+#define fb_readw fb_readw
+#endif
+
+#ifndef fb_readl
+static inline u32 fb_readl(const volatile void __iomem *addr)
+{
+	return __raw_readl(addr);
+}
+#define fb_readl fb_readl
+#endif
+
+#ifndef fb_readq
+#if defined(__raw_readq)
+static inline u64 fb_readq(const volatile void __iomem *addr)
+{
+	return __raw_readq(addr);
+}
+#define fb_readq fb_readq
+#endif
+#endif
+
+#ifndef fb_writeb
+static inline void fb_writeb(u8 b, volatile void __iomem *addr)
+{
+	__raw_writeb(b, addr);
+}
+#define fb_writeb fb_writeb
+#endif
+
+#ifndef fb_writew
+static inline void fb_writew(u16 b, volatile void __iomem *addr)
+{
+	__raw_writew(b, addr);
+}
+#define fb_writew fb_writew
+#endif
+
+#ifndef fb_writel
+static inline void fb_writel(u32 b, volatile void __iomem *addr)
+{
+	__raw_writel(b, addr);
+}
+#define fb_writel fb_writel
+#endif
+
+#ifndef fb_writeq
+#if defined(__raw_writeq)
+static inline void fb_writeq(u64 b, volatile void __iomem *addr)
+{
+	__raw_writeq(b, addr);
+}
+#define fb_writeq fb_writeq
+#endif
+#endif
+
+#ifndef fb_memcpy_fromfb
+static inline void fb_memcpy_fromfb(void *to, const volatile void __iomem *from, size_t n)
+{
+	memcpy_fromio(to, from, n);
+}
+#define fb_memcpy_fromfb fb_memcpy_fromfb
+#endif
+
+#ifndef fb_memcpy_tofb
+static inline void fb_memcpy_tofb(volatile void __iomem *to, const void *from, size_t n)
+{
+	memcpy_toio(to, from, n);
+}
+#define fb_memcpy_tofb fb_memcpy_tofb
+#endif
+
+#ifndef fb_memset
+static inline void fb_memset(volatile void __iomem *addr, int c, size_t n)
+{
+	memset_io(addr, c, n);
+}
+#define fb_memset fb_memset
+#endif
+
 #endif /* __ASM_GENERIC_FB_H_ */
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 4b4d9a5d200a..2cf8efcb9e32 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -17,7 +17,6 @@
 #include <linux/slab.h>
 
 #include <asm/fb.h>
-#include <asm/io.h>
 
 struct vm_area_struct;
 struct fb_info;
@@ -513,58 +512,6 @@ struct fb_info {
  */
 #define STUPID_ACCELF_TEXT_SHIT
 
-// This will go away
-#if defined(__sparc__)
-
-/* We map all of our framebuffers such that big-endian accesses
- * are what we want, so the following is sufficient.
- */
-
-// This will go away
-#define fb_readb sbus_readb
-#define fb_readw sbus_readw
-#define fb_readl sbus_readl
-#define fb_readq sbus_readq
-#define fb_writeb sbus_writeb
-#define fb_writew sbus_writew
-#define fb_writel sbus_writel
-#define fb_writeq sbus_writeq
-#define fb_memset sbus_memset_io
-#define fb_memcpy_fromfb sbus_memcpy_fromio
-#define fb_memcpy_tofb sbus_memcpy_toio
-
-#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) ||	\
-	defined(__hppa__) || defined(__sh__) || defined(__powerpc__) ||	\
-	defined(__arm__) || defined(__aarch64__) || defined(__mips__)
-
-#define fb_readb __raw_readb
-#define fb_readw __raw_readw
-#define fb_readl __raw_readl
-#define fb_readq __raw_readq
-#define fb_writeb __raw_writeb
-#define fb_writew __raw_writew
-#define fb_writel __raw_writel
-#define fb_writeq __raw_writeq
-#define fb_memset memset_io
-#define fb_memcpy_fromfb memcpy_fromio
-#define fb_memcpy_tofb memcpy_toio
-
-#else
-
-#define fb_readb(addr) (*(volatile u8 *) (addr))
-#define fb_readw(addr) (*(volatile u16 *) (addr))
-#define fb_readl(addr) (*(volatile u32 *) (addr))
-#define fb_readq(addr) (*(volatile u64 *) (addr))
-#define fb_writeb(b,addr) (*(volatile u8 *) (addr) = (b))
-#define fb_writew(b,addr) (*(volatile u16 *) (addr) = (b))
-#define fb_writel(b,addr) (*(volatile u32 *) (addr) = (b))
-#define fb_writeq(b,addr) (*(volatile u64 *) (addr) = (b))
-#define fb_memset memset
-#define fb_memcpy_fromfb memcpy
-#define fb_memcpy_tofb memcpy
-
-#endif
-
 #define FB_LEFT_POS(p, bpp)          (fb_be_math(p) ? (32 - (bpp)) : 0)
 #define FB_SHIFT_HIGH(p, val, bits)  (fb_be_math(p) ? (val) >> (bits) : \
 						      (val) << (bits))
-- 
cgit v1.2.3


From 325bec7157b3859b45b9471447f5d130ab8a8723 Mon Sep 17 00:00:00 2001
From: Julien Panis <jpanis@baylibre.com>
Date: Thu, 11 May 2023 11:51:22 +0200
Subject: mfd: tps6594: Add driver for TI TPS6594 PMIC

This patch adds support for TPS6594 PMIC MFD core. It provides
communication through the I2C and SPI interfaces, and supports
protocols with embedded CRC data fields for safety applications.

Signed-off-by: Julien Panis <jpanis@baylibre.com>
Link: https://lore.kernel.org/r/20230511095126.105104-3-jpanis@baylibre.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/Kconfig         |   32 ++
 drivers/mfd/Makefile        |    3 +
 drivers/mfd/tps6594-core.c  |  462 ++++++++++++++++++++
 drivers/mfd/tps6594-i2c.c   |  244 +++++++++++
 drivers/mfd/tps6594-spi.c   |  129 ++++++
 include/linux/mfd/tps6594.h | 1020 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 1890 insertions(+)
 create mode 100644 drivers/mfd/tps6594-core.c
 create mode 100644 drivers/mfd/tps6594-i2c.c
 create mode 100644 drivers/mfd/tps6594-spi.c
 create mode 100644 include/linux/mfd/tps6594.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index e90463c4441c..de67fe83efd3 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1679,6 +1679,38 @@ config MFD_TPS65912_SPI
 	  If you say yes here you get support for the TPS65912 series of
 	  PM chips with SPI interface.
 
+config MFD_TPS6594
+	tristate
+	select MFD_CORE
+	select REGMAP
+	select REGMAP_IRQ
+
+config MFD_TPS6594_I2C
+	tristate "TI TPS6594 Power Management chip with I2C"
+	select MFD_TPS6594
+	select REGMAP_I2C
+	select CRC8
+	depends on I2C
+	help
+	  If you say yes here you get support for the TPS6594 series of
+	  PM chips with I2C interface.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called tps6594-i2c.
+
+config MFD_TPS6594_SPI
+	tristate "TI TPS6594 Power Management chip with SPI"
+	select MFD_TPS6594
+	select REGMAP_SPI
+	select CRC8
+	depends on SPI_MASTER
+	help
+	  If you say yes here you get support for the TPS6594 series of
+	  PM chips with SPI interface.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called tps6594-spi.
+
 config TWL4030_CORE
 	bool "TI TWL4030/TWL5030/TWL6030/TPS659x0 Support"
 	depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 1d2392f06f78..40b0bf8eace6 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -96,6 +96,9 @@ obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
 obj-$(CONFIG_MFD_TPS65912)	+= tps65912-core.o
 obj-$(CONFIG_MFD_TPS65912_I2C)	+= tps65912-i2c.o
 obj-$(CONFIG_MFD_TPS65912_SPI)  += tps65912-spi.o
+obj-$(CONFIG_MFD_TPS6594)	+= tps6594-core.o
+obj-$(CONFIG_MFD_TPS6594_I2C)	+= tps6594-i2c.o
+obj-$(CONFIG_MFD_TPS6594_SPI)	+= tps6594-spi.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl-core.o twl4030-irq.o twl6030-irq.o
diff --git a/drivers/mfd/tps6594-core.c b/drivers/mfd/tps6594-core.c
new file mode 100644
index 000000000000..15f314833207
--- /dev/null
+++ b/drivers/mfd/tps6594-core.c
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Core functions for TI TPS6594/TPS6593/LP8764 PMICs
+ *
+ * Copyright (C) 2023 BayLibre Incorporated - https://www.baylibre.com/
+ */
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include <linux/mfd/core.h>
+#include <linux/mfd/tps6594.h>
+
+#define TPS6594_CRC_SYNC_TIMEOUT_MS 150
+
+/* Completion to synchronize CRC feature enabling on all PMICs */
+static DECLARE_COMPLETION(tps6594_crc_comp);
+
+static const struct resource tps6594_regulator_resources[] = {
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK1_OV, TPS6594_IRQ_NAME_BUCK1_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK1_UV, TPS6594_IRQ_NAME_BUCK1_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK1_SC, TPS6594_IRQ_NAME_BUCK1_SC),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK1_ILIM, TPS6594_IRQ_NAME_BUCK1_ILIM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK2_OV, TPS6594_IRQ_NAME_BUCK2_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK2_UV, TPS6594_IRQ_NAME_BUCK2_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK2_SC, TPS6594_IRQ_NAME_BUCK2_SC),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK2_ILIM, TPS6594_IRQ_NAME_BUCK2_ILIM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK3_OV, TPS6594_IRQ_NAME_BUCK3_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK3_UV, TPS6594_IRQ_NAME_BUCK3_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK3_SC, TPS6594_IRQ_NAME_BUCK3_SC),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK3_ILIM, TPS6594_IRQ_NAME_BUCK3_ILIM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK4_OV, TPS6594_IRQ_NAME_BUCK4_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK4_UV, TPS6594_IRQ_NAME_BUCK4_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK4_SC, TPS6594_IRQ_NAME_BUCK4_SC),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK4_ILIM, TPS6594_IRQ_NAME_BUCK4_ILIM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK5_OV, TPS6594_IRQ_NAME_BUCK5_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK5_UV, TPS6594_IRQ_NAME_BUCK5_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK5_SC, TPS6594_IRQ_NAME_BUCK5_SC),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BUCK5_ILIM, TPS6594_IRQ_NAME_BUCK5_ILIM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO1_OV, TPS6594_IRQ_NAME_LDO1_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO1_UV, TPS6594_IRQ_NAME_LDO1_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO1_SC, TPS6594_IRQ_NAME_LDO1_SC),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO1_ILIM, TPS6594_IRQ_NAME_LDO1_ILIM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO2_OV, TPS6594_IRQ_NAME_LDO2_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO2_UV, TPS6594_IRQ_NAME_LDO2_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO2_SC, TPS6594_IRQ_NAME_LDO2_SC),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO2_ILIM, TPS6594_IRQ_NAME_LDO2_ILIM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO3_OV, TPS6594_IRQ_NAME_LDO3_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO3_UV, TPS6594_IRQ_NAME_LDO3_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO3_SC, TPS6594_IRQ_NAME_LDO3_SC),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO3_ILIM, TPS6594_IRQ_NAME_LDO3_ILIM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO4_OV, TPS6594_IRQ_NAME_LDO4_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO4_UV, TPS6594_IRQ_NAME_LDO4_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO4_SC, TPS6594_IRQ_NAME_LDO4_SC),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_LDO4_ILIM, TPS6594_IRQ_NAME_LDO4_ILIM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_VCCA_OV, TPS6594_IRQ_NAME_VCCA_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_VCCA_UV, TPS6594_IRQ_NAME_VCCA_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_VMON1_OV, TPS6594_IRQ_NAME_VMON1_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_VMON1_UV, TPS6594_IRQ_NAME_VMON1_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_VMON1_RV, TPS6594_IRQ_NAME_VMON1_RV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_VMON2_OV, TPS6594_IRQ_NAME_VMON2_OV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_VMON2_UV, TPS6594_IRQ_NAME_VMON2_UV),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_VMON2_RV, TPS6594_IRQ_NAME_VMON2_RV),
+};
+
+static const struct resource tps6594_pinctrl_resources[] = {
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO9, TPS6594_IRQ_NAME_GPIO9),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO10, TPS6594_IRQ_NAME_GPIO10),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO11, TPS6594_IRQ_NAME_GPIO11),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO1, TPS6594_IRQ_NAME_GPIO1),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO2, TPS6594_IRQ_NAME_GPIO2),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO3, TPS6594_IRQ_NAME_GPIO3),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO4, TPS6594_IRQ_NAME_GPIO4),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO5, TPS6594_IRQ_NAME_GPIO5),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO6, TPS6594_IRQ_NAME_GPIO6),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO7, TPS6594_IRQ_NAME_GPIO7),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_GPIO8, TPS6594_IRQ_NAME_GPIO8),
+};
+
+static const struct resource tps6594_pfsm_resources[] = {
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_NPWRON_START, TPS6594_IRQ_NAME_NPWRON_START),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_ENABLE, TPS6594_IRQ_NAME_ENABLE),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_FSD, TPS6594_IRQ_NAME_FSD),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_SOFT_REBOOT, TPS6594_IRQ_NAME_SOFT_REBOOT),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BIST_PASS, TPS6594_IRQ_NAME_BIST_PASS),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_EXT_CLK, TPS6594_IRQ_NAME_EXT_CLK),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_TWARN, TPS6594_IRQ_NAME_TWARN),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_TSD_ORD, TPS6594_IRQ_NAME_TSD_ORD),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_BIST_FAIL, TPS6594_IRQ_NAME_BIST_FAIL),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_REG_CRC_ERR, TPS6594_IRQ_NAME_REG_CRC_ERR),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_RECOV_CNT, TPS6594_IRQ_NAME_RECOV_CNT),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_SPMI_ERR, TPS6594_IRQ_NAME_SPMI_ERR),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_NPWRON_LONG, TPS6594_IRQ_NAME_NPWRON_LONG),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_NINT_READBACK, TPS6594_IRQ_NAME_NINT_READBACK),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_NRSTOUT_READBACK, TPS6594_IRQ_NAME_NRSTOUT_READBACK),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_TSD_IMM, TPS6594_IRQ_NAME_TSD_IMM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_VCCA_OVP, TPS6594_IRQ_NAME_VCCA_OVP),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_PFSM_ERR, TPS6594_IRQ_NAME_PFSM_ERR),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_IMM_SHUTDOWN, TPS6594_IRQ_NAME_IMM_SHUTDOWN),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_ORD_SHUTDOWN, TPS6594_IRQ_NAME_ORD_SHUTDOWN),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_MCU_PWR_ERR, TPS6594_IRQ_NAME_MCU_PWR_ERR),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_SOC_PWR_ERR, TPS6594_IRQ_NAME_SOC_PWR_ERR),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_COMM_FRM_ERR, TPS6594_IRQ_NAME_COMM_FRM_ERR),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_COMM_CRC_ERR, TPS6594_IRQ_NAME_COMM_CRC_ERR),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_COMM_ADR_ERR, TPS6594_IRQ_NAME_COMM_ADR_ERR),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_EN_DRV_READBACK, TPS6594_IRQ_NAME_EN_DRV_READBACK),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_NRSTOUT_SOC_READBACK,
+			     TPS6594_IRQ_NAME_NRSTOUT_SOC_READBACK),
+};
+
+static const struct resource tps6594_esm_resources[] = {
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_ESM_SOC_PIN, TPS6594_IRQ_NAME_ESM_SOC_PIN),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_ESM_SOC_FAIL, TPS6594_IRQ_NAME_ESM_SOC_FAIL),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_ESM_SOC_RST, TPS6594_IRQ_NAME_ESM_SOC_RST),
+};
+
+static const struct resource tps6594_rtc_resources[] = {
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_TIMER, TPS6594_IRQ_NAME_TIMER),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_ALARM, TPS6594_IRQ_NAME_ALARM),
+	DEFINE_RES_IRQ_NAMED(TPS6594_IRQ_POWER_UP, TPS6594_IRQ_NAME_POWERUP),
+};
+
+static const struct mfd_cell tps6594_common_cells[] = {
+	MFD_CELL_RES("tps6594-regulator", tps6594_regulator_resources),
+	MFD_CELL_RES("tps6594-pinctrl", tps6594_pinctrl_resources),
+	MFD_CELL_RES("tps6594-pfsm", tps6594_pfsm_resources),
+	MFD_CELL_RES("tps6594-esm", tps6594_esm_resources),
+};
+
+static const struct mfd_cell tps6594_rtc_cells[] = {
+	MFD_CELL_RES("tps6594-rtc", tps6594_rtc_resources),
+};
+
+static const struct regmap_irq tps6594_irqs[] = {
+	/* INT_BUCK1_2 register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK1_OV, 0, TPS6594_BIT_BUCKX_OV_INT(0)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK1_UV, 0, TPS6594_BIT_BUCKX_UV_INT(0)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK1_SC, 0, TPS6594_BIT_BUCKX_SC_INT(0)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK1_ILIM, 0, TPS6594_BIT_BUCKX_ILIM_INT(0)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK2_OV, 0, TPS6594_BIT_BUCKX_OV_INT(1)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK2_UV, 0, TPS6594_BIT_BUCKX_UV_INT(1)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK2_SC, 0, TPS6594_BIT_BUCKX_SC_INT(1)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK2_ILIM, 0, TPS6594_BIT_BUCKX_ILIM_INT(1)),
+
+	/* INT_BUCK3_4 register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK3_OV, 1, TPS6594_BIT_BUCKX_OV_INT(2)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK3_UV, 1, TPS6594_BIT_BUCKX_UV_INT(2)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK3_SC, 1, TPS6594_BIT_BUCKX_SC_INT(2)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK3_ILIM, 1, TPS6594_BIT_BUCKX_ILIM_INT(2)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK4_OV, 1, TPS6594_BIT_BUCKX_OV_INT(3)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK4_UV, 1, TPS6594_BIT_BUCKX_UV_INT(3)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK4_SC, 1, TPS6594_BIT_BUCKX_SC_INT(3)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK4_ILIM, 1, TPS6594_BIT_BUCKX_ILIM_INT(3)),
+
+	/* INT_BUCK5 register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK5_OV, 2, TPS6594_BIT_BUCKX_OV_INT(4)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK5_UV, 2, TPS6594_BIT_BUCKX_UV_INT(4)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK5_SC, 2, TPS6594_BIT_BUCKX_SC_INT(4)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BUCK5_ILIM, 2, TPS6594_BIT_BUCKX_ILIM_INT(4)),
+
+	/* INT_LDO1_2 register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO1_OV, 3, TPS6594_BIT_LDOX_OV_INT(0)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO1_UV, 3, TPS6594_BIT_LDOX_UV_INT(0)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO1_SC, 3, TPS6594_BIT_LDOX_SC_INT(0)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO1_ILIM, 3, TPS6594_BIT_LDOX_ILIM_INT(0)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO2_OV, 3, TPS6594_BIT_LDOX_OV_INT(1)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO2_UV, 3, TPS6594_BIT_LDOX_UV_INT(1)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO2_SC, 3, TPS6594_BIT_LDOX_SC_INT(1)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO2_ILIM, 3, TPS6594_BIT_LDOX_ILIM_INT(1)),
+
+	/* INT_LDO3_4 register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO3_OV, 4, TPS6594_BIT_LDOX_OV_INT(2)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO3_UV, 4, TPS6594_BIT_LDOX_UV_INT(2)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO3_SC, 4, TPS6594_BIT_LDOX_SC_INT(2)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO3_ILIM, 4, TPS6594_BIT_LDOX_ILIM_INT(2)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO4_OV, 4, TPS6594_BIT_LDOX_OV_INT(3)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO4_UV, 4, TPS6594_BIT_LDOX_UV_INT(3)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO4_SC, 4, TPS6594_BIT_LDOX_SC_INT(3)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_LDO4_ILIM, 4, TPS6594_BIT_LDOX_ILIM_INT(3)),
+
+	/* INT_VMON register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_VCCA_OV, 5, TPS6594_BIT_VCCA_OV_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_VCCA_UV, 5, TPS6594_BIT_VCCA_UV_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_VMON1_OV, 5, TPS6594_BIT_VMON1_OV_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_VMON1_UV, 5, TPS6594_BIT_VMON1_UV_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_VMON1_RV, 5, TPS6594_BIT_VMON1_RV_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_VMON2_OV, 5, TPS6594_BIT_VMON2_OV_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_VMON2_UV, 5, TPS6594_BIT_VMON2_UV_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_VMON2_RV, 5, TPS6594_BIT_VMON2_RV_INT),
+
+	/* INT_GPIO register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO9, 6, TPS6594_BIT_GPIO9_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO10, 6, TPS6594_BIT_GPIO10_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO11, 6, TPS6594_BIT_GPIO11_INT),
+
+	/* INT_GPIO1_8 register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO1, 7, TPS6594_BIT_GPIOX_INT(0)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO2, 7, TPS6594_BIT_GPIOX_INT(1)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO3, 7, TPS6594_BIT_GPIOX_INT(2)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO4, 7, TPS6594_BIT_GPIOX_INT(3)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO5, 7, TPS6594_BIT_GPIOX_INT(4)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO6, 7, TPS6594_BIT_GPIOX_INT(5)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO7, 7, TPS6594_BIT_GPIOX_INT(6)),
+	REGMAP_IRQ_REG(TPS6594_IRQ_GPIO8, 7, TPS6594_BIT_GPIOX_INT(7)),
+
+	/* INT_STARTUP register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_NPWRON_START, 8, TPS6594_BIT_NPWRON_START_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_ENABLE, 8, TPS6594_BIT_ENABLE_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_FSD, 8, TPS6594_BIT_FSD_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_SOFT_REBOOT, 8, TPS6594_BIT_SOFT_REBOOT_INT),
+
+	/* INT_MISC register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_BIST_PASS, 9, TPS6594_BIT_BIST_PASS_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_EXT_CLK, 9, TPS6594_BIT_EXT_CLK_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_TWARN, 9, TPS6594_BIT_TWARN_INT),
+
+	/* INT_MODERATE_ERR register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_TSD_ORD, 10, TPS6594_BIT_TSD_ORD_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_BIST_FAIL, 10, TPS6594_BIT_BIST_FAIL_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_REG_CRC_ERR, 10, TPS6594_BIT_REG_CRC_ERR_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_RECOV_CNT, 10, TPS6594_BIT_RECOV_CNT_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_SPMI_ERR, 10, TPS6594_BIT_SPMI_ERR_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_NPWRON_LONG, 10, TPS6594_BIT_NPWRON_LONG_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_NINT_READBACK, 10, TPS6594_BIT_NINT_READBACK_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_NRSTOUT_READBACK, 10, TPS6594_BIT_NRSTOUT_READBACK_INT),
+
+	/* INT_SEVERE_ERR register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_TSD_IMM, 11, TPS6594_BIT_TSD_IMM_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_VCCA_OVP, 11, TPS6594_BIT_VCCA_OVP_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_PFSM_ERR, 11, TPS6594_BIT_PFSM_ERR_INT),
+
+	/* INT_FSM_ERR register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_IMM_SHUTDOWN, 12, TPS6594_BIT_IMM_SHUTDOWN_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_ORD_SHUTDOWN, 12, TPS6594_BIT_ORD_SHUTDOWN_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_MCU_PWR_ERR, 12, TPS6594_BIT_MCU_PWR_ERR_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_SOC_PWR_ERR, 12, TPS6594_BIT_SOC_PWR_ERR_INT),
+
+	/* INT_COMM_ERR register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_COMM_FRM_ERR, 13, TPS6594_BIT_COMM_FRM_ERR_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_COMM_CRC_ERR, 13, TPS6594_BIT_COMM_CRC_ERR_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_COMM_ADR_ERR, 13, TPS6594_BIT_COMM_ADR_ERR_INT),
+
+	/* INT_READBACK_ERR register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_EN_DRV_READBACK, 14, TPS6594_BIT_EN_DRV_READBACK_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_NRSTOUT_SOC_READBACK, 14, TPS6594_BIT_NRSTOUT_SOC_READBACK_INT),
+
+	/* INT_ESM register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_ESM_SOC_PIN, 15, TPS6594_BIT_ESM_SOC_PIN_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_ESM_SOC_FAIL, 15, TPS6594_BIT_ESM_SOC_FAIL_INT),
+	REGMAP_IRQ_REG(TPS6594_IRQ_ESM_SOC_RST, 15, TPS6594_BIT_ESM_SOC_RST_INT),
+
+	/* RTC_STATUS register */
+	REGMAP_IRQ_REG(TPS6594_IRQ_TIMER, 16, TPS6594_BIT_TIMER),
+	REGMAP_IRQ_REG(TPS6594_IRQ_ALARM, 16, TPS6594_BIT_ALARM),
+	REGMAP_IRQ_REG(TPS6594_IRQ_POWER_UP, 16, TPS6594_BIT_POWER_UP),
+};
+
+static const unsigned int tps6594_irq_reg[] = {
+	TPS6594_REG_INT_BUCK1_2,
+	TPS6594_REG_INT_BUCK3_4,
+	TPS6594_REG_INT_BUCK5,
+	TPS6594_REG_INT_LDO1_2,
+	TPS6594_REG_INT_LDO3_4,
+	TPS6594_REG_INT_VMON,
+	TPS6594_REG_INT_GPIO,
+	TPS6594_REG_INT_GPIO1_8,
+	TPS6594_REG_INT_STARTUP,
+	TPS6594_REG_INT_MISC,
+	TPS6594_REG_INT_MODERATE_ERR,
+	TPS6594_REG_INT_SEVERE_ERR,
+	TPS6594_REG_INT_FSM_ERR,
+	TPS6594_REG_INT_COMM_ERR,
+	TPS6594_REG_INT_READBACK_ERR,
+	TPS6594_REG_INT_ESM,
+	TPS6594_REG_RTC_STATUS,
+};
+
+static inline unsigned int tps6594_get_irq_reg(struct regmap_irq_chip_data *data,
+					       unsigned int base, int index)
+{
+	return tps6594_irq_reg[index];
+};
+
+static int tps6594_handle_post_irq(void *irq_drv_data)
+{
+	struct tps6594 *tps = irq_drv_data;
+	int ret = 0;
+
+	/*
+	 * When CRC is enabled, writing to a read-only bit triggers an error,
+	 * and COMM_ADR_ERR_INT bit is set. Besides, bits indicating interrupts
+	 * (that must be cleared) and read-only bits are sometimes grouped in
+	 * the same register.
+	 * Since regmap clears interrupts by doing a write per register, clearing
+	 * an interrupt bit in a register containing also a read-only bit makes
+	 * COMM_ADR_ERR_INT bit set. Clear immediately this bit to avoid raising
+	 * a new interrupt.
+	 */
+	if (tps->use_crc)
+		ret = regmap_write_bits(tps->regmap, TPS6594_REG_INT_COMM_ERR,
+					TPS6594_BIT_COMM_ADR_ERR_INT,
+					TPS6594_BIT_COMM_ADR_ERR_INT);
+
+	return ret;
+};
+
+static struct regmap_irq_chip tps6594_irq_chip = {
+	.ack_base = TPS6594_REG_INT_BUCK1_2,
+	.ack_invert = 1,
+	.clear_ack = 1,
+	.init_ack_masked = 1,
+	.num_regs = ARRAY_SIZE(tps6594_irq_reg),
+	.irqs = tps6594_irqs,
+	.num_irqs = ARRAY_SIZE(tps6594_irqs),
+	.get_irq_reg = tps6594_get_irq_reg,
+	.handle_post_irq = tps6594_handle_post_irq,
+};
+
+bool tps6594_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	return (reg >= TPS6594_REG_INT_TOP && reg <= TPS6594_REG_STAT_READBACK_ERR) ||
+	       reg == TPS6594_REG_RTC_STATUS;
+}
+EXPORT_SYMBOL_GPL(tps6594_is_volatile_reg);
+
+static int tps6594_check_crc_mode(struct tps6594 *tps, bool primary_pmic)
+{
+	int ret;
+
+	/*
+	 * Check if CRC is enabled.
+	 * Once CRC is enabled, it can't be disabled until next power cycle.
+	 */
+	tps->use_crc = true;
+	ret = regmap_test_bits(tps->regmap, TPS6594_REG_SERIAL_IF_CONFIG,
+			       TPS6594_BIT_I2C1_SPI_CRC_EN);
+	if (ret == 0) {
+		ret = -EIO;
+	} else if (ret > 0) {
+		dev_info(tps->dev, "CRC feature enabled on %s PMIC",
+			 primary_pmic ? "primary" : "secondary");
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int tps6594_set_crc_feature(struct tps6594 *tps)
+{
+	int ret;
+
+	ret = tps6594_check_crc_mode(tps, true);
+	if (ret) {
+		/*
+		 * If CRC is not already enabled, force PFSM I2C_2 trigger to enable it
+		 * on primary PMIC.
+		 */
+		tps->use_crc = false;
+		ret = regmap_write_bits(tps->regmap, TPS6594_REG_FSM_I2C_TRIGGERS,
+					TPS6594_BIT_TRIGGER_I2C(2), TPS6594_BIT_TRIGGER_I2C(2));
+		if (ret)
+			return ret;
+
+		/*
+		 * Wait for PFSM to process trigger.
+		 * The datasheet indicates 2 ms, and clock specification is +/-5%.
+		 * 4 ms should provide sufficient margin.
+		 */
+		usleep_range(4000, 5000);
+
+		ret = tps6594_check_crc_mode(tps, true);
+	}
+
+	return ret;
+}
+
+static int tps6594_enable_crc(struct tps6594 *tps)
+{
+	struct device *dev = tps->dev;
+	unsigned int is_primary;
+	unsigned long timeout = msecs_to_jiffies(TPS6594_CRC_SYNC_TIMEOUT_MS);
+	int ret;
+
+	/*
+	 * CRC mode can be used with I2C or SPI protocols.
+	 * If this mode is specified for primary PMIC, it will also be applied to secondary PMICs
+	 * through SPMI serial interface.
+	 * In this multi-PMIC synchronization scheme, the primary PMIC is the controller device
+	 * on the SPMI bus, and the secondary PMICs are the target devices on the SPMI bus.
+	 */
+	is_primary = of_property_read_bool(dev->of_node, "ti,primary-pmic");
+	if (is_primary) {
+		/* Enable CRC feature on primary PMIC */
+		ret = tps6594_set_crc_feature(tps);
+		if (ret)
+			return ret;
+
+		/* Notify secondary PMICs that CRC feature is enabled */
+		complete_all(&tps6594_crc_comp);
+	} else {
+		/* Wait for CRC feature enabling event from primary PMIC */
+		ret = wait_for_completion_interruptible_timeout(&tps6594_crc_comp, timeout);
+		if (ret == 0)
+			ret = -ETIMEDOUT;
+		else if (ret > 0)
+			ret = tps6594_check_crc_mode(tps, false);
+	}
+
+	return ret;
+}
+
+int tps6594_device_init(struct tps6594 *tps, bool enable_crc)
+{
+	struct device *dev = tps->dev;
+	int ret;
+
+	if (enable_crc) {
+		ret = tps6594_enable_crc(tps);
+		if (ret)
+			return dev_err_probe(dev, ret, "Failed to enable CRC\n");
+	}
+
+	/* Keep PMIC in ACTIVE state */
+	ret = regmap_set_bits(tps->regmap, TPS6594_REG_FSM_NSLEEP_TRIGGERS,
+			      TPS6594_BIT_NSLEEP1B | TPS6594_BIT_NSLEEP2B);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to set PMIC state\n");
+
+	tps6594_irq_chip.irq_drv_data = tps;
+	tps6594_irq_chip.name = devm_kasprintf(dev, GFP_KERNEL, "%s-%ld-0x%02x",
+					       dev->driver->name, tps->chip_id, tps->reg);
+
+	ret = devm_regmap_add_irq_chip(dev, tps->regmap, tps->irq, IRQF_SHARED | IRQF_ONESHOT,
+				       0, &tps6594_irq_chip, &tps->irq_data);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to add regmap IRQ\n");
+
+	ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_AUTO, tps6594_common_cells,
+				   ARRAY_SIZE(tps6594_common_cells), NULL, 0,
+				   regmap_irq_get_domain(tps->irq_data));
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to add common child devices\n");
+
+	/* No RTC for LP8764 */
+	if (tps->chip_id != LP8764) {
+		ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_AUTO, tps6594_rtc_cells,
+					   ARRAY_SIZE(tps6594_rtc_cells), NULL, 0,
+					   regmap_irq_get_domain(tps->irq_data));
+		if (ret)
+			return dev_err_probe(dev, ret, "Failed to add RTC child device\n");
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(tps6594_device_init);
+
+MODULE_AUTHOR("Julien Panis <jpanis@baylibre.com>");
+MODULE_DESCRIPTION("TPS6594 Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/tps6594-i2c.c b/drivers/mfd/tps6594-i2c.c
new file mode 100644
index 000000000000..449d5c61bc9f
--- /dev/null
+++ b/drivers/mfd/tps6594-i2c.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I2C access driver for TI TPS6594/TPS6593/LP8764 PMICs
+ *
+ * Copyright (C) 2023 BayLibre Incorporated - https://www.baylibre.com/
+ */
+
+#include <linux/crc8.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/tps6594.h>
+
+static bool enable_crc;
+module_param(enable_crc, bool, 0444);
+MODULE_PARM_DESC(enable_crc, "Enable CRC feature for I2C interface");
+
+DECLARE_CRC8_TABLE(tps6594_i2c_crc_table);
+
+static int tps6594_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
+{
+	int ret = i2c_transfer(adap, msgs, num);
+
+	if (ret == num)
+		return 0;
+	else if (ret < 0)
+		return ret;
+	else
+		return -EIO;
+}
+
+static int tps6594_i2c_reg_read_with_crc(struct i2c_client *client, u8 page, u8 reg, u8 *val)
+{
+	struct i2c_msg msgs[2];
+	u8 buf_rx[] = { 0, 0 };
+	/* I2C address = I2C base address + Page index */
+	const u8 addr = client->addr + page;
+	/*
+	 * CRC is calculated from every bit included in the protocol
+	 * except the ACK bits from the target. Byte stream is:
+	 * - B0: (I2C_addr_7bits << 1) | WR_bit, with WR_bit = 0
+	 * - B1: reg
+	 * - B2: (I2C_addr_7bits << 1) | RD_bit, with RD_bit = 1
+	 * - B3: val
+	 * - B4: CRC from B0-B1-B2-B3
+	 */
+	u8 crc_data[] = { addr << 1, reg, addr << 1 | 1, 0 };
+	int ret;
+
+	/* Write register */
+	msgs[0].addr = addr;
+	msgs[0].flags = 0;
+	msgs[0].len = 1;
+	msgs[0].buf = &reg;
+
+	/* Read data and CRC */
+	msgs[1].addr = msgs[0].addr;
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].len = 2;
+	msgs[1].buf = buf_rx;
+
+	ret = tps6594_i2c_transfer(client->adapter, msgs, 2);
+	if (ret < 0)
+		return ret;
+
+	crc_data[sizeof(crc_data) - 1] = *val = buf_rx[0];
+	if (buf_rx[1] != crc8(tps6594_i2c_crc_table, crc_data, sizeof(crc_data), CRC8_INIT_VALUE))
+		return -EIO;
+
+	return ret;
+}
+
+static int tps6594_i2c_reg_write_with_crc(struct i2c_client *client, u8 page, u8 reg, u8 val)
+{
+	struct i2c_msg msg;
+	u8 buf[] = { reg, val, 0 };
+	/* I2C address = I2C base address + Page index */
+	const u8 addr = client->addr + page;
+	/*
+	 * CRC is calculated from every bit included in the protocol
+	 * except the ACK bits from the target. Byte stream is:
+	 * - B0: (I2C_addr_7bits << 1) | WR_bit, with WR_bit = 0
+	 * - B1: reg
+	 * - B2: val
+	 * - B3: CRC from B0-B1-B2
+	 */
+	const u8 crc_data[] = { addr << 1, reg, val };
+
+	/* Write register, data and CRC */
+	msg.addr = addr;
+	msg.flags = client->flags & I2C_M_TEN;
+	msg.len = sizeof(buf);
+	msg.buf = buf;
+
+	buf[msg.len - 1] = crc8(tps6594_i2c_crc_table, crc_data, sizeof(crc_data), CRC8_INIT_VALUE);
+
+	return tps6594_i2c_transfer(client->adapter, &msg, 1);
+}
+
+static int tps6594_i2c_read(void *context, const void *reg_buf, size_t reg_size,
+			    void *val_buf, size_t val_size)
+{
+	struct i2c_client *client = context;
+	struct tps6594 *tps = i2c_get_clientdata(client);
+	struct i2c_msg msgs[2];
+	const u8 *reg_bytes = reg_buf;
+	u8 *val_bytes = val_buf;
+	const u8 page = reg_bytes[1];
+	u8 reg = reg_bytes[0];
+	int ret = 0;
+	int i;
+
+	if (tps->use_crc) {
+		/*
+		 * Auto-increment feature does not support CRC protocol.
+		 * Converts the bulk read operation into a series of single read operations.
+		 */
+		for (i = 0 ; ret == 0 && i < val_size ; i++)
+			ret = tps6594_i2c_reg_read_with_crc(client, page, reg + i, val_bytes + i);
+
+		return ret;
+	}
+
+	/* Write register: I2C address = I2C base address + Page index */
+	msgs[0].addr = client->addr + page;
+	msgs[0].flags = 0;
+	msgs[0].len = 1;
+	msgs[0].buf = &reg;
+
+	/* Read data */
+	msgs[1].addr = msgs[0].addr;
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].len = val_size;
+	msgs[1].buf = val_bytes;
+
+	return tps6594_i2c_transfer(client->adapter, msgs, 2);
+}
+
+static int tps6594_i2c_write(void *context, const void *data, size_t count)
+{
+	struct i2c_client *client = context;
+	struct tps6594 *tps = i2c_get_clientdata(client);
+	struct i2c_msg msg;
+	const u8 *bytes = data;
+	u8 *buf;
+	const u8 page = bytes[1];
+	const u8 reg = bytes[0];
+	int ret = 0;
+	int i;
+
+	if (tps->use_crc) {
+		/*
+		 * Auto-increment feature does not support CRC protocol.
+		 * Converts the bulk write operation into a series of single write operations.
+		 */
+		for (i = 0 ; ret == 0 && i < count - 2 ; i++)
+			ret = tps6594_i2c_reg_write_with_crc(client, page, reg + i, bytes[i + 2]);
+
+		return ret;
+	}
+
+	/* Setup buffer: page byte is not sent */
+	buf = kzalloc(--count, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf[0] = reg;
+	for (i = 0 ; i < count - 1 ; i++)
+		buf[i + 1] = bytes[i + 2];
+
+	/* Write register and data: I2C address = I2C base address + Page index */
+	msg.addr = client->addr + page;
+	msg.flags = client->flags & I2C_M_TEN;
+	msg.len = count;
+	msg.buf = buf;
+
+	ret = tps6594_i2c_transfer(client->adapter, &msg, 1);
+
+	kfree(buf);
+	return ret;
+}
+
+static const struct regmap_config tps6594_i2c_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 8,
+	.max_register = TPS6594_REG_DWD_FAIL_CNT_REG,
+	.volatile_reg = tps6594_is_volatile_reg,
+	.read = tps6594_i2c_read,
+	.write = tps6594_i2c_write,
+};
+
+static const struct of_device_id tps6594_i2c_of_match_table[] = {
+	{ .compatible = "ti,tps6594-q1", .data = (void *)TPS6594, },
+	{ .compatible = "ti,tps6593-q1", .data = (void *)TPS6593, },
+	{ .compatible = "ti,lp8764-q1",  .data = (void *)LP8764,  },
+	{}
+};
+MODULE_DEVICE_TABLE(of, tps6594_i2c_of_match_table);
+
+static int tps6594_i2c_probe(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	struct tps6594 *tps;
+	const struct of_device_id *match;
+
+	tps = devm_kzalloc(dev, sizeof(*tps), GFP_KERNEL);
+	if (!tps)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, tps);
+
+	tps->dev = dev;
+	tps->reg = client->addr;
+	tps->irq = client->irq;
+
+	tps->regmap = devm_regmap_init(dev, NULL, client, &tps6594_i2c_regmap_config);
+	if (IS_ERR(tps->regmap))
+		return dev_err_probe(dev, PTR_ERR(tps->regmap), "Failed to init regmap\n");
+
+	match = of_match_device(tps6594_i2c_of_match_table, dev);
+	if (!match)
+		return dev_err_probe(dev, PTR_ERR(match), "Failed to find matching chip ID\n");
+	tps->chip_id = (unsigned long)match->data;
+
+	crc8_populate_msb(tps6594_i2c_crc_table, TPS6594_CRC8_POLYNOMIAL);
+
+	return tps6594_device_init(tps, enable_crc);
+}
+
+static struct i2c_driver tps6594_i2c_driver = {
+	.driver	= {
+		.name = "tps6594",
+		.of_match_table = tps6594_i2c_of_match_table,
+	},
+	.probe_new = tps6594_i2c_probe,
+};
+module_i2c_driver(tps6594_i2c_driver);
+
+MODULE_AUTHOR("Julien Panis <jpanis@baylibre.com>");
+MODULE_DESCRIPTION("TPS6594 I2C Interface Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/tps6594-spi.c b/drivers/mfd/tps6594-spi.c
new file mode 100644
index 000000000000..a938a191744f
--- /dev/null
+++ b/drivers/mfd/tps6594-spi.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SPI access driver for TI TPS6594/TPS6593/LP8764 PMICs
+ *
+ * Copyright (C) 2023 BayLibre Incorporated - https://www.baylibre.com/
+ */
+
+#include <linux/crc8.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+
+#include <linux/mfd/tps6594.h>
+
+#define TPS6594_SPI_PAGE_SHIFT	5
+#define TPS6594_SPI_READ_BIT	BIT(4)
+
+static bool enable_crc;
+module_param(enable_crc, bool, 0444);
+MODULE_PARM_DESC(enable_crc, "Enable CRC feature for SPI interface");
+
+DECLARE_CRC8_TABLE(tps6594_spi_crc_table);
+
+static int tps6594_spi_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct spi_device *spi = context;
+	struct tps6594 *tps = spi_get_drvdata(spi);
+	u8 buf[4] = { 0 };
+	size_t count_rx = 1;
+	int ret;
+
+	buf[0] = reg;
+	buf[1] = TPS6594_REG_TO_PAGE(reg) << TPS6594_SPI_PAGE_SHIFT | TPS6594_SPI_READ_BIT;
+
+	if (tps->use_crc)
+		count_rx++;
+
+	ret = spi_write_then_read(spi, buf, 2, buf + 2, count_rx);
+	if (ret < 0)
+		return ret;
+
+	if (tps->use_crc && buf[3] != crc8(tps6594_spi_crc_table, buf, 3, CRC8_INIT_VALUE))
+		return -EIO;
+
+	*val = buf[2];
+
+	return 0;
+}
+
+static int tps6594_spi_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct spi_device *spi = context;
+	struct tps6594 *tps = spi_get_drvdata(spi);
+	u8 buf[4] = { 0 };
+	size_t count = 3;
+
+	buf[0] = reg;
+	buf[1] = TPS6594_REG_TO_PAGE(reg) << TPS6594_SPI_PAGE_SHIFT;
+	buf[2] = val;
+
+	if (tps->use_crc)
+		buf[3] = crc8(tps6594_spi_crc_table, buf, count++, CRC8_INIT_VALUE);
+
+	return spi_write(spi, buf, count);
+}
+
+static const struct regmap_config tps6594_spi_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 8,
+	.max_register = TPS6594_REG_DWD_FAIL_CNT_REG,
+	.volatile_reg = tps6594_is_volatile_reg,
+	.reg_read = tps6594_spi_reg_read,
+	.reg_write = tps6594_spi_reg_write,
+	.use_single_read = true,
+	.use_single_write = true,
+};
+
+static const struct of_device_id tps6594_spi_of_match_table[] = {
+	{ .compatible = "ti,tps6594-q1", .data = (void *)TPS6594, },
+	{ .compatible = "ti,tps6593-q1", .data = (void *)TPS6593, },
+	{ .compatible = "ti,lp8764-q1",  .data = (void *)LP8764,  },
+	{}
+};
+MODULE_DEVICE_TABLE(of, tps6594_spi_of_match_table);
+
+static int tps6594_spi_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct tps6594 *tps;
+	const struct of_device_id *match;
+
+	tps = devm_kzalloc(dev, sizeof(*tps), GFP_KERNEL);
+	if (!tps)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, tps);
+
+	tps->dev = dev;
+	tps->reg = spi->chip_select;
+	tps->irq = spi->irq;
+
+	tps->regmap = devm_regmap_init(dev, NULL, spi, &tps6594_spi_regmap_config);
+	if (IS_ERR(tps->regmap))
+		return dev_err_probe(dev, PTR_ERR(tps->regmap), "Failed to init regmap\n");
+
+	match = of_match_device(tps6594_spi_of_match_table, dev);
+	if (!match)
+		return dev_err_probe(dev, PTR_ERR(match), "Failed to find matching chip ID\n");
+	tps->chip_id = (unsigned long)match->data;
+
+	crc8_populate_msb(tps6594_spi_crc_table, TPS6594_CRC8_POLYNOMIAL);
+
+	return tps6594_device_init(tps, enable_crc);
+}
+
+static struct spi_driver tps6594_spi_driver = {
+	.driver	= {
+		.name = "tps6594",
+		.of_match_table = tps6594_spi_of_match_table,
+	},
+	.probe = tps6594_spi_probe,
+};
+module_spi_driver(tps6594_spi_driver);
+
+MODULE_AUTHOR("Julien Panis <jpanis@baylibre.com>");
+MODULE_DESCRIPTION("TPS6594 SPI Interface Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/tps6594.h b/include/linux/mfd/tps6594.h
new file mode 100644
index 000000000000..3f7c5e23cd4c
--- /dev/null
+++ b/include/linux/mfd/tps6594.h
@@ -0,0 +1,1020 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Functions to access TPS6594 Power Management IC
+ *
+ * Copyright (C) 2023 BayLibre Incorporated - https://www.baylibre.com/
+ */
+
+#ifndef __LINUX_MFD_TPS6594_H
+#define __LINUX_MFD_TPS6594_H
+
+#include <linux/device.h>
+#include <linux/regmap.h>
+
+struct regmap_irq_chip_data;
+
+/* Chip id list */
+enum pmic_id {
+	TPS6594,
+	TPS6593,
+	LP8764,
+};
+
+/* Macro to get page index from register address */
+#define TPS6594_REG_TO_PAGE(reg)	((reg) >> 8)
+
+/* Registers for page 0 of TPS6594 */
+#define TPS6594_REG_DEV_REV				0x01
+
+#define TPS6594_REG_NVM_CODE_1				0x02
+#define TPS6594_REG_NVM_CODE_2				0x03
+
+#define TPS6594_REG_BUCKX_CTRL(buck_inst)		(0x04 + ((buck_inst) << 1))
+#define TPS6594_REG_BUCKX_CONF(buck_inst)		(0x05 + ((buck_inst) << 1))
+#define TPS6594_REG_BUCKX_VOUT_1(buck_inst)		(0x0e + ((buck_inst) << 1))
+#define TPS6594_REG_BUCKX_VOUT_2(buck_inst)		(0x0f + ((buck_inst) << 1))
+#define TPS6594_REG_BUCKX_PG_WINDOW(buck_inst)		(0x18 + (buck_inst))
+
+#define TPS6594_REG_LDOX_CTRL(ldo_inst)			(0x1d + (ldo_inst))
+#define TPS6594_REG_LDORTC_CTRL				0x22
+#define TPS6594_REG_LDOX_VOUT(ldo_inst)			(0x23 + (ldo_inst))
+#define TPS6594_REG_LDOX_PG_WINDOW(ldo_inst)		(0x27 + (ldo_inst))
+
+#define TPS6594_REG_VCCA_VMON_CTRL			0x2b
+#define TPS6594_REG_VCCA_PG_WINDOW			0x2c
+#define TPS6594_REG_VMON1_PG_WINDOW			0x2d
+#define TPS6594_REG_VMON1_PG_LEVEL			0x2e
+#define TPS6594_REG_VMON2_PG_WINDOW			0x2f
+#define TPS6594_REG_VMON2_PG_LEVEL			0x30
+
+#define TPS6594_REG_GPIOX_CONF(gpio_inst)		(0x31 + (gpio_inst))
+#define TPS6594_REG_NPWRON_CONF				0x3c
+#define TPS6594_REG_GPIO_OUT_1				0x3d
+#define TPS6594_REG_GPIO_OUT_2				0x3e
+#define TPS6594_REG_GPIO_IN_1				0x3f
+#define TPS6594_REG_GPIO_IN_2				0x40
+#define TPS6594_REG_GPIOX_OUT(gpio_inst)		(TPS6594_REG_GPIO_OUT_1 + (gpio_inst) / 8)
+#define TPS6594_REG_GPIOX_IN(gpio_inst)			(TPS6594_REG_GPIO_IN_1 + (gpio_inst) / 8)
+
+#define TPS6594_REG_GPIO_IN_1				0x3f
+#define TPS6594_REG_GPIO_IN_2				0x40
+
+#define TPS6594_REG_RAIL_SEL_1				0x41
+#define TPS6594_REG_RAIL_SEL_2				0x42
+#define TPS6594_REG_RAIL_SEL_3				0x43
+
+#define TPS6594_REG_FSM_TRIG_SEL_1			0x44
+#define TPS6594_REG_FSM_TRIG_SEL_2			0x45
+#define TPS6594_REG_FSM_TRIG_MASK_1			0x46
+#define TPS6594_REG_FSM_TRIG_MASK_2			0x47
+#define TPS6594_REG_FSM_TRIG_MASK_3			0x48
+
+#define TPS6594_REG_MASK_BUCK1_2			0x49
+#define TPS6594_REG_MASK_BUCK3_4			0x4a
+#define TPS6594_REG_MASK_BUCK5				0x4b
+#define TPS6594_REG_MASK_LDO1_2				0x4c
+#define TPS6594_REG_MASK_LDO3_4				0x4d
+#define TPS6594_REG_MASK_VMON				0x4e
+#define TPS6594_REG_MASK_GPIO1_8_FALL			0x4f
+#define TPS6594_REG_MASK_GPIO1_8_RISE			0x50
+#define TPS6594_REG_MASK_GPIO9_11			0x51
+#define TPS6594_REG_MASK_STARTUP			0x52
+#define TPS6594_REG_MASK_MISC				0x53
+#define TPS6594_REG_MASK_MODERATE_ERR			0x54
+#define TPS6594_REG_MASK_FSM_ERR			0x56
+#define TPS6594_REG_MASK_COMM_ERR			0x57
+#define TPS6594_REG_MASK_READBACK_ERR			0x58
+#define TPS6594_REG_MASK_ESM				0x59
+
+#define TPS6594_REG_INT_TOP				0x5a
+#define TPS6594_REG_INT_BUCK				0x5b
+#define TPS6594_REG_INT_BUCK1_2				0x5c
+#define TPS6594_REG_INT_BUCK3_4				0x5d
+#define TPS6594_REG_INT_BUCK5				0x5e
+#define TPS6594_REG_INT_LDO_VMON			0x5f
+#define TPS6594_REG_INT_LDO1_2				0x60
+#define TPS6594_REG_INT_LDO3_4				0x61
+#define TPS6594_REG_INT_VMON				0x62
+#define TPS6594_REG_INT_GPIO				0x63
+#define TPS6594_REG_INT_GPIO1_8				0x64
+#define TPS6594_REG_INT_STARTUP				0x65
+#define TPS6594_REG_INT_MISC				0x66
+#define TPS6594_REG_INT_MODERATE_ERR			0x67
+#define TPS6594_REG_INT_SEVERE_ERR			0x68
+#define TPS6594_REG_INT_FSM_ERR				0x69
+#define TPS6594_REG_INT_COMM_ERR			0x6a
+#define TPS6594_REG_INT_READBACK_ERR			0x6b
+#define TPS6594_REG_INT_ESM				0x6c
+
+#define TPS6594_REG_STAT_BUCK1_2			0x6d
+#define TPS6594_REG_STAT_BUCK3_4			0x6e
+#define TPS6594_REG_STAT_BUCK5				0x6f
+#define TPS6594_REG_STAT_LDO1_2				0x70
+#define TPS6594_REG_STAT_LDO3_4				0x71
+#define TPS6594_REG_STAT_VMON				0x72
+#define TPS6594_REG_STAT_STARTUP			0x73
+#define TPS6594_REG_STAT_MISC				0x74
+#define TPS6594_REG_STAT_MODERATE_ERR			0x75
+#define TPS6594_REG_STAT_SEVERE_ERR			0x76
+#define TPS6594_REG_STAT_READBACK_ERR			0x77
+
+#define TPS6594_REG_PGOOD_SEL_1				0x78
+#define TPS6594_REG_PGOOD_SEL_2				0x79
+#define TPS6594_REG_PGOOD_SEL_3				0x7a
+#define TPS6594_REG_PGOOD_SEL_4				0x7b
+
+#define TPS6594_REG_PLL_CTRL				0x7c
+
+#define TPS6594_REG_CONFIG_1				0x7d
+#define TPS6594_REG_CONFIG_2				0x7e
+
+#define TPS6594_REG_ENABLE_DRV_REG			0x80
+
+#define TPS6594_REG_MISC_CTRL				0x81
+
+#define TPS6594_REG_ENABLE_DRV_STAT			0x82
+
+#define TPS6594_REG_RECOV_CNT_REG_1			0x83
+#define TPS6594_REG_RECOV_CNT_REG_2			0x84
+
+#define TPS6594_REG_FSM_I2C_TRIGGERS			0x85
+#define TPS6594_REG_FSM_NSLEEP_TRIGGERS			0x86
+
+#define TPS6594_REG_BUCK_RESET_REG			0x87
+
+#define TPS6594_REG_SPREAD_SPECTRUM_1			0x88
+
+#define TPS6594_REG_FREQ_SEL				0x8a
+
+#define TPS6594_REG_FSM_STEP_SIZE			0x8b
+
+#define TPS6594_REG_LDO_RV_TIMEOUT_REG_1		0x8c
+#define TPS6594_REG_LDO_RV_TIMEOUT_REG_2		0x8d
+
+#define TPS6594_REG_USER_SPARE_REGS			0x8e
+
+#define TPS6594_REG_ESM_MCU_START_REG			0x8f
+#define TPS6594_REG_ESM_MCU_DELAY1_REG			0x90
+#define TPS6594_REG_ESM_MCU_DELAY2_REG			0x91
+#define TPS6594_REG_ESM_MCU_MODE_CFG			0x92
+#define TPS6594_REG_ESM_MCU_HMAX_REG			0x93
+#define TPS6594_REG_ESM_MCU_HMIN_REG			0x94
+#define TPS6594_REG_ESM_MCU_LMAX_REG			0x95
+#define TPS6594_REG_ESM_MCU_LMIN_REG			0x96
+#define TPS6594_REG_ESM_MCU_ERR_CNT_REG			0x97
+#define TPS6594_REG_ESM_SOC_START_REG			0x98
+#define TPS6594_REG_ESM_SOC_DELAY1_REG			0x99
+#define TPS6594_REG_ESM_SOC_DELAY2_REG			0x9a
+#define TPS6594_REG_ESM_SOC_MODE_CFG			0x9b
+#define TPS6594_REG_ESM_SOC_HMAX_REG			0x9c
+#define TPS6594_REG_ESM_SOC_HMIN_REG			0x9d
+#define TPS6594_REG_ESM_SOC_LMAX_REG			0x9e
+#define TPS6594_REG_ESM_SOC_LMIN_REG			0x9f
+#define TPS6594_REG_ESM_SOC_ERR_CNT_REG			0xa0
+
+#define TPS6594_REG_REGISTER_LOCK			0xa1
+
+#define TPS6594_REG_MANUFACTURING_VER			0xa6
+
+#define TPS6594_REG_CUSTOMER_NVM_ID_REG			0xa7
+
+#define TPS6594_REG_VMON_CONF_REG			0xa8
+
+#define TPS6594_REG_SOFT_REBOOT_REG			0xab
+
+#define TPS6594_REG_RTC_SECONDS				0xb5
+#define TPS6594_REG_RTC_MINUTES				0xb6
+#define TPS6594_REG_RTC_HOURS				0xb7
+#define TPS6594_REG_RTC_DAYS				0xb8
+#define TPS6594_REG_RTC_MONTHS				0xb9
+#define TPS6594_REG_RTC_YEARS				0xba
+#define TPS6594_REG_RTC_WEEKS				0xbb
+
+#define TPS6594_REG_ALARM_SECONDS			0xbc
+#define TPS6594_REG_ALARM_MINUTES			0xbd
+#define TPS6594_REG_ALARM_HOURS				0xbe
+#define TPS6594_REG_ALARM_DAYS				0xbf
+#define TPS6594_REG_ALARM_MONTHS			0xc0
+#define TPS6594_REG_ALARM_YEARS				0xc1
+
+#define TPS6594_REG_RTC_CTRL_1				0xc2
+#define TPS6594_REG_RTC_CTRL_2				0xc3
+#define TPS6594_REG_RTC_STATUS				0xc4
+#define TPS6594_REG_RTC_INTERRUPTS			0xc5
+#define TPS6594_REG_RTC_COMP_LSB			0xc6
+#define TPS6594_REG_RTC_COMP_MSB			0xc7
+#define TPS6594_REG_RTC_RESET_STATUS			0xc8
+
+#define TPS6594_REG_SCRATCH_PAD_REG_1			0xc9
+#define TPS6594_REG_SCRATCH_PAD_REG_2			0xca
+#define TPS6594_REG_SCRATCH_PAD_REG_3			0xcb
+#define TPS6594_REG_SCRATCH_PAD_REG_4			0xcc
+
+#define TPS6594_REG_PFSM_DELAY_REG_1			0xcd
+#define TPS6594_REG_PFSM_DELAY_REG_2			0xce
+#define TPS6594_REG_PFSM_DELAY_REG_3			0xcf
+#define TPS6594_REG_PFSM_DELAY_REG_4			0xd0
+
+/* Registers for page 1 of TPS6594 */
+#define TPS6594_REG_SERIAL_IF_CONFIG			0x11a
+#define TPS6594_REG_I2C1_ID				0x122
+#define TPS6594_REG_I2C2_ID				0x123
+
+/* Registers for page 4 of TPS6594 */
+#define TPS6594_REG_WD_ANSWER_REG			0x401
+#define TPS6594_REG_WD_QUESTION_ANSW_CNT		0x402
+#define TPS6594_REG_WD_WIN1_CFG				0x403
+#define TPS6594_REG_WD_WIN2_CFG				0x404
+#define TPS6594_REG_WD_LONGWIN_CFG			0x405
+#define TPS6594_REG_WD_MODE_REG				0x406
+#define TPS6594_REG_WD_QA_CFG				0x407
+#define TPS6594_REG_WD_ERR_STATUS			0x408
+#define TPS6594_REG_WD_THR_CFG				0x409
+#define TPS6594_REG_DWD_FAIL_CNT_REG			0x40a
+
+/* BUCKX_CTRL register field definition */
+#define TPS6594_BIT_BUCK_EN				BIT(0)
+#define TPS6594_BIT_BUCK_FPWM				BIT(1)
+#define TPS6594_BIT_BUCK_FPWM_MP			BIT(2)
+#define TPS6594_BIT_BUCK_VSEL				BIT(3)
+#define TPS6594_BIT_BUCK_VMON_EN			BIT(4)
+#define TPS6594_BIT_BUCK_PLDN				BIT(5)
+#define TPS6594_BIT_BUCK_RV_SEL				BIT(7)
+
+/* BUCKX_CONF register field definition */
+#define TPS6594_MASK_BUCK_SLEW_RATE			GENMASK(2, 0)
+#define TPS6594_MASK_BUCK_ILIM				GENMASK(5, 3)
+
+/* BUCKX_PG_WINDOW register field definition */
+#define TPS6594_MASK_BUCK_OV_THR			GENMASK(2, 0)
+#define TPS6594_MASK_BUCK_UV_THR			GENMASK(5, 3)
+
+/* BUCKX VSET */
+#define TPS6594_MASK_BUCKS_VSET GENMASK(7, 0)
+
+/* LDOX_CTRL register field definition */
+#define TPS6594_BIT_LDO_EN				BIT(0)
+#define TPS6594_BIT_LDO_SLOW_RAMP			BIT(1)
+#define TPS6594_BIT_LDO_VMON_EN				BIT(4)
+#define TPS6594_MASK_LDO_PLDN				GENMASK(6, 5)
+#define TPS6594_BIT_LDO_RV_SEL				BIT(7)
+
+/* LDORTC_CTRL register field definition */
+#define TPS6594_BIT_LDORTC_DIS				BIT(0)
+
+/* LDOX_VOUT register field definition */
+#define TPS6594_MASK_LDO123_VSET			GENMASK(6, 1)
+#define TPS6594_MASK_LDO4_VSET				GENMASK(6, 0)
+#define TPS6594_BIT_LDO_BYPASS				BIT(7)
+
+/* LDOX_PG_WINDOW register field definition */
+#define TPS6594_MASK_LDO_OV_THR				GENMASK(2, 0)
+#define TPS6594_MASK_LDO_UV_THR				GENMASK(5, 3)
+
+/* VCCA_VMON_CTRL register field definition */
+#define TPS6594_BIT_VMON_EN				BIT(0)
+#define TPS6594_BIT_VMON1_EN				BIT(1)
+#define TPS6594_BIT_VMON1_RV_SEL			BIT(2)
+#define TPS6594_BIT_VMON2_EN				BIT(3)
+#define TPS6594_BIT_VMON2_RV_SEL			BIT(4)
+#define TPS6594_BIT_VMON_DEGLITCH_SEL			BIT(5)
+
+/* VCCA_PG_WINDOW register field definition */
+#define TPS6594_MASK_VCCA_OV_THR			GENMASK(2, 0)
+#define TPS6594_MASK_VCCA_UV_THR			GENMASK(5, 3)
+#define TPS6594_BIT_VCCA_PG_SET				BIT(6)
+
+/* VMONX_PG_WINDOW register field definition */
+#define TPS6594_MASK_VMONX_OV_THR			GENMASK(2, 0)
+#define TPS6594_MASK_VMONX_UV_THR			GENMASK(5, 3)
+#define TPS6594_BIT_VMONX_RANGE				BIT(6)
+
+/* GPIOX_CONF register field definition */
+#define TPS6594_BIT_GPIO_DIR				BIT(0)
+#define TPS6594_BIT_GPIO_OD				BIT(1)
+#define TPS6594_BIT_GPIO_PU_SEL				BIT(2)
+#define TPS6594_BIT_GPIO_PU_PD_EN			BIT(3)
+#define TPS6594_BIT_GPIO_DEGLITCH_EN			BIT(4)
+#define TPS6594_MASK_GPIO_SEL				GENMASK(7, 5)
+
+/* NPWRON_CONF register field definition */
+#define TPS6594_BIT_NRSTOUT_OD				BIT(0)
+#define TPS6594_BIT_ENABLE_PU_SEL			BIT(2)
+#define TPS6594_BIT_ENABLE_PU_PD_EN			BIT(3)
+#define TPS6594_BIT_ENABLE_DEGLITCH_EN			BIT(4)
+#define TPS6594_BIT_ENABLE_POL				BIT(5)
+#define TPS6594_MASK_NPWRON_SEL				GENMASK(7, 6)
+
+/* GPIO_OUT_X register field definition */
+#define TPS6594_BIT_GPIOX_OUT(gpio_inst)		BIT((gpio_inst) % 8)
+
+/* GPIO_IN_X register field definition */
+#define TPS6594_BIT_GPIOX_IN(gpio_inst)			BIT((gpio_inst) % 8)
+#define TPS6594_BIT_NPWRON_IN				BIT(3)
+
+/* RAIL_SEL_1 register field definition */
+#define TPS6594_MASK_BUCK1_GRP_SEL			GENMASK(1, 0)
+#define TPS6594_MASK_BUCK2_GRP_SEL			GENMASK(3, 2)
+#define TPS6594_MASK_BUCK3_GRP_SEL			GENMASK(5, 4)
+#define TPS6594_MASK_BUCK4_GRP_SEL			GENMASK(7, 6)
+
+/* RAIL_SEL_2 register field definition */
+#define TPS6594_MASK_BUCK5_GRP_SEL			GENMASK(1, 0)
+#define TPS6594_MASK_LDO1_GRP_SEL			GENMASK(3, 2)
+#define TPS6594_MASK_LDO2_GRP_SEL			GENMASK(5, 4)
+#define TPS6594_MASK_LDO3_GRP_SEL			GENMASK(7, 6)
+
+/* RAIL_SEL_3 register field definition */
+#define TPS6594_MASK_LDO4_GRP_SEL			GENMASK(1, 0)
+#define TPS6594_MASK_VCCA_GRP_SEL			GENMASK(3, 2)
+#define TPS6594_MASK_VMON1_GRP_SEL			GENMASK(5, 4)
+#define TPS6594_MASK_VMON2_GRP_SEL			GENMASK(7, 6)
+
+/* FSM_TRIG_SEL_1 register field definition */
+#define TPS6594_MASK_MCU_RAIL_TRIG			GENMASK(1, 0)
+#define TPS6594_MASK_SOC_RAIL_TRIG			GENMASK(3, 2)
+#define TPS6594_MASK_OTHER_RAIL_TRIG			GENMASK(5, 4)
+#define TPS6594_MASK_SEVERE_ERR_TRIG			GENMASK(7, 6)
+
+/* FSM_TRIG_SEL_2 register field definition */
+#define TPS6594_MASK_MODERATE_ERR_TRIG			GENMASK(1, 0)
+
+/* FSM_TRIG_MASK_X register field definition */
+#define TPS6594_BIT_GPIOX_FSM_MASK(gpio_inst)		BIT(((gpio_inst) << 1) % 8)
+#define TPS6594_BIT_GPIOX_FSM_MASK_POL(gpio_inst)	BIT(((gpio_inst) << 1) % 8 + 1)
+
+/* MASK_BUCKX register field definition */
+#define TPS6594_BIT_BUCKX_OV_MASK(buck_inst)		BIT(((buck_inst) << 2) % 8)
+#define TPS6594_BIT_BUCKX_UV_MASK(buck_inst)		BIT(((buck_inst) << 2) % 8 + 1)
+#define TPS6594_BIT_BUCKX_ILIM_MASK(buck_inst)		BIT(((buck_inst) << 2) % 8 + 3)
+
+/* MASK_LDOX register field definition */
+#define TPS6594_BIT_LDOX_OV_MASK(ldo_inst)		BIT(((ldo_inst) << 2) % 8)
+#define TPS6594_BIT_LDOX_UV_MASK(ldo_inst)		BIT(((ldo_inst) << 2) % 8 + 1)
+#define TPS6594_BIT_LDOX_ILIM_MASK(ldo_inst)		BIT(((ldo_inst) << 2) % 8 + 3)
+
+/* MASK_VMON register field definition */
+#define TPS6594_BIT_VCCA_OV_MASK			BIT(0)
+#define TPS6594_BIT_VCCA_UV_MASK			BIT(1)
+#define TPS6594_BIT_VMON1_OV_MASK			BIT(2)
+#define TPS6594_BIT_VMON1_UV_MASK			BIT(3)
+#define TPS6594_BIT_VMON2_OV_MASK			BIT(5)
+#define TPS6594_BIT_VMON2_UV_MASK			BIT(6)
+
+/* MASK_GPIOX register field definition */
+#define TPS6594_BIT_GPIOX_FALL_MASK(gpio_inst)		BIT((gpio_inst) < 8 ? \
+							    (gpio_inst) : (gpio_inst) % 8)
+#define TPS6594_BIT_GPIOX_RISE_MASK(gpio_inst)		BIT((gpio_inst) < 8 ? \
+							    (gpio_inst) : (gpio_inst) % 8 + 3)
+
+/* MASK_STARTUP register field definition */
+#define TPS6594_BIT_NPWRON_START_MASK			BIT(0)
+#define TPS6594_BIT_ENABLE_MASK				BIT(1)
+#define TPS6594_BIT_FSD_MASK				BIT(4)
+#define TPS6594_BIT_SOFT_REBOOT_MASK			BIT(5)
+
+/* MASK_MISC register field definition */
+#define TPS6594_BIT_BIST_PASS_MASK			BIT(0)
+#define TPS6594_BIT_EXT_CLK_MASK			BIT(1)
+#define TPS6594_BIT_TWARN_MASK				BIT(3)
+
+/* MASK_MODERATE_ERR register field definition */
+#define TPS6594_BIT_BIST_FAIL_MASK			BIT(1)
+#define TPS6594_BIT_REG_CRC_ERR_MASK			BIT(2)
+#define TPS6594_BIT_SPMI_ERR_MASK			BIT(4)
+#define TPS6594_BIT_NPWRON_LONG_MASK			BIT(5)
+#define TPS6594_BIT_NINT_READBACK_MASK			BIT(6)
+#define TPS6594_BIT_NRSTOUT_READBACK_MASK		BIT(7)
+
+/* MASK_FSM_ERR register field definition */
+#define TPS6594_BIT_IMM_SHUTDOWN_MASK			BIT(0)
+#define TPS6594_BIT_ORD_SHUTDOWN_MASK			BIT(1)
+#define TPS6594_BIT_MCU_PWR_ERR_MASK			BIT(2)
+#define TPS6594_BIT_SOC_PWR_ERR_MASK			BIT(3)
+
+/* MASK_COMM_ERR register field definition */
+#define TPS6594_BIT_COMM_FRM_ERR_MASK			BIT(0)
+#define TPS6594_BIT_COMM_CRC_ERR_MASK			BIT(1)
+#define TPS6594_BIT_COMM_ADR_ERR_MASK			BIT(3)
+#define TPS6594_BIT_I2C2_CRC_ERR_MASK			BIT(5)
+#define TPS6594_BIT_I2C2_ADR_ERR_MASK			BIT(7)
+
+/* MASK_READBACK_ERR register field definition */
+#define TPS6594_BIT_EN_DRV_READBACK_MASK		BIT(0)
+#define TPS6594_BIT_NRSTOUT_SOC_READBACK_MASK		BIT(3)
+
+/* MASK_ESM register field definition */
+#define TPS6594_BIT_ESM_SOC_PIN_MASK			BIT(0)
+#define TPS6594_BIT_ESM_SOC_FAIL_MASK			BIT(1)
+#define TPS6594_BIT_ESM_SOC_RST_MASK			BIT(2)
+#define TPS6594_BIT_ESM_MCU_PIN_MASK			BIT(3)
+#define TPS6594_BIT_ESM_MCU_FAIL_MASK			BIT(4)
+#define TPS6594_BIT_ESM_MCU_RST_MASK			BIT(5)
+
+/* INT_TOP register field definition */
+#define TPS6594_BIT_BUCK_INT				BIT(0)
+#define TPS6594_BIT_LDO_VMON_INT			BIT(1)
+#define TPS6594_BIT_GPIO_INT				BIT(2)
+#define TPS6594_BIT_STARTUP_INT				BIT(3)
+#define TPS6594_BIT_MISC_INT				BIT(4)
+#define TPS6594_BIT_MODERATE_ERR_INT			BIT(5)
+#define TPS6594_BIT_SEVERE_ERR_INT			BIT(6)
+#define TPS6594_BIT_FSM_ERR_INT				BIT(7)
+
+/* INT_BUCK register field definition */
+#define TPS6594_BIT_BUCK1_2_INT				BIT(0)
+#define TPS6594_BIT_BUCK3_4_INT				BIT(1)
+#define TPS6594_BIT_BUCK5_INT				BIT(2)
+
+/* INT_BUCKX register field definition */
+#define TPS6594_BIT_BUCKX_OV_INT(buck_inst)		BIT(((buck_inst) << 2) % 8)
+#define TPS6594_BIT_BUCKX_UV_INT(buck_inst)		BIT(((buck_inst) << 2) % 8 + 1)
+#define TPS6594_BIT_BUCKX_SC_INT(buck_inst)		BIT(((buck_inst) << 2) % 8 + 2)
+#define TPS6594_BIT_BUCKX_ILIM_INT(buck_inst)		BIT(((buck_inst) << 2) % 8 + 3)
+
+/* INT_LDO_VMON register field definition */
+#define TPS6594_BIT_LDO1_2_INT				BIT(0)
+#define TPS6594_BIT_LDO3_4_INT				BIT(1)
+#define TPS6594_BIT_VCCA_INT				BIT(4)
+
+/* INT_LDOX register field definition */
+#define TPS6594_BIT_LDOX_OV_INT(ldo_inst)		BIT(((ldo_inst) << 2) % 8)
+#define TPS6594_BIT_LDOX_UV_INT(ldo_inst)		BIT(((ldo_inst) << 2) % 8 + 1)
+#define TPS6594_BIT_LDOX_SC_INT(ldo_inst)		BIT(((ldo_inst) << 2) % 8 + 2)
+#define TPS6594_BIT_LDOX_ILIM_INT(ldo_inst)		BIT(((ldo_inst) << 2) % 8 + 3)
+
+/* INT_VMON register field definition */
+#define TPS6594_BIT_VCCA_OV_INT				BIT(0)
+#define TPS6594_BIT_VCCA_UV_INT				BIT(1)
+#define TPS6594_BIT_VMON1_OV_INT			BIT(2)
+#define TPS6594_BIT_VMON1_UV_INT			BIT(3)
+#define TPS6594_BIT_VMON1_RV_INT			BIT(4)
+#define TPS6594_BIT_VMON2_OV_INT			BIT(5)
+#define TPS6594_BIT_VMON2_UV_INT			BIT(6)
+#define TPS6594_BIT_VMON2_RV_INT			BIT(7)
+
+/* INT_GPIO register field definition */
+#define TPS6594_BIT_GPIO9_INT				BIT(0)
+#define TPS6594_BIT_GPIO10_INT				BIT(1)
+#define TPS6594_BIT_GPIO11_INT				BIT(2)
+#define TPS6594_BIT_GPIO1_8_INT				BIT(3)
+
+/* INT_GPIOX register field definition */
+#define TPS6594_BIT_GPIOX_INT(gpio_inst)		BIT(gpio_inst)
+
+/* INT_STARTUP register field definition */
+#define TPS6594_BIT_NPWRON_START_INT			BIT(0)
+#define TPS6594_BIT_ENABLE_INT				BIT(1)
+#define TPS6594_BIT_RTC_INT				BIT(2)
+#define TPS6594_BIT_FSD_INT				BIT(4)
+#define TPS6594_BIT_SOFT_REBOOT_INT			BIT(5)
+
+/* INT_MISC register field definition */
+#define TPS6594_BIT_BIST_PASS_INT			BIT(0)
+#define TPS6594_BIT_EXT_CLK_INT				BIT(1)
+#define TPS6594_BIT_TWARN_INT				BIT(3)
+
+/* INT_MODERATE_ERR register field definition */
+#define TPS6594_BIT_TSD_ORD_INT				BIT(0)
+#define TPS6594_BIT_BIST_FAIL_INT			BIT(1)
+#define TPS6594_BIT_REG_CRC_ERR_INT			BIT(2)
+#define TPS6594_BIT_RECOV_CNT_INT			BIT(3)
+#define TPS6594_BIT_SPMI_ERR_INT			BIT(4)
+#define TPS6594_BIT_NPWRON_LONG_INT			BIT(5)
+#define TPS6594_BIT_NINT_READBACK_INT			BIT(6)
+#define TPS6594_BIT_NRSTOUT_READBACK_INT		BIT(7)
+
+/* INT_SEVERE_ERR register field definition */
+#define TPS6594_BIT_TSD_IMM_INT				BIT(0)
+#define TPS6594_BIT_VCCA_OVP_INT			BIT(1)
+#define TPS6594_BIT_PFSM_ERR_INT			BIT(2)
+
+/* INT_FSM_ERR register field definition */
+#define TPS6594_BIT_IMM_SHUTDOWN_INT			BIT(0)
+#define TPS6594_BIT_ORD_SHUTDOWN_INT			BIT(1)
+#define TPS6594_BIT_MCU_PWR_ERR_INT			BIT(2)
+#define TPS6594_BIT_SOC_PWR_ERR_INT			BIT(3)
+#define TPS6594_BIT_COMM_ERR_INT			BIT(4)
+#define TPS6594_BIT_READBACK_ERR_INT			BIT(5)
+#define TPS6594_BIT_ESM_INT				BIT(6)
+#define TPS6594_BIT_WD_INT				BIT(7)
+
+/* INT_COMM_ERR register field definition */
+#define TPS6594_BIT_COMM_FRM_ERR_INT			BIT(0)
+#define TPS6594_BIT_COMM_CRC_ERR_INT			BIT(1)
+#define TPS6594_BIT_COMM_ADR_ERR_INT			BIT(3)
+#define TPS6594_BIT_I2C2_CRC_ERR_INT			BIT(5)
+#define TPS6594_BIT_I2C2_ADR_ERR_INT			BIT(7)
+
+/* INT_READBACK_ERR register field definition */
+#define TPS6594_BIT_EN_DRV_READBACK_INT			BIT(0)
+#define TPS6594_BIT_NRSTOUT_SOC_READBACK_INT		BIT(3)
+
+/* INT_ESM register field definition */
+#define TPS6594_BIT_ESM_SOC_PIN_INT			BIT(0)
+#define TPS6594_BIT_ESM_SOC_FAIL_INT			BIT(1)
+#define TPS6594_BIT_ESM_SOC_RST_INT			BIT(2)
+#define TPS6594_BIT_ESM_MCU_PIN_INT			BIT(3)
+#define TPS6594_BIT_ESM_MCU_FAIL_INT			BIT(4)
+#define TPS6594_BIT_ESM_MCU_RST_INT			BIT(5)
+
+/* STAT_BUCKX register field definition */
+#define TPS6594_BIT_BUCKX_OV_STAT(buck_inst)		BIT(((buck_inst) << 2) % 8)
+#define TPS6594_BIT_BUCKX_UV_STAT(buck_inst)		BIT(((buck_inst) << 2) % 8 + 1)
+#define TPS6594_BIT_BUCKX_ILIM_STAT(buck_inst)		BIT(((buck_inst) << 2) % 8 + 3)
+
+/* STAT_LDOX register field definition */
+#define TPS6594_BIT_LDOX_OV_STAT(ldo_inst)		BIT(((ldo_inst) << 2) % 8)
+#define TPS6594_BIT_LDOX_UV_STAT(ldo_inst)		BIT(((ldo_inst) << 2) % 8 + 1)
+#define TPS6594_BIT_LDOX_ILIM_STAT(ldo_inst)		BIT(((ldo_inst) << 2) % 8 + 3)
+
+/* STAT_VMON register field definition */
+#define TPS6594_BIT_VCCA_OV_STAT			BIT(0)
+#define TPS6594_BIT_VCCA_UV_STAT			BIT(1)
+#define TPS6594_BIT_VMON1_OV_STAT			BIT(2)
+#define TPS6594_BIT_VMON1_UV_STAT			BIT(3)
+#define TPS6594_BIT_VMON2_OV_STAT			BIT(5)
+#define TPS6594_BIT_VMON2_UV_STAT			BIT(6)
+
+/* STAT_STARTUP register field definition */
+#define TPS6594_BIT_ENABLE_STAT				BIT(1)
+
+/* STAT_MISC register field definition */
+#define TPS6594_BIT_EXT_CLK_STAT			BIT(1)
+#define TPS6594_BIT_TWARN_STAT				BIT(3)
+
+/* STAT_MODERATE_ERR register field definition */
+#define TPS6594_BIT_TSD_ORD_STAT			BIT(0)
+
+/* STAT_SEVERE_ERR register field definition */
+#define TPS6594_BIT_TSD_IMM_STAT			BIT(0)
+#define TPS6594_BIT_VCCA_OVP_STAT			BIT(1)
+
+/* STAT_READBACK_ERR register field definition */
+#define TPS6594_BIT_EN_DRV_READBACK_STAT		BIT(0)
+#define TPS6594_BIT_NINT_READBACK_STAT			BIT(1)
+#define TPS6594_BIT_NRSTOUT_READBACK_STAT		BIT(2)
+#define TPS6594_BIT_NRSTOUT_SOC_READBACK_STAT		BIT(3)
+
+/* PGOOD_SEL_1 register field definition */
+#define TPS6594_MASK_PGOOD_SEL_BUCK1			GENMASK(1, 0)
+#define TPS6594_MASK_PGOOD_SEL_BUCK2			GENMASK(3, 2)
+#define TPS6594_MASK_PGOOD_SEL_BUCK3			GENMASK(5, 4)
+#define TPS6594_MASK_PGOOD_SEL_BUCK4			GENMASK(7, 6)
+
+/* PGOOD_SEL_2 register field definition */
+#define TPS6594_MASK_PGOOD_SEL_BUCK5			GENMASK(1, 0)
+
+/* PGOOD_SEL_3 register field definition */
+#define TPS6594_MASK_PGOOD_SEL_LDO1			GENMASK(1, 0)
+#define TPS6594_MASK_PGOOD_SEL_LDO2			GENMASK(3, 2)
+#define TPS6594_MASK_PGOOD_SEL_LDO3			GENMASK(5, 4)
+#define TPS6594_MASK_PGOOD_SEL_LDO4			GENMASK(7, 6)
+
+/* PGOOD_SEL_4 register field definition */
+#define TPS6594_BIT_PGOOD_SEL_VCCA			BIT(0)
+#define TPS6594_BIT_PGOOD_SEL_VMON1			BIT(1)
+#define TPS6594_BIT_PGOOD_SEL_VMON2			BIT(2)
+#define TPS6594_BIT_PGOOD_SEL_TDIE_WARN			BIT(3)
+#define TPS6594_BIT_PGOOD_SEL_NRSTOUT			BIT(4)
+#define TPS6594_BIT_PGOOD_SEL_NRSTOUT_SOC		BIT(5)
+#define TPS6594_BIT_PGOOD_POL				BIT(6)
+#define TPS6594_BIT_PGOOD_WINDOW			BIT(7)
+
+/* PLL_CTRL register field definition */
+#define TPS6594_MASK_EXT_CLK_FREQ			GENMASK(1, 0)
+
+/* CONFIG_1 register field definition */
+#define TPS6594_BIT_TWARN_LEVEL				BIT(0)
+#define TPS6594_BIT_TSD_ORD_LEVEL			BIT(1)
+#define TPS6594_BIT_I2C1_HS				BIT(3)
+#define TPS6594_BIT_I2C2_HS				BIT(4)
+#define TPS6594_BIT_EN_ILIM_FSM_CTRL			BIT(5)
+#define TPS6594_BIT_NSLEEP1_MASK			BIT(6)
+#define TPS6594_BIT_NSLEEP2_MASK			BIT(7)
+
+/* CONFIG_2 register field definition */
+#define TPS6594_BIT_BB_CHARGER_EN			BIT(0)
+#define TPS6594_BIT_BB_ICHR				BIT(1)
+#define TPS6594_MASK_BB_VEOC				GENMASK(3, 2)
+#define TPS6594_BB_EOC_RDY				BIT(7)
+
+/* ENABLE_DRV_REG register field definition */
+#define TPS6594_BIT_ENABLE_DRV				BIT(0)
+
+/* MISC_CTRL register field definition */
+#define TPS6594_BIT_NRSTOUT				BIT(0)
+#define TPS6594_BIT_NRSTOUT_SOC				BIT(1)
+#define TPS6594_BIT_LPM_EN				BIT(2)
+#define TPS6594_BIT_CLKMON_EN				BIT(3)
+#define TPS6594_BIT_AMUXOUT_EN				BIT(4)
+#define TPS6594_BIT_SEL_EXT_CLK				BIT(5)
+#define TPS6594_MASK_SYNCCLKOUT_FREQ_SEL		GENMASK(7, 6)
+
+/* ENABLE_DRV_STAT register field definition */
+#define TPS6594_BIT_EN_DRV_IN				BIT(0)
+#define TPS6594_BIT_NRSTOUT_IN				BIT(1)
+#define TPS6594_BIT_NRSTOUT_SOC_IN			BIT(2)
+#define TPS6594_BIT_FORCE_EN_DRV_LOW			BIT(3)
+#define TPS6594_BIT_SPMI_LPM_EN				BIT(4)
+
+/* RECOV_CNT_REG_1 register field definition */
+#define TPS6594_MASK_RECOV_CNT				GENMASK(3, 0)
+
+/* RECOV_CNT_REG_2 register field definition */
+#define TPS6594_MASK_RECOV_CNT_THR			GENMASK(3, 0)
+#define TPS6594_BIT_RECOV_CNT_CLR			BIT(4)
+
+/* FSM_I2C_TRIGGERS register field definition */
+#define TPS6594_BIT_TRIGGER_I2C(bit)			BIT(bit)
+
+/* FSM_NSLEEP_TRIGGERS register field definition */
+#define TPS6594_BIT_NSLEEP1B				BIT(0)
+#define TPS6594_BIT_NSLEEP2B				BIT(1)
+
+/* BUCK_RESET_REG register field definition */
+#define TPS6594_BIT_BUCKX_RESET(buck_inst)		BIT(buck_inst)
+
+/* SPREAD_SPECTRUM_1 register field definition */
+#define TPS6594_MASK_SS_DEPTH				GENMASK(1, 0)
+#define TPS6594_BIT_SS_EN				BIT(2)
+
+/* FREQ_SEL register field definition */
+#define TPS6594_BIT_BUCKX_FREQ_SEL(buck_inst)		BIT(buck_inst)
+
+/* FSM_STEP_SIZE register field definition */
+#define TPS6594_MASK_PFSM_DELAY_STEP			GENMASK(4, 0)
+
+/* LDO_RV_TIMEOUT_REG_1 register field definition */
+#define TPS6594_MASK_LDO1_RV_TIMEOUT			GENMASK(3, 0)
+#define TPS6594_MASK_LDO2_RV_TIMEOUT			GENMASK(7, 4)
+
+/* LDO_RV_TIMEOUT_REG_2 register field definition */
+#define TPS6594_MASK_LDO3_RV_TIMEOUT			GENMASK(3, 0)
+#define TPS6594_MASK_LDO4_RV_TIMEOUT			GENMASK(7, 4)
+
+/* USER_SPARE_REGS register field definition */
+#define TPS6594_BIT_USER_SPARE(bit)			BIT(bit)
+
+/* ESM_MCU_START_REG register field definition */
+#define TPS6594_BIT_ESM_MCU_START			BIT(0)
+
+/* ESM_MCU_MODE_CFG register field definition */
+#define TPS6594_MASK_ESM_MCU_ERR_CNT_TH			GENMASK(3, 0)
+#define TPS6594_BIT_ESM_MCU_ENDRV			BIT(5)
+#define TPS6594_BIT_ESM_MCU_EN				BIT(6)
+#define TPS6594_BIT_ESM_MCU_MODE			BIT(7)
+
+/* ESM_MCU_ERR_CNT_REG register field definition */
+#define TPS6594_MASK_ESM_MCU_ERR_CNT			GENMASK(4, 0)
+
+/* ESM_SOC_START_REG register field definition */
+#define TPS6594_BIT_ESM_SOC_START			BIT(0)
+
+/* ESM_SOC_MODE_CFG register field definition */
+#define TPS6594_MASK_ESM_SOC_ERR_CNT_TH			GENMASK(3, 0)
+#define TPS6594_BIT_ESM_SOC_ENDRV			BIT(5)
+#define TPS6594_BIT_ESM_SOC_EN				BIT(6)
+#define TPS6594_BIT_ESM_SOC_MODE			BIT(7)
+
+/* ESM_SOC_ERR_CNT_REG register field definition */
+#define TPS6594_MASK_ESM_SOC_ERR_CNT			GENMASK(4, 0)
+
+/* REGISTER_LOCK register field definition */
+#define TPS6594_BIT_REGISTER_LOCK_STATUS		BIT(0)
+
+/* VMON_CONF register field definition */
+#define TPS6594_MASK_VMON1_SLEW_RATE			GENMASK(2, 0)
+#define TPS6594_MASK_VMON2_SLEW_RATE			GENMASK(5, 3)
+
+/* SOFT_REBOOT_REG register field definition */
+#define TPS6594_BIT_SOFT_REBOOT				BIT(0)
+
+/* RTC_SECONDS & ALARM_SECONDS register field definition */
+#define TPS6594_MASK_SECOND_0				GENMASK(3, 0)
+#define TPS6594_MASK_SECOND_1				GENMASK(6, 4)
+
+/* RTC_MINUTES & ALARM_MINUTES register field definition */
+#define TPS6594_MASK_MINUTE_0				GENMASK(3, 0)
+#define TPS6594_MASK_MINUTE_1				GENMASK(6, 4)
+
+/* RTC_HOURS & ALARM_HOURS register field definition */
+#define TPS6594_MASK_HOUR_0				GENMASK(3, 0)
+#define TPS6594_MASK_HOUR_1				GENMASK(5, 4)
+#define TPS6594_BIT_PM_NAM				BIT(7)
+
+/* RTC_DAYS & ALARM_DAYS register field definition */
+#define TPS6594_MASK_DAY_0				GENMASK(3, 0)
+#define TPS6594_MASK_DAY_1				GENMASK(5, 4)
+
+/* RTC_MONTHS & ALARM_MONTHS register field definition */
+#define TPS6594_MASK_MONTH_0				GENMASK(3, 0)
+#define TPS6594_BIT_MONTH_1				BIT(4)
+
+/* RTC_YEARS & ALARM_YEARS register field definition */
+#define TPS6594_MASK_YEAR_0				GENMASK(3, 0)
+#define TPS6594_MASK_YEAR_1				GENMASK(7, 4)
+
+/* RTC_WEEKS register field definition */
+#define TPS6594_MASK_WEEK				GENMASK(2, 0)
+
+/* RTC_CTRL_1 register field definition */
+#define TPS6594_BIT_STOP_RTC				BIT(0)
+#define TPS6594_BIT_ROUND_30S				BIT(1)
+#define TPS6594_BIT_AUTO_COMP				BIT(2)
+#define TPS6594_BIT_MODE_12_24				BIT(3)
+#define TPS6594_BIT_SET_32_COUNTER			BIT(5)
+#define TPS6594_BIT_GET_TIME				BIT(6)
+#define TPS6594_BIT_RTC_V_OPT				BIT(7)
+
+/* RTC_CTRL_2 register field definition */
+#define TPS6594_BIT_XTAL_EN				BIT(0)
+#define TPS6594_MASK_XTAL_SEL				GENMASK(2, 1)
+#define TPS6594_BIT_LP_STANDBY_SEL			BIT(3)
+#define TPS6594_BIT_FAST_BIST				BIT(4)
+#define TPS6594_MASK_STARTUP_DEST			GENMASK(6, 5)
+#define TPS6594_BIT_FIRST_STARTUP_DONE			BIT(7)
+
+/* RTC_STATUS register field definition */
+#define TPS6594_BIT_RUN					BIT(1)
+#define TPS6594_BIT_TIMER				BIT(5)
+#define TPS6594_BIT_ALARM				BIT(6)
+#define TPS6594_BIT_POWER_UP				BIT(7)
+
+/* RTC_INTERRUPTS register field definition */
+#define TPS6594_MASK_EVERY				GENMASK(1, 0)
+#define TPS6594_BIT_IT_TIMER				BIT(2)
+#define TPS6594_BIT_IT_ALARM				BIT(3)
+
+/* RTC_RESET_STATUS register field definition */
+#define TPS6594_BIT_RESET_STATUS_RTC			BIT(0)
+
+/* SERIAL_IF_CONFIG register field definition */
+#define TPS6594_BIT_I2C_SPI_SEL				BIT(0)
+#define TPS6594_BIT_I2C1_SPI_CRC_EN			BIT(1)
+#define TPS6594_BIT_I2C2_CRC_EN				BIT(2)
+#define TPS6594_MASK_T_CRC				GENMASK(7, 3)
+
+/* WD_QUESTION_ANSW_CNT register field definition */
+#define TPS6594_MASK_WD_QUESTION			GENMASK(3, 0)
+#define TPS6594_MASK_WD_ANSW_CNT			GENMASK(5, 4)
+
+/* WD_MODE_REG register field definition */
+#define TPS6594_BIT_WD_RETURN_LONGWIN			BIT(0)
+#define TPS6594_BIT_WD_MODE_SELECT			BIT(1)
+#define TPS6594_BIT_WD_PWRHOLD				BIT(2)
+
+/* WD_QA_CFG register field definition */
+#define TPS6594_MASK_WD_QUESTION_SEED			GENMASK(3, 0)
+#define TPS6594_MASK_WD_QA_LFSR				GENMASK(5, 4)
+#define TPS6594_MASK_WD_QA_FDBK				GENMASK(7, 6)
+
+/* WD_ERR_STATUS register field definition */
+#define TPS6594_BIT_WD_LONGWIN_TIMEOUT_INT		BIT(0)
+#define TPS6594_BIT_WD_TIMEOUT				BIT(1)
+#define TPS6594_BIT_WD_TRIG_EARLY			BIT(2)
+#define TPS6594_BIT_WD_ANSW_EARLY			BIT(3)
+#define TPS6594_BIT_WD_SEQ_ERR				BIT(4)
+#define TPS6594_BIT_WD_ANSW_ERR				BIT(5)
+#define TPS6594_BIT_WD_FAIL_INT				BIT(6)
+#define TPS6594_BIT_WD_RST_INT				BIT(7)
+
+/* WD_THR_CFG register field definition */
+#define TPS6594_MASK_WD_RST_TH				GENMASK(2, 0)
+#define TPS6594_MASK_WD_FAIL_TH				GENMASK(5, 3)
+#define TPS6594_BIT_WD_EN				BIT(6)
+#define TPS6594_BIT_WD_RST_EN				BIT(7)
+
+/* WD_FAIL_CNT_REG register field definition */
+#define TPS6594_MASK_WD_FAIL_CNT			GENMASK(3, 0)
+#define TPS6594_BIT_WD_FIRST_OK				BIT(5)
+#define TPS6594_BIT_WD_BAD_EVENT			BIT(6)
+
+/* CRC8 polynomial for I2C & SPI protocols */
+#define TPS6594_CRC8_POLYNOMIAL	0x07
+
+/* IRQs */
+enum tps6594_irqs {
+	/* INT_BUCK1_2 register */
+	TPS6594_IRQ_BUCK1_OV,
+	TPS6594_IRQ_BUCK1_UV,
+	TPS6594_IRQ_BUCK1_SC,
+	TPS6594_IRQ_BUCK1_ILIM,
+	TPS6594_IRQ_BUCK2_OV,
+	TPS6594_IRQ_BUCK2_UV,
+	TPS6594_IRQ_BUCK2_SC,
+	TPS6594_IRQ_BUCK2_ILIM,
+	/* INT_BUCK3_4 register */
+	TPS6594_IRQ_BUCK3_OV,
+	TPS6594_IRQ_BUCK3_UV,
+	TPS6594_IRQ_BUCK3_SC,
+	TPS6594_IRQ_BUCK3_ILIM,
+	TPS6594_IRQ_BUCK4_OV,
+	TPS6594_IRQ_BUCK4_UV,
+	TPS6594_IRQ_BUCK4_SC,
+	TPS6594_IRQ_BUCK4_ILIM,
+	/* INT_BUCK5 register */
+	TPS6594_IRQ_BUCK5_OV,
+	TPS6594_IRQ_BUCK5_UV,
+	TPS6594_IRQ_BUCK5_SC,
+	TPS6594_IRQ_BUCK5_ILIM,
+	/* INT_LDO1_2 register */
+	TPS6594_IRQ_LDO1_OV,
+	TPS6594_IRQ_LDO1_UV,
+	TPS6594_IRQ_LDO1_SC,
+	TPS6594_IRQ_LDO1_ILIM,
+	TPS6594_IRQ_LDO2_OV,
+	TPS6594_IRQ_LDO2_UV,
+	TPS6594_IRQ_LDO2_SC,
+	TPS6594_IRQ_LDO2_ILIM,
+	/* INT_LDO3_4 register */
+	TPS6594_IRQ_LDO3_OV,
+	TPS6594_IRQ_LDO3_UV,
+	TPS6594_IRQ_LDO3_SC,
+	TPS6594_IRQ_LDO3_ILIM,
+	TPS6594_IRQ_LDO4_OV,
+	TPS6594_IRQ_LDO4_UV,
+	TPS6594_IRQ_LDO4_SC,
+	TPS6594_IRQ_LDO4_ILIM,
+	/* INT_VMON register */
+	TPS6594_IRQ_VCCA_OV,
+	TPS6594_IRQ_VCCA_UV,
+	TPS6594_IRQ_VMON1_OV,
+	TPS6594_IRQ_VMON1_UV,
+	TPS6594_IRQ_VMON1_RV,
+	TPS6594_IRQ_VMON2_OV,
+	TPS6594_IRQ_VMON2_UV,
+	TPS6594_IRQ_VMON2_RV,
+	/* INT_GPIO register */
+	TPS6594_IRQ_GPIO9,
+	TPS6594_IRQ_GPIO10,
+	TPS6594_IRQ_GPIO11,
+	/* INT_GPIO1_8 register */
+	TPS6594_IRQ_GPIO1,
+	TPS6594_IRQ_GPIO2,
+	TPS6594_IRQ_GPIO3,
+	TPS6594_IRQ_GPIO4,
+	TPS6594_IRQ_GPIO5,
+	TPS6594_IRQ_GPIO6,
+	TPS6594_IRQ_GPIO7,
+	TPS6594_IRQ_GPIO8,
+	/* INT_STARTUP register */
+	TPS6594_IRQ_NPWRON_START,
+	TPS6594_IRQ_ENABLE,
+	TPS6594_IRQ_FSD,
+	TPS6594_IRQ_SOFT_REBOOT,
+	/* INT_MISC register */
+	TPS6594_IRQ_BIST_PASS,
+	TPS6594_IRQ_EXT_CLK,
+	TPS6594_IRQ_TWARN,
+	/* INT_MODERATE_ERR register */
+	TPS6594_IRQ_TSD_ORD,
+	TPS6594_IRQ_BIST_FAIL,
+	TPS6594_IRQ_REG_CRC_ERR,
+	TPS6594_IRQ_RECOV_CNT,
+	TPS6594_IRQ_SPMI_ERR,
+	TPS6594_IRQ_NPWRON_LONG,
+	TPS6594_IRQ_NINT_READBACK,
+	TPS6594_IRQ_NRSTOUT_READBACK,
+	/* INT_SEVERE_ERR register */
+	TPS6594_IRQ_TSD_IMM,
+	TPS6594_IRQ_VCCA_OVP,
+	TPS6594_IRQ_PFSM_ERR,
+	/* INT_FSM_ERR register */
+	TPS6594_IRQ_IMM_SHUTDOWN,
+	TPS6594_IRQ_ORD_SHUTDOWN,
+	TPS6594_IRQ_MCU_PWR_ERR,
+	TPS6594_IRQ_SOC_PWR_ERR,
+	/* INT_COMM_ERR register */
+	TPS6594_IRQ_COMM_FRM_ERR,
+	TPS6594_IRQ_COMM_CRC_ERR,
+	TPS6594_IRQ_COMM_ADR_ERR,
+	TPS6594_IRQ_I2C2_CRC_ERR,
+	TPS6594_IRQ_I2C2_ADR_ERR,
+	/* INT_READBACK_ERR register */
+	TPS6594_IRQ_EN_DRV_READBACK,
+	TPS6594_IRQ_NRSTOUT_SOC_READBACK,
+	/* INT_ESM register */
+	TPS6594_IRQ_ESM_SOC_PIN,
+	TPS6594_IRQ_ESM_SOC_FAIL,
+	TPS6594_IRQ_ESM_SOC_RST,
+	/* RTC_STATUS register */
+	TPS6594_IRQ_TIMER,
+	TPS6594_IRQ_ALARM,
+	TPS6594_IRQ_POWER_UP,
+};
+
+#define TPS6594_IRQ_NAME_BUCK1_OV		"buck1_ov"
+#define TPS6594_IRQ_NAME_BUCK1_UV		"buck1_uv"
+#define TPS6594_IRQ_NAME_BUCK1_SC		"buck1_sc"
+#define TPS6594_IRQ_NAME_BUCK1_ILIM		"buck1_ilim"
+#define TPS6594_IRQ_NAME_BUCK2_OV		"buck2_ov"
+#define TPS6594_IRQ_NAME_BUCK2_UV		"buck2_uv"
+#define TPS6594_IRQ_NAME_BUCK2_SC		"buck2_sc"
+#define TPS6594_IRQ_NAME_BUCK2_ILIM		"buck2_ilim"
+#define TPS6594_IRQ_NAME_BUCK3_OV		"buck3_ov"
+#define TPS6594_IRQ_NAME_BUCK3_UV		"buck3_uv"
+#define TPS6594_IRQ_NAME_BUCK3_SC		"buck3_sc"
+#define TPS6594_IRQ_NAME_BUCK3_ILIM		"buck3_ilim"
+#define TPS6594_IRQ_NAME_BUCK4_OV		"buck4_ov"
+#define TPS6594_IRQ_NAME_BUCK4_UV		"buck4_uv"
+#define TPS6594_IRQ_NAME_BUCK4_SC		"buck4_sc"
+#define TPS6594_IRQ_NAME_BUCK4_ILIM		"buck4_ilim"
+#define TPS6594_IRQ_NAME_BUCK5_OV		"buck5_ov"
+#define TPS6594_IRQ_NAME_BUCK5_UV		"buck5_uv"
+#define TPS6594_IRQ_NAME_BUCK5_SC		"buck5_sc"
+#define TPS6594_IRQ_NAME_BUCK5_ILIM		"buck5_ilim"
+#define TPS6594_IRQ_NAME_LDO1_OV		"ldo1_ov"
+#define TPS6594_IRQ_NAME_LDO1_UV		"ldo1_uv"
+#define TPS6594_IRQ_NAME_LDO1_SC		"ldo1_sc"
+#define TPS6594_IRQ_NAME_LDO1_ILIM		"ldo1_ilim"
+#define TPS6594_IRQ_NAME_LDO2_OV		"ldo2_ov"
+#define TPS6594_IRQ_NAME_LDO2_UV		"ldo2_uv"
+#define TPS6594_IRQ_NAME_LDO2_SC		"ldo2_sc"
+#define TPS6594_IRQ_NAME_LDO2_ILIM		"ldo2_ilim"
+#define TPS6594_IRQ_NAME_LDO3_OV		"ldo3_ov"
+#define TPS6594_IRQ_NAME_LDO3_UV		"ldo3_uv"
+#define TPS6594_IRQ_NAME_LDO3_SC		"ldo3_sc"
+#define TPS6594_IRQ_NAME_LDO3_ILIM		"ldo3_ilim"
+#define TPS6594_IRQ_NAME_LDO4_OV		"ldo4_ov"
+#define TPS6594_IRQ_NAME_LDO4_UV		"ldo4_uv"
+#define TPS6594_IRQ_NAME_LDO4_SC		"ldo4_sc"
+#define TPS6594_IRQ_NAME_LDO4_ILIM		"ldo4_ilim"
+#define TPS6594_IRQ_NAME_VCCA_OV		"vcca_ov"
+#define TPS6594_IRQ_NAME_VCCA_UV		"vcca_uv"
+#define TPS6594_IRQ_NAME_VMON1_OV		"vmon1_ov"
+#define TPS6594_IRQ_NAME_VMON1_UV		"vmon1_uv"
+#define TPS6594_IRQ_NAME_VMON1_RV		"vmon1_rv"
+#define TPS6594_IRQ_NAME_VMON2_OV		"vmon2_ov"
+#define TPS6594_IRQ_NAME_VMON2_UV		"vmon2_uv"
+#define TPS6594_IRQ_NAME_VMON2_RV		"vmon2_rv"
+#define TPS6594_IRQ_NAME_GPIO9			"gpio9"
+#define TPS6594_IRQ_NAME_GPIO10			"gpio10"
+#define TPS6594_IRQ_NAME_GPIO11			"gpio11"
+#define TPS6594_IRQ_NAME_GPIO1			"gpio1"
+#define TPS6594_IRQ_NAME_GPIO2			"gpio2"
+#define TPS6594_IRQ_NAME_GPIO3			"gpio3"
+#define TPS6594_IRQ_NAME_GPIO4			"gpio4"
+#define TPS6594_IRQ_NAME_GPIO5			"gpio5"
+#define TPS6594_IRQ_NAME_GPIO6			"gpio6"
+#define TPS6594_IRQ_NAME_GPIO7			"gpio7"
+#define TPS6594_IRQ_NAME_GPIO8			"gpio8"
+#define TPS6594_IRQ_NAME_NPWRON_START		"npwron_start"
+#define TPS6594_IRQ_NAME_ENABLE			"enable"
+#define TPS6594_IRQ_NAME_FSD			"fsd"
+#define TPS6594_IRQ_NAME_SOFT_REBOOT		"soft_reboot"
+#define TPS6594_IRQ_NAME_BIST_PASS		"bist_pass"
+#define TPS6594_IRQ_NAME_EXT_CLK		"ext_clk"
+#define TPS6594_IRQ_NAME_TWARN			"twarn"
+#define TPS6594_IRQ_NAME_TSD_ORD		"tsd_ord"
+#define TPS6594_IRQ_NAME_BIST_FAIL		"bist_fail"
+#define TPS6594_IRQ_NAME_REG_CRC_ERR		"reg_crc_err"
+#define TPS6594_IRQ_NAME_RECOV_CNT		"recov_cnt"
+#define TPS6594_IRQ_NAME_SPMI_ERR		"spmi_err"
+#define TPS6594_IRQ_NAME_NPWRON_LONG		"npwron_long"
+#define TPS6594_IRQ_NAME_NINT_READBACK		"nint_readback"
+#define TPS6594_IRQ_NAME_NRSTOUT_READBACK	"nrstout_readback"
+#define TPS6594_IRQ_NAME_TSD_IMM		"tsd_imm"
+#define TPS6594_IRQ_NAME_VCCA_OVP		"vcca_ovp"
+#define TPS6594_IRQ_NAME_PFSM_ERR		"pfsm_err"
+#define TPS6594_IRQ_NAME_IMM_SHUTDOWN		"imm_shutdown"
+#define TPS6594_IRQ_NAME_ORD_SHUTDOWN		"ord_shutdown"
+#define TPS6594_IRQ_NAME_MCU_PWR_ERR		"mcu_pwr_err"
+#define TPS6594_IRQ_NAME_SOC_PWR_ERR		"soc_pwr_err"
+#define TPS6594_IRQ_NAME_COMM_FRM_ERR		"comm_frm_err"
+#define TPS6594_IRQ_NAME_COMM_CRC_ERR		"comm_crc_err"
+#define TPS6594_IRQ_NAME_COMM_ADR_ERR		"comm_adr_err"
+#define TPS6594_IRQ_NAME_EN_DRV_READBACK	"en_drv_readback"
+#define TPS6594_IRQ_NAME_NRSTOUT_SOC_READBACK	"nrstout_soc_readback"
+#define TPS6594_IRQ_NAME_ESM_SOC_PIN		"esm_soc_pin"
+#define TPS6594_IRQ_NAME_ESM_SOC_FAIL		"esm_soc_fail"
+#define TPS6594_IRQ_NAME_ESM_SOC_RST		"esm_soc_rst"
+#define TPS6594_IRQ_NAME_TIMER			"timer"
+#define TPS6594_IRQ_NAME_ALARM			"alarm"
+#define TPS6594_IRQ_NAME_POWERUP		"powerup"
+
+/**
+ * struct tps6594 - device private data structure
+ *
+ * @dev:      MFD parent device
+ * @chip_id:  chip ID
+ * @reg:      I2C slave address or SPI chip select number
+ * @use_crc:  if true, use CRC for I2C and SPI interface protocols
+ * @regmap:   regmap for accessing the device registers
+ * @irq:      irq generated by the device
+ * @irq_data: regmap irq data used for the irq chip
+ */
+struct tps6594 {
+	struct device *dev;
+	unsigned long chip_id;
+	unsigned short reg;
+	bool use_crc;
+	struct regmap *regmap;
+	int irq;
+	struct regmap_irq_chip_data *irq_data;
+};
+
+bool tps6594_is_volatile_reg(struct device *dev, unsigned int reg);
+int tps6594_device_init(struct tps6594 *tps, bool enable_crc);
+
+#endif /*  __LINUX_MFD_TPS6594_H */
-- 
cgit v1.2.3


From 0ac2a08f42ce5c06d5d1216eac59c046961acd4f Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Fri, 12 May 2023 03:13:32 +0300
Subject: interconnect: add clk-based icc provider support

For some devices it is useful to export clocks as interconnect providers,
if the clock corresponds to bus bandwidth.

For example, on MSM8996 the cluster interconnect clock should be scaled
according to the cluster frequencies. Exporting it as an interconnect
allows one to properly describe this as the cluster bandwidth
requirements.

Tested-by: Yassine Oudjana <y.oudjana@protonmail.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20230512001334.2983048-3-dmitry.baryshkov@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/Kconfig     |   6 ++
 drivers/interconnect/Makefile    |   2 +
 drivers/interconnect/icc-clk.c   | 168 +++++++++++++++++++++++++++++++++++++++
 include/linux/interconnect-clk.h |  22 +++++
 4 files changed, 198 insertions(+)
 create mode 100644 drivers/interconnect/icc-clk.c
 create mode 100644 include/linux/interconnect-clk.h

(limited to 'include/linux')

diff --git a/drivers/interconnect/Kconfig b/drivers/interconnect/Kconfig
index d637a89d4695..5faa8d2aecff 100644
--- a/drivers/interconnect/Kconfig
+++ b/drivers/interconnect/Kconfig
@@ -15,4 +15,10 @@ source "drivers/interconnect/imx/Kconfig"
 source "drivers/interconnect/qcom/Kconfig"
 source "drivers/interconnect/samsung/Kconfig"
 
+config INTERCONNECT_CLK
+	tristate
+	depends on COMMON_CLK
+	help
+	  Support for wrapping clocks into the interconnect nodes.
+
 endif
diff --git a/drivers/interconnect/Makefile b/drivers/interconnect/Makefile
index 97d393fd638d..5604ce351a9f 100644
--- a/drivers/interconnect/Makefile
+++ b/drivers/interconnect/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_INTERCONNECT)		+= icc-core.o
 obj-$(CONFIG_INTERCONNECT_IMX)		+= imx/
 obj-$(CONFIG_INTERCONNECT_QCOM)		+= qcom/
 obj-$(CONFIG_INTERCONNECT_SAMSUNG)	+= samsung/
+
+obj-$(CONFIG_INTERCONNECT_CLK)		+= icc-clk.o
diff --git a/drivers/interconnect/icc-clk.c b/drivers/interconnect/icc-clk.c
new file mode 100644
index 000000000000..0db3b654548b
--- /dev/null
+++ b/drivers/interconnect/icc-clk.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023, Linaro Ltd.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/interconnect-clk.h>
+#include <linux/interconnect-provider.h>
+
+struct icc_clk_node {
+	struct clk *clk;
+	bool enabled;
+};
+
+struct icc_clk_provider {
+	struct icc_provider provider;
+	int num_clocks;
+	struct icc_clk_node clocks[];
+};
+
+#define to_icc_clk_provider(_provider) \
+	container_of(_provider, struct icc_clk_provider, provider)
+
+static int icc_clk_set(struct icc_node *src, struct icc_node *dst)
+{
+	struct icc_clk_node *qn = src->data;
+	int ret;
+
+	if (!qn || !qn->clk)
+		return 0;
+
+	if (!src->peak_bw) {
+		if (qn->enabled)
+			clk_disable_unprepare(qn->clk);
+		qn->enabled = false;
+
+		return 0;
+	}
+
+	if (!qn->enabled) {
+		ret = clk_prepare_enable(qn->clk);
+		if (ret)
+			return ret;
+		qn->enabled = true;
+	}
+
+	return clk_set_rate(qn->clk, icc_units_to_bps(src->peak_bw));
+}
+
+static int icc_clk_get_bw(struct icc_node *node, u32 *avg, u32 *peak)
+{
+	struct icc_clk_node *qn = node->data;
+
+	if (!qn || !qn->clk)
+		*peak = INT_MAX;
+	else
+		*peak = Bps_to_icc(clk_get_rate(qn->clk));
+
+	return 0;
+}
+
+/**
+ * icc_clk_register() - register a new clk-based interconnect provider
+ * @dev: device supporting this provider
+ * @first_id: an ID of the first provider's node
+ * @num_clocks: number of instances of struct icc_clk_data
+ * @data: data for the provider
+ *
+ * Registers and returns a clk-based interconnect provider. It is a simple
+ * wrapper around COMMON_CLK framework, allowing other devices to vote on the
+ * clock rate.
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+struct icc_provider *icc_clk_register(struct device *dev,
+				      unsigned int first_id,
+				      unsigned int num_clocks,
+				      const struct icc_clk_data *data)
+{
+	struct icc_clk_provider *qp;
+	struct icc_provider *provider;
+	struct icc_onecell_data *onecell;
+	struct icc_node *node;
+	int ret, i, j;
+
+	onecell = devm_kzalloc(dev, struct_size(onecell, nodes, 2 * num_clocks), GFP_KERNEL);
+	if (!onecell)
+		return ERR_PTR(-ENOMEM);
+
+	qp = devm_kzalloc(dev, struct_size(qp, clocks, num_clocks), GFP_KERNEL);
+	if (!qp)
+		return ERR_PTR(-ENOMEM);
+
+	qp->num_clocks = num_clocks;
+
+	provider = &qp->provider;
+	provider->dev = dev;
+	provider->get_bw = icc_clk_get_bw;
+	provider->set = icc_clk_set;
+	provider->aggregate = icc_std_aggregate;
+	provider->xlate = of_icc_xlate_onecell;
+	INIT_LIST_HEAD(&provider->nodes);
+	provider->data = onecell;
+
+	icc_provider_init(provider);
+
+	for (i = 0, j = 0; i < num_clocks; i++) {
+		qp->clocks[i].clk = data[i].clk;
+
+		node = icc_node_create(first_id + j);
+		if (IS_ERR(node)) {
+			ret = PTR_ERR(node);
+			goto err;
+		}
+
+		node->name = devm_kasprintf(dev, GFP_KERNEL, "%s_master", data[i].name);
+		node->data = &qp->clocks[i];
+		icc_node_add(node, provider);
+		/* link to the next node, slave */
+		icc_link_create(node, first_id + j + 1);
+		onecell->nodes[j++] = node;
+
+		node = icc_node_create(first_id + j);
+		if (IS_ERR(node)) {
+			ret = PTR_ERR(node);
+			goto err;
+		}
+
+		node->name = devm_kasprintf(dev, GFP_KERNEL, "%s_slave", data[i].name);
+		/* no data for slave node */
+		icc_node_add(node, provider);
+		onecell->nodes[j++] = node;
+	}
+
+	onecell->num_nodes = j;
+
+	ret = icc_provider_register(provider);
+	if (ret)
+		goto err;
+
+	return provider;
+
+err:
+	icc_nodes_remove(provider);
+
+	return ERR_PTR(ret);
+}
+
+/**
+ * icc_clk_unregister() - unregister a previously registered clk interconnect provider
+ * @provider: provider returned by icc_clk_register()
+ */
+void icc_clk_unregister(struct icc_provider *provider)
+{
+	struct icc_clk_provider *qp = container_of(provider, struct icc_clk_provider, provider);
+	int i;
+
+	icc_provider_deregister(&qp->provider);
+	icc_nodes_remove(&qp->provider);
+
+	for (i = 0; i < qp->num_clocks; i++) {
+		struct icc_clk_node *qn = &qp->clocks[i];
+
+		if (qn->enabled)
+			clk_disable_unprepare(qn->clk);
+	}
+}
diff --git a/include/linux/interconnect-clk.h b/include/linux/interconnect-clk.h
new file mode 100644
index 000000000000..0cd80112bea5
--- /dev/null
+++ b/include/linux/interconnect-clk.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023, Linaro Ltd.
+ */
+
+#ifndef __LINUX_INTERCONNECT_CLK_H
+#define __LINUX_INTERCONNECT_CLK_H
+
+struct device;
+
+struct icc_clk_data {
+	struct clk *clk;
+	const char *name;
+};
+
+struct icc_provider *icc_clk_register(struct device *dev,
+				      unsigned int first_id,
+				      unsigned int num_clocks,
+				      const struct icc_clk_data *data);
+void icc_clk_unregister(struct icc_provider *provider);
+
+#endif
-- 
cgit v1.2.3


From e3d9387f002612093dbeaa272f7930ce5108033f Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 20 Apr 2023 19:17:13 +0200
Subject: security, lsm: Introduce security_mptcp_add_subflow()

MPTCP can create subflows in kernel context, and later indirectly
expose them to user-space, via the owning MPTCP socket.

As discussed in the reported link, the above causes unexpected failures
for server, MPTCP-enabled applications.

Let's introduce a new LSM hook to allow the security module to relabel
the subflow according to the owning user-space process, via the MPTCP
socket owning the subflow.

Note that the new hook requires both the MPTCP socket and the new
subflow. This could allow future extensions, e.g. explicitly validating
the MPTCP <-> subflow linkage.

Link: https://lore.kernel.org/mptcp/CAHC9VhTNh-YwiyTds=P1e3rixEDqbRTFj22bpya=+qJqfcaMfg@mail.gmail.com/
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hook_defs.h |  1 +
 include/linux/security.h      |  6 ++++++
 net/mptcp/subflow.c           |  6 ++++++
 security/security.c           | 17 +++++++++++++++++
 4 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 6bb55e61e8e8..7308a1a7599b 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -343,6 +343,7 @@ LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc,
 	 struct sock *sk, struct sock *newsk)
 LSM_HOOK(int, 0, sctp_assoc_established, struct sctp_association *asoc,
 	 struct sk_buff *skb)
+LSM_HOOK(int, 0, mptcp_add_subflow, struct sock *sk, struct sock *ssk)
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/include/linux/security.h b/include/linux/security.h
index e2734e9e44d5..32828502f09e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1465,6 +1465,7 @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 			    struct sock *newsk);
 int security_sctp_assoc_established(struct sctp_association *asoc,
 				    struct sk_buff *skb);
+int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -1692,6 +1693,11 @@ static inline int security_sctp_assoc_established(struct sctp_association *asoc,
 {
 	return 0;
 }
+
+static inline int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index ba065b66551a..76952cf74fc0 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1694,6 +1694,10 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
 
 	lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING);
 
+	err = security_mptcp_add_subflow(sk, sf->sk);
+	if (err)
+		goto release_ssk;
+
 	/* the newly created socket has to be in the same cgroup as its parent */
 	mptcp_attach_cgroup(sk, sf->sk);
 
@@ -1706,6 +1710,8 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
 	get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
 	sock_inuse_add(net, 1);
 	err = tcp_set_ulp(sf->sk, "mptcp");
+
+release_ssk:
 	release_sock(sf->sk);
 
 	if (err) {
diff --git a/security/security.c b/security/security.c
index d5ff7ff45b77..0e7be1da0b3d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -4667,6 +4667,23 @@ int security_sctp_assoc_established(struct sctp_association *asoc,
 }
 EXPORT_SYMBOL(security_sctp_assoc_established);
 
+/**
+ * security_mptcp_add_subflow() - Inherit the LSM label from the MPTCP socket
+ * @sk: the owning MPTCP socket
+ * @ssk: the new subflow
+ *
+ * Update the labeling for the given MPTCP subflow, to match the one of the
+ * owning MPTCP socket. This hook has to be called after the socket creation and
+ * initialization via the security_socket_create() and
+ * security_socket_post_create() LSM hooks.
+ *
+ * Return: Returns 0 on success or a negative error code on failure.
+ */
+int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk)
+{
+	return call_int_hook(mptcp_add_subflow, 0, sk, ssk);
+}
+
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
-- 
cgit v1.2.3


From fc522f3bdf43efa75b54775978b6b6c19d0d997d Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 11 May 2023 11:52:42 -0700
Subject: Input: libps2 - remove special handling of ACK for command byte

When getting unexpected data while waiting for an acknowledgement it does
not matter what command phase is currently executed, and ps2_handle_ack()
should indicate that no further processing is needed for the received data
byte. Remove PS2_FLAG_ACK_CMD and associated handling.

Note that while it is possible to make ps2_handle_ack (and
ps2_handle_repsonse) return void, it will be done when the code will be
converted to common PS/2 interrupt handler later.

Reviewed-by: Raul E Rangel <rrangel@chromium.org>
Link: https://lore.kernel.org/r/20230511185252.386941-3-dmitry.torokhov@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/libps2.c | 9 ++-------
 include/linux/libps2.h       | 1 -
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 764990723847..399cda0d34f5 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -253,9 +253,6 @@ int __ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command)
 		for (i = 0; i < receive; i++)
 			ps2dev->cmdbuf[(receive - 1) - i] = param[i];
 
-	/* Signal that we are sending the command byte */
-	ps2dev->flags |= PS2_FLAG_ACK_CMD;
-
 	/*
 	 * Some devices (Synaptics) peform the reset before
 	 * ACKing the reset command, and so it can take a long
@@ -267,9 +264,7 @@ int __ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command)
 	if (rc)
 		goto out_reset_flags;
 
-	/* Now we are sending command parameters, if any */
-	ps2dev->flags &= ~PS2_FLAG_ACK_CMD;
-
+	/* Send command parameters, if any. */
 	for (i = 0; i < send; i++) {
 		rc = ps2_do_sendbyte(ps2dev, param[i], 200, 2);
 		if (rc)
@@ -436,7 +431,7 @@ bool ps2_handle_ack(struct ps2dev *ps2dev, u8 data)
 		 */
 		dev_dbg(&ps2dev->serio->dev, "unexpected %#02x\n", data);
 		ps2dev->flags &= ~PS2_FLAG_WAITID;
-		return ps2dev->flags & PS2_FLAG_ACK_CMD;
+		return true;
 	}
 
 	if (!ps2dev->nak) {
diff --git a/include/linux/libps2.h b/include/linux/libps2.h
index 53f7e4d0f4b7..193dd53ad18b 100644
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -28,7 +28,6 @@
 #define PS2_FLAG_CMD1		BIT(2)	/* Waiting for the first byte of command response */
 #define PS2_FLAG_WAITID		BIT(3)	/* Command executing is GET ID */
 #define PS2_FLAG_NAK		BIT(4)	/* Last transmission was NAKed */
-#define PS2_FLAG_ACK_CMD	BIT(5)	/* Waiting to ACK the command (first) byte */
 
 struct ps2dev {
 	struct serio *serio;
-- 
cgit v1.2.3


From c4c7eac8ee78d896635ce05d7a1c3f813fcbe24c Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 15 May 2023 16:14:29 -0700
Subject: Input: libps2 - introduce common interrupt handler

Instead of exposing inner workings of libps2 to drivers such as atkbd and
psmouse, have them define pre-receive and receive callbacks, and provide a
common handler that can be used with underlying serio port.

While at this add kerneldoc to the module.

Link: https://lore.kernel.org/r/ZGK81cxqjr/KS1kA@google.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/atkbd.c     |  81 +++++++------
 drivers/input/mouse/psmouse-base.c |  53 ++++-----
 drivers/input/serio/libps2.c       | 226 ++++++++++++++++++++++++++-----------
 include/linux/libps2.h             |  61 ++++++----
 4 files changed, 269 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 2fb2ad73e796..c92e544c792d 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -399,46 +399,60 @@ static unsigned int atkbd_compat_scancode(struct atkbd *atkbd, unsigned int code
 }
 
 /*
- * atkbd_interrupt(). Here takes place processing of data received from
- * the keyboard into events.
+ * Tries to handle frame or parity error by requesting the keyboard controller
+ * to resend the last byte. This historically not done on x86 as controllers
+ * there typically do not implement this command.
  */
-
-static irqreturn_t atkbd_interrupt(struct serio *serio, unsigned char data,
-				   unsigned int flags)
+static bool __maybe_unused atkbd_handle_frame_error(struct ps2dev *ps2dev,
+						    u8 data, unsigned int flags)
 {
-	struct atkbd *atkbd = atkbd_from_serio(serio);
-	struct input_dev *dev = atkbd->dev;
-	unsigned int code = data;
-	int scroll = 0, hscroll = 0, click = -1;
-	int value;
-	unsigned short keycode;
+	struct atkbd *atkbd = container_of(ps2dev, struct atkbd, ps2dev);
+	struct serio *serio = ps2dev->serio;
 
-	dev_dbg(&serio->dev, "Received %02x flags %02x\n", data, flags);
-
-#if !defined(__i386__) && !defined (__x86_64__)
-	if ((flags & (SERIO_FRAME | SERIO_PARITY)) && (~flags & SERIO_TIMEOUT) && !atkbd->resend && atkbd->write) {
+	if ((flags & (SERIO_FRAME | SERIO_PARITY)) &&
+	    (~flags & SERIO_TIMEOUT) &&
+	    !atkbd->resend && atkbd->write) {
 		dev_warn(&serio->dev, "Frame/parity error: %02x\n", flags);
 		serio_write(serio, ATKBD_CMD_RESEND);
 		atkbd->resend = true;
-		goto out;
+		return true;
 	}
 
 	if (!flags && data == ATKBD_RET_ACK)
 		atkbd->resend = false;
+
+	return false;
+}
+
+static enum ps2_disposition atkbd_pre_receive_byte(struct ps2dev *ps2dev,
+						   u8 data, unsigned int flags)
+{
+	struct serio *serio = ps2dev->serio;
+
+	dev_dbg(&serio->dev, "Received %02x flags %02x\n", data, flags);
+
+#if !defined(__i386__) && !defined (__x86_64__)
+	if (atkbd_handle_frame_error(ps2dev, data, flags))
+		return PS2_IGNORE;
 #endif
 
-	if (unlikely(atkbd->ps2dev.flags & PS2_FLAG_ACK))
-		if  (ps2_handle_ack(&atkbd->ps2dev, data))
-			goto out;
+	return PS2_PROCESS;
+}
 
-	if (unlikely(atkbd->ps2dev.flags & PS2_FLAG_CMD))
-		if  (ps2_handle_response(&atkbd->ps2dev, data))
-			goto out;
+static void atkbd_receive_byte(struct ps2dev *ps2dev, u8 data)
+{
+	struct serio *serio = ps2dev->serio;
+	struct atkbd *atkbd = container_of(ps2dev, struct atkbd, ps2dev);
+	struct input_dev *dev = atkbd->dev;
+	unsigned int code = data;
+	int scroll = 0, hscroll = 0, click = -1;
+	int value;
+	unsigned short keycode;
 
 	pm_wakeup_event(&serio->dev, 0);
 
 	if (!atkbd->enabled)
-		goto out;
+		return;
 
 	input_event(dev, EV_MSC, MSC_RAW, code);
 
@@ -460,16 +474,16 @@ static irqreturn_t atkbd_interrupt(struct serio *serio, unsigned char data,
 	case ATKBD_RET_BAT:
 		atkbd->enabled = false;
 		serio_reconnect(atkbd->ps2dev.serio);
-		goto out;
+		return;
 	case ATKBD_RET_EMUL0:
 		atkbd->emul = 1;
-		goto out;
+		return;
 	case ATKBD_RET_EMUL1:
 		atkbd->emul = 2;
-		goto out;
+		return;
 	case ATKBD_RET_RELEASE:
 		atkbd->release = true;
-		goto out;
+		return;
 	case ATKBD_RET_ACK:
 	case ATKBD_RET_NAK:
 		if (printk_ratelimit())
@@ -477,18 +491,18 @@ static irqreturn_t atkbd_interrupt(struct serio *serio, unsigned char data,
 				 "Spurious %s on %s. "
 				 "Some program might be trying to access hardware directly.\n",
 				 data == ATKBD_RET_ACK ? "ACK" : "NAK", serio->phys);
-		goto out;
+		return;
 	case ATKBD_RET_ERR:
 		atkbd->err_count++;
 		dev_dbg(&serio->dev, "Keyboard on %s reports too many keys pressed.\n",
 			serio->phys);
-		goto out;
+		return;
 	}
 
 	code = atkbd_compat_scancode(atkbd, code);
 
 	if (atkbd->emul && --atkbd->emul)
-		goto out;
+		return;
 
 	keycode = atkbd->keycode[code];
 
@@ -564,8 +578,6 @@ static irqreturn_t atkbd_interrupt(struct serio *serio, unsigned char data,
 	}
 
 	atkbd->release = false;
-out:
-	return IRQ_HANDLED;
 }
 
 static int atkbd_set_repeat_rate(struct atkbd *atkbd)
@@ -1229,7 +1241,8 @@ static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
 		goto fail1;
 
 	atkbd->dev = dev;
-	ps2_init(&atkbd->ps2dev, serio);
+	ps2_init(&atkbd->ps2dev, serio,
+		 atkbd_pre_receive_byte, atkbd_receive_byte);
 	INIT_DELAYED_WORK(&atkbd->event_work, atkbd_event_work);
 	mutex_init(&atkbd->mutex);
 
@@ -1385,7 +1398,7 @@ static struct serio_driver atkbd_drv = {
 	},
 	.description	= DRIVER_DESC,
 	.id_table	= atkbd_serio_ids,
-	.interrupt	= atkbd_interrupt,
+	.interrupt	= ps2_interrupt,
 	.connect	= atkbd_connect,
 	.reconnect	= atkbd_reconnect,
 	.disconnect	= atkbd_disconnect,
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index ed5376099fba..a0aac76b1e41 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -336,17 +336,14 @@ static void psmouse_handle_oob_data(struct psmouse *psmouse, u8 data)
 	}
 }
 
-/*
- * psmouse_interrupt() handles incoming characters, either passing them
- * for normal processing or gathering them as command response.
- */
-static irqreturn_t psmouse_interrupt(struct serio *serio,
-				     u8 data, unsigned int flags)
+static enum ps2_disposition psmouse_pre_receive_byte(struct ps2dev *ps2dev,
+						     u8 data,
+						     unsigned int flags)
 {
-	struct psmouse *psmouse = psmouse_from_serio(serio);
+	struct psmouse *psmouse = container_of(ps2dev, struct psmouse, ps2dev);
 
 	if (psmouse->state == PSMOUSE_IGNORE)
-		goto out;
+		return PS2_IGNORE;
 
 	if (unlikely((flags & SERIO_TIMEOUT) ||
 		     ((flags & SERIO_PARITY) &&
@@ -357,27 +354,25 @@ static irqreturn_t psmouse_interrupt(struct serio *serio,
 				     "bad data from KBC -%s%s\n",
 				     flags & SERIO_TIMEOUT ? " timeout" : "",
 				     flags & SERIO_PARITY ? " bad parity" : "");
-		ps2_cmd_aborted(&psmouse->ps2dev);
-		goto out;
+		return PS2_ERROR;
 	}
 
 	if (flags & SERIO_OOB_DATA) {
 		psmouse_handle_oob_data(psmouse, data);
-		goto out;
+		return PS2_IGNORE;
 	}
 
-	if (unlikely(psmouse->ps2dev.flags & PS2_FLAG_ACK))
-		if  (ps2_handle_ack(&psmouse->ps2dev, data))
-			goto out;
+	return PS2_PROCESS;
+}
 
-	if (unlikely(psmouse->ps2dev.flags & PS2_FLAG_CMD))
-		if  (ps2_handle_response(&psmouse->ps2dev, data))
-			goto out;
+static void psmouse_receive_byte(struct ps2dev *ps2dev, u8 data)
+{
+	struct psmouse *psmouse = container_of(ps2dev, struct psmouse, ps2dev);
 
-	pm_wakeup_event(&serio->dev, 0);
+	pm_wakeup_event(&ps2dev->serio->dev, 0);
 
 	if (psmouse->state <= PSMOUSE_RESYNCING)
-		goto out;
+		return;
 
 	if (psmouse->state == PSMOUSE_ACTIVATED &&
 	    psmouse->pktcnt && time_after(jiffies, psmouse->last + HZ/2)) {
@@ -386,7 +381,7 @@ static irqreturn_t psmouse_interrupt(struct serio *serio,
 		psmouse->badbyte = psmouse->packet[0];
 		__psmouse_set_state(psmouse, PSMOUSE_RESYNCING);
 		psmouse_queue_work(psmouse, &psmouse->resync_work, 0);
-		goto out;
+		return;
 	}
 
 	psmouse->packet[psmouse->pktcnt++] = data;
@@ -395,21 +390,21 @@ static irqreturn_t psmouse_interrupt(struct serio *serio,
 	if (unlikely(psmouse->packet[0] == PSMOUSE_RET_BAT && psmouse->pktcnt <= 2)) {
 		if (psmouse->pktcnt == 1) {
 			psmouse->last = jiffies;
-			goto out;
+			return;
 		}
 
 		if (psmouse->packet[1] == PSMOUSE_RET_ID ||
 		    (psmouse->protocol->type == PSMOUSE_HGPK &&
 		     psmouse->packet[1] == PSMOUSE_RET_BAT)) {
 			__psmouse_set_state(psmouse, PSMOUSE_IGNORE);
-			serio_reconnect(serio);
-			goto out;
+			serio_reconnect(ps2dev->serio);
+			return;
 		}
 
 		/* Not a new device, try processing first byte normally */
 		psmouse->pktcnt = 1;
 		if (psmouse_handle_byte(psmouse))
-			goto out;
+			return;
 
 		psmouse->packet[psmouse->pktcnt++] = data;
 	}
@@ -424,14 +419,11 @@ static irqreturn_t psmouse_interrupt(struct serio *serio,
 		psmouse->badbyte = psmouse->packet[0];
 		__psmouse_set_state(psmouse, PSMOUSE_RESYNCING);
 		psmouse_queue_work(psmouse, &psmouse->resync_work, 0);
-		goto out;
+		return;
 	}
 
 	psmouse->last = jiffies;
 	psmouse_handle_byte(psmouse);
-
- out:
-	return IRQ_HANDLED;
 }
 
 /*
@@ -1604,7 +1596,8 @@ static int psmouse_connect(struct serio *serio, struct serio_driver *drv)
 	if (!psmouse || !input_dev)
 		goto err_free;
 
-	ps2_init(&psmouse->ps2dev, serio);
+	ps2_init(&psmouse->ps2dev, serio,
+		 psmouse_pre_receive_byte, psmouse_receive_byte);
 	INIT_DELAYED_WORK(&psmouse->resync_work, psmouse_resync);
 	psmouse->dev = input_dev;
 	snprintf(psmouse->phys, sizeof(psmouse->phys), "%s/input0", serio->phys);
@@ -1786,7 +1779,7 @@ static struct serio_driver psmouse_drv = {
 	},
 	.description	= DRIVER_DESC,
 	.id_table	= psmouse_serio_ids,
-	.interrupt	= psmouse_interrupt,
+	.interrupt	= ps2_interrupt,
 	.connect	= psmouse_connect,
 	.reconnect	= psmouse_reconnect,
 	.fast_reconnect	= psmouse_fast_reconnect,
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 09eb605364bb..7c5fc853072a 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -19,9 +19,22 @@
 
 #define DRIVER_DESC	"PS/2 driver library"
 
-MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>");
-MODULE_DESCRIPTION("PS/2 driver library");
-MODULE_LICENSE("GPL");
+#define PS2_CMD_SETSCALE11	0x00e6
+#define PS2_CMD_SETRES		0x10e8
+#define PS2_CMD_GETID		0x02f2
+#define PS2_CMD_RESET_BAT	0x02ff
+
+#define PS2_RET_BAT		0xaa
+#define PS2_RET_ID		0x00
+#define PS2_RET_ACK		0xfa
+#define PS2_RET_NAK		0xfe
+#define PS2_RET_ERR		0xfc
+
+#define PS2_FLAG_ACK		BIT(0)	/* Waiting for ACK/NAK */
+#define PS2_FLAG_CMD		BIT(1)	/* Waiting for a command to finish */
+#define PS2_FLAG_CMD1		BIT(2)	/* Waiting for the first byte of command response */
+#define PS2_FLAG_WAITID		BIT(3)	/* Command executing is GET ID */
+#define PS2_FLAG_NAK		BIT(4)	/* Last transmission was NAKed */
 
 static int ps2_do_sendbyte(struct ps2dev *ps2dev, u8 byte,
 			   unsigned int timeout, unsigned int max_attempts)
@@ -76,14 +89,17 @@ static int ps2_do_sendbyte(struct ps2dev *ps2dev, u8 byte,
 	return error;
 }
 
-/*
- * ps2_sendbyte() sends a byte to the device and waits for acknowledge.
- * It doesn't handle retransmission, the caller is expected to handle
+/**
+ * ps2_sendbyte - sends a byte to the device and wait for acknowledgement
+ * @ps2dev: a PS/2 device to send the data to
+ * @byte: data to be sent to the device
+ * @timeout: timeout for sending the data and receiving an acknowledge
+ *
+ * The function doesn't handle retransmission, the caller is expected to handle
  * it when needed.
  *
  * ps2_sendbyte() can only be called from a process context.
  */
-
 int ps2_sendbyte(struct ps2dev *ps2dev, u8 byte, unsigned int timeout)
 {
 	int retval;
@@ -99,6 +115,13 @@ int ps2_sendbyte(struct ps2dev *ps2dev, u8 byte, unsigned int timeout)
 }
 EXPORT_SYMBOL(ps2_sendbyte);
 
+/**
+ * ps2_begin_command - mark beginning of execution of a complex command
+ * @ps2dev: a PS/2 device executing the command
+ *
+ * Serializes a complex/compound command. Once command is finished
+ * ps2_end_command() should be called.
+ */
 void ps2_begin_command(struct ps2dev *ps2dev)
 {
 	struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex;
@@ -107,6 +130,10 @@ void ps2_begin_command(struct ps2dev *ps2dev)
 }
 EXPORT_SYMBOL(ps2_begin_command);
 
+/**
+ * ps2_end_command - mark end of execution of a complex command
+ * @ps2dev: a PS/2 device executing the command
+ */
 void ps2_end_command(struct ps2dev *ps2dev)
 {
 	struct mutex *m = ps2dev->serio->ps2_cmd_mutex ?: &ps2dev->cmd_mutex;
@@ -115,11 +142,13 @@ void ps2_end_command(struct ps2dev *ps2dev)
 }
 EXPORT_SYMBOL(ps2_end_command);
 
-/*
- * ps2_drain() waits for device to transmit requested number of bytes
- * and discards them.
+/**
+ * ps2_drain - waits for device to transmit requested number of bytes
+ * and discards them
+ * @ps2dev: the PS/2 device that should be drained
+ * @maxbytes: maximum number of bytes to be drained
+ * @timeout: time to drain the device
  */
-
 void ps2_drain(struct ps2dev *ps2dev, size_t maxbytes, unsigned int timeout)
 {
 	if (maxbytes > sizeof(ps2dev->cmdbuf)) {
@@ -142,11 +171,11 @@ void ps2_drain(struct ps2dev *ps2dev, size_t maxbytes, unsigned int timeout)
 }
 EXPORT_SYMBOL(ps2_drain);
 
-/*
- * ps2_is_keyboard_id() checks received ID byte against the list of
- * known keyboard IDs.
+/**
+ * ps2_is_keyboard_id - checks received ID byte against the list of
+ *   known keyboard IDs
+ * @id_byte: data byte that should be checked
  */
-
 bool ps2_is_keyboard_id(u8 id_byte)
 {
 	static const u8 keyboard_ids[] = {
@@ -167,7 +196,6 @@ EXPORT_SYMBOL(ps2_is_keyboard_id);
  * response and tries to reduce remaining timeout to speed up command
  * completion.
  */
-
 static int ps2_adjust_timeout(struct ps2dev *ps2dev,
 			      unsigned int command, unsigned int timeout)
 {
@@ -217,13 +245,19 @@ static int ps2_adjust_timeout(struct ps2dev *ps2dev,
 	return timeout;
 }
 
-/*
- * ps2_command() sends a command and its parameters to the mouse,
- * then waits for the response and puts it in the param array.
+/**
+ * __ps2_command - send a command to PS/2 device
+ * @ps2dev: the PS/2 device that should execute the command
+ * @param: a buffer containing parameters to be sent along with the command,
+ *   or place where the results of the command execution will be deposited,
+ *   or both
+ * @command: command word that encodes the command itself, as well as number of
+ *   additional parameter bytes that should be sent to the device and expected
+ *   length of the command response
  *
- * ps2_command() can only be called from a process context
+ * Not serialized. Callers should use ps2_begin_command() and ps2_end_command()
+ * to ensure proper serialization for complex commands.
  */
-
 int __ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command)
 {
 	unsigned int timeout;
@@ -327,6 +361,20 @@ int __ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command)
 }
 EXPORT_SYMBOL(__ps2_command);
 
+/**
+ * ps2_command - send a command to PS/2 device
+ * @ps2dev: the PS/2 device that should execute the command
+ * @param: a buffer containing parameters to be sent along with the command,
+ *   or place where the results of the command execution will be deposited,
+ *   or both
+ * @command: command word that encodes the command itself, as well as number of
+ *   additional parameter bytes that should be sent to the device and expected
+ *   length of the command response
+ *
+ * Note: ps2_command() serializes the command execution so that only one
+ * command can be executed at a time for either individual port or the entire
+ * 8042 controller.
+ */
 int ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command)
 {
 	int rc;
@@ -339,14 +387,16 @@ int ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command)
 }
 EXPORT_SYMBOL(ps2_command);
 
-/*
- * ps2_sliced_command() sends an extended PS/2 command to the mouse
- * using sliced syntax, understood by advanced devices, such as Logitech
- * or Synaptics touchpads. The command is encoded as:
+/**
+ * ps2_sliced_command - sends an extended PS/2 command to a mouse
+ * @ps2dev: the PS/2 device that should execute the command
+ * @command: command byte
+ *
+ * The command is sent using "sliced" syntax understood by advanced devices,
+ * such as Logitech or Synaptics touchpads. The command is encoded as:
  * 0xE6 0xE8 rr 0xE8 ss 0xE8 tt 0xE8 uu where (rr*64)+(ss*16)+(tt*4)+uu
  * is the command.
  */
-
 int ps2_sliced_command(struct ps2dev *ps2dev, u8 command)
 {
 	int i;
@@ -372,12 +422,22 @@ out:
 }
 EXPORT_SYMBOL(ps2_sliced_command);
 
-/*
- * ps2_init() initializes ps2dev structure
+/**
+ * ps2_init - initializes ps2dev structure
+ * @ps2dev: structure to be initialized
+ * @serio: serio port associated with the PS/2 device
+ * @pre_receive_handler: validation handler to check basic communication state
+ * @receive_handler: main protocol handler
+ *
+ * Prepares ps2dev structure for use in drivers for PS/2 devices.
  */
-
-void ps2_init(struct ps2dev *ps2dev, struct serio *serio)
+void ps2_init(struct ps2dev *ps2dev, struct serio *serio,
+	      ps2_pre_receive_handler_t pre_receive_handler,
+	      ps2_receive_handler_t receive_handler)
 {
+	ps2dev->pre_receive_handler = pre_receive_handler;
+	ps2dev->receive_handler = receive_handler;
+
 	mutex_init(&ps2dev->cmd_mutex);
 	lockdep_set_subclass(&ps2dev->cmd_mutex, serio->depth);
 	init_waitqueue_head(&ps2dev->wait);
@@ -387,11 +447,35 @@ void ps2_init(struct ps2dev *ps2dev, struct serio *serio)
 EXPORT_SYMBOL(ps2_init);
 
 /*
- * ps2_handle_ack() is supposed to be used in interrupt handler
- * to properly process ACK/NAK of a command from a PS/2 device.
+ * ps2_handle_response() stores device's response to a command and notifies
+ * the process waiting for completion of the command. Note that there is a
+ * distinction between waiting for the first byte of the response, and
+ * waiting for subsequent bytes. It is done so that callers could shorten
+ * timeouts once first byte of response is received.
  */
+static void ps2_handle_response(struct ps2dev *ps2dev, u8 data)
+{
+	if (ps2dev->cmdcnt)
+		ps2dev->cmdbuf[--ps2dev->cmdcnt] = data;
 
-bool ps2_handle_ack(struct ps2dev *ps2dev, u8 data)
+	if (ps2dev->flags & PS2_FLAG_CMD1) {
+		ps2dev->flags &= ~PS2_FLAG_CMD1;
+		if (ps2dev->cmdcnt)
+			wake_up(&ps2dev->wait);
+	}
+
+	if (!ps2dev->cmdcnt) {
+		ps2dev->flags &= ~PS2_FLAG_CMD;
+		wake_up(&ps2dev->wait);
+	}
+}
+
+/*
+ * ps2_handle_ack() processes ACK/NAK of a command from a PS/2 device,
+ * possibly applying workarounds for mice not acknowledging the "get ID"
+ * command.
+ */
+static void ps2_handle_ack(struct ps2dev *ps2dev, u8 data)
 {
 	switch (data) {
 	case PS2_RET_ACK:
@@ -436,53 +520,25 @@ bool ps2_handle_ack(struct ps2dev *ps2dev, u8 data)
 		 */
 		dev_dbg(&ps2dev->serio->dev, "unexpected %#02x\n", data);
 		ps2dev->flags &= ~PS2_FLAG_WAITID;
-		return true;
+		return;
 	}
 
 	if (!ps2dev->nak)
 		ps2dev->flags &= ~PS2_FLAG_NAK;
 
 	ps2dev->flags &= ~PS2_FLAG_ACK;
-	wake_up(&ps2dev->wait);
 
 	if (!ps2dev->nak && data != PS2_RET_ACK)
 		ps2_handle_response(ps2dev, data);
-
-	return true;
-}
-EXPORT_SYMBOL(ps2_handle_ack);
-
-/*
- * ps2_handle_response() is supposed to be used in interrupt handler
- * to properly store device's response to a command and notify process
- * waiting for completion of the command.
- */
-
-bool ps2_handle_response(struct ps2dev *ps2dev, u8 data)
-{
-	if (ps2dev->cmdcnt)
-		ps2dev->cmdbuf[--ps2dev->cmdcnt] = data;
-
-	if (ps2dev->flags & PS2_FLAG_CMD1) {
-		ps2dev->flags &= ~PS2_FLAG_CMD1;
-		if (ps2dev->cmdcnt)
-			wake_up(&ps2dev->wait);
-	}
-
-	if (!ps2dev->cmdcnt) {
-		ps2dev->flags &= ~PS2_FLAG_CMD;
+	else
 		wake_up(&ps2dev->wait);
-	}
-
-	return true;
 }
-EXPORT_SYMBOL(ps2_handle_response);
 
 /*
  * Clears state of PS/2 device after communication error by resetting majority
  * of flags and waking up waiters, if any.
  */
-void ps2_cmd_aborted(struct ps2dev *ps2dev)
+static void ps2_cleanup(struct ps2dev *ps2dev)
 {
 	unsigned long old_flags = ps2dev->flags;
 
@@ -494,6 +550,46 @@ void ps2_cmd_aborted(struct ps2dev *ps2dev)
 
 	if (old_flags & (PS2_FLAG_ACK | PS2_FLAG_CMD))
 		wake_up(&ps2dev->wait);
+}
 
+/**
+ * ps2_interrupt - common interrupt handler for PS/2 devices
+ * @serio: serio port for the device
+ * @data: a data byte received from the device
+ * @flags: flags such as %SERIO_PARITY or %SERIO_TIMEOUT indicating state of
+ *   the data transfer
+ *
+ * ps2_interrupt() invokes pre-receive handler, optionally handles command
+ * acknowledgement and response from the device, and finally passes the data
+ * to the main protocol handler for future processing.
+ */
+irqreturn_t ps2_interrupt(struct serio *serio, u8 data, unsigned int flags) {
+	struct ps2dev *ps2dev = serio_get_drvdata(serio);
+	enum ps2_disposition rc;
+
+	rc = ps2dev->pre_receive_handler(ps2dev, data, flags);
+	switch (rc) {
+	case PS2_ERROR:
+		ps2_cleanup(ps2dev);
+		break;
+
+	case PS2_IGNORE:
+		break;
+
+	case PS2_PROCESS:
+		if (ps2dev->flags & PS2_FLAG_ACK)
+			ps2_handle_ack(ps2dev, data);
+		else if (ps2dev->flags & PS2_FLAG_CMD)
+			ps2_handle_response(ps2dev, data);
+		else
+			ps2dev->receive_handler(ps2dev, data);
+		break;
+	}
+
+	return IRQ_HANDLED;
 }
-EXPORT_SYMBOL(ps2_cmd_aborted);
+EXPORT_SYMBOL(ps2_interrupt);
+
+MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>");
+MODULE_DESCRIPTION("PS/2 driver library");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/libps2.h b/include/linux/libps2.h
index 193dd53ad18b..9ca9ce4e6e64 100644
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -8,43 +8,59 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/interrupt.h>
 #include <linux/mutex.h>
 #include <linux/types.h>
 #include <linux/wait.h>
 
-#define PS2_CMD_SETSCALE11	0x00e6
-#define PS2_CMD_SETRES		0x10e8
-#define PS2_CMD_GETID		0x02f2
-#define PS2_CMD_RESET_BAT	0x02ff
+struct ps2dev;
 
-#define PS2_RET_BAT		0xaa
-#define PS2_RET_ID		0x00
-#define PS2_RET_ACK		0xfa
-#define PS2_RET_NAK		0xfe
-#define PS2_RET_ERR		0xfc
+/**
+ * enum ps2_disposition - indicates how received byte should be handled
+ * @PS2_PROCESS: pass to the main protocol handler, process normally
+ * @PS2_IGNORE: skip the byte
+ * @PS2_ERROR: do not process the byte, abort command in progress
+ */
+enum ps2_disposition {
+	PS2_PROCESS,
+	PS2_IGNORE,
+	PS2_ERROR,
+};
 
-#define PS2_FLAG_ACK		BIT(0)	/* Waiting for ACK/NAK */
-#define PS2_FLAG_CMD		BIT(1)	/* Waiting for a command to finish */
-#define PS2_FLAG_CMD1		BIT(2)	/* Waiting for the first byte of command response */
-#define PS2_FLAG_WAITID		BIT(3)	/* Command executing is GET ID */
-#define PS2_FLAG_NAK		BIT(4)	/* Last transmission was NAKed */
+typedef enum ps2_disposition (*ps2_pre_receive_handler_t)(struct ps2dev *, u8,
+							  unsigned int);
+typedef void (*ps2_receive_handler_t)(struct ps2dev *, u8);
 
+/**
+ * struct ps2dev - represents a device using PS/2 protocol
+ * @serio: a serio port used by the PS/2 device
+ * @cmd_mutex: a mutex ensuring that only one command is executing at a time
+ * @wait: a waitqueue used to signal completion from the serio interrupt handler
+ * @flags: various internal flags indicating stages of PS/2 command execution
+ * @cmdbuf: buffer holding command response
+ * @cmdcnt: outstanding number of bytes of the command response
+ * @nak: a byte transmitted by the device when it refuses command
+ * @pre_receive_handler: checks communication errors and returns disposition
+ * (&enum ps2_disposition) of the received data byte
+ * @receive_handler: main handler of particular PS/2 protocol, such as keyboard
+ *   or mouse protocol
+ */
 struct ps2dev {
 	struct serio *serio;
-
-	/* Ensures that only one command is executing at a time */
 	struct mutex cmd_mutex;
-
-	/* Used to signal completion from interrupt handler */
 	wait_queue_head_t wait;
-
 	unsigned long flags;
 	u8 cmdbuf[8];
 	u8 cmdcnt;
 	u8 nak;
+
+	ps2_pre_receive_handler_t pre_receive_handler;
+	ps2_receive_handler_t receive_handler;
 };
 
-void ps2_init(struct ps2dev *ps2dev, struct serio *serio);
+void ps2_init(struct ps2dev *ps2dev, struct serio *serio,
+	      ps2_pre_receive_handler_t pre_receive_handler,
+	      ps2_receive_handler_t receive_handler);
 int ps2_sendbyte(struct ps2dev *ps2dev, u8 byte, unsigned int timeout);
 void ps2_drain(struct ps2dev *ps2dev, size_t maxbytes, unsigned int timeout);
 void ps2_begin_command(struct ps2dev *ps2dev);
@@ -52,9 +68,8 @@ void ps2_end_command(struct ps2dev *ps2dev);
 int __ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command);
 int ps2_command(struct ps2dev *ps2dev, u8 *param, unsigned int command);
 int ps2_sliced_command(struct ps2dev *ps2dev, u8 command);
-bool ps2_handle_ack(struct ps2dev *ps2dev, u8 data);
-bool ps2_handle_response(struct ps2dev *ps2dev, u8 data);
-void ps2_cmd_aborted(struct ps2dev *ps2dev);
 bool ps2_is_keyboard_id(u8 id);
 
+irqreturn_t ps2_interrupt(struct serio *serio, u8 data, unsigned int flags);
+
 #endif /* _LIBPS2_H */
-- 
cgit v1.2.3


From 8a3e82d38674066f4cbed3588b78b0d9b8b15ed7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 21:35:42 +0200
Subject: x86/hibernate: Declare global functions in suspend.h

Three functions that are defined in x86 specific code to override
generic __weak implementations cause a warning because of a missing
prototype:

arch/x86/power/cpu.c:298:5: error: no previous prototype for 'hibernate_resume_nonboot_cpu_disable' [-Werror=missing-prototypes]
arch/x86/power/hibernate.c:129:5: error: no previous prototype for 'arch_hibernation_header_restore' [-Werror=missing-prototypes]
arch/x86/power/hibernate.c:91:5: error: no previous prototype for 'arch_hibernation_header_save' [-Werror=missing-prototypes]

Move the declarations into a global header so it can be included
by any file defining one of these.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/all/20230516193549.544673-14-arnd%40kernel.org
---
 include/linux/suspend.h | 4 ++++
 kernel/power/power.h    | 5 -----
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d0d4598a7b3f..f16653f7be32 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -452,6 +452,10 @@ extern struct pbe *restore_pblist;
 int pfn_is_nosave(unsigned long pfn);
 
 int hibernate_quiet_exec(int (*func)(void *data), void *data);
+int hibernate_resume_nonboot_cpu_disable(void);
+int arch_hibernation_header_save(void *addr, unsigned int max_size);
+int arch_hibernation_header_restore(void *addr);
+
 #else /* CONFIG_HIBERNATION */
 static inline void register_nosave_region(unsigned long b, unsigned long e) {}
 static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
diff --git a/kernel/power/power.h b/kernel/power/power.h
index b83c8d5e188d..a6a16faf0ead 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -26,9 +26,6 @@ extern void __init hibernate_image_size_init(void);
 /* Maximum size of architecture specific data in a hibernation header */
 #define MAX_ARCH_HEADER_SIZE	(sizeof(struct new_utsname) + 4)
 
-extern int arch_hibernation_header_save(void *addr, unsigned int max_size);
-extern int arch_hibernation_header_restore(void *addr);
-
 static inline int init_header_complete(struct swsusp_info *info)
 {
 	return arch_hibernation_header_save(info, MAX_ARCH_HEADER_SIZE);
@@ -41,8 +38,6 @@ static inline const char *check_image_kernel(struct swsusp_info *info)
 }
 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
 
-extern int hibernate_resume_nonboot_cpu_disable(void);
-
 /*
  * Keep some memory free so that I/O operations can succeed without paging
  * [Might this be more than 4 MB?]
-- 
cgit v1.2.3


From 4d312ac057da57b4a844ec8af14236e74b652efe Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 21:35:47 +0200
Subject: x86/mm: Add early_memremap_pgprot_adjust() prototype

early_memremap_pgprot_adjust() is a __weak function with a local
prototype, but x86 has a custom implementation that does not
see the prototype, causing a W=1 warning:

arch/x86/mm/ioremap.c:785:17: error: no previous prototype for 'early_memremap_pgprot_adjust' [-Werror=missing-prototypes]

Move the declaration into the global linux/io.h header to avoid this.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/all/20230516193549.544673-19-arnd%40kernel.org
---
 include/linux/io.h | 5 +++++
 mm/internal.h      | 6 ------
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io.h b/include/linux/io.h
index 308f4f0cfb93..7304f2a69960 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -68,6 +68,11 @@ void *devm_memremap(struct device *dev, resource_size_t offset,
 		size_t size, unsigned long flags);
 void devm_memunmap(struct device *dev, void *addr);
 
+/* architectures can override this */
+pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
+					unsigned long size, pgprot_t prot);
+
+
 #ifdef CONFIG_PCI
 /*
  * The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and
diff --git a/mm/internal.h b/mm/internal.h
index 68410c6d97ac..e6029d94bdb2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -178,12 +178,6 @@ extern unsigned long highest_memmap_pfn;
  */
 #define MAX_RECLAIM_RETRIES 16
 
-/*
- * in mm/early_ioremap.c
- */
-pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
-					unsigned long size, pgprot_t prot);
-
 /*
  * in mm/vmscan.c:
  */
-- 
cgit v1.2.3


From 454a348714954f7b626c027a90c3967278e3f93b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 21:35:49 +0200
Subject: x86/platform: Avoid missing-prototype warnings for OLPC

There are two functions in the olpc platform that have no prototype:

arch/x86/platform/olpc/olpc_dt.c:237:13: error: no previous prototype for 'olpc_dt_fixup' [-Werror=missing-prototypes]
arch/x86/platform/olpc/olpc-xo1-pm.c:73:26: error: no previous prototype for 'xo1_do_sleep' [-Werror=missing-prototypes]

The first one should just be marked 'static' as there are no other
callers, while the second one is called from assembler and is
just a false-positive warning that can be silenced by adding a
prototype.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/all/20230516193549.544673-21-arnd%40kernel.org
---
 arch/x86/platform/olpc/olpc_dt.c | 2 +-
 include/linux/olpc-ec.h          | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 75e3319e8bee..74ebd6882690 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -234,7 +234,7 @@ static int __init olpc_dt_compatible_match(phandle node, const char *compat)
 	return 0;
 }
 
-void __init olpc_dt_fixup(void)
+static void __init olpc_dt_fixup(void)
 {
 	phandle node;
 	u32 board_rev;
diff --git a/include/linux/olpc-ec.h b/include/linux/olpc-ec.h
index c4602364e909..3c2891d85c41 100644
--- a/include/linux/olpc-ec.h
+++ b/include/linux/olpc-ec.h
@@ -56,6 +56,8 @@ extern int olpc_ec_sci_query(u16 *sci_value);
 
 extern bool olpc_ec_wakeup_available(void);
 
+asmlinkage int xo1_do_sleep(u8 sleep_state);
+
 #else
 
 static inline int olpc_ec_cmd(u8 cmd, u8 *inbuf, size_t inlen, u8 *outbuf,
-- 
cgit v1.2.3


From fdcab6cddef24a26b86d798814b3c25057e53c21 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 18 May 2023 07:31:00 +0200
Subject: blk-mq: remove RQF_ELVPRIV

RQF_ELVPRIV is set for all non-flush requests that have RQF_ELV set.
Expand this condition in the two users of the flag and remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20230518053101.760632-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 1 -
 block/blk-mq-sched.h   | 4 ++--
 block/blk-mq.c         | 6 ++----
 include/linux/blk-mq.h | 2 --
 4 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index f89865a90dba..ae1b3080b62b 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -251,7 +251,6 @@ static const char *const rqf_name[] = {
 	RQF_NAME(DONTPREP),
 	RQF_NAME(FAILED),
 	RQF_NAME(QUIET),
-	RQF_NAME(ELVPRIV),
 	RQF_NAME(IO_STAT),
 	RQF_NAME(PM),
 	RQF_NAME(HASHED),
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 7c3cbad17f30..4d8d2cd3b473 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -58,11 +58,11 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
 
 static inline void blk_mq_sched_requeue_request(struct request *rq)
 {
-	if (rq->rq_flags & RQF_ELV) {
+	if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags)) {
 		struct request_queue *q = rq->q;
 		struct elevator_queue *e = q->elevator;
 
-		if ((rq->rq_flags & RQF_ELVPRIV) && e->type->ops.requeue_request)
+		if (e->type->ops.requeue_request)
 			e->type->ops.requeue_request(rq);
 	}
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8b7e4daaa5b7..7470c6636dc4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -393,10 +393,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 		RB_CLEAR_NODE(&rq->rb_node);
 
 		if (!op_is_flush(data->cmd_flags) &&
-		    e->type->ops.prepare_request) {
+		    e->type->ops.prepare_request)
 			e->type->ops.prepare_request(rq);
-			rq->rq_flags |= RQF_ELVPRIV;
-		}
 	}
 
 	return rq;
@@ -696,7 +694,7 @@ void blk_mq_free_request(struct request *rq)
 	struct request_queue *q = rq->q;
 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 
-	if ((rq->rq_flags & RQF_ELVPRIV) &&
+	if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags) &&
 	    q->elevator->type->ops.finish_request)
 		q->elevator->type->ops.finish_request(rq);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 06caacd77ed6..5529e7d28ae6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -42,8 +42,6 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_FAILED		((__force req_flags_t)(1 << 10))
 /* don't warn about errors */
 #define RQF_QUIET		((__force req_flags_t)(1 << 11))
-/* elevator private data attached */
-#define RQF_ELVPRIV		((__force req_flags_t)(1 << 12))
 /* account into disk and partition IO statistics */
 #define RQF_IO_STAT		((__force req_flags_t)(1 << 13))
 /* runtime pm request */
-- 
cgit v1.2.3


From dd6216bb16e83e349d5d987227328031b0b0d30d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 18 May 2023 07:31:01 +0200
Subject: blk-mq: make sure elevator callbacks aren't called for passthrough
 request

In case of q->elevator, passthrough request can still be marked as
RQF_ELV, so some elevator callbacks will be called for them.

Fix this by splitting RQF_SCHED_TAGS, which is set for all requests that
are issued on a queue that uses an I/O scheduler, and RQF_USE_SCHED for
non-flush, non-passthrough requests on such a queue.

Roughly based on two different patches from
Ming Lei <ming.lei@redhat.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20230518053101.760632-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c |  3 ++-
 block/blk-mq-sched.h   |  6 +++---
 block/blk-mq.c         | 53 +++++++++++++++++++++++++++-----------------------
 block/blk-mq.h         |  6 +++---
 include/linux/blk-mq.h | 12 +++++++-----
 5 files changed, 44 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index ae1b3080b62b..22e39b9a77ec 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -249,6 +249,8 @@ static const char *const rqf_name[] = {
 	RQF_NAME(MIXED_MERGE),
 	RQF_NAME(MQ_INFLIGHT),
 	RQF_NAME(DONTPREP),
+	RQF_NAME(SCHED_TAGS),
+	RQF_NAME(USE_SCHED),
 	RQF_NAME(FAILED),
 	RQF_NAME(QUIET),
 	RQF_NAME(IO_STAT),
@@ -258,7 +260,6 @@ static const char *const rqf_name[] = {
 	RQF_NAME(SPECIAL_PAYLOAD),
 	RQF_NAME(ZONE_WRITE_LOCKED),
 	RQF_NAME(TIMED_OUT),
-	RQF_NAME(ELV),
 	RQF_NAME(RESV),
 };
 #undef RQF_NAME
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 4d8d2cd3b473..1326526bb733 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -37,7 +37,7 @@ static inline bool
 blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 			 struct bio *bio)
 {
-	if (rq->rq_flags & RQF_ELV) {
+	if (rq->rq_flags & RQF_USE_SCHED) {
 		struct elevator_queue *e = q->elevator;
 
 		if (e->type->ops.allow_merge)
@@ -48,7 +48,7 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 
 static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
 {
-	if (rq->rq_flags & RQF_ELV) {
+	if (rq->rq_flags & RQF_USE_SCHED) {
 		struct elevator_queue *e = rq->q->elevator;
 
 		if (e->type->ops.completed_request)
@@ -58,7 +58,7 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
 
 static inline void blk_mq_sched_requeue_request(struct request *rq)
 {
-	if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags)) {
+	if (rq->rq_flags & RQF_USE_SCHED) {
 		struct request_queue *q = rq->q;
 		struct elevator_queue *e = q->elevator;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7470c6636dc4..e021740154fe 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -354,12 +354,12 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 		data->rq_flags |= RQF_IO_STAT;
 	rq->rq_flags = data->rq_flags;
 
-	if (!(data->rq_flags & RQF_ELV)) {
-		rq->tag = tag;
-		rq->internal_tag = BLK_MQ_NO_TAG;
-	} else {
+	if (data->rq_flags & RQF_SCHED_TAGS) {
 		rq->tag = BLK_MQ_NO_TAG;
 		rq->internal_tag = tag;
+	} else {
+		rq->tag = tag;
+		rq->internal_tag = BLK_MQ_NO_TAG;
 	}
 	rq->timeout = 0;
 
@@ -386,14 +386,13 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	WRITE_ONCE(rq->deadline, 0);
 	req_ref_set(rq, 1);
 
-	if (rq->rq_flags & RQF_ELV) {
+	if (rq->rq_flags & RQF_USE_SCHED) {
 		struct elevator_queue *e = data->q->elevator;
 
 		INIT_HLIST_NODE(&rq->hash);
 		RB_CLEAR_NODE(&rq->rb_node);
 
-		if (!op_is_flush(data->cmd_flags) &&
-		    e->type->ops.prepare_request)
+		if (e->type->ops.prepare_request)
 			e->type->ops.prepare_request(rq);
 	}
 
@@ -447,26 +446,32 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
 		data->flags |= BLK_MQ_REQ_NOWAIT;
 
 	if (q->elevator) {
-		struct elevator_queue *e = q->elevator;
-
-		data->rq_flags |= RQF_ELV;
+		/*
+		 * All requests use scheduler tags when an I/O scheduler is
+		 * enabled for the queue.
+		 */
+		data->rq_flags |= RQF_SCHED_TAGS;
 
 		/*
 		 * Flush/passthrough requests are special and go directly to the
-		 * dispatch list. Don't include reserved tags in the
-		 * limiting, as it isn't useful.
+		 * dispatch list.
 		 */
 		if (!op_is_flush(data->cmd_flags) &&
-		    !blk_op_is_passthrough(data->cmd_flags) &&
-		    e->type->ops.limit_depth &&
-		    !(data->flags & BLK_MQ_REQ_RESERVED))
-			e->type->ops.limit_depth(data->cmd_flags, data);
+		    !blk_op_is_passthrough(data->cmd_flags)) {
+			struct elevator_mq_ops *ops = &q->elevator->type->ops;
+
+			WARN_ON_ONCE(data->flags & BLK_MQ_REQ_RESERVED);
+
+			data->rq_flags |= RQF_USE_SCHED;
+			if (ops->limit_depth)
+				ops->limit_depth(data->cmd_flags, data);
+		}
 	}
 
 retry:
 	data->ctx = blk_mq_get_ctx(q);
 	data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
-	if (!(data->rq_flags & RQF_ELV))
+	if (!(data->rq_flags & RQF_SCHED_TAGS))
 		blk_mq_tag_busy(data->hctx);
 
 	if (data->flags & BLK_MQ_REQ_RESERVED)
@@ -646,10 +651,10 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 		goto out_queue_exit;
 	data.ctx = __blk_mq_get_ctx(q, cpu);
 
-	if (!q->elevator)
-		blk_mq_tag_busy(data.hctx);
+	if (q->elevator)
+		data.rq_flags |= RQF_SCHED_TAGS;
 	else
-		data.rq_flags |= RQF_ELV;
+		blk_mq_tag_busy(data.hctx);
 
 	if (flags & BLK_MQ_REQ_RESERVED)
 		data.rq_flags |= RQF_RESV;
@@ -694,7 +699,7 @@ void blk_mq_free_request(struct request *rq)
 	struct request_queue *q = rq->q;
 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 
-	if ((rq->rq_flags & RQF_ELV) && !op_is_flush(rq->cmd_flags) &&
+	if ((rq->rq_flags & RQF_USE_SCHED) &&
 	    q->elevator->type->ops.finish_request)
 		q->elevator->type->ops.finish_request(rq);
 
@@ -1268,7 +1273,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 
 	if (!plug->multiple_queues && last && last->q != rq->q)
 		plug->multiple_queues = true;
-	if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+	if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
 		plug->has_elevator = true;
 	rq->rq_next = NULL;
 	rq_list_add(&plug->mq_list, rq);
@@ -2620,7 +2625,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		return;
 	}
 
-	if ((rq->rq_flags & RQF_ELV) || !blk_mq_get_budget_and_tag(rq)) {
+	if ((rq->rq_flags & RQF_USE_SCHED) || !blk_mq_get_budget_and_tag(rq)) {
 		blk_mq_insert_request(rq, 0);
 		blk_mq_run_hw_queue(hctx, false);
 		return;
@@ -2983,7 +2988,7 @@ void blk_mq_submit_bio(struct bio *bio)
 	}
 
 	hctx = rq->mq_hctx;
-	if ((rq->rq_flags & RQF_ELV) ||
+	if ((rq->rq_flags & RQF_USE_SCHED) ||
 	    (hctx->dispatch_busy && (q->nr_hw_queues == 1 || !is_sync))) {
 		blk_mq_insert_request(rq, 0);
 		blk_mq_run_hw_queue(hctx, true);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index e876584d3516..d15981db34b9 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -226,9 +226,9 @@ static inline bool blk_mq_is_shared_tags(unsigned int flags)
 
 static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
 {
-	if (!(data->rq_flags & RQF_ELV))
-		return data->hctx->tags;
-	return data->hctx->sched_tags;
+	if (data->rq_flags & RQF_SCHED_TAGS)
+		return data->hctx->sched_tags;
+	return data->hctx->tags;
 }
 
 static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 5529e7d28ae6..e4a211957db6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -38,6 +38,10 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_MQ_INFLIGHT		((__force req_flags_t)(1 << 6))
 /* don't call prep for this one */
 #define RQF_DONTPREP		((__force req_flags_t)(1 << 7))
+/* use hctx->sched_tags */
+#define RQF_SCHED_TAGS		((__force req_flags_t)(1 << 8))
+/* use an I/O scheduler for this request */
+#define RQF_USE_SCHED		((__force req_flags_t)(1 << 9))
 /* vaguely specified driver internal error.  Ignored by the block layer */
 #define RQF_FAILED		((__force req_flags_t)(1 << 10))
 /* don't warn about errors */
@@ -57,9 +61,7 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_ZONE_WRITE_LOCKED	((__force req_flags_t)(1 << 19))
 /* ->timeout has been called, don't expire again */
 #define RQF_TIMED_OUT		((__force req_flags_t)(1 << 21))
-/* queue has elevator attached */
-#define RQF_ELV			((__force req_flags_t)(1 << 22))
-#define RQF_RESV			((__force req_flags_t)(1 << 23))
+#define RQF_RESV		((__force req_flags_t)(1 << 23))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
@@ -842,7 +844,7 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
  */
 static inline bool blk_mq_need_time_stamp(struct request *rq)
 {
-	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
+	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
 }
 
 static inline bool blk_mq_is_reserved_rq(struct request *rq)
@@ -858,7 +860,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
 				       struct io_comp_batch *iob, int ioerror,
 				       void (*complete)(struct io_comp_batch *))
 {
-	if (!iob || (req->rq_flags & RQF_ELV) || ioerror ||
+	if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror ||
 			(req->end_io && !blk_rq_is_passthrough(req)))
 		return false;
 
-- 
cgit v1.2.3


From 3ddbe2a7e0d4a155a805f69c906c9beed30d4cc4 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 17 May 2023 10:42:21 -0700
Subject: block: Fix the type of the second bdev_op_is_zoned_write() argument

Change the type of the second argument of bdev_op_is_zoned_write() from
blk_opf_t into enum req_op because this function expects an operation
without flags as second argument.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
Fixes: 8cafdb5ab94c ("block: adapt blk_mq_plug() to not plug for writes that require a zone lock")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20230517174230.897144-4-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b441e633f4dd..db24cf98ccfb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1282,7 +1282,7 @@ static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec)
 }
 
 static inline bool bdev_op_is_zoned_write(struct block_device *bdev,
-					  blk_opf_t op)
+					  enum req_op op)
 {
 	if (!bdev_is_zoned(bdev))
 		return false;
-- 
cgit v1.2.3


From a370798201b537f78288e4ef5e0f7fc70889e7ee Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 17 May 2023 10:42:22 -0700
Subject: block: Introduce op_needs_zoned_write_locking()

Introduce a helper function for checking whether write serialization is
required if the operation will be sent to a zoned device. A second caller
for op_needs_zoned_write_locking() will be introduced in the next patch
in this series.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20230517174230.897144-5-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index db24cf98ccfb..3952c52d6cd1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1281,13 +1281,16 @@ static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec)
 	return disk_zone_no(bdev->bd_disk, sec);
 }
 
+/* Whether write serialization is required for @op on zoned devices. */
+static inline bool op_needs_zoned_write_locking(enum req_op op)
+{
+	return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES;
+}
+
 static inline bool bdev_op_is_zoned_write(struct block_device *bdev,
 					  enum req_op op)
 {
-	if (!bdev_is_zoned(bdev))
-		return false;
-
-	return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES;
+	return bdev_is_zoned(bdev) && op_needs_zoned_write_locking(op);
 }
 
 static inline sector_t bdev_zone_sectors(struct block_device *bdev)
-- 
cgit v1.2.3


From 19821fee3ed42e5b294e95814892d0ad6a9890c9 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 17 May 2023 10:42:23 -0700
Subject: block: Introduce blk_rq_is_seq_zoned_write()

Introduce the function blk_rq_is_seq_zoned_write(). This function will
be used in later patches to preserve the order of zoned writes that
require write serialization.

This patch includes an optimization: instead of using
rq->q->disk->part0->bd_queue to check whether or not the queue is
associated with a zoned block device, use rq->q->disk->queue.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <dlemoal@kernel.org>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20230517174230.897144-6-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c      |  5 +----
 include/linux/blk-mq.h | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 835d9e937d4d..096b6b47561f 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -60,10 +60,7 @@ bool blk_req_needs_zone_write_lock(struct request *rq)
 	if (!rq->q->disk->seq_zones_wlock)
 		return false;
 
-	if (bdev_op_is_zoned_write(rq->q->disk->part0, req_op(rq)))
-		return blk_rq_zone_is_seq(rq);
-
-	return false;
+	return blk_rq_is_seq_zoned_write(rq);
 }
 EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e4a211957db6..49d14b1acfa5 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -1164,6 +1164,18 @@ static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
 	return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq));
 }
 
+/**
+ * blk_rq_is_seq_zoned_write() - Check if @rq requires write serialization.
+ * @rq: Request to examine.
+ *
+ * Note: REQ_OP_ZONE_APPEND requests do not require serialization.
+ */
+static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
+{
+	return op_needs_zoned_write_locking(req_op(rq)) &&
+		blk_rq_zone_is_seq(rq);
+}
+
 bool blk_req_needs_zone_write_lock(struct request *rq);
 bool blk_req_zone_write_trylock(struct request *rq);
 void __blk_req_zone_write_lock(struct request *rq);
@@ -1194,6 +1206,11 @@ static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
 	return !blk_req_zone_is_write_locked(rq);
 }
 #else /* CONFIG_BLK_DEV_ZONED */
+static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
+{
+	return false;
+}
+
 static inline bool blk_req_needs_zone_write_lock(struct request *rq)
 {
 	return false;
-- 
cgit v1.2.3


From dc18582211b34bce8250ddf3cac2a2230e192120 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 17 May 2023 11:38:12 +0100
Subject: net: sfp: add support for setting signalling rate

Add support to the SFP layer to allow phylink to set the signalling
rate for a SFP module. The rate given will be in units of kilo-baud
(1000 baud).

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phylink.c | 24 ++++++++++++++++++++++++
 drivers/net/phy/sfp-bus.c | 20 ++++++++++++++++++++
 drivers/net/phy/sfp.c     |  5 +++++
 drivers/net/phy/sfp.h     |  1 +
 include/linux/sfp.h       |  6 ++++++
 5 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index dc9a740b1ff7..f2106d17847a 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -156,6 +156,23 @@ static const char *phylink_an_mode_str(unsigned int mode)
 	return mode < ARRAY_SIZE(modestr) ? modestr[mode] : "unknown";
 }
 
+static unsigned int phylink_interface_signal_rate(phy_interface_t interface)
+{
+	switch (interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_1000BASEX: /* 1.25Mbd */
+		return 1250;
+	case PHY_INTERFACE_MODE_2500BASEX: /* 3.125Mbd */
+		return 3125;
+	case PHY_INTERFACE_MODE_5GBASER: /* 5.15625Mbd */
+		return 5156;
+	case PHY_INTERFACE_MODE_10GBASER: /* 10.3125Mbd */
+		return 10313;
+	default:
+		return 0;
+	}
+}
+
 /**
  * phylink_interface_max_speed() - get the maximum speed of a phy interface
  * @interface: phy interface mode defined by &typedef phy_interface_t
@@ -1025,6 +1042,7 @@ static void phylink_major_config(struct phylink *pl, bool restart,
 {
 	struct phylink_pcs *pcs = NULL;
 	bool pcs_changed = false;
+	unsigned int rate_kbd;
 	int err;
 
 	phylink_dbg(pl, "major config %s\n", phy_modes(state->interface));
@@ -1084,6 +1102,12 @@ static void phylink_major_config(struct phylink *pl, bool restart,
 				    ERR_PTR(err));
 	}
 
+	if (pl->sfp_bus) {
+		rate_kbd = phylink_interface_signal_rate(state->interface);
+		if (rate_kbd)
+			sfp_upstream_set_signal_rate(pl->sfp_bus, rate_kbd);
+	}
+
 	phylink_pcs_poll_start(pl);
 }
 
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 9372e5a4cadc..e8dd47bffe43 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -575,6 +575,26 @@ static void sfp_upstream_clear(struct sfp_bus *bus)
 	bus->upstream = NULL;
 }
 
+/**
+ * sfp_upstream_set_signal_rate() - set data signalling rate
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @rate_kbd: signalling rate in units of 1000 baud
+ *
+ * Configure the rate select settings on the SFP module for the signalling
+ * rate (not the same as the data rate).
+ *
+ * Locks that may be held:
+ *  Phylink's state_mutex
+ *  rtnl lock
+ *  SFP's sm_mutex
+ */
+void sfp_upstream_set_signal_rate(struct sfp_bus *bus, unsigned int rate_kbd)
+{
+	if (bus->registered)
+		bus->socket_ops->set_signal_rate(bus->sfp, rate_kbd);
+}
+EXPORT_SYMBOL_GPL(sfp_upstream_set_signal_rate);
+
 /**
  * sfp_bus_find_fwnode() - parse and locate the SFP bus from fwnode
  * @fwnode: firmware node for the parent device (MAC or PHY)
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index bf7dac9977e1..34bf724c00c7 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -2527,6 +2527,10 @@ static void sfp_stop(struct sfp *sfp)
 	sfp_sm_event(sfp, SFP_E_DEV_DOWN);
 }
 
+static void sfp_set_signal_rate(struct sfp *sfp, unsigned int rate_kbd)
+{
+}
+
 static int sfp_module_info(struct sfp *sfp, struct ethtool_modinfo *modinfo)
 {
 	/* locking... and check module is present */
@@ -2611,6 +2615,7 @@ static const struct sfp_socket_ops sfp_module_ops = {
 	.detach = sfp_detach,
 	.start = sfp_start,
 	.stop = sfp_stop,
+	.set_signal_rate = sfp_set_signal_rate,
 	.module_info = sfp_module_info,
 	.module_eeprom = sfp_module_eeprom,
 	.module_eeprom_by_page = sfp_module_eeprom_by_page,
diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h
index 6cf1643214d3..c7cb50d10099 100644
--- a/drivers/net/phy/sfp.h
+++ b/drivers/net/phy/sfp.h
@@ -19,6 +19,7 @@ struct sfp_socket_ops {
 	void (*detach)(struct sfp *sfp);
 	void (*start)(struct sfp *sfp);
 	void (*stop)(struct sfp *sfp);
+	void (*set_signal_rate)(struct sfp *sfp, unsigned int rate_kbd);
 	int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo);
 	int (*module_eeprom)(struct sfp *sfp, struct ethtool_eeprom *ee,
 			     u8 *data);
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index ef06a195b3c2..2f66e03e9dbd 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -556,6 +556,7 @@ int sfp_get_module_eeprom_by_page(struct sfp_bus *bus,
 				  struct netlink_ext_ack *extack);
 void sfp_upstream_start(struct sfp_bus *bus);
 void sfp_upstream_stop(struct sfp_bus *bus);
+void sfp_upstream_set_signal_rate(struct sfp_bus *bus, unsigned int rate_kbd);
 void sfp_bus_put(struct sfp_bus *bus);
 struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode);
 int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream,
@@ -615,6 +616,11 @@ static inline void sfp_upstream_stop(struct sfp_bus *bus)
 {
 }
 
+static inline void sfp_upstream_set_signal_rate(struct sfp_bus *bus,
+						unsigned int rate_kbd)
+{
+}
+
 static inline void sfp_bus_put(struct sfp_bus *bus)
 {
 }
-- 
cgit v1.2.3


From fc082b39d0a29891ab4b54c88a40f42385103f71 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 17 May 2023 11:38:17 +0100
Subject: net: sfp: add support for rate selection

Add support for parsing the rate select thresholds and switching of the
RS0 and RS1 signals to the transceiver. This is complicated by various
revisions of SFF-8472 and interaction of SFF-8431, SFF-8079 and
INF-8074.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/sfp.c | 212 ++++++++++++++++++++++++++++++++++++++++++++------
 include/linux/sfp.h   |   8 ++
 2 files changed, 196 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 34bf724c00c7..4799976a1609 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -24,14 +24,18 @@ enum {
 	GPIO_LOS,
 	GPIO_TX_FAULT,
 	GPIO_TX_DISABLE,
-	GPIO_RATE_SELECT,
+	GPIO_RS0,
+	GPIO_RS1,
 	GPIO_MAX,
 
 	SFP_F_PRESENT = BIT(GPIO_MODDEF0),
 	SFP_F_LOS = BIT(GPIO_LOS),
 	SFP_F_TX_FAULT = BIT(GPIO_TX_FAULT),
 	SFP_F_TX_DISABLE = BIT(GPIO_TX_DISABLE),
-	SFP_F_RATE_SELECT = BIT(GPIO_RATE_SELECT),
+	SFP_F_RS0 = BIT(GPIO_RS0),
+	SFP_F_RS1 = BIT(GPIO_RS1),
+
+	SFP_F_OUTPUTS = SFP_F_TX_DISABLE | SFP_F_RS0 | SFP_F_RS1,
 
 	SFP_E_INSERT = 0,
 	SFP_E_REMOVE,
@@ -148,6 +152,7 @@ static const char *gpio_names[] = {
 	"tx-fault",
 	"tx-disable",
 	"rate-select0",
+	"rate-select1",
 };
 
 static const enum gpiod_flags gpio_flags[] = {
@@ -156,6 +161,7 @@ static const enum gpiod_flags gpio_flags[] = {
 	GPIOD_IN,
 	GPIOD_ASIS,
 	GPIOD_ASIS,
+	GPIOD_ASIS,
 };
 
 /* t_start_up (SFF-8431) or t_init (SFF-8472) is the time required for a
@@ -249,6 +255,7 @@ struct sfp {
 	 * state: st_mutex held unless reading input bits
 	 */
 	struct mutex st_mutex;			/* Protects state */
+	unsigned int state_hw_drive;
 	unsigned int state_hw_mask;
 	unsigned int state_soft_mask;
 	unsigned int state;
@@ -269,6 +276,10 @@ struct sfp {
 	unsigned int module_t_start_up;
 	unsigned int module_t_wait;
 
+	unsigned int rate_kbd;
+	unsigned int rs_threshold_kbd;
+	unsigned int rs_state_mask;
+
 	bool have_a2;
 	bool tx_fault_ignore;
 
@@ -319,7 +330,7 @@ static bool sfp_module_supported(const struct sfp_eeprom_id *id)
 
 static const struct sff_data sfp_data = {
 	.gpios = SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT |
-		 SFP_F_TX_DISABLE | SFP_F_RATE_SELECT,
+		 SFP_F_TX_DISABLE | SFP_F_RS0 | SFP_F_RS1,
 	.module_supported = sfp_module_supported,
 };
 
@@ -507,20 +518,37 @@ static unsigned int sff_gpio_get_state(struct sfp *sfp)
 
 static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state)
 {
-	if (state & SFP_F_PRESENT) {
-		/* If the module is present, drive the signals */
-		if (sfp->gpio[GPIO_TX_DISABLE])
+	unsigned int drive;
+
+	if (state & SFP_F_PRESENT)
+		/* If the module is present, drive the requested signals */
+		drive = sfp->state_hw_drive;
+	else
+		/* Otherwise, let them float to the pull-ups */
+		drive = 0;
+
+	if (sfp->gpio[GPIO_TX_DISABLE]) {
+		if (drive & SFP_F_TX_DISABLE)
 			gpiod_direction_output(sfp->gpio[GPIO_TX_DISABLE],
 					       state & SFP_F_TX_DISABLE);
-		if (state & SFP_F_RATE_SELECT)
-			gpiod_direction_output(sfp->gpio[GPIO_RATE_SELECT],
-					       state & SFP_F_RATE_SELECT);
-	} else {
-		/* Otherwise, let them float to the pull-ups */
-		if (sfp->gpio[GPIO_TX_DISABLE])
+		else
 			gpiod_direction_input(sfp->gpio[GPIO_TX_DISABLE]);
-		if (state & SFP_F_RATE_SELECT)
-			gpiod_direction_input(sfp->gpio[GPIO_RATE_SELECT]);
+	}
+
+	if (sfp->gpio[GPIO_RS0]) {
+		if (drive & SFP_F_RS0)
+			gpiod_direction_output(sfp->gpio[GPIO_RS0],
+					       state & SFP_F_RS0);
+		else
+			gpiod_direction_input(sfp->gpio[GPIO_RS0]);
+	}
+
+	if (sfp->gpio[GPIO_RS1]) {
+		if (drive & SFP_F_RS1)
+			gpiod_direction_output(sfp->gpio[GPIO_RS1],
+					       state & SFP_F_RS1);
+		else
+			gpiod_direction_input(sfp->gpio[GPIO_RS1]);
 	}
 }
 
@@ -682,16 +710,33 @@ static unsigned int sfp_soft_get_state(struct sfp *sfp)
 	return state & sfp->state_soft_mask;
 }
 
-static void sfp_soft_set_state(struct sfp *sfp, unsigned int state)
+static void sfp_soft_set_state(struct sfp *sfp, unsigned int state,
+			       unsigned int soft)
 {
-	u8 mask = SFP_STATUS_TX_DISABLE_FORCE;
+	u8 mask = 0;
 	u8 val = 0;
 
+	if (soft & SFP_F_TX_DISABLE)
+		mask |= SFP_STATUS_TX_DISABLE_FORCE;
 	if (state & SFP_F_TX_DISABLE)
 		val |= SFP_STATUS_TX_DISABLE_FORCE;
 
+	if (soft & SFP_F_RS0)
+		mask |= SFP_STATUS_RS0_SELECT;
+	if (state & SFP_F_RS0)
+		val |= SFP_STATUS_RS0_SELECT;
+
+	if (mask)
+		sfp_modify_u8(sfp, true, SFP_STATUS, mask, val);
 
-	sfp_modify_u8(sfp, true, SFP_STATUS, mask, val);
+	val = mask = 0;
+	if (soft & SFP_F_RS1)
+		mask |= SFP_EXT_STATUS_RS1_SELECT;
+	if (state & SFP_F_RS1)
+		val |= SFP_EXT_STATUS_RS1_SELECT;
+
+	if (mask)
+		sfp_modify_u8(sfp, true, SFP_EXT_STATUS, mask, val);
 }
 
 static void sfp_soft_start_poll(struct sfp *sfp)
@@ -705,6 +750,8 @@ static void sfp_soft_start_poll(struct sfp *sfp)
 		mask |= SFP_F_TX_FAULT;
 	if (id->ext.enhopts & SFP_ENHOPTS_SOFT_RX_LOS)
 		mask |= SFP_F_LOS;
+	if (id->ext.enhopts & SFP_ENHOPTS_SOFT_RATE_SELECT)
+		mask |= sfp->rs_state_mask;
 
 	mutex_lock(&sfp->st_mutex);
 	// Poll the soft state for hardware pins we want to ignore
@@ -743,11 +790,13 @@ static unsigned int sfp_get_state(struct sfp *sfp)
  */
 static void sfp_set_state(struct sfp *sfp, unsigned int state)
 {
+	unsigned int soft;
+
 	sfp->set_state(sfp, state);
 
-	if (state & SFP_F_PRESENT &&
-	    sfp->state_soft_mask & SFP_F_TX_DISABLE)
-		sfp_soft_set_state(sfp, state);
+	soft = sfp->state_soft_mask & SFP_F_OUTPUTS;
+	if (state & SFP_F_PRESENT && soft)
+		sfp_soft_set_state(sfp, state, soft);
 }
 
 static void sfp_mod_state(struct sfp *sfp, unsigned int mask, unsigned int set)
@@ -1589,10 +1638,15 @@ static int sfp_debug_state_show(struct seq_file *s, void *data)
 		   sfp->sm_fault_retries);
 	seq_printf(s, "PHY probe remaining retries: %d\n",
 		   sfp->sm_phy_retries);
+	seq_printf(s, "Signalling rate: %u kBd\n", sfp->rate_kbd);
+	seq_printf(s, "Rate select threshold: %u kBd\n",
+		   sfp->rs_threshold_kbd);
 	seq_printf(s, "moddef0: %d\n", !!(sfp->state & SFP_F_PRESENT));
 	seq_printf(s, "rx_los: %d\n", !!(sfp->state & SFP_F_LOS));
 	seq_printf(s, "tx_fault: %d\n", !!(sfp->state & SFP_F_TX_FAULT));
 	seq_printf(s, "tx_disable: %d\n", !!(sfp->state & SFP_F_TX_DISABLE));
+	seq_printf(s, "rs0: %d\n", !!(sfp->state & SFP_F_RS0));
+	seq_printf(s, "rs1: %d\n", !!(sfp->state & SFP_F_RS1));
 	return 0;
 }
 DEFINE_SHOW_ATTRIBUTE(sfp_debug_state);
@@ -1898,6 +1952,95 @@ static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable)
 	return 0;
 }
 
+static void sfp_module_parse_rate_select(struct sfp *sfp)
+{
+	u8 rate_id;
+
+	sfp->rs_threshold_kbd = 0;
+	sfp->rs_state_mask = 0;
+
+	if (!(sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_RATE_SELECT)))
+		/* No support for RateSelect */
+		return;
+
+	/* Default to INF-8074 RateSelect operation. The signalling threshold
+	 * rate is not well specified, so always select "Full Bandwidth", but
+	 * SFF-8079 reveals that it is understood that RS0 will be low for
+	 * 1.0625Gb/s and high for 2.125Gb/s. Choose a value half-way between.
+	 * This method exists prior to SFF-8472.
+	 */
+	sfp->rs_state_mask = SFP_F_RS0;
+	sfp->rs_threshold_kbd = 1594;
+
+	/* Parse the rate identifier, which is complicated due to history:
+	 * SFF-8472 rev 9.5 marks this field as reserved.
+	 * SFF-8079 references SFF-8472 rev 9.5 and defines bit 0. SFF-8472
+	 *  compliance is not required.
+	 * SFF-8472 rev 10.2 defines this field using values 0..4
+	 * SFF-8472 rev 11.0 redefines this field with bit 0 for SFF-8079
+	 * and even values.
+	 */
+	rate_id = sfp->id.base.rate_id;
+	if (rate_id == 0)
+		/* Unspecified */
+		return;
+
+	/* SFF-8472 rev 10.0..10.4 did not account for SFF-8079 using bit 0,
+	 * and allocated value 3 to SFF-8431 independent tx/rx rate select.
+	 * Convert this to a SFF-8472 rev 11.0 rate identifier.
+	 */
+	if (sfp->id.ext.sff8472_compliance >= SFP_SFF8472_COMPLIANCE_REV10_2 &&
+	    sfp->id.ext.sff8472_compliance < SFP_SFF8472_COMPLIANCE_REV11_0 &&
+	    rate_id == 3)
+		rate_id = SFF_RID_8431;
+
+	if (rate_id & SFF_RID_8079) {
+		/* SFF-8079 RateSelect / Application Select in conjunction with
+		 * SFF-8472 rev 9.5. SFF-8079 defines rate_id as a bitfield
+		 * with only bit 0 used, which takes precedence over SFF-8472.
+		 */
+		if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_APP_SELECT_SFF8079)) {
+			/* SFF-8079 Part 1 - rate selection between Fibre
+			 * Channel 1.0625/2.125/4.25 Gbd modes. Note that RS0
+			 * is high for 2125, so we have to subtract 1 to
+			 * include it.
+			 */
+			sfp->rs_threshold_kbd = 2125 - 1;
+			sfp->rs_state_mask = SFP_F_RS0;
+		}
+		return;
+	}
+
+	/* SFF-8472 rev 9.5 does not define the rate identifier */
+	if (sfp->id.ext.sff8472_compliance <= SFP_SFF8472_COMPLIANCE_REV9_5)
+		return;
+
+	/* SFF-8472 rev 11.0 defines rate_id as a numerical value which will
+	 * always have bit 0 clear due to SFF-8079's bitfield usage of rate_id.
+	 */
+	switch (rate_id) {
+	case SFF_RID_8431_RX_ONLY:
+		sfp->rs_threshold_kbd = 4250;
+		sfp->rs_state_mask = SFP_F_RS0;
+		break;
+
+	case SFF_RID_8431_TX_ONLY:
+		sfp->rs_threshold_kbd = 4250;
+		sfp->rs_state_mask = SFP_F_RS1;
+		break;
+
+	case SFF_RID_8431:
+		sfp->rs_threshold_kbd = 4250;
+		sfp->rs_state_mask = SFP_F_RS0 | SFP_F_RS1;
+		break;
+
+	case SFF_RID_10G8G:
+		sfp->rs_threshold_kbd = 9000;
+		sfp->rs_state_mask = SFP_F_RS0 | SFP_F_RS1;
+		break;
+	}
+}
+
 /* GPON modules based on Realtek RTL8672 and RTL9601C chips (e.g. V-SOL
  * V2801F, CarlitoxxPro CPGOS03-0490, Ubiquiti U-Fiber Instant, ...) do
  * not support multibyte reads from the EEPROM. Each multi-byte read
@@ -2117,6 +2260,8 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
 	if (ret < 0)
 		return ret;
 
+	sfp_module_parse_rate_select(sfp);
+
 	mask = SFP_F_PRESENT;
 	if (sfp->gpio[GPIO_TX_DISABLE])
 		mask |= SFP_F_TX_DISABLE;
@@ -2124,6 +2269,10 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
 		mask |= SFP_F_TX_FAULT;
 	if (sfp->gpio[GPIO_LOS])
 		mask |= SFP_F_LOS;
+	if (sfp->gpio[GPIO_RS0])
+		mask |= SFP_F_RS0;
+	if (sfp->gpio[GPIO_RS1])
+		mask |= SFP_F_RS1;
 
 	sfp->module_t_start_up = T_START_UP;
 	sfp->module_t_wait = T_WAIT;
@@ -2146,6 +2295,9 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
 	/* Initialise state bits to use from hardware */
 	sfp->state_hw_mask = mask;
 
+	/* We want to drive the rate select pins that the module is using */
+	sfp->state_hw_drive |= sfp->rs_state_mask;
+
 	if (sfp->quirk && sfp->quirk->fixup)
 		sfp->quirk->fixup(sfp);
 	mutex_unlock(&sfp->st_mutex);
@@ -2162,6 +2314,7 @@ static void sfp_sm_mod_remove(struct sfp *sfp)
 
 	memset(&sfp->id, 0, sizeof(sfp->id));
 	sfp->module_power_mW = 0;
+	sfp->state_hw_drive = SFP_F_TX_DISABLE;
 	sfp->have_a2 = false;
 
 	dev_info(sfp->dev, "module removed\n");
@@ -2529,6 +2682,16 @@ static void sfp_stop(struct sfp *sfp)
 
 static void sfp_set_signal_rate(struct sfp *sfp, unsigned int rate_kbd)
 {
+	unsigned int set;
+
+	sfp->rate_kbd = rate_kbd;
+
+	if (rate_kbd > sfp->rs_threshold_kbd)
+		set = sfp->rs_state_mask;
+	else
+		set = 0;
+
+	sfp_mod_state(sfp, SFP_F_RS0 | SFP_F_RS1, set);
 }
 
 static int sfp_module_info(struct sfp *sfp, struct ethtool_modinfo *modinfo)
@@ -2648,7 +2811,7 @@ static void sfp_check_state(struct sfp *sfp)
 			dev_dbg(sfp->dev, "%s %u -> %u\n", gpio_names[i],
 				!!(sfp->state & BIT(i)), !!(state & BIT(i)));
 
-	state |= sfp->state & (SFP_F_TX_DISABLE | SFP_F_RATE_SELECT);
+	state |= sfp->state & SFP_F_OUTPUTS;
 	sfp->state = state;
 	mutex_unlock(&sfp->st_mutex);
 
@@ -2790,6 +2953,7 @@ static int sfp_probe(struct platform_device *pdev)
 		}
 
 	sfp->state_hw_mask = SFP_F_PRESENT;
+	sfp->state_hw_drive = SFP_F_TX_DISABLE;
 
 	sfp->get_state = sfp_gpio_get_state;
 	sfp->set_state = sfp_gpio_set_state;
@@ -2815,9 +2979,9 @@ static int sfp_probe(struct platform_device *pdev)
 	 */
 	sfp->state = sfp_get_state(sfp) | SFP_F_TX_DISABLE;
 
-	if (sfp->gpio[GPIO_RATE_SELECT] &&
-	    gpiod_get_value_cansleep(sfp->gpio[GPIO_RATE_SELECT]))
-		sfp->state |= SFP_F_RATE_SELECT;
+	if (sfp->gpio[GPIO_RS0] &&
+	    gpiod_get_value_cansleep(sfp->gpio[GPIO_RS0]))
+		sfp->state |= SFP_F_RS0;
 	sfp_set_state(sfp, sfp->state);
 	sfp_module_tx_disable(sfp);
 	if (sfp->state & SFP_F_PRESENT) {
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 2f66e03e9dbd..9346cd44814d 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -342,6 +342,12 @@ enum {
 	SFP_ENCODING			= 11,
 	SFP_BR_NOMINAL			= 12,
 	SFP_RATE_ID			= 13,
+	SFF_RID_8079			= 0x01,
+	SFF_RID_8431_RX_ONLY		= 0x02,
+	SFF_RID_8431_TX_ONLY		= 0x04,
+	SFF_RID_8431			= 0x06,
+	SFF_RID_10G8G			= 0x0e,
+
 	SFP_LINK_LEN_SM_KM		= 14,
 	SFP_LINK_LEN_SM_100M		= 15,
 	SFP_LINK_LEN_50UM_OM2_10M	= 16,
@@ -465,6 +471,7 @@ enum {
 	SFP_STATUS			= 110,
 	SFP_STATUS_TX_DISABLE		= BIT(7),
 	SFP_STATUS_TX_DISABLE_FORCE	= BIT(6),
+	SFP_STATUS_RS0_SELECT		= BIT(3),
 	SFP_STATUS_TX_FAULT		= BIT(2),
 	SFP_STATUS_RX_LOS		= BIT(1),
 	SFP_ALARM0			= 112,
@@ -496,6 +503,7 @@ enum {
 	SFP_WARN1_RXPWR_LOW		= BIT(6),
 
 	SFP_EXT_STATUS			= 118,
+	SFP_EXT_STATUS_RS1_SELECT	= BIT(3),
 	SFP_EXT_STATUS_PWRLVL_SELECT	= BIT(0),
 
 	SFP_VSL				= 120,
-- 
cgit v1.2.3


From 14c4be92ebb3e36e392aa9dd8f314038a9f96f3c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 16 May 2023 18:50:38 -0700
Subject: tls: rx: strp: force mixed decrypted records into copy mode

If a record is partially decrypted we'll have to CoW it, anyway,
so go into copy mode and allocate a writable skb right away.

This will make subsequent fix simpler because we won't have to
teach tls_strp_msg_make_copy() how to copy skbs while preserving
decrypt status.

Tested-by: Shai Amiram <samiram@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 ++++++++++
 net/tls/tls_strp.c     | 16 +++++++++++-----
 2 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 738776ab8838..0b40417457cd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1587,6 +1587,16 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
 	to->l4_hash = from->l4_hash;
 };
 
+static inline int skb_cmp_decrypted(const struct sk_buff *skb1,
+				    const struct sk_buff *skb2)
+{
+#ifdef CONFIG_TLS_DEVICE
+	return skb2->decrypted - skb1->decrypted;
+#else
+	return 0;
+#endif
+}
+
 static inline void skb_copy_decrypted(struct sk_buff *to,
 				      const struct sk_buff *from)
 {
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 24016c865e00..2b6fa9855999 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -317,15 +317,19 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
 	return 0;
 }
 
-static bool tls_strp_check_no_dup(struct tls_strparser *strp)
+static bool tls_strp_check_queue_ok(struct tls_strparser *strp)
 {
 	unsigned int len = strp->stm.offset + strp->stm.full_len;
-	struct sk_buff *skb;
+	struct sk_buff *first, *skb;
 	u32 seq;
 
-	skb = skb_shinfo(strp->anchor)->frag_list;
-	seq = TCP_SKB_CB(skb)->seq;
+	first = skb_shinfo(strp->anchor)->frag_list;
+	skb = first;
+	seq = TCP_SKB_CB(first)->seq;
 
+	/* Make sure there's no duplicate data in the queue,
+	 * and the decrypted status matches.
+	 */
 	while (skb->len < len) {
 		seq += skb->len;
 		len -= skb->len;
@@ -333,6 +337,8 @@ static bool tls_strp_check_no_dup(struct tls_strparser *strp)
 
 		if (TCP_SKB_CB(skb)->seq != seq)
 			return false;
+		if (skb_cmp_decrypted(first, skb))
+			return false;
 	}
 
 	return true;
@@ -413,7 +419,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
 			return tls_strp_read_copy(strp, true);
 	}
 
-	if (!tls_strp_check_no_dup(strp))
+	if (!tls_strp_check_queue_ok(strp))
 		return tls_strp_read_copy(strp, false);
 
 	strp->msg_ready = 1;
-- 
cgit v1.2.3


From eb1cfd09f788e39948a82be8063e54e40dd018d9 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue, 9 May 2023 15:58:46 -0400
Subject: lockdep: Add lock_set_cmp_fn() annotation

This implements a new interface to lockdep, lock_set_cmp_fn(), for
defining a custom ordering when taking multiple locks of the same
class.

This is an alternative to subclasses, but can not fully replace them
since subclasses allow lock hierarchies with other clasees
inter-twined, while this relies on pure class nesting.

Specifically, if A is our nesting class then:

  A/0 <- B <- A/1

Would be a valid lock order with subclasses (each subclass really is a
full class from the validation PoV) but not with this annotation,
which requires all nesting to be consecutive.

Example output:

| ============================================
| WARNING: possible recursive locking detected
| 6.2.0-rc8-00003-g7d81e591ca6a-dirty #15 Not tainted
| --------------------------------------------
| kworker/14:3/938 is trying to acquire lock:
| ffff8880143218c8 (&b->lock l=0 0:2803368){++++}-{3:3}, at: bch_btree_node_get.part.0+0x81/0x2b0
|
| but task is already holding lock:
| ffff8880143de8c8 (&b->lock l=1 1048575:9223372036854775807){++++}-{3:3}, at: __bch_btree_map_nodes+0xea/0x1e0
| and the lock comparison function returns 1:
|
| other info that might help us debug this:
|  Possible unsafe locking scenario:
|
|        CPU0
|        ----
|   lock(&b->lock l=1 1048575:9223372036854775807);
|   lock(&b->lock l=0 0:2803368);
|
|  *** DEADLOCK ***
|
|  May be due to missing lock nesting notation
|
| 3 locks held by kworker/14:3/938:
|  #0: ffff888005ea9d38 ((wq_completion)bcache){+.+.}-{0:0}, at: process_one_work+0x1ec/0x530
|  #1: ffff8880098c3e70 ((work_completion)(&cl->work)#3){+.+.}-{0:0}, at: process_one_work+0x1ec/0x530
|  #2: ffff8880143de8c8 (&b->lock l=1 1048575:9223372036854775807){++++}-{3:3}, at: __bch_btree_map_nodes+0xea/0x1e0

[peterz: extended changelog]
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230509195847.1745548-1-kent.overstreet@linux.dev
---
 include/linux/lockdep.h       |   8 +++
 include/linux/lockdep_types.h |   8 +++
 kernel/locking/lockdep.c      | 118 +++++++++++++++++++++++++++++++-----------
 3 files changed, 103 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b32256e9e944..3bac1501fb58 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -434,6 +434,14 @@ extern int lockdep_is_held(const void *);
 
 #endif /* !LOCKDEP */
 
+#ifdef CONFIG_PROVE_LOCKING
+void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);
+
+#define lock_set_cmp_fn(lock, ...)	lockdep_set_lock_cmp_fn(&(lock)->dep_map, __VA_ARGS__)
+#else
+#define lock_set_cmp_fn(lock, ...)	do { } while (0)
+#endif
+
 enum xhlock_context_t {
 	XHLOCK_HARD,
 	XHLOCK_SOFT,
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index d22430840b53..8bf79c4e4873 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -84,6 +84,11 @@ struct lock_trace;
 
 #define LOCKSTAT_POINTS		4
 
+struct lockdep_map;
+typedef int (*lock_cmp_fn)(const struct lockdep_map *a,
+			   const struct lockdep_map *b);
+typedef void (*lock_print_fn)(const struct lockdep_map *map);
+
 /*
  * The lock-class itself. The order of the structure members matters.
  * reinit_class() zeroes the key member and all subsequent members.
@@ -109,6 +114,9 @@ struct lock_class {
 	struct list_head		locks_after, locks_before;
 
 	const struct lockdep_subclass_key *key;
+	lock_cmp_fn			cmp_fn;
+	lock_print_fn			print_fn;
+
 	unsigned int			subclass;
 	unsigned int			dep_gen_id;
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index dcd1d5bfc1e0..3e8950f3beab 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -709,7 +709,7 @@ void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
 	usage[i] = '\0';
 }
 
-static void __print_lock_name(struct lock_class *class)
+static void __print_lock_name(struct held_lock *hlock, struct lock_class *class)
 {
 	char str[KSYM_NAME_LEN];
 	const char *name;
@@ -724,17 +724,19 @@ static void __print_lock_name(struct lock_class *class)
 			printk(KERN_CONT "#%d", class->name_version);
 		if (class->subclass)
 			printk(KERN_CONT "/%d", class->subclass);
+		if (hlock && class->print_fn)
+			class->print_fn(hlock->instance);
 	}
 }
 
-static void print_lock_name(struct lock_class *class)
+static void print_lock_name(struct held_lock *hlock, struct lock_class *class)
 {
 	char usage[LOCK_USAGE_CHARS];
 
 	get_usage_chars(class, usage);
 
 	printk(KERN_CONT " (");
-	__print_lock_name(class);
+	__print_lock_name(hlock, class);
 	printk(KERN_CONT "){%s}-{%d:%d}", usage,
 			class->wait_type_outer ?: class->wait_type_inner,
 			class->wait_type_inner);
@@ -772,7 +774,7 @@ static void print_lock(struct held_lock *hlock)
 	}
 
 	printk(KERN_CONT "%px", hlock->instance);
-	print_lock_name(lock);
+	print_lock_name(hlock, lock);
 	printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip);
 }
 
@@ -1868,7 +1870,7 @@ print_circular_bug_entry(struct lock_list *target, int depth)
 	if (debug_locks_silent)
 		return;
 	printk("\n-> #%u", depth);
-	print_lock_name(target->class);
+	print_lock_name(NULL, target->class);
 	printk(KERN_CONT ":\n");
 	print_lock_trace(target->trace, 6);
 }
@@ -1899,11 +1901,11 @@ print_circular_lock_scenario(struct held_lock *src,
 	 */
 	if (parent != source) {
 		printk("Chain exists of:\n  ");
-		__print_lock_name(source);
+		__print_lock_name(src, source);
 		printk(KERN_CONT " --> ");
-		__print_lock_name(parent);
+		__print_lock_name(NULL, parent);
 		printk(KERN_CONT " --> ");
-		__print_lock_name(target);
+		__print_lock_name(tgt, target);
 		printk(KERN_CONT "\n\n");
 	}
 
@@ -1914,13 +1916,13 @@ print_circular_lock_scenario(struct held_lock *src,
 		printk("  rlock(");
 	else
 		printk("  lock(");
-	__print_lock_name(target);
+	__print_lock_name(tgt, target);
 	printk(KERN_CONT ");\n");
 	printk("                               lock(");
-	__print_lock_name(parent);
+	__print_lock_name(NULL, parent);
 	printk(KERN_CONT ");\n");
 	printk("                               lock(");
-	__print_lock_name(target);
+	__print_lock_name(tgt, target);
 	printk(KERN_CONT ");\n");
 	if (src_read != 0)
 		printk("  rlock(");
@@ -1928,7 +1930,7 @@ print_circular_lock_scenario(struct held_lock *src,
 		printk("  sync(");
 	else
 		printk("  lock(");
-	__print_lock_name(source);
+	__print_lock_name(src, source);
 	printk(KERN_CONT ");\n");
 	printk("\n *** DEADLOCK ***\n\n");
 }
@@ -2154,6 +2156,8 @@ check_path(struct held_lock *target, struct lock_list *src_entry,
 	return ret;
 }
 
+static void print_deadlock_bug(struct task_struct *, struct held_lock *, struct held_lock *);
+
 /*
  * Prove that the dependency graph starting at <src> can not
  * lead to <target>. If it can, there is a circle when adding
@@ -2185,7 +2189,10 @@ check_noncircular(struct held_lock *src, struct held_lock *target,
 			*trace = save_trace();
 		}
 
-		print_circular_bug(&src_entry, target_entry, src, target);
+		if (src->class_idx == target->class_idx)
+			print_deadlock_bug(current, src, target);
+		else
+			print_circular_bug(&src_entry, target_entry, src, target);
 	}
 
 	return ret;
@@ -2341,7 +2348,7 @@ static void print_lock_class_header(struct lock_class *class, int depth)
 	int bit;
 
 	printk("%*s->", depth, "");
-	print_lock_name(class);
+	print_lock_name(NULL, class);
 #ifdef CONFIG_DEBUG_LOCKDEP
 	printk(KERN_CONT " ops: %lu", debug_class_ops_read(class));
 #endif
@@ -2523,11 +2530,11 @@ print_irq_lock_scenario(struct lock_list *safe_entry,
 	 */
 	if (middle_class != unsafe_class) {
 		printk("Chain exists of:\n  ");
-		__print_lock_name(safe_class);
+		__print_lock_name(NULL, safe_class);
 		printk(KERN_CONT " --> ");
-		__print_lock_name(middle_class);
+		__print_lock_name(NULL, middle_class);
 		printk(KERN_CONT " --> ");
-		__print_lock_name(unsafe_class);
+		__print_lock_name(NULL, unsafe_class);
 		printk(KERN_CONT "\n\n");
 	}
 
@@ -2535,18 +2542,18 @@ print_irq_lock_scenario(struct lock_list *safe_entry,
 	printk("       CPU0                    CPU1\n");
 	printk("       ----                    ----\n");
 	printk("  lock(");
-	__print_lock_name(unsafe_class);
+	__print_lock_name(NULL, unsafe_class);
 	printk(KERN_CONT ");\n");
 	printk("                               local_irq_disable();\n");
 	printk("                               lock(");
-	__print_lock_name(safe_class);
+	__print_lock_name(NULL, safe_class);
 	printk(KERN_CONT ");\n");
 	printk("                               lock(");
-	__print_lock_name(middle_class);
+	__print_lock_name(NULL, middle_class);
 	printk(KERN_CONT ");\n");
 	printk("  <Interrupt>\n");
 	printk("    lock(");
-	__print_lock_name(safe_class);
+	__print_lock_name(NULL, safe_class);
 	printk(KERN_CONT ");\n");
 	printk("\n *** DEADLOCK ***\n\n");
 }
@@ -2583,20 +2590,20 @@ print_bad_irq_dependency(struct task_struct *curr,
 	pr_warn("\nand this task is already holding:\n");
 	print_lock(prev);
 	pr_warn("which would create a new lock dependency:\n");
-	print_lock_name(hlock_class(prev));
+	print_lock_name(prev, hlock_class(prev));
 	pr_cont(" ->");
-	print_lock_name(hlock_class(next));
+	print_lock_name(next, hlock_class(next));
 	pr_cont("\n");
 
 	pr_warn("\nbut this new dependency connects a %s-irq-safe lock:\n",
 		irqclass);
-	print_lock_name(backwards_entry->class);
+	print_lock_name(NULL, backwards_entry->class);
 	pr_warn("\n... which became %s-irq-safe at:\n", irqclass);
 
 	print_lock_trace(backwards_entry->class->usage_traces[bit1], 1);
 
 	pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass);
-	print_lock_name(forwards_entry->class);
+	print_lock_name(NULL, forwards_entry->class);
 	pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass);
 	pr_warn("...");
 
@@ -2966,10 +2973,10 @@ print_deadlock_scenario(struct held_lock *nxt, struct held_lock *prv)
 	printk("       CPU0\n");
 	printk("       ----\n");
 	printk("  lock(");
-	__print_lock_name(prev);
+	__print_lock_name(prv, prev);
 	printk(KERN_CONT ");\n");
 	printk("  lock(");
-	__print_lock_name(next);
+	__print_lock_name(nxt, next);
 	printk(KERN_CONT ");\n");
 	printk("\n *** DEADLOCK ***\n\n");
 	printk(" May be due to missing lock nesting notation\n\n");
@@ -2979,6 +2986,8 @@ static void
 print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 		   struct held_lock *next)
 {
+	struct lock_class *class = hlock_class(prev);
+
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return;
 
@@ -2993,6 +3002,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 	pr_warn("\nbut task is already holding lock:\n");
 	print_lock(prev);
 
+	if (class->cmp_fn) {
+		pr_warn("and the lock comparison function returns %i:\n",
+			class->cmp_fn(prev->instance, next->instance));
+	}
+
 	pr_warn("\nother info that might help us debug this:\n");
 	print_deadlock_scenario(next, prev);
 	lockdep_print_held_locks(curr);
@@ -3014,6 +3028,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 static int
 check_deadlock(struct task_struct *curr, struct held_lock *next)
 {
+	struct lock_class *class;
 	struct held_lock *prev;
 	struct held_lock *nest = NULL;
 	int i;
@@ -3034,6 +3049,12 @@ check_deadlock(struct task_struct *curr, struct held_lock *next)
 		if ((next->read == 2) && prev->read)
 			continue;
 
+		class = hlock_class(prev);
+
+		if (class->cmp_fn &&
+		    class->cmp_fn(prev->instance, next->instance) < 0)
+			continue;
+
 		/*
 		 * We're holding the nest_lock, which serializes this lock's
 		 * nesting behaviour.
@@ -3095,6 +3116,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 		return 2;
 	}
 
+	if (prev->class_idx == next->class_idx) {
+		struct lock_class *class = hlock_class(prev);
+
+		if (class->cmp_fn &&
+		    class->cmp_fn(prev->instance, next->instance) < 0)
+			return 2;
+	}
+
 	/*
 	 * Prove that the new <prev> -> <next> dependency would not
 	 * create a circular dependency in the graph. (We do this by
@@ -3571,7 +3600,7 @@ static void print_chain_keys_chain(struct lock_chain *chain)
 		hlock_id = chain_hlocks[chain->base + i];
 		chain_key = print_chain_key_iteration(hlock_id, chain_key);
 
-		print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id));
+		print_lock_name(NULL, lock_classes + chain_hlock_class_idx(hlock_id));
 		printk("\n");
 	}
 }
@@ -3928,11 +3957,11 @@ static void print_usage_bug_scenario(struct held_lock *lock)
 	printk("       CPU0\n");
 	printk("       ----\n");
 	printk("  lock(");
-	__print_lock_name(class);
+	__print_lock_name(lock, class);
 	printk(KERN_CONT ");\n");
 	printk("  <Interrupt>\n");
 	printk("    lock(");
-	__print_lock_name(class);
+	__print_lock_name(lock, class);
 	printk(KERN_CONT ");\n");
 	printk("\n *** DEADLOCK ***\n\n");
 }
@@ -4018,7 +4047,7 @@ print_irq_inversion_bug(struct task_struct *curr,
 		pr_warn("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
 	else
 		pr_warn("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
-	print_lock_name(other->class);
+	print_lock_name(NULL, other->class);
 	pr_warn("\n\nand interrupts could create inverse lock ordering between them.\n\n");
 
 	pr_warn("\nother info that might help us debug this:\n");
@@ -4882,6 +4911,33 @@ EXPORT_SYMBOL_GPL(lockdep_init_map_type);
 struct lock_class_key __lockdep_no_validate__;
 EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
 
+#ifdef CONFIG_PROVE_LOCKING
+void lockdep_set_lock_cmp_fn(struct lockdep_map *lock, lock_cmp_fn cmp_fn,
+			     lock_print_fn print_fn)
+{
+	struct lock_class *class = lock->class_cache[0];
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	lockdep_recursion_inc();
+
+	if (!class)
+		class = register_lock_class(lock, 0, 0);
+
+	if (class) {
+		WARN_ON(class->cmp_fn	&& class->cmp_fn != cmp_fn);
+		WARN_ON(class->print_fn && class->print_fn != print_fn);
+
+		class->cmp_fn	= cmp_fn;
+		class->print_fn = print_fn;
+	}
+
+	lockdep_recursion_finish();
+	raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lockdep_set_lock_cmp_fn);
+#endif
+
 static void
 print_lock_nested_lock_not_held(struct task_struct *curr,
 				struct held_lock *hlock)
-- 
cgit v1.2.3


From 4d744ce9d5d7cf0e3ab68d0cf160194da6504eb8 Mon Sep 17 00:00:00 2001
From: James Seo <james@equiv.tech>
Date: Tue, 9 May 2023 10:55:46 -0700
Subject: err.h: Add missing kerneldocs for error pointer functions

Add kerneldocs for ERR_PTR(), PTR_ERR(), PTR_ERR_OR_ZERO(), IS_ERR(),
and IS_ERR_OR_NULL(). Doing so will help convert hundreds of mentions
of them in existing documentation into automatic cross-references.

Also add kerneldocs for IS_ERR_VALUE(). Doing so adds no automatic
cross-references, but this macro has a slightly different use case
than the functionally similar IS_ERR(), and documenting it may be
helpful to readers who encounter it in existing code.

ERR_CAST() already has kerneldocs and has not been touched.

Signed-off-by: James Seo <james@equiv.tech>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/20230509175543.2065835-3-james@equiv.tech
---
 include/linux/err.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/err.h b/include/linux/err.h
index a139c64aef2a..b5d9bb2a2349 100644
--- a/include/linux/err.h
+++ b/include/linux/err.h
@@ -19,23 +19,54 @@
 
 #ifndef __ASSEMBLY__
 
+/**
+ * IS_ERR_VALUE - Detect an error pointer.
+ * @x: The pointer to check.
+ *
+ * Like IS_ERR(), but does not generate a compiler warning if result is unused.
+ */
 #define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
 
+/**
+ * ERR_PTR - Create an error pointer.
+ * @error: A negative error code.
+ *
+ * Encodes @error into a pointer value. Users should consider the result
+ * opaque and not assume anything about how the error is encoded.
+ *
+ * Return: A pointer with @error encoded within its value.
+ */
 static inline void * __must_check ERR_PTR(long error)
 {
 	return (void *) error;
 }
 
+/**
+ * PTR_ERR - Extract the error code from an error pointer.
+ * @ptr: An error pointer.
+ * Return: The error code within @ptr.
+ */
 static inline long __must_check PTR_ERR(__force const void *ptr)
 {
 	return (long) ptr;
 }
 
+/**
+ * IS_ERR - Detect an error pointer.
+ * @ptr: The pointer to check.
+ * Return: true if @ptr is an error pointer, false otherwise.
+ */
 static inline bool __must_check IS_ERR(__force const void *ptr)
 {
 	return IS_ERR_VALUE((unsigned long)ptr);
 }
 
+/**
+ * IS_ERR_OR_NULL - Detect an error pointer or a null pointer.
+ * @ptr: The pointer to check.
+ *
+ * Like IS_ERR(), but also returns true for a null pointer.
+ */
 static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
 {
 	return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr);
@@ -54,6 +85,23 @@ static inline void * __must_check ERR_CAST(__force const void *ptr)
 	return (void *) ptr;
 }
 
+/**
+ * PTR_ERR_OR_ZERO - Extract the error code from a pointer if it has one.
+ * @ptr: A potential error pointer.
+ *
+ * Convenience function that can be used inside a function that returns
+ * an error code to propagate errors received as error pointers.
+ * For example, ``return PTR_ERR_OR_ZERO(ptr);`` replaces:
+ *
+ * .. code-block:: c
+ *
+ *	if (IS_ERR(ptr))
+ *		return PTR_ERR(ptr);
+ *	else
+ *		return 0;
+ *
+ * Return: The error code within @ptr if it is an error pointer; 0 otherwise.
+ */
 static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
 {
 	if (IS_ERR(ptr))
-- 
cgit v1.2.3


From 91694772067203a554354a08bfad294b81fff5ad Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Sat, 13 May 2023 17:22:17 +0800
Subject: phy: core: add debugfs files

Add a debugfs root for phy class, and create a debugfs directory under
the root when create phy, then phy drivers can add debugfs files.

Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Link: https://lore.kernel.org/r/20230513092218.21139-1-chunfeng.yun@mediatek.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/phy-core.c  | 14 ++++++++++++++
 include/linux/phy/phy.h |  2 ++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index 6464dcb56d56..96a0b1e111f3 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -11,6 +11,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/err.h>
+#include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/of.h>
@@ -20,6 +21,7 @@
 #include <linux/regulator/consumer.h>
 
 static struct class *phy_class;
+static struct dentry *phy_debugfs_root;
 static DEFINE_MUTEX(phy_provider_mutex);
 static LIST_HEAD(phy_provider_list);
 static LIST_HEAD(phys);
@@ -996,6 +998,8 @@ struct phy *phy_create(struct device *dev, struct device_node *node,
 		pm_runtime_no_callbacks(&phy->dev);
 	}
 
+	phy->debugfs = debugfs_create_dir(dev_name(&phy->dev), phy_debugfs_root);
+
 	return phy;
 
 put_dev:
@@ -1226,6 +1230,7 @@ static void phy_release(struct device *dev)
 
 	phy = to_phy(dev);
 	dev_vdbg(dev, "releasing '%s'\n", dev_name(dev));
+	debugfs_remove_recursive(phy->debugfs);
 	regulator_put(phy->pwr);
 	ida_simple_remove(&phy_ida, phy->id);
 	kfree(phy);
@@ -1242,6 +1247,15 @@ static int __init phy_core_init(void)
 
 	phy_class->dev_release = phy_release;
 
+	phy_debugfs_root = debugfs_create_dir("phy", NULL);
+
 	return 0;
 }
 device_initcall(phy_core_init);
+
+static void __exit phy_core_exit(void)
+{
+	debugfs_remove_recursive(phy_debugfs_root);
+	class_destroy(phy_class);
+}
+module_exit(phy_core_exit);
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 3a570bc59fc7..f6d607ef0e80 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -148,6 +148,7 @@ struct phy_attrs {
  * @power_count: used to protect when the PHY is used by multiple consumers
  * @attrs: used to specify PHY specific attributes
  * @pwr: power regulator associated with the phy
+ * @debugfs: debugfs directory
  */
 struct phy {
 	struct device		dev;
@@ -158,6 +159,7 @@ struct phy {
 	int			power_count;
 	struct phy_attrs	attrs;
 	struct regulator	*pwr;
+	struct dentry		*debugfs;
 };
 
 /**
-- 
cgit v1.2.3


From be4c427809b0a746aff54dbb8ef663f0184291d0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 19 May 2023 06:40:47 +0200
Subject: blk-mq: use the I/O scheduler for writes from the flush state machine

Send write requests issued by the flush state machine through the normal
I/O submission path including the I/O scheduler (if present) so that I/O
scheduler policies are applied to writes with the FUA flag set.

Separate the I/O scheduler members from the flush members in struct
request since now a request may pass through both an I/O scheduler
and the flush machinery.

Note that the actual flush requests, which have no bio attached to the
request still bypass the I/O schedulers.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
[hch: rebased]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20230519044050.107790-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         |  4 ++--
 include/linux/blk-mq.h | 27 +++++++++++----------------
 2 files changed, 13 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c0b394096b6b..aac67bc3d368 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -458,7 +458,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
 		 * Flush/passthrough requests are special and go directly to the
 		 * dispatch list.
 		 */
-		if (!op_is_flush(data->cmd_flags) &&
+		if ((data->cmd_flags & REQ_OP_MASK) != REQ_OP_FLUSH &&
 		    !blk_op_is_passthrough(data->cmd_flags)) {
 			struct elevator_mq_ops *ops = &q->elevator->type->ops;
 
@@ -2497,7 +2497,7 @@ static void blk_mq_insert_request(struct request *rq, blk_insert_t flags)
 		 * dispatch it given we prioritize requests in hctx->dispatch.
 		 */
 		blk_mq_request_bypass_insert(rq, flags);
-	} else if (rq->rq_flags & RQF_FLUSH_SEQ) {
+	} else if (req_op(rq) == REQ_OP_FLUSH) {
 		/*
 		 * Firstly normal IO request is inserted to scheduler queue or
 		 * sw queue, meantime we add flush request to dispatch queue(
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 49d14b1acfa5..935201c89743 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -169,25 +169,20 @@ struct request {
 		void *completion_data;
 	};
 
-
 	/*
 	 * Three pointers are available for the IO schedulers, if they need
-	 * more they have to dynamically allocate it.  Flush requests are
-	 * never put on the IO scheduler. So let the flush fields share
-	 * space with the elevator data.
+	 * more they have to dynamically allocate it.
 	 */
-	union {
-		struct {
-			struct io_cq		*icq;
-			void			*priv[2];
-		} elv;
-
-		struct {
-			unsigned int		seq;
-			struct list_head	list;
-			rq_end_io_fn		*saved_end_io;
-		} flush;
-	};
+	struct {
+		struct io_cq		*icq;
+		void			*priv[2];
+	} elv;
+
+	struct {
+		unsigned int		seq;
+		struct list_head	list;
+		rq_end_io_fn		*saved_end_io;
+	} flush;
 
 	union {
 		struct __call_single_data csd;
-- 
cgit v1.2.3


From 9a67aa52a42b31ad44220cc218df3b75a5cd5d05 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 19 May 2023 06:40:50 +0200
Subject: blk-mq: don't use the requeue list to queue flush commands

Currently both requeues of commands that were already sent to the driver
and flush commands submitted from the flush state machine share the same
requeue_list struct request_queue, despite requeues doing head
insertions and flushes not.  Switch to using two separate lists instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Link: https://lore.kernel.org/r/20230519044050.107790-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-flush.c      |  9 +++++++--
 block/blk-mq-debugfs.c |  1 -
 block/blk-mq.c         | 42 +++++++++++++-----------------------------
 block/blk-mq.h         |  1 -
 include/linux/blk-mq.h |  4 +---
 include/linux/blkdev.h |  1 +
 6 files changed, 22 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index f407a5950317..dba392cf22be 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -188,7 +188,9 @@ static void blk_flush_complete_seq(struct request *rq,
 
 	case REQ_FSEQ_DATA:
 		list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
-		blk_mq_add_to_requeue_list(rq, 0);
+		spin_lock(&q->requeue_lock);
+		list_add_tail(&rq->queuelist, &q->flush_list);
+		spin_unlock(&q->requeue_lock);
 		blk_mq_kick_requeue_list(q);
 		break;
 
@@ -346,7 +348,10 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
 	smp_wmb();
 	req_ref_set(flush_rq, 1);
 
-	blk_mq_add_to_requeue_list(flush_rq, 0);
+	spin_lock(&q->requeue_lock);
+	list_add_tail(&flush_rq->queuelist, &q->flush_list);
+	spin_unlock(&q->requeue_lock);
+
 	blk_mq_kick_requeue_list(q);
 }
 
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 22e39b9a77ec..68165a50951b 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -244,7 +244,6 @@ static const char *const cmd_flag_name[] = {
 #define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name
 static const char *const rqf_name[] = {
 	RQF_NAME(STARTED),
-	RQF_NAME(SOFTBARRIER),
 	RQF_NAME(FLUSH_SEQ),
 	RQF_NAME(MIXED_MERGE),
 	RQF_NAME(MQ_INFLIGHT),
diff --git a/block/blk-mq.c b/block/blk-mq.c
index aac67bc3d368..551e7760f45e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1416,13 +1416,16 @@ static void __blk_mq_requeue_request(struct request *rq)
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list)
 {
 	struct request_queue *q = rq->q;
+	unsigned long flags;
 
 	__blk_mq_requeue_request(rq);
 
 	/* this request will be re-inserted to io scheduler queue */
 	blk_mq_sched_requeue_request(rq);
 
-	blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
+	spin_lock_irqsave(&q->requeue_lock, flags);
+	list_add_tail(&rq->queuelist, &q->requeue_list);
+	spin_unlock_irqrestore(&q->requeue_lock, flags);
 
 	if (kick_requeue_list)
 		blk_mq_kick_requeue_list(q);
@@ -1434,13 +1437,16 @@ static void blk_mq_requeue_work(struct work_struct *work)
 	struct request_queue *q =
 		container_of(work, struct request_queue, requeue_work.work);
 	LIST_HEAD(rq_list);
-	struct request *rq, *next;
+	LIST_HEAD(flush_list);
+	struct request *rq;
 
 	spin_lock_irq(&q->requeue_lock);
 	list_splice_init(&q->requeue_list, &rq_list);
+	list_splice_init(&q->flush_list, &flush_list);
 	spin_unlock_irq(&q->requeue_lock);
 
-	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
+	while (!list_empty(&rq_list)) {
+		rq = list_entry(rq_list.next, struct request, queuelist);
 		/*
 		 * If RQF_DONTPREP ist set, the request has been started by the
 		 * driver already and might have driver-specific data allocated
@@ -1448,18 +1454,16 @@ static void blk_mq_requeue_work(struct work_struct *work)
 		 * block layer merges for the request.
 		 */
 		if (rq->rq_flags & RQF_DONTPREP) {
-			rq->rq_flags &= ~RQF_SOFTBARRIER;
 			list_del_init(&rq->queuelist);
 			blk_mq_request_bypass_insert(rq, 0);
-		} else if (rq->rq_flags & RQF_SOFTBARRIER) {
-			rq->rq_flags &= ~RQF_SOFTBARRIER;
+		} else {
 			list_del_init(&rq->queuelist);
 			blk_mq_insert_request(rq, BLK_MQ_INSERT_AT_HEAD);
 		}
 	}
 
-	while (!list_empty(&rq_list)) {
-		rq = list_entry(rq_list.next, struct request, queuelist);
+	while (!list_empty(&flush_list)) {
+		rq = list_entry(flush_list.next, struct request, queuelist);
 		list_del_init(&rq->queuelist);
 		blk_mq_insert_request(rq, 0);
 	}
@@ -1467,27 +1471,6 @@ static void blk_mq_requeue_work(struct work_struct *work)
 	blk_mq_run_hw_queues(q, false);
 }
 
-void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags)
-{
-	struct request_queue *q = rq->q;
-	unsigned long flags;
-
-	/*
-	 * We abuse this flag that is otherwise used by the I/O scheduler to
-	 * request head insertion from the workqueue.
-	 */
-	BUG_ON(rq->rq_flags & RQF_SOFTBARRIER);
-
-	spin_lock_irqsave(&q->requeue_lock, flags);
-	if (insert_flags & BLK_MQ_INSERT_AT_HEAD) {
-		rq->rq_flags |= RQF_SOFTBARRIER;
-		list_add(&rq->queuelist, &q->requeue_list);
-	} else {
-		list_add_tail(&rq->queuelist, &q->requeue_list);
-	}
-	spin_unlock_irqrestore(&q->requeue_lock, flags);
-}
-
 void blk_mq_kick_requeue_list(struct request_queue *q)
 {
 	kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, 0);
@@ -4239,6 +4222,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	blk_mq_update_poll_flag(q);
 
 	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
+	INIT_LIST_HEAD(&q->flush_list);
 	INIT_LIST_HEAD(&q->requeue_list);
 	spin_lock_init(&q->requeue_lock);
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ec7d2fb0b3c8..8c642e9f32f1 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -47,7 +47,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
 bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *,
 			     unsigned int);
-void blk_mq_add_to_requeue_list(struct request *rq, blk_insert_t insert_flags);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
 struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_ctx *start);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 935201c89743..d778cb6b2112 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -28,8 +28,6 @@ typedef __u32 __bitwise req_flags_t;
 
 /* drive already may have started this one */
 #define RQF_STARTED		((__force req_flags_t)(1 << 1))
-/* may not be passed by ioscheduler */
-#define RQF_SOFTBARRIER		((__force req_flags_t)(1 << 3))
 /* request for flush sequence */
 #define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << 4))
 /* merge of different types, fail separately */
@@ -65,7 +63,7 @@ typedef __u32 __bitwise req_flags_t;
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
+	(RQF_STARTED | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
 
 enum mq_rq_state {
 	MQ_RQ_IDLE		= 0,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3952c52d6cd1..fe99948688df 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -487,6 +487,7 @@ struct request_queue {
 	 * for flush operations
 	 */
 	struct blk_flush_queue	*fq;
+	struct list_head	flush_list;
 
 	struct list_head	requeue_list;
 	spinlock_t		requeue_lock;
-- 
cgit v1.2.3


From e924e80ee6a39bc28d2ef8f51e19d336a98e3be0 Mon Sep 17 00:00:00 2001
From: Aditi Ghag <aditi.ghag@isovalent.com>
Date: Fri, 19 May 2023 22:51:54 +0000
Subject: bpf: Add kfunc filter function to 'struct btf_kfunc_id_set'

This commit adds the ability to filter kfuncs to certain BPF program
types. This is required to limit bpf_sock_destroy kfunc implemented in
follow-up commits to programs with attach type 'BPF_TRACE_ITER'.

The commit adds a callback filter to 'struct btf_kfunc_id_set'.  The
filter has access to the `bpf_prog` construct including its properties
such as `expected_attached_type`.

Signed-off-by: Aditi Ghag <aditi.ghag@isovalent.com>
Link: https://lore.kernel.org/r/20230519225157.760788-7-aditi.ghag@isovalent.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/linux/btf.h   | 18 ++++++++------
 kernel/bpf/btf.c      | 65 ++++++++++++++++++++++++++++++++++++++++++---------
 kernel/bpf/verifier.c |  7 +++---
 3 files changed, 69 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 508199e38415..cac9f304e27a 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -98,10 +98,14 @@ struct btf_type;
 union bpf_attr;
 struct btf_show;
 struct btf_id_set;
+struct bpf_prog;
+
+typedef int (*btf_kfunc_filter_t)(const struct bpf_prog *prog, u32 kfunc_id);
 
 struct btf_kfunc_id_set {
 	struct module *owner;
 	struct btf_id_set8 *set;
+	btf_kfunc_filter_t filter;
 };
 
 struct btf_id_dtor_kfunc {
@@ -479,7 +483,6 @@ static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
 	return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
 }
 
-struct bpf_prog;
 struct bpf_verifier_log;
 
 #ifdef CONFIG_BPF_SYSCALL
@@ -487,10 +490,10 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
 const char *btf_name_by_offset(const struct btf *btf, u32 offset);
 struct btf *btf_parse_vmlinux(void);
 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
-u32 *btf_kfunc_id_set_contains(const struct btf *btf,
-			       enum bpf_prog_type prog_type,
-			       u32 kfunc_btf_id);
-u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id);
+u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id,
+			       const struct bpf_prog *prog);
+u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
+				const struct bpf_prog *prog);
 int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
 			      const struct btf_kfunc_id_set *s);
 int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset);
@@ -517,8 +520,9 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
 	return NULL;
 }
 static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf,
-					     enum bpf_prog_type prog_type,
-					     u32 kfunc_btf_id)
+					     u32 kfunc_btf_id,
+					     struct bpf_prog *prog)
+
 {
 	return NULL;
 }
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 6b682b8e4b50..947f0b83bfad 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -222,10 +222,17 @@ enum btf_kfunc_hook {
 enum {
 	BTF_KFUNC_SET_MAX_CNT = 256,
 	BTF_DTOR_KFUNC_MAX_CNT = 256,
+	BTF_KFUNC_FILTER_MAX_CNT = 16,
+};
+
+struct btf_kfunc_hook_filter {
+	btf_kfunc_filter_t filters[BTF_KFUNC_FILTER_MAX_CNT];
+	u32 nr_filters;
 };
 
 struct btf_kfunc_set_tab {
 	struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX];
+	struct btf_kfunc_hook_filter hook_filters[BTF_KFUNC_HOOK_MAX];
 };
 
 struct btf_id_dtor_kfunc_tab {
@@ -7669,9 +7676,12 @@ static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags)
 /* Kernel Function (kfunc) BTF ID set registration API */
 
 static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
-				  struct btf_id_set8 *add_set)
+				  const struct btf_kfunc_id_set *kset)
 {
+	struct btf_kfunc_hook_filter *hook_filter;
+	struct btf_id_set8 *add_set = kset->set;
 	bool vmlinux_set = !btf_is_module(btf);
+	bool add_filter = !!kset->filter;
 	struct btf_kfunc_set_tab *tab;
 	struct btf_id_set8 *set;
 	u32 set_cnt;
@@ -7686,6 +7696,24 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
 		return 0;
 
 	tab = btf->kfunc_set_tab;
+
+	if (tab && add_filter) {
+		u32 i;
+
+		hook_filter = &tab->hook_filters[hook];
+		for (i = 0; i < hook_filter->nr_filters; i++) {
+			if (hook_filter->filters[i] == kset->filter) {
+				add_filter = false;
+				break;
+			}
+		}
+
+		if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) {
+			ret = -E2BIG;
+			goto end;
+		}
+	}
+
 	if (!tab) {
 		tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
 		if (!tab)
@@ -7708,7 +7736,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
 	 */
 	if (!vmlinux_set) {
 		tab->sets[hook] = add_set;
-		return 0;
+		goto do_add_filter;
 	}
 
 	/* In case of vmlinux sets, there may be more than one set being
@@ -7750,6 +7778,11 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
 
 	sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL);
 
+do_add_filter:
+	if (add_filter) {
+		hook_filter = &tab->hook_filters[hook];
+		hook_filter->filters[hook_filter->nr_filters++] = kset->filter;
+	}
 	return 0;
 end:
 	btf_free_kfunc_set_tab(btf);
@@ -7758,15 +7791,22 @@ end:
 
 static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
 					enum btf_kfunc_hook hook,
-					u32 kfunc_btf_id)
+					u32 kfunc_btf_id,
+					const struct bpf_prog *prog)
 {
+	struct btf_kfunc_hook_filter *hook_filter;
 	struct btf_id_set8 *set;
-	u32 *id;
+	u32 *id, i;
 
 	if (hook >= BTF_KFUNC_HOOK_MAX)
 		return NULL;
 	if (!btf->kfunc_set_tab)
 		return NULL;
+	hook_filter = &btf->kfunc_set_tab->hook_filters[hook];
+	for (i = 0; i < hook_filter->nr_filters; i++) {
+		if (hook_filter->filters[i](prog, kfunc_btf_id))
+			return NULL;
+	}
 	set = btf->kfunc_set_tab->sets[hook];
 	if (!set)
 		return NULL;
@@ -7821,23 +7861,25 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
  * protection for looking up a well-formed btf->kfunc_set_tab.
  */
 u32 *btf_kfunc_id_set_contains(const struct btf *btf,
-			       enum bpf_prog_type prog_type,
-			       u32 kfunc_btf_id)
+			       u32 kfunc_btf_id,
+			       const struct bpf_prog *prog)
 {
+	enum bpf_prog_type prog_type = resolve_prog_type(prog);
 	enum btf_kfunc_hook hook;
 	u32 *kfunc_flags;
 
-	kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id);
+	kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog);
 	if (kfunc_flags)
 		return kfunc_flags;
 
 	hook = bpf_prog_type_to_kfunc_hook(prog_type);
-	return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
+	return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog);
 }
 
-u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id)
+u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id,
+				const struct bpf_prog *prog)
 {
-	return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id);
+	return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog);
 }
 
 static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
@@ -7868,7 +7910,8 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
 			goto err_out;
 	}
 
-	ret = btf_populate_kfunc_set(btf, hook, kset->set);
+	ret = btf_populate_kfunc_set(btf, hook, kset);
+
 err_out:
 	btf_put(btf);
 	return ret;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f597491259ab..af70dad655ab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -10939,7 +10939,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
 		*kfunc_name = func_name;
 	func_proto = btf_type_by_id(desc_btf, func->type);
 
-	kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
+	kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog);
 	if (!kfunc_flags) {
 		return -EACCES;
 	}
@@ -19010,7 +19010,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 				 * in the fmodret id set with the KF_SLEEPABLE flag.
 				 */
 				else {
-					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
+					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
+										prog);
 
 					if (flags && (*flags & KF_SLEEPABLE))
 						ret = 0;
@@ -19038,7 +19039,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 				return -EINVAL;
 			}
 			ret = -EINVAL;
-			if (btf_kfunc_is_modify_return(btf, btf_id) ||
+			if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
 			    !check_attach_modify_return(addr, tname))
 				ret = 0;
 			if (ret) {
-- 
cgit v1.2.3


From ae9b15fbe63447bc1d3bba3769f409d17ca6fdf6 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Wed, 17 May 2023 14:30:10 +0000
Subject: net: fix stack overflow when LRO is disabled for virtual interfaces

When the virtual interface's feature is updated, it synchronizes the
updated feature for its own lower interface.
This propagation logic should be worked as the iteration, not recursively.
But it works recursively due to the netdev notification unexpectedly.
This problem occurs when it disables LRO only for the team and bonding
interface type.

       team0
         |
  +------+------+-----+-----+
  |      |      |     |     |
team1  team2  team3  ...  team200

If team0's LRO feature is updated, it generates the NETDEV_FEAT_CHANGE
event to its own lower interfaces(team1 ~ team200).
It is worked by netdev_sync_lower_features().
So, the NETDEV_FEAT_CHANGE notification logic of each lower interface
work iteratively.
But generated NETDEV_FEAT_CHANGE event is also sent to the upper
interface too.
upper interface(team0) generates the NETDEV_FEAT_CHANGE event for its own
lower interfaces again.
lower and upper interfaces receive this event and generate this
event again and again.
So, the stack overflow occurs.

But it is not the infinite loop issue.
Because the netdev_sync_lower_features() updates features before
generating the NETDEV_FEAT_CHANGE event.
Already synchronized lower interfaces skip notification logic.
So, it is just the problem that iteration logic is changed to the
recursive unexpectedly due to the notification mechanism.

Reproducer:

ip link add team0 type team
ethtool -K team0 lro on
for i in {1..200}
do
        ip link add team$i master team0 type team
        ethtool -K team$i lro on
done

ethtool -K team0 lro off

In order to fix it, the notifier_ctx member of bonding/team is introduced.

Reported-by: syzbot+60748c96cf5c6df8e581@syzkaller.appspotmail.com
Fixes: fd867d51f889 ("net/core: generic support for disabling netdev features down stack")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://lore.kernel.org/r/20230517143010.3596250-1-ap420073@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/bonding/bond_main.c | 8 +++++++-
 drivers/net/team/team.c         | 7 ++++++-
 include/linux/if_team.h         | 1 +
 include/net/bonding.h           | 1 +
 4 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3fed888629f7..edbaa1444f8e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3947,7 +3947,11 @@ static int bond_slave_netdev_event(unsigned long event,
 		unblock_netpoll_tx();
 		break;
 	case NETDEV_FEAT_CHANGE:
-		bond_compute_features(bond);
+		if (!bond->notifier_ctx) {
+			bond->notifier_ctx = true;
+			bond_compute_features(bond);
+			bond->notifier_ctx = false;
+		}
 		break;
 	case NETDEV_RESEND_IGMP:
 		/* Propagate to master device */
@@ -6342,6 +6346,8 @@ static int bond_init(struct net_device *bond_dev)
 	if (!bond->wq)
 		return -ENOMEM;
 
+	bond->notifier_ctx = false;
+
 	spin_lock_init(&bond->stats_lock);
 	netdev_lockdep_set_classes(bond_dev);
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index d10606f257c4..555b0b1e9a78 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1629,6 +1629,7 @@ static int team_init(struct net_device *dev)
 
 	team->dev = dev;
 	team_set_no_mode(team);
+	team->notifier_ctx = false;
 
 	team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats);
 	if (!team->pcpu_stats)
@@ -3022,7 +3023,11 @@ static int team_device_event(struct notifier_block *unused,
 		team_del_slave(port->team->dev, dev);
 		break;
 	case NETDEV_FEAT_CHANGE:
-		team_compute_features(port->team);
+		if (!port->team->notifier_ctx) {
+			port->team->notifier_ctx = true;
+			team_compute_features(port->team);
+			port->team->notifier_ctx = false;
+		}
 		break;
 	case NETDEV_PRECHANGEMTU:
 		/* Forbid to change mtu of underlaying device */
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index fc985e5c739d..8de6b6e67829 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -208,6 +208,7 @@ struct team {
 	bool queue_override_enabled;
 	struct list_head *qom_lists; /* array of queue override mapping lists */
 	bool port_mtu_change_allowed;
+	bool notifier_ctx;
 	struct {
 		unsigned int count;
 		unsigned int interval; /* in ms */
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 0efef2a952b7..59955ac33157 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -221,6 +221,7 @@ struct bonding {
 	struct   bond_up_slave __rcu *usable_slaves;
 	struct   bond_up_slave __rcu *all_slaves;
 	bool     force_primary;
+	bool     notifier_ctx;
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
 			      struct slave *);
-- 
cgit v1.2.3


From 32a31bd41be148cac0dae3ff0f2555027c6853b7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 22:22:07 +0200
Subject: HSI: fix ssi_waketest() declaration

The ssi_waketest() function definition causes a 'make W=1' warning
because the declaration is hidden away in ssi_protocol.c:

drivers/hsi/controllers/omap_ssi_core.c:147:6: error: no previous prototype for 'ssi_waketest'

Move it into a header file instead.

Fixes: dc7bf5d71868 ("HSI: Introduce driver for SSI Protocol")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/hsi/clients/ssi_protocol.c      | 2 --
 drivers/hsi/controllers/omap_ssi_core.c | 1 +
 include/linux/hsi/ssi_protocol.h        | 1 +
 3 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c
index 38e572faff43..da6a7abd584f 100644
--- a/drivers/hsi/clients/ssi_protocol.c
+++ b/drivers/hsi/clients/ssi_protocol.c
@@ -32,8 +32,6 @@
 #include <linux/hsi/hsi.h>
 #include <linux/hsi/ssi_protocol.h>
 
-void ssi_waketest(struct hsi_client *cl, unsigned int enable);
-
 #define SSIP_TXQUEUE_LEN	100
 #define SSIP_MAX_MTU		65535
 #define SSIP_DEFAULT_MTU	4000
diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c
index 26f2c3c01297..84ba8b875199 100644
--- a/drivers/hsi/controllers/omap_ssi_core.c
+++ b/drivers/hsi/controllers/omap_ssi_core.c
@@ -17,6 +17,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
+#include <linux/hsi/ssi_protocol.h>
 #include <linux/seq_file.h>
 #include <linux/scatterlist.h>
 #include <linux/interrupt.h>
diff --git a/include/linux/hsi/ssi_protocol.h b/include/linux/hsi/ssi_protocol.h
index 2d6f3cfa7dea..972434daa000 100644
--- a/include/linux/hsi/ssi_protocol.h
+++ b/include/linux/hsi/ssi_protocol.h
@@ -24,6 +24,7 @@ int ssip_slave_stop_tx(struct hsi_client *master);
 void ssip_reset_event(struct hsi_client *master);
 
 int ssip_slave_running(struct hsi_client *master);
+void ssi_waketest(struct hsi_client *cl, unsigned int enable);
 
 #endif /* __LINUX_SSIP_SLAVE_H__ */
 
-- 
cgit v1.2.3


From 4b159f5048b90844679dad08afb3240c1957aba1 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Fri, 19 May 2023 14:03:59 +0100
Subject: net: phy: add helpers for comparing phy IDs

There are several places which open code comparing PHY IDs. Provide a
couple of helpers to assist with this, using a slightly simpler test
than the original:

- phy_id_compare() compares two arbitary PHY IDs and a mask of the
  significant bits in the ID.
- phydev_id_compare() compares the bound phydev with the specified
  PHY ID, using the bound driver's mask.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c     |  6 +++---
 drivers/net/phy/phy_device.c | 16 +++++++---------
 drivers/net/phy/phylink.c    |  4 ++--
 include/linux/phy.h          | 28 ++++++++++++++++++++++++++++
 4 files changed, 40 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 3f81bb8dac44..2094d49025a7 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -637,7 +637,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
 {
 	int ret;
 
-	if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051)
+	if (!phy_id_compare(phydev->phy_id, PHY_ID_KSZ8051, MICREL_PHY_ID_MASK))
 		return 0;
 
 	ret = phy_read(phydev, MII_BMSR);
@@ -1566,7 +1566,7 @@ static int ksz9x31_cable_test_fault_length(struct phy_device *phydev, u16 stat)
 	 *
 	 * distance to fault = (VCT_DATA - 22) * 4 / cable propagation velocity
 	 */
-	if ((phydev->phy_id & MICREL_PHY_ID_MASK) == PHY_ID_KSZ9131)
+	if (phydev_id_compare(phydev, PHY_ID_KSZ9131))
 		dt = clamp(dt - 22, 0, 255);
 
 	return (dt * 400) / 10;
@@ -1998,7 +1998,7 @@ static __always_inline int ksz886x_cable_test_fault_length(struct phy_device *ph
 	 */
 	dt = FIELD_GET(data_mask, status);
 
-	if ((phydev->phy_id & MICREL_PHY_ID_MASK) == PHY_ID_LAN8814)
+	if (phydev_id_compare(phydev, PHY_ID_LAN8814))
 		return ((dt - 22) * 800) / 10;
 	else
 		return (dt * 400) / 10;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8852b0c53114..2cad9cc3f6b8 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -454,8 +454,7 @@ int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask)
 		fixup = list_entry(pos, struct phy_fixup, list);
 
 		if ((!strcmp(fixup->bus_id, bus_id)) &&
-		    ((fixup->phy_uid & phy_uid_mask) ==
-		     (phy_uid & phy_uid_mask))) {
+		    phy_id_compare(fixup->phy_uid, phy_uid, phy_uid_mask)) {
 			list_del(&fixup->list);
 			kfree(fixup);
 			ret = 0;
@@ -491,8 +490,8 @@ static int phy_needs_fixup(struct phy_device *phydev, struct phy_fixup *fixup)
 		if (strcmp(fixup->bus_id, PHY_ANY_ID) != 0)
 			return 0;
 
-	if ((fixup->phy_uid & fixup->phy_uid_mask) !=
-	    (phydev->phy_id & fixup->phy_uid_mask))
+	if (!phy_id_compare(phydev->phy_id, fixup->phy_uid,
+			    fixup->phy_uid_mask))
 		if (fixup->phy_uid != PHY_ANY_UID)
 			return 0;
 
@@ -539,15 +538,14 @@ static int phy_bus_match(struct device *dev, struct device_driver *drv)
 			if (phydev->c45_ids.device_ids[i] == 0xffffffff)
 				continue;
 
-			if ((phydrv->phy_id & phydrv->phy_id_mask) ==
-			    (phydev->c45_ids.device_ids[i] &
-			     phydrv->phy_id_mask))
+			if (phy_id_compare(phydev->c45_ids.device_ids[i],
+					   phydrv->phy_id, phydrv->phy_id_mask))
 				return 1;
 		}
 		return 0;
 	} else {
-		return (phydrv->phy_id & phydrv->phy_id_mask) ==
-			(phydev->phy_id & phydrv->phy_id_mask);
+		return phy_id_compare(phydev->phy_id, phydrv->phy_id,
+				      phydrv->phy_id_mask);
 	}
 }
 
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index f2106d17847a..a4dd5197355a 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -3151,8 +3151,8 @@ static void phylink_sfp_link_up(void *upstream)
  */
 static bool phylink_phy_no_inband(struct phy_device *phy)
 {
-	return phy->is_c45 &&
-		(phy->c45_ids.device_ids[1] & 0xfffffff0) == 0xae025150;
+	return phy->is_c45 && phy_id_compare(phy->c45_ids.device_ids[1],
+					     0xae025150, 0xfffffff0);
 }
 
 static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index d8cd7115c773..2da87a36200d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1112,6 +1112,34 @@ struct phy_driver {
 #define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4)
 #define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10)
 
+/**
+ * phy_id_compare - compare @id1 with @id2 taking account of @mask
+ * @id1: first PHY ID
+ * @id2: second PHY ID
+ * @mask: the PHY ID mask, set bits are significant in matching
+ *
+ * Return true if the bits from @id1 and @id2 specified by @mask match.
+ * This uses an equivalent test to (@id & @mask) == (@phy_id & @mask).
+ */
+static inline bool phy_id_compare(u32 id1, u32 id2, u32 mask)
+{
+	return !((id1 ^ id2) & mask);
+}
+
+/**
+ * phydev_id_compare - compare @id with the PHY's Clause 22 ID
+ * @phydev: the PHY device
+ * @id: the PHY ID to be matched
+ *
+ * Compare the @phydev clause 22 ID with the provided @id and return true or
+ * false depending whether it matches, using the bound driver mask. The
+ * @phydev must be bound to a driver.
+ */
+static inline bool phydev_id_compare(struct phy_device *phydev, u32 id)
+{
+	return phy_id_compare(id, phydev->phy_id, phydev->drv->phy_id_mask);
+}
+
 /* A Structure for boards to register fixups with the PHY Lib */
 struct phy_fixup {
 	struct list_head list;
-- 
cgit v1.2.3


From 72b44f6577f15f37fe964c8dcc42a7c5736e604c Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Tue, 16 May 2023 11:22:05 +1000
Subject: nubus: Don't list slot resources by default

Some Nubus card ROMs contain many slot resources. A single Radius video
card produced well over a thousand entries under /proc/bus/nubus/.
Populating /proc/bus/nubus/ on a slow machine with several such cards
installed takes long enough that the user may think that the system is
wedged. All those procfs entries also consume significant RAM though
they are not normally needed (except by developers).
Omit these resources from /proc/bus/nubus/ by default and add a kernel
parameter to enable them when needed.
On the test machine, this saved 300 kB and 10 seconds.

Cc: Brad Boyer <flar@allandria.com>
Reviewed-by: Brad Boyer <flar@allandria.com>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/71ed7fb234a5f7381a50253b0d841a656d53e64c.1684200125.git.fthain@linux-m68k.org
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drivers/nubus/nubus.c | 13 ++++++++++---
 drivers/nubus/proc.c  |  8 ++++----
 include/linux/nubus.h |  1 +
 3 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c
index f70ba58dbc55..ab0f32b901c8 100644
--- a/drivers/nubus/nubus.c
+++ b/drivers/nubus/nubus.c
@@ -32,6 +32,13 @@
 
 /* Globals */
 
+/* The "nubus.populate_procfs" parameter makes slot resources available in
+ * procfs. It's deprecated and disabled by default because procfs is no longer
+ * thought to be suitable for that and some board ROMs make it too expensive.
+ */
+bool nubus_populate_procfs;
+module_param_named(populate_procfs, nubus_populate_procfs, bool, 0);
+
 LIST_HEAD(nubus_func_rsrcs);
 
 /* Meaning of "bytelanes":
@@ -572,9 +579,9 @@ nubus_get_functional_resource(struct nubus_board *board, int slot,
 			nubus_proc_add_rsrc(dir.procdir, &ent);
 			break;
 		default:
-			/* Local/Private resources have their own
-			   function */
-			nubus_get_private_resource(fres, dir.procdir, &ent);
+			if (nubus_populate_procfs)
+				nubus_get_private_resource(fres, dir.procdir,
+							   &ent);
 		}
 	}
 
diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c
index 2c320a84fd72..e7a347db708c 100644
--- a/drivers/nubus/proc.c
+++ b/drivers/nubus/proc.c
@@ -55,7 +55,7 @@ struct proc_dir_entry *nubus_proc_add_board(struct nubus_board *board)
 {
 	char name[2];
 
-	if (!proc_bus_nubus_dir)
+	if (!proc_bus_nubus_dir || !nubus_populate_procfs)
 		return NULL;
 	snprintf(name, sizeof(name), "%x", board->slot);
 	return proc_mkdir(name, proc_bus_nubus_dir);
@@ -72,7 +72,7 @@ struct proc_dir_entry *nubus_proc_add_rsrc_dir(struct proc_dir_entry *procdir,
 	char name[9];
 	int lanes = board->lanes;
 
-	if (!procdir)
+	if (!procdir || !nubus_populate_procfs)
 		return NULL;
 	snprintf(name, sizeof(name), "%x", ent->type);
 	remove_proc_subtree(name, procdir);
@@ -157,7 +157,7 @@ void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir,
 	char name[9];
 	struct nubus_proc_pde_data *pded;
 
-	if (!procdir)
+	if (!procdir || !nubus_populate_procfs)
 		return;
 
 	snprintf(name, sizeof(name), "%x", ent->type);
@@ -176,7 +176,7 @@ void nubus_proc_add_rsrc(struct proc_dir_entry *procdir,
 	char name[9];
 	unsigned char *data = (unsigned char *)ent->data;
 
-	if (!procdir)
+	if (!procdir || !nubus_populate_procfs)
 		return;
 
 	snprintf(name, sizeof(name), "%x", ent->type);
diff --git a/include/linux/nubus.h b/include/linux/nubus.h
index 392fc6c53e96..bdcd85e622d8 100644
--- a/include/linux/nubus.h
+++ b/include/linux/nubus.h
@@ -93,6 +93,7 @@ extern struct bus_type nubus_bus_type;
 
 /* Generic NuBus interface functions, modelled after the PCI interface */
 #ifdef CONFIG_PROC_FS
+extern bool nubus_populate_procfs;
 void nubus_proc_init(void);
 struct proc_dir_entry *nubus_proc_add_board(struct nubus_board *board);
 struct proc_dir_entry *nubus_proc_add_rsrc_dir(struct proc_dir_entry *procdir,
-- 
cgit v1.2.3


From 4a20ce0ff68eb6fc4b1e8f25139c93b312f21229 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 4 May 2023 22:10:55 +0100
Subject: iommu: Add a capability for flush queue support

Passing a special type to domain_alloc to indirectly query whether flush
queues are a worthwhile optimisation with the given driver is a bit
clunky, and looking increasingly anachronistic. Let's put that into an
explicit capability instead.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Jerry Snitselaar <jsnitsel@redhat.com> # amd, intel, smmu-v3
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/f0086a93dbccb92622e1ace775846d81c1c4b174.1683233867.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/iommu.c                   | 2 ++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 +
 drivers/iommu/arm/arm-smmu/arm-smmu.c       | 1 +
 drivers/iommu/intel/iommu.c                 | 1 +
 include/linux/iommu.h                       | 5 +++++
 5 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 4a314647d1f7..9b7bd6bed664 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2293,6 +2293,8 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
 		return amdr_ivrs_remap_support;
 	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
 		return true;
+	case IOMMU_CAP_DEFERRED_FLUSH:
+		return true;
 	default:
 		break;
 	}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 3fd83fb75722..6d65a7e81df4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2008,6 +2008,7 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
 		/* Assume that a coherent TCU implies coherent TBUs */
 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
 	case IOMMU_CAP_NOEXEC:
+	case IOMMU_CAP_DEFERRED_FLUSH:
 		return true;
 	default:
 		return false;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 6e0813b26fb6..7f4ee365912c 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1325,6 +1325,7 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
 		return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK ||
 			device_get_dma_attr(dev) == DEV_DMA_COHERENT;
 	case IOMMU_CAP_NOEXEC:
+	case IOMMU_CAP_DEFERRED_FLUSH:
 		return true;
 	default:
 		return false;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index b871a6afd803..ff923298f8ed 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4369,6 +4369,7 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
 
 	switch (cap) {
 	case IOMMU_CAP_CACHE_COHERENCY:
+	case IOMMU_CAP_DEFERRED_FLUSH:
 		return true;
 	case IOMMU_CAP_PRE_BOOT_PROTECTION:
 		return dmar_platform_optin();
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e8c9a7da1060..1b7180d6edae 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -127,6 +127,11 @@ enum iommu_cap {
 	 * this device.
 	 */
 	IOMMU_CAP_ENFORCE_CACHE_COHERENCY,
+	/*
+	 * IOMMU driver does not issue TLB maintenance during .unmap, so can
+	 * usefully support the non-strict DMA flush queue.
+	 */
+	IOMMU_CAP_DEFERRED_FLUSH,
 };
 
 /* These are the possible reserved region types */
-- 
cgit v1.2.3


From a4fdd976227240b06ced89b5df88a1a1f388f092 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 4 May 2023 22:10:56 +0100
Subject: iommu: Use flush queue capability

It remains really handy to have distinct DMA domain types within core
code for the sake of default domain policy selection, but we can now
hide that detail from drivers by using the new capability instead.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Jerry Snitselaar <jsnitsel@redhat.com> # amd, intel, smmu-v3
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/1c552d99e8ba452bdac48209fa74c0bdd52fd9d9.1683233867.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 -
 drivers/iommu/arm/arm-smmu/arm-smmu.c       | 3 +--
 drivers/iommu/dma-iommu.c                   | 3 ++-
 drivers/iommu/intel/iommu.c                 | 1 -
 drivers/iommu/iommu.c                       | 3 ++-
 include/linux/iommu.h                       | 1 +
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 6d65a7e81df4..1ed9c4ed5db9 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2024,7 +2024,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 
 	if (type != IOMMU_DOMAIN_UNMANAGED &&
 	    type != IOMMU_DOMAIN_DMA &&
-	    type != IOMMU_DOMAIN_DMA_FQ &&
 	    type != IOMMU_DOMAIN_IDENTITY)
 		return NULL;
 
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 7f4ee365912c..a86acd76c1df 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -856,8 +856,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	struct arm_smmu_domain *smmu_domain;
 
 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
-		if (using_legacy_binding ||
-		    (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
+		if (using_legacy_binding || type != IOMMU_DOMAIN_DMA)
 			return NULL;
 	}
 	/*
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 7a9f0b0bddbd..c4bdd2587daf 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -586,7 +586,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 		goto done_unlock;
 
 	/* If the FQ fails we can simply fall back to strict mode */
-	if (domain->type == IOMMU_DOMAIN_DMA_FQ && iommu_dma_init_fq(domain))
+	if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
+	    (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
 		domain->type = IOMMU_DOMAIN_DMA;
 
 	ret = iova_reserve_iommu_regions(dev, domain);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ff923298f8ed..8096273b034c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4064,7 +4064,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 	case IOMMU_DOMAIN_BLOCKED:
 		return &blocking_domain;
 	case IOMMU_DOMAIN_DMA:
-	case IOMMU_DOMAIN_DMA_FQ:
 	case IOMMU_DOMAIN_UNMANAGED:
 		dmar_domain = alloc_domain(type);
 		if (!dmar_domain) {
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f1dcfa3f1a1b..7078bf4a8ec8 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1980,11 +1980,12 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
 						 unsigned type)
 {
 	struct iommu_domain *domain;
+	unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;
 
 	if (bus == NULL || bus->iommu_ops == NULL)
 		return NULL;
 
-	domain = bus->iommu_ops->domain_alloc(type);
+	domain = bus->iommu_ops->domain_alloc(alloc_type);
 	if (!domain)
 		return NULL;
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 1b7180d6edae..d31642596675 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -65,6 +65,7 @@ struct iommu_domain_geometry {
 
 #define __IOMMU_DOMAIN_SVA	(1U << 4)  /* Shared process address space */
 
+#define IOMMU_DOMAIN_ALLOC_FLAGS ~__IOMMU_DOMAIN_DMA_FQ
 /*
  * This are the possible domain-types
  *
-- 
cgit v1.2.3


From b52878275ce54b5d3a654ed24dfb169c1c501998 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 2 May 2023 15:48:14 +0300
Subject: exportfs: change connectable argument to bit flags

Convert the bool connectable arguemnt into a bit flags argument and
define the EXPORT_FS_CONNECTABLE flag as a requested property of the
file handle.

We are going to add a flag for requesting non-decodeable file handles.

Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Message-Id: <20230502124817.3070545-2-amir73il@gmail.com>
---
 fs/exportfs/expfs.c      | 13 +++++++++++--
 fs/nfsd/nfsfh.c          |  5 +++--
 include/linux/exportfs.h |  6 ++++--
 3 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index ab88d33d106c..ab7feffe2d19 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -393,14 +393,23 @@ int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
 }
 EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh);
 
+/**
+ * exportfs_encode_fh - encode a file handle from dentry
+ * @dentry:  the object to encode
+ * @fid:     where to store the file handle fragment
+ * @max_len: maximum length to store there
+ * @flags:   properties of the requested file handle
+ *
+ * Returns an enum fid_type or a negative errno.
+ */
 int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
-		int connectable)
+		       int flags)
 {
 	int error;
 	struct dentry *p = NULL;
 	struct inode *inode = dentry->d_inode, *parent = NULL;
 
-	if (connectable && !S_ISDIR(inode->i_mode)) {
+	if ((flags & EXPORT_FH_CONNECTABLE) && !S_ISDIR(inode->i_mode)) {
 		p = dget_parent(dentry);
 		/*
 		 * note that while p might've ceased to be our parent already,
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ccd8485fee04..31e4505c0df3 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -414,10 +414,11 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
 		struct fid *fid = (struct fid *)
 			(fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);
 		int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
-		int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK);
+		int fh_flags = (exp->ex_flags & NFSEXP_NOSUBTREECHECK) ? 0 :
+				EXPORT_FH_CONNECTABLE;
 
 		fhp->fh_handle.fh_fileid_type =
-			exportfs_encode_fh(dentry, fid, &maxsize, subtreecheck);
+			exportfs_encode_fh(dentry, fid, &maxsize, fh_flags);
 		fhp->fh_handle.fh_size += maxsize * 4;
 	} else {
 		fhp->fh_handle.fh_fileid_type = FILEID_ROOT;
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 9edb29101ec8..fe4967ba61b2 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -135,6 +135,8 @@ struct fid {
 	};
 };
 
+#define EXPORT_FH_CONNECTABLE	0x1 /* Encode file handle with parent */
+
 /**
  * struct export_operations - for nfsd to communicate with file systems
  * @encode_fh:      encode a file handle fragment from a dentry
@@ -150,7 +152,7 @@ struct fid {
  * encode_fh:
  *    @encode_fh should store in the file handle fragment @fh (using at most
  *    @max_len bytes) information that can be used by @decode_fh to recover the
- *    file referred to by the &struct dentry @de.  If the @connectable flag is
+ *    file referred to by the &struct dentry @de.  If @flag has CONNECTABLE bit
  *    set, the encode_fh() should store sufficient information so that a good
  *    attempt can be made to find not only the file but also it's place in the
  *    filesystem.   This typically means storing a reference to de->d_parent in
@@ -227,7 +229,7 @@ struct export_operations {
 extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
 				    int *max_len, struct inode *parent);
 extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
-	int *max_len, int connectable);
+			      int *max_len, int flags);
 extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt,
 					     struct fid *fid, int fh_len,
 					     int fileid_type,
-- 
cgit v1.2.3


From 304e9c83e80d5cbe20ab64ffa1fac9fc51d30bc9 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 2 May 2023 15:48:15 +0300
Subject: exportfs: add explicit flag to request non-decodeable file handles

So far, all callers of exportfs_encode_inode_fh(), except for fsnotify's
show_mark_fhandle(), check that filesystem can decode file handles, but
we would like to add more callers that do not require a file handle that
can be decoded.

Introduce a flag to explicitly request a file handle that may not to be
decoded later and a wrapper exportfs_encode_fid() that sets this flag
and convert show_mark_fhandle() to use the new wrapper.

This will be used to allow adding fanotify support to filesystems that
do not support NFS export.

Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Message-Id: <20230502124817.3070545-3-amir73il@gmail.com>
---
 Documentation/filesystems/nfs/exporting.rst |  4 ++--
 fs/exportfs/expfs.c                         | 20 ++++++++++++++++++--
 fs/notify/fanotify/fanotify.c               |  4 ++--
 fs/notify/fdinfo.c                          |  2 +-
 include/linux/exportfs.h                    | 12 +++++++++++-
 5 files changed, 34 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
index 0e98edd353b5..3d97b8d8f735 100644
--- a/Documentation/filesystems/nfs/exporting.rst
+++ b/Documentation/filesystems/nfs/exporting.rst
@@ -122,8 +122,8 @@ are exportable by setting the s_export_op field in the struct
 super_block.  This field must point to a "struct export_operations"
 struct which has the following members:
 
- encode_fh  (optional)
-    Takes a dentry and creates a filehandle fragment which can later be used
+  encode_fh (optional)
+    Takes a dentry and creates a filehandle fragment which may later be used
     to find or create a dentry for the same object.  The default
     implementation creates a filehandle fragment that encodes a 32bit inode
     and generation number for the inode encoded, and if necessary the
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index ab7feffe2d19..40e624cf7e92 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -381,11 +381,27 @@ static int export_encode_fh(struct inode *inode, struct fid *fid,
 	return type;
 }
 
+/**
+ * exportfs_encode_inode_fh - encode a file handle from inode
+ * @inode:   the object to encode
+ * @fid:     where to store the file handle fragment
+ * @max_len: maximum length to store there
+ * @flags:   properties of the requested file handle
+ *
+ * Returns an enum fid_type or a negative errno.
+ */
 int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
-			     int *max_len, struct inode *parent)
+			     int *max_len, struct inode *parent, int flags)
 {
 	const struct export_operations *nop = inode->i_sb->s_export_op;
 
+	/*
+	 * If a decodeable file handle was requested, we need to make sure that
+	 * filesystem can decode file handles.
+	 */
+	if (nop && !(flags & EXPORT_FH_FID) && !nop->fh_to_dentry)
+		return -EOPNOTSUPP;
+
 	if (nop && nop->encode_fh)
 		return nop->encode_fh(inode, fid->raw, max_len, parent);
 
@@ -418,7 +434,7 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
 		parent = p->d_inode;
 	}
 
-	error = exportfs_encode_inode_fh(inode, fid, max_len, parent);
+	error = exportfs_encode_inode_fh(inode, fid, max_len, parent, flags);
 	dput(p);
 
 	return error;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 29bdd99b29fa..d1a49f5b6e6d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -380,7 +380,7 @@ static int fanotify_encode_fh_len(struct inode *inode)
 	if (!inode)
 		return 0;
 
-	exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
+	exportfs_encode_inode_fh(inode, NULL, &dwords, NULL, 0);
 	fh_len = dwords << 2;
 
 	/*
@@ -443,7 +443,7 @@ static int fanotify_encode_fh(struct fanotify_fh *fh, struct inode *inode,
 	}
 
 	dwords = fh_len >> 2;
-	type = exportfs_encode_inode_fh(inode, buf, &dwords, NULL);
+	type = exportfs_encode_inode_fh(inode, buf, &dwords, NULL, 0);
 	err = -EINVAL;
 	if (!type || type == FILEID_INVALID || fh_len != dwords << 2)
 		goto out_err;
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 55081ae3a6ec..5c430736ec12 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -50,7 +50,7 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
 	f.handle.handle_bytes = sizeof(f.pad);
 	size = f.handle.handle_bytes >> 2;
 
-	ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, NULL);
+	ret = exportfs_encode_fid(inode, (struct fid *)f.handle.f_handle, &size);
 	if ((ret == FILEID_INVALID) || (ret < 0)) {
 		WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
 		return;
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index fe4967ba61b2..11fbd0ee1370 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -136,6 +136,7 @@ struct fid {
 };
 
 #define EXPORT_FH_CONNECTABLE	0x1 /* Encode file handle with parent */
+#define EXPORT_FH_FID		0x2 /* File handle may be non-decodeable */
 
 /**
  * struct export_operations - for nfsd to communicate with file systems
@@ -227,9 +228,18 @@ struct export_operations {
 };
 
 extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
-				    int *max_len, struct inode *parent);
+				    int *max_len, struct inode *parent,
+				    int flags);
 extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
 			      int *max_len, int flags);
+
+static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid,
+				      int *max_len)
+{
+	return exportfs_encode_inode_fh(inode, fid, max_len, NULL,
+					EXPORT_FH_FID);
+}
+
 extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt,
 					     struct fid *fid, int fh_len,
 					     int fileid_type,
-- 
cgit v1.2.3


From dffc480d2df1772d6092f46f2b4c5e0de941bd47 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 11 May 2023 03:13:36 +0200
Subject: scsi: block: Introduce BLK_STS_DURATION_LIMIT

Introduce the new block I/O status BLK_STS_DURATION_LIMIT for LLDDs to
report command that failed due to a command duration limit being
exceeded. This new status is mapped to the ETIME error code to allow users
to differentiate "soft" duration limit failures from other more serious
hardware related errors.

If we compare BLK_STS_DURATION_LIMIT with BLK_STS_TIMEOUT:
-BLK_STS_DURATION_LIMIT means that the drive gave a reply indicating that
the command duration limit was exceeded before the command could be
completed. This I/O status is mapped to ETIME for user space.

-BLK_STS_TIMEOUT means that the drive never gave a reply at all.
This I/O status is mapped to ETIMEDOUT for user space.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Co-developed-by: Niklas Cassel <niklas.cassel@wdc.com>
Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Link: https://lore.kernel.org/r/20230511011356.227789-4-nks@flawful.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/blk-core.c          | 3 +++
 include/linux/blk_types.h | 6 ++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 00c74330fa92..04ad13ec6ead 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -170,6 +170,9 @@ static const struct {
 	[BLK_STS_ZONE_OPEN_RESOURCE]	= { -ETOOMANYREFS, "open zones exceeded" },
 	[BLK_STS_ZONE_ACTIVE_RESOURCE]	= { -EOVERFLOW, "active zones exceeded" },
 
+	/* Command duration limit device-side timeout */
+	[BLK_STS_DURATION_LIMIT]	= { -ETIME, "duration limit exceeded" },
+
 	/* everything else not covered above: */
 	[BLK_STS_IOERR]		= { -EIO,	"I/O" },
 };
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 740afe80f297..dfdcd218aaac 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -171,6 +171,12 @@ typedef u16 blk_short_t;
  */
 #define BLK_STS_OFFLINE		((__force blk_status_t)17)
 
+/*
+ * BLK_STS_DURATION_LIMIT is returned from the driver when the target device
+ * aborted the command because it exceeded one of its Command Duration Limits.
+ */
+#define BLK_STS_DURATION_LIMIT	((__force blk_status_t)18)
+
 /**
  * blk_path_error - returns true if error may be path related
  * @error: status the request was completed with
-- 
cgit v1.2.3


From 62e4a60e0cdb540b314061469e025fd834ff300c Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 11 May 2023 03:13:47 +0200
Subject: scsi: ata: libata: Detect support for command duration limits

Use the supported capabilities identify device data log page to detect if a
device supports the command duration limits feature. For devices supporting
this feature, set the device flag ATA_DFLAG_CDL. To support SCSI-ATA
translation, retrieve the command duration limits log page 18h and cache
this page content using the cdl array added to the ata_device data
structure.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Co-developed-by: Niklas Cassel <niklas.cassel@wdc.com>
Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Link: https://lore.kernel.org/r/20230511011356.227789-15-nks@flawful.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-core.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/ata/libata-scsi.c | 17 ++++++++--------
 include/linux/ata.h       |  5 ++++-
 include/linux/libata.h    | 29 +++++++++++++++-----------
 4 files changed, 80 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8bf612bdd61a..83fe037f63b9 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2367,6 +2367,54 @@ static void ata_dev_config_trusted(struct ata_device *dev)
 		dev->flags |= ATA_DFLAG_TRUSTED;
 }
 
+static void ata_dev_config_cdl(struct ata_device *dev)
+{
+	struct ata_port *ap = dev->link->ap;
+	unsigned int err_mask;
+	u64 val;
+
+	if (ata_id_major_version(dev->id) < 12)
+		goto not_supported;
+
+	if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) ||
+	    !ata_identify_page_supported(dev, ATA_LOG_SUPPORTED_CAPABILITIES))
+		goto not_supported;
+
+	err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE,
+				     ATA_LOG_SUPPORTED_CAPABILITIES,
+				     ap->sector_buf, 1);
+	if (err_mask)
+		goto not_supported;
+
+	/* Check Command Duration Limit Supported bits */
+	val = get_unaligned_le64(&ap->sector_buf[168]);
+	if (!(val & BIT_ULL(63)) || !(val & BIT_ULL(0)))
+		goto not_supported;
+
+	/* Warn the user if command duration guideline is not supported */
+	if (!(val & BIT_ULL(1)))
+		ata_dev_warn(dev,
+			"Command duration guideline is not supported\n");
+
+	/*
+	 * Command duration limits is supported: cache the CDL log page 18h
+	 * (command duration descriptors).
+	 */
+	err_mask = ata_read_log_page(dev, ATA_LOG_CDL, 0, ap->sector_buf, 1);
+	if (err_mask) {
+		ata_dev_warn(dev, "Read Command Duration Limits log failed\n");
+		goto not_supported;
+	}
+
+	memcpy(dev->cdl, ap->sector_buf, ATA_LOG_CDL_SIZE);
+	dev->flags |= ATA_DFLAG_CDL;
+
+	return;
+
+not_supported:
+	dev->flags &= ~ATA_DFLAG_CDL;
+}
+
 static int ata_dev_config_lba(struct ata_device *dev)
 {
 	const u16 *id = dev->id;
@@ -2534,13 +2582,14 @@ static void ata_dev_print_features(struct ata_device *dev)
 		return;
 
 	ata_dev_info(dev,
-		     "Features:%s%s%s%s%s%s%s\n",
+		     "Features:%s%s%s%s%s%s%s%s\n",
 		     dev->flags & ATA_DFLAG_FUA ? " FUA" : "",
 		     dev->flags & ATA_DFLAG_TRUSTED ? " Trust" : "",
 		     dev->flags & ATA_DFLAG_DA ? " Dev-Attention" : "",
 		     dev->flags & ATA_DFLAG_DEVSLP ? " Dev-Sleep" : "",
 		     dev->flags & ATA_DFLAG_NCQ_SEND_RECV ? " NCQ-sndrcv" : "",
 		     dev->flags & ATA_DFLAG_NCQ_PRIO ? " NCQ-prio" : "",
+		     dev->flags & ATA_DFLAG_CDL ? " CDL" : "",
 		     dev->cpr_log ? " CPR" : "");
 }
 
@@ -2702,6 +2751,7 @@ int ata_dev_configure(struct ata_device *dev)
 		ata_dev_config_zac(dev);
 		ata_dev_config_trusted(dev);
 		ata_dev_config_cpr(dev);
+		ata_dev_config_cdl(dev);
 		dev->cdb_len = 32;
 
 		if (print_info)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 072785808751..3434fec8ca5c 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -47,15 +47,14 @@ typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc);
 static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
 					const struct scsi_device *scsidev);
 
-#define RW_RECOVERY_MPAGE 0x1
-#define RW_RECOVERY_MPAGE_LEN 12
-#define CACHE_MPAGE 0x8
-#define CACHE_MPAGE_LEN 20
-#define CONTROL_MPAGE 0xa
-#define CONTROL_MPAGE_LEN 12
-#define ALL_MPAGES 0x3f
-#define ALL_SUB_MPAGES 0xff
-
+#define RW_RECOVERY_MPAGE		0x1
+#define RW_RECOVERY_MPAGE_LEN		12
+#define CACHE_MPAGE			0x8
+#define CACHE_MPAGE_LEN			20
+#define CONTROL_MPAGE			0xa
+#define CONTROL_MPAGE_LEN		12
+#define ALL_MPAGES			0x3f
+#define ALL_SUB_MPAGES			0xff
 
 static const u8 def_rw_recovery_mpage[RW_RECOVERY_MPAGE_LEN] = {
 	RW_RECOVERY_MPAGE,
diff --git a/include/linux/ata.h b/include/linux/ata.h
index c224dbddb9b2..1eda46b63dcc 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -322,15 +322,18 @@ enum {
 	ATA_LOG_SATA_NCQ	= 0x10,
 	ATA_LOG_NCQ_NON_DATA	= 0x12,
 	ATA_LOG_NCQ_SEND_RECV	= 0x13,
+	ATA_LOG_CDL		= 0x18,
+	ATA_LOG_CDL_SIZE	= ATA_SECT_SIZE,
 	ATA_LOG_IDENTIFY_DEVICE	= 0x30,
 	ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47,
 
 	/* Identify device log pages: */
+	ATA_LOG_SUPPORTED_CAPABILITIES	= 0x03,
 	ATA_LOG_SECURITY	  = 0x06,
 	ATA_LOG_SATA_SETTINGS	  = 0x08,
 	ATA_LOG_ZONED_INFORMATION = 0x09,
 
-	/* Identify device SATA settings log:*/
+	/* Identify device SATA settings log: */
 	ATA_LOG_DEVSLP_OFFSET	  = 0x30,
 	ATA_LOG_DEVSLP_SIZE	  = 0x08,
 	ATA_LOG_DEVSLP_MDAT	  = 0x00,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 311cd93377c7..e8a45f7f3f5c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -94,17 +94,18 @@ enum {
 	ATA_DFLAG_DMADIR	= (1 << 10), /* device requires DMADIR */
 	ATA_DFLAG_NCQ_SEND_RECV = (1 << 11), /* device supports NCQ SEND and RECV */
 	ATA_DFLAG_NCQ_PRIO	= (1 << 12), /* device supports NCQ priority */
-	ATA_DFLAG_CFG_MASK	= (1 << 13) - 1,
-
-	ATA_DFLAG_PIO		= (1 << 13), /* device limited to PIO mode */
-	ATA_DFLAG_NCQ_OFF	= (1 << 14), /* device limited to non-NCQ mode */
-	ATA_DFLAG_SLEEPING	= (1 << 15), /* device is sleeping */
-	ATA_DFLAG_DUBIOUS_XFER	= (1 << 16), /* data transfer not verified */
-	ATA_DFLAG_NO_UNLOAD	= (1 << 17), /* device doesn't support unload */
-	ATA_DFLAG_UNLOCK_HPA	= (1 << 18), /* unlock HPA */
-	ATA_DFLAG_INIT_MASK	= (1 << 19) - 1,
-
-	ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 19), /* Priority cmds sent to dev */
+	ATA_DFLAG_CDL		= (1 << 13), /* supports cmd duration limits */
+	ATA_DFLAG_CFG_MASK	= (1 << 14) - 1,
+
+	ATA_DFLAG_PIO		= (1 << 14), /* device limited to PIO mode */
+	ATA_DFLAG_NCQ_OFF	= (1 << 15), /* device limited to non-NCQ mode */
+	ATA_DFLAG_SLEEPING	= (1 << 16), /* device is sleeping */
+	ATA_DFLAG_DUBIOUS_XFER	= (1 << 17), /* data transfer not verified */
+	ATA_DFLAG_NO_UNLOAD	= (1 << 18), /* device doesn't support unload */
+	ATA_DFLAG_UNLOCK_HPA	= (1 << 19), /* unlock HPA */
+	ATA_DFLAG_INIT_MASK	= (1 << 20) - 1,
+
+	ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */
 	ATA_DFLAG_DETACH	= (1 << 24),
 	ATA_DFLAG_DETACHED	= (1 << 25),
 	ATA_DFLAG_DA		= (1 << 26), /* device supports Device Attention */
@@ -115,7 +116,8 @@ enum {
 
 	ATA_DFLAG_FEATURES_MASK	= (ATA_DFLAG_TRUSTED | ATA_DFLAG_DA |	\
 				   ATA_DFLAG_DEVSLP | ATA_DFLAG_NCQ_SEND_RECV | \
-				   ATA_DFLAG_NCQ_PRIO | ATA_DFLAG_FUA),
+				   ATA_DFLAG_NCQ_PRIO | ATA_DFLAG_FUA | \
+				   ATA_DFLAG_CDL),
 
 	ATA_DEV_UNKNOWN		= 0,	/* unknown device */
 	ATA_DEV_ATA		= 1,	/* ATA device */
@@ -709,6 +711,9 @@ struct ata_device {
 	/* Concurrent positioning ranges */
 	struct ata_cpr_log	*cpr_log;
 
+	/* Command Duration Limits log support */
+	u8			cdl[ATA_LOG_CDL_SIZE];
+
 	/* error history */
 	int			spdn_cnt;
 	/* ering is CLEAR_END, read comment above CLEAR_END */
-- 
cgit v1.2.3


From df60f9c64576d6d05b59ec5c34addcd61ef1efb0 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 11 May 2023 03:13:50 +0200
Subject: scsi: ata: libata: Add ATA feature control sub-page translation

Add support for the ATA feature control sub-page of the control mode page
to enable/disable the command duration limits feature using the cdl_ctrl
field of the ATA feature control sub-page.

Both mode sense and mode select translation are supported. For mode sense,
the ata device flag ATA_DFLAG_CDL_ENABLED is used to cache the status of
the command duration limits feature. Enabling this feature is done using a
SET FEATURES command with a cdl action set to 1 when the page cdl_ctrl
field value is 0x2 (T2A and T2B pages supported). If this field is 0, CDL
is disabled using the SET FEATURES command with a cdl action set to 0.

Since a device CDL and NCQ priority features should not be used
simultaneously, ata_mselect_control_ata_feature() returns an error when
attempting to enable CDL with the device priority feature enabled.
Conversely, the function ata_ncq_prio_enable_store() used to enable the use
of the device NCQ priority feature through sysfs is modified to return an
error if the device CDL feature is enabled.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Co-developed-by: Niklas Cassel <niklas.cassel@wdc.com>
Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Link: https://lore.kernel.org/r/20230511011356.227789-18-nks@flawful.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-core.c |  40 ++++++++++-
 drivers/ata/libata-sata.c |  11 ++-
 drivers/ata/libata-scsi.c | 167 +++++++++++++++++++++++++++++++++++++++-------
 include/linux/ata.h       |   3 +
 include/linux/libata.h    |   1 +
 5 files changed, 193 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 83fe037f63b9..cd7aaf202397 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2371,13 +2371,15 @@ static void ata_dev_config_cdl(struct ata_device *dev)
 {
 	struct ata_port *ap = dev->link->ap;
 	unsigned int err_mask;
+	bool cdl_enabled;
 	u64 val;
 
 	if (ata_id_major_version(dev->id) < 12)
 		goto not_supported;
 
 	if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) ||
-	    !ata_identify_page_supported(dev, ATA_LOG_SUPPORTED_CAPABILITIES))
+	    !ata_identify_page_supported(dev, ATA_LOG_SUPPORTED_CAPABILITIES) ||
+	    !ata_identify_page_supported(dev, ATA_LOG_CURRENT_SETTINGS))
 		goto not_supported;
 
 	err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE,
@@ -2396,6 +2398,40 @@ static void ata_dev_config_cdl(struct ata_device *dev)
 		ata_dev_warn(dev,
 			"Command duration guideline is not supported\n");
 
+	/*
+	 * If CDL is marked as enabled, make sure the feature is enabled too.
+	 * Conversely, if CDL is disabled, make sure the feature is turned off.
+	 */
+	err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE,
+				     ATA_LOG_CURRENT_SETTINGS,
+				     ap->sector_buf, 1);
+	if (err_mask)
+		goto not_supported;
+
+	val = get_unaligned_le64(&ap->sector_buf[8]);
+	cdl_enabled = val & BIT_ULL(63) && val & BIT_ULL(21);
+	if (dev->flags & ATA_DFLAG_CDL_ENABLED) {
+		if (!cdl_enabled) {
+			/* Enable CDL on the device */
+			err_mask = ata_dev_set_feature(dev, SETFEATURES_CDL, 1);
+			if (err_mask) {
+				ata_dev_err(dev,
+					    "Enable CDL feature failed\n");
+				goto not_supported;
+			}
+		}
+	} else {
+		if (cdl_enabled) {
+			/* Disable CDL on the device */
+			err_mask = ata_dev_set_feature(dev, SETFEATURES_CDL, 0);
+			if (err_mask) {
+				ata_dev_err(dev,
+					    "Disable CDL feature failed\n");
+				goto not_supported;
+			}
+		}
+	}
+
 	/*
 	 * Command duration limits is supported: cache the CDL log page 18h
 	 * (command duration descriptors).
@@ -2412,7 +2448,7 @@ static void ata_dev_config_cdl(struct ata_device *dev)
 	return;
 
 not_supported:
-	dev->flags &= ~ATA_DFLAG_CDL;
+	dev->flags &= ~(ATA_DFLAG_CDL | ATA_DFLAG_CDL_ENABLED);
 }
 
 static int ata_dev_config_lba(struct ata_device *dev)
diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
index f3e7396e3191..57cb33060c9d 100644
--- a/drivers/ata/libata-sata.c
+++ b/drivers/ata/libata-sata.c
@@ -907,10 +907,17 @@ static ssize_t ata_ncq_prio_enable_store(struct device *device,
 		goto unlock;
 	}
 
-	if (input)
+	if (input) {
+		if (dev->flags & ATA_DFLAG_CDL_ENABLED) {
+			ata_dev_err(dev,
+				"CDL must be disabled to enable NCQ priority\n");
+			rc = -EINVAL;
+			goto unlock;
+		}
 		dev->flags |= ATA_DFLAG_NCQ_PRIO_ENABLED;
-	else
+	} else {
 		dev->flags &= ~ATA_DFLAG_NCQ_PRIO_ENABLED;
+	}
 
 unlock:
 	spin_unlock_irq(ap->lock);
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 4a4c6405d52e..91db4e7f4906 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -58,6 +58,8 @@ static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
 #define CDL_T2A_SUB_MPAGE		0x07
 #define CDL_T2B_SUB_MPAGE		0x08
 #define CDL_T2_SUB_MPAGE_LEN		232
+#define ATA_FEATURE_SUB_MPAGE		0xf2
+#define ATA_FEATURE_SUB_MPAGE_LEN	16
 
 static const u8 def_rw_recovery_mpage[RW_RECOVERY_MPAGE_LEN] = {
 	RW_RECOVERY_MPAGE,
@@ -2286,6 +2288,31 @@ static unsigned int ata_msense_control_spgt2(struct ata_device *dev, u8 *buf,
 	return CDL_T2_SUB_MPAGE_LEN;
 }
 
+/*
+ * Simulate MODE SENSE control mode page, sub-page f2h
+ * (ATA feature control mode page).
+ */
+static unsigned int ata_msense_control_ata_feature(struct ata_device *dev,
+						   u8 *buf)
+{
+	/* PS=0, SPF=1 */
+	buf[0] = CONTROL_MPAGE | (1 << 6);
+	buf[1] = ATA_FEATURE_SUB_MPAGE;
+
+	/*
+	 * The first four bytes of ATA Feature Control mode page are a header.
+	 * The PAGE LENGTH field is the size of the page excluding the header.
+	 */
+	put_unaligned_be16(ATA_FEATURE_SUB_MPAGE_LEN - 4, &buf[2]);
+
+	if (dev->flags & ATA_DFLAG_CDL)
+		buf[4] = 0x02; /* Support T2A and T2B pages */
+	else
+		buf[4] = 0;
+
+	return ATA_FEATURE_SUB_MPAGE_LEN;
+}
+
 /**
  *	ata_msense_control - Simulate MODE SENSE control mode page
  *	@dev: ATA device of interest
@@ -2309,10 +2336,13 @@ static unsigned int ata_msense_control(struct ata_device *dev, u8 *buf,
 	case CDL_T2A_SUB_MPAGE:
 	case CDL_T2B_SUB_MPAGE:
 		return ata_msense_control_spgt2(dev, buf, spg);
+	case ATA_FEATURE_SUB_MPAGE:
+		return ata_msense_control_ata_feature(dev, buf);
 	case ALL_SUB_MPAGES:
 		n = ata_msense_control_spg0(dev, buf, changeable);
 		n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE);
 		n += ata_msense_control_spgt2(dev, buf + n, CDL_T2A_SUB_MPAGE);
+		n += ata_msense_control_ata_feature(dev, buf + n);
 		return n;
 	default:
 		return 0;
@@ -2391,7 +2421,7 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf)
 	spg = scsicmd[3];
 
 	/*
-	 * Supported subpages: all subpages and sub-pages 07h and 08h of
+	 * Supported subpages: all subpages and sub-pages 07h, 08h and f2h of
 	 * the control page.
 	 */
 	if (spg) {
@@ -2400,6 +2430,7 @@ static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf)
 			break;
 		case CDL_T2A_SUB_MPAGE:
 		case CDL_T2B_SUB_MPAGE:
+		case ATA_FEATURE_SUB_MPAGE:
 			if (dev->flags & ATA_DFLAG_CDL && pg == CONTROL_MPAGE)
 				break;
 			fallthrough;
@@ -3708,20 +3739,11 @@ static int ata_mselect_caching(struct ata_queued_cmd *qc,
 	return 0;
 }
 
-/**
- *	ata_mselect_control - Simulate MODE SELECT for control page
- *	@qc: Storage for translated ATA taskfile
- *	@buf: input buffer
- *	@len: number of valid bytes in the input buffer
- *	@fp: out parameter for the failed field on error
- *
- *	Prepare a taskfile to modify caching information for the device.
- *
- *	LOCKING:
- *	None.
+/*
+ * Simulate MODE SELECT control mode page, sub-page 0.
  */
-static int ata_mselect_control(struct ata_queued_cmd *qc,
-			       const u8 *buf, int len, u16 *fp)
+static int ata_mselect_control_spg0(struct ata_queued_cmd *qc,
+				    const u8 *buf, int len, u16 *fp)
 {
 	struct ata_device *dev = qc->dev;
 	u8 mpage[CONTROL_MPAGE_LEN];
@@ -3759,6 +3781,83 @@ static int ata_mselect_control(struct ata_queued_cmd *qc,
 	return 0;
 }
 
+/*
+ * Translate MODE SELECT control mode page, sub-pages f2h (ATA feature mode
+ * page) into a SET FEATURES command.
+ */
+static unsigned int ata_mselect_control_ata_feature(struct ata_queued_cmd *qc,
+						    const u8 *buf, int len,
+						    u16 *fp)
+{
+	struct ata_device *dev = qc->dev;
+	struct ata_taskfile *tf = &qc->tf;
+	u8 cdl_action;
+
+	/*
+	 * The first four bytes of ATA Feature Control mode page are a header,
+	 * so offsets in mpage are off by 4 compared to buf.  Same for len.
+	 */
+	if (len != ATA_FEATURE_SUB_MPAGE_LEN - 4) {
+		*fp = min(len, ATA_FEATURE_SUB_MPAGE_LEN - 4);
+		return -EINVAL;
+	}
+
+	/* Check cdl_ctrl */
+	switch (buf[0] & 0x03) {
+	case 0:
+		/* Disable CDL */
+		cdl_action = 0;
+		dev->flags &= ~ATA_DFLAG_CDL_ENABLED;
+		break;
+	case 0x02:
+		/* Enable CDL T2A/T2B: NCQ priority must be disabled */
+		if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLED) {
+			ata_dev_err(dev,
+				"NCQ priority must be disabled to enable CDL\n");
+			return -EINVAL;
+		}
+		cdl_action = 1;
+		dev->flags |= ATA_DFLAG_CDL_ENABLED;
+		break;
+	default:
+		*fp = 0;
+		return -EINVAL;
+	}
+
+	tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR;
+	tf->protocol = ATA_PROT_NODATA;
+	tf->command = ATA_CMD_SET_FEATURES;
+	tf->feature = SETFEATURES_CDL;
+	tf->nsect = cdl_action;
+
+	return 1;
+}
+
+/**
+ *	ata_mselect_control - Simulate MODE SELECT for control page
+ *	@qc: Storage for translated ATA taskfile
+ *	@buf: input buffer
+ *	@len: number of valid bytes in the input buffer
+ *	@fp: out parameter for the failed field on error
+ *
+ *	Prepare a taskfile to modify caching information for the device.
+ *
+ *	LOCKING:
+ *	None.
+ */
+static int ata_mselect_control(struct ata_queued_cmd *qc, u8 spg,
+			       const u8 *buf, int len, u16 *fp)
+{
+	switch (spg) {
+	case 0:
+		return ata_mselect_control_spg0(qc, buf, len, fp);
+	case ATA_FEATURE_SUB_MPAGE:
+		return ata_mselect_control_ata_feature(qc, buf, len, fp);
+	default:
+		return -EINVAL;
+	}
+}
+
 /**
  *	ata_scsi_mode_select_xlat - Simulate MODE SELECT 6, 10 commands
  *	@qc: Storage for translated ATA taskfile
@@ -3776,7 +3875,7 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc)
 	const u8 *cdb = scmd->cmnd;
 	u8 pg, spg;
 	unsigned six_byte, pg_len, hdr_len, bd_len;
-	int len;
+	int len, ret;
 	u16 fp = (u16)-1;
 	u8 bp = 0xff;
 	u8 buffer[64];
@@ -3861,13 +3960,29 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc)
 	}
 
 	/*
-	 * No mode subpages supported (yet) but asking for _all_
-	 * subpages may be valid
+	 * Supported subpages: all subpages and ATA feature sub-page f2h of
+	 * the control page.
 	 */
-	if (spg && (spg != ALL_SUB_MPAGES)) {
-		fp = (p[0] & 0x40) ? 1 : 0;
-		fp += hdr_len + bd_len;
-		goto invalid_param;
+	if (spg) {
+		switch (spg) {
+		case ALL_SUB_MPAGES:
+			/* All subpages is not supported for the control page */
+			if (pg == CONTROL_MPAGE) {
+				fp = (p[0] & 0x40) ? 1 : 0;
+				fp += hdr_len + bd_len;
+				goto invalid_param;
+			}
+			break;
+		case ATA_FEATURE_SUB_MPAGE:
+			if (qc->dev->flags & ATA_DFLAG_CDL &&
+			    pg == CONTROL_MPAGE)
+				break;
+			fallthrough;
+		default:
+			fp = (p[0] & 0x40) ? 1 : 0;
+			fp += hdr_len + bd_len;
+			goto invalid_param;
+		}
 	}
 	if (pg_len > len)
 		goto invalid_param_len;
@@ -3880,14 +3995,16 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc)
 		}
 		break;
 	case CONTROL_MPAGE:
-		if (ata_mselect_control(qc, p, pg_len, &fp) < 0) {
+		ret = ata_mselect_control(qc, spg, p, pg_len, &fp);
+		if (ret < 0) {
 			fp += hdr_len + bd_len;
 			goto invalid_param;
-		} else {
-			goto skip; /* No ATA command to send */
 		}
+		if (!ret)
+			goto skip; /* No ATA command to send */
 		break;
-	default:		/* invalid page code */
+	default:
+		/* Invalid page code */
 		fp = bd_len + hdr_len;
 		goto invalid_param;
 	}
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 1eda46b63dcc..21108471c6af 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -329,6 +329,7 @@ enum {
 
 	/* Identify device log pages: */
 	ATA_LOG_SUPPORTED_CAPABILITIES	= 0x03,
+	ATA_LOG_CURRENT_SETTINGS  = 0x04,
 	ATA_LOG_SECURITY	  = 0x06,
 	ATA_LOG_SATA_SETTINGS	  = 0x08,
 	ATA_LOG_ZONED_INFORMATION = 0x09,
@@ -418,6 +419,8 @@ enum {
 	SETFEATURES_SATA_ENABLE = 0x10, /* Enable use of SATA feature */
 	SETFEATURES_SATA_DISABLE = 0x90, /* Disable use of SATA feature */
 
+	SETFEATURES_CDL		= 0x0d, /* Enable/disable cmd duration limits */
+
 	/* SETFEATURE Sector counts for SATA features */
 	SATA_FPDMA_OFFSET	= 0x01,	/* FPDMA non-zero buffer offsets */
 	SATA_FPDMA_AA		= 0x02, /* FPDMA Setup FIS Auto-Activate */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index e8a45f7f3f5c..385ca23d5ad0 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -106,6 +106,7 @@ enum {
 	ATA_DFLAG_INIT_MASK	= (1 << 20) - 1,
 
 	ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */
+	ATA_DFLAG_CDL_ENABLED	= (1 << 21), /* cmd duration limits is enabled */
 	ATA_DFLAG_DETACH	= (1 << 24),
 	ATA_DFLAG_DETACHED	= (1 << 25),
 	ATA_DFLAG_DA		= (1 << 26), /* device supports Device Attention */
-- 
cgit v1.2.3


From eafe804bda7ba01da562c43351068b8a76a579af Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 11 May 2023 03:13:51 +0200
Subject: scsi: ata: libata: Set read/write commands CDL index

For devices supporting the command duration limits feature, translate the
dld field of read and write operation to set the command duration limit
index field of the command task file when the duration limit feature is
enabled.

The function ata_set_tf_cdl() is introduced to do this. For unqueued (non
NCQ) read and write operations, this function sets the command duration
limit index set as the lower 3 bits of the feature field.  For queued NCQ
read/write commands, the index is set as the lower 3 bits of the auxiliary
field.

The flag ATA_QCFLAG_HAS_CDL is introduced to indicate that a command
taskfile has a non zero cdl field.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Igor Pylypiv <ipylypiv@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Co-developed-by: Niklas Cassel <niklas.cassel@wdc.com>
Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Link: https://lore.kernel.org/r/20230511011356.227789-19-nks@flawful.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-core.c | 32 +++++++++++++++++++++++++++++---
 drivers/ata/libata-scsi.c | 16 +++++++++++++++-
 drivers/ata/libata.h      |  2 +-
 include/linux/libata.h    |  1 +
 4 files changed, 46 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index cd7aaf202397..e63773740fc2 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -665,12 +665,29 @@ u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev)
 	return block;
 }
 
+/*
+ * Set a taskfile command duration limit index.
+ */
+static inline void ata_set_tf_cdl(struct ata_queued_cmd *qc, int cdl)
+{
+	struct ata_taskfile *tf = &qc->tf;
+
+	if (tf->protocol == ATA_PROT_NCQ)
+		tf->auxiliary |= cdl;
+	else
+		tf->feature |= cdl;
+
+	/* Mark this command as having a CDL */
+	qc->flags |= ATA_QCFLAG_HAS_CDL;
+}
+
 /**
  *	ata_build_rw_tf - Build ATA taskfile for given read/write request
  *	@qc: Metadata associated with the taskfile to build
  *	@block: Block address
  *	@n_block: Number of blocks
  *	@tf_flags: RW/FUA etc...
+ *	@cdl: Command duration limit index
  *	@class: IO priority class
  *
  *	LOCKING:
@@ -685,7 +702,7 @@ u64 ata_tf_read_block(const struct ata_taskfile *tf, struct ata_device *dev)
  *	-EINVAL if the request is invalid.
  */
 int ata_build_rw_tf(struct ata_queued_cmd *qc, u64 block, u32 n_block,
-		    unsigned int tf_flags, int class)
+		    unsigned int tf_flags, int cdl, int class)
 {
 	struct ata_taskfile *tf = &qc->tf;
 	struct ata_device *dev = qc->dev;
@@ -724,11 +741,20 @@ int ata_build_rw_tf(struct ata_queued_cmd *qc, u64 block, u32 n_block,
 		if (dev->flags & ATA_DFLAG_NCQ_PRIO_ENABLED &&
 		    class == IOPRIO_CLASS_RT)
 			tf->hob_nsect |= ATA_PRIO_HIGH << ATA_SHIFT_PRIO;
+
+		if ((dev->flags & ATA_DFLAG_CDL_ENABLED) && cdl)
+			ata_set_tf_cdl(qc, cdl);
+
 	} else if (dev->flags & ATA_DFLAG_LBA) {
 		tf->flags |= ATA_TFLAG_LBA;
 
-		/* We need LBA48 for FUA writes */
-		if (!(tf->flags & ATA_TFLAG_FUA) && lba_28_ok(block, n_block)) {
+		if ((dev->flags & ATA_DFLAG_CDL_ENABLED) && cdl)
+			ata_set_tf_cdl(qc, cdl);
+
+		/* Both FUA writes and a CDL index require 48-bit commands */
+		if (!(tf->flags & ATA_TFLAG_FUA) &&
+		    !(qc->flags & ATA_QCFLAG_HAS_CDL) &&
+		    lba_28_ok(block, n_block)) {
 			/* use LBA28 */
 			tf->device |= (block >> 24) & 0xf;
 		} else if (lba_48_ok(block, n_block)) {
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 91db4e7f4906..69fc0d2c2123 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1380,6 +1380,18 @@ static inline void scsi_16_lba_len(const u8 *cdb, u64 *plba, u32 *plen)
 	*plen = get_unaligned_be32(&cdb[10]);
 }
 
+/**
+ *	scsi_dld - Get duration limit descriptor index
+ *	@cdb: SCSI command to translate
+ *
+ *	Returns the dld bits indicating the index of a command duration limit
+ *	descriptor.
+ */
+static inline int scsi_dld(const u8 *cdb)
+{
+	return ((cdb[1] & 0x01) << 2) | ((cdb[14] >> 6) & 0x03);
+}
+
 /**
  *	ata_scsi_verify_xlat - Translate SCSI VERIFY command into an ATA one
  *	@qc: Storage for translated ATA taskfile
@@ -1548,6 +1560,7 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
 	struct request *rq = scsi_cmd_to_rq(scmd);
 	int class = IOPRIO_PRIO_CLASS(req_get_ioprio(rq));
 	unsigned int tf_flags = 0;
+	int dld = 0;
 	u64 block;
 	u32 n_block;
 	int rc;
@@ -1598,6 +1611,7 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
 			goto invalid_fld;
 		}
 		scsi_16_lba_len(cdb, &block, &n_block);
+		dld = scsi_dld(cdb);
 		if (cdb[1] & (1 << 3))
 			tf_flags |= ATA_TFLAG_FUA;
 		if (!ata_check_nblocks(scmd, n_block))
@@ -1622,7 +1636,7 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc)
 	qc->flags |= ATA_QCFLAG_IO;
 	qc->nbytes = n_block * scmd->device->sector_size;
 
-	rc = ata_build_rw_tf(qc, block, n_block, tf_flags, class);
+	rc = ata_build_rw_tf(qc, block, n_block, tf_flags, dld, class);
 	if (likely(rc == 0))
 		return 0;
 
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 926d0d33cd29..cf993885d2b2 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -45,7 +45,7 @@ static inline void ata_force_cbl(struct ata_port *ap) { }
 extern u64 ata_tf_to_lba(const struct ata_taskfile *tf);
 extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf);
 extern int ata_build_rw_tf(struct ata_queued_cmd *qc, u64 block, u32 n_block,
-			   unsigned int tf_flags, int class);
+			   unsigned int tf_flags, int dld, int class);
 extern u64 ata_tf_read_block(const struct ata_taskfile *tf,
 			     struct ata_device *dev);
 extern unsigned ata_exec_internal(struct ata_device *dev,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 385ca23d5ad0..f679abd2e61f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -209,6 +209,7 @@ enum {
 	ATA_QCFLAG_CLEAR_EXCL	= (1 << 5), /* clear excl_link on completion */
 	ATA_QCFLAG_QUIET	= (1 << 6), /* don't report device error */
 	ATA_QCFLAG_RETRY	= (1 << 7), /* retry after failure */
+	ATA_QCFLAG_HAS_CDL	= (1 << 8), /* qc has CDL a descriptor set */
 
 	ATA_QCFLAG_EH		= (1 << 16), /* cmd aborted and owned by EH */
 	ATA_QCFLAG_SENSE_VALID	= (1 << 17), /* sense data valid */
-- 
cgit v1.2.3


From 18bd7718b5c489b3161b6c2ab4685d57c1e2da3b Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@wdc.com>
Date: Thu, 11 May 2023 03:13:52 +0200
Subject: scsi: ata: libata: Handle completion of CDL commands using policy 0xD

A CDL timeout for policy 0xF is defined as a NCQ error, just with a CDL
specific sk/asc/ascq in the sense data. Therefore, the existing code in
libata does not need to be modified to handle a policy 0xF CDL timeout.

For Command Duration Limits policy 0xD:

  The device shall complete the command without error with the additional
  sense code set to DATA CURRENTLY UNAVAILABLE.

Since a CDL timeout for policy 0xD is not an error, we cannot use the NCQ
Command Error log (10h).

Instead, we need to read the Sense Data for Successful NCQ Commands log
(0Fh).

In the success case, just like in the error case, we cannot simply read a
log page from the interrupt handler itself, since reading a log page
involves sending a READ LOG DMA EXT or READ LOG EXT command.

Therefore, we add a new EH action ATA_EH_GET_SUCCESS_SENSE.  When a command
completes without error, and when the ATA_SENSE bit is set, this new action
is set as pending, and EH is scheduled.

This way, similar to the NCQ error case, the log page will be read from EH
context.

An alternative would have been to add a new kthread or workqueue to handle
this. However, extending EH can be done with minimal changes and avoids the
need to synchronize a new kthread/workqueue with EH.

Co-developed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Link: https://lore.kernel.org/r/20230511011356.227789-20-nks@flawful.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-core.c |  88 +++++++++++++++++++++++++++++++++++++-
 drivers/ata/libata-eh.c   | 105 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/ata/libata-sata.c |  92 ++++++++++++++++++++++++++++++++++++++++
 include/linux/ata.h       |   3 ++
 include/linux/libata.h    |  11 ++++-
 5 files changed, 295 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index e63773740fc2..ddc3d9f8fa02 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -677,8 +677,12 @@ static inline void ata_set_tf_cdl(struct ata_queued_cmd *qc, int cdl)
 	else
 		tf->feature |= cdl;
 
-	/* Mark this command as having a CDL */
-	qc->flags |= ATA_QCFLAG_HAS_CDL;
+	/*
+	 * Mark this command as having a CDL and request the result
+	 * task file so that we can inspect the sense data available
+	 * bit on completion.
+	 */
+	qc->flags |= ATA_QCFLAG_HAS_CDL | ATA_QCFLAG_RESULT_TF;
 }
 
 /**
@@ -2424,6 +2428,24 @@ static void ata_dev_config_cdl(struct ata_device *dev)
 		ata_dev_warn(dev,
 			"Command duration guideline is not supported\n");
 
+	/*
+	 * We must have support for the sense data for successful NCQ commands
+	 * log indicated by the successful NCQ command sense data supported bit.
+	 */
+	val = get_unaligned_le64(&ap->sector_buf[8]);
+	if (!(val & BIT_ULL(63)) || !(val & BIT_ULL(47))) {
+		ata_dev_warn(dev,
+			"CDL supported but Successful NCQ Command Sense Data is not supported\n");
+		goto not_supported;
+	}
+
+	/* Without NCQ autosense, the successful NCQ commands log is useless. */
+	if (!ata_id_has_ncq_autosense(dev->id)) {
+		ata_dev_warn(dev,
+			"CDL supported but NCQ autosense is not supported\n");
+		goto not_supported;
+	}
+
 	/*
 	 * If CDL is marked as enabled, make sure the feature is enabled too.
 	 * Conversely, if CDL is disabled, make sure the feature is turned off.
@@ -2458,6 +2480,35 @@ static void ata_dev_config_cdl(struct ata_device *dev)
 		}
 	}
 
+	/*
+	 * While CDL itself has to be enabled using sysfs, CDL requires that
+	 * sense data for successful NCQ commands is enabled to work properly.
+	 * Just like ata_dev_config_sense_reporting(), enable it unconditionally
+	 * if supported.
+	 */
+	if (!(val & BIT_ULL(63)) || !(val & BIT_ULL(18))) {
+		err_mask = ata_dev_set_feature(dev,
+					SETFEATURE_SENSE_DATA_SUCC_NCQ, 0x1);
+		if (err_mask) {
+			ata_dev_warn(dev,
+				     "failed to enable Sense Data for successful NCQ commands, Emask 0x%x\n",
+				     err_mask);
+			goto not_supported;
+		}
+	}
+
+	/*
+	 * Allocate a buffer to handle reading the sense data for successful
+	 * NCQ Commands log page for commands using a CDL with one of the limit
+	 * policy set to 0xD (successful completion with sense data available
+	 * bit set).
+	 */
+	if (!ap->ncq_sense_buf) {
+		ap->ncq_sense_buf = kmalloc(ATA_LOG_SENSE_NCQ_SIZE, GFP_KERNEL);
+		if (!ap->ncq_sense_buf)
+			goto not_supported;
+	}
+
 	/*
 	 * Command duration limits is supported: cache the CDL log page 18h
 	 * (command duration descriptors).
@@ -2475,6 +2526,8 @@ static void ata_dev_config_cdl(struct ata_device *dev)
 
 not_supported:
 	dev->flags &= ~(ATA_DFLAG_CDL | ATA_DFLAG_CDL_ENABLED);
+	kfree(ap->ncq_sense_buf);
+	ap->ncq_sense_buf = NULL;
 }
 
 static int ata_dev_config_lba(struct ata_device *dev)
@@ -4878,6 +4931,36 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
 			fill_result_tf(qc);
 
 		trace_ata_qc_complete_done(qc);
+
+		/*
+		 * For CDL commands that completed without an error, check if
+		 * we have sense data (ATA_SENSE is set). If we do, then the
+		 * command may have been aborted by the device due to a limit
+		 * timeout using the policy 0xD. For these commands, invoke EH
+		 * to get the command sense data.
+		 */
+		if (qc->result_tf.status & ATA_SENSE &&
+		    ((ata_is_ncq(qc->tf.protocol) &&
+		      dev->flags & ATA_DFLAG_CDL_ENABLED) ||
+		     (!(ata_is_ncq(qc->tf.protocol) &&
+			ata_id_sense_reporting_enabled(dev->id))))) {
+			/*
+			 * Tell SCSI EH to not overwrite scmd->result even if
+			 * this command is finished with result SAM_STAT_GOOD.
+			 */
+			qc->scsicmd->flags |= SCMD_FORCE_EH_SUCCESS;
+			qc->flags |= ATA_QCFLAG_EH_SUCCESS_CMD;
+			ehi->dev_action[dev->devno] |= ATA_EH_GET_SUCCESS_SENSE;
+
+			/*
+			 * set pending so that ata_qc_schedule_eh() does not
+			 * trigger fast drain, and freeze the port.
+			 */
+			ap->pflags |= ATA_PFLAG_EH_PENDING;
+			ata_qc_schedule_eh(qc);
+			return;
+		}
+
 		/* Some commands need post-processing after successful
 		 * completion.
 		 */
@@ -5510,6 +5593,7 @@ static void ata_host_release(struct kref *kref)
 
 		kfree(ap->pmp_link);
 		kfree(ap->slave_link);
+		kfree(ap->ncq_sense_buf);
 		kfree(ap);
 		host->ports[i] = NULL;
 	}
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 598ae07195b6..05af292eb8ce 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1917,6 +1917,99 @@ static inline bool ata_eh_quiet(struct ata_queued_cmd *qc)
 	return qc->flags & ATA_QCFLAG_QUIET;
 }
 
+static int ata_eh_read_sense_success_non_ncq(struct ata_link *link)
+{
+	struct ata_port *ap = link->ap;
+	struct ata_queued_cmd *qc;
+
+	qc = __ata_qc_from_tag(ap, link->active_tag);
+	if (!qc)
+		return -EIO;
+
+	if (!(qc->flags & ATA_QCFLAG_EH) ||
+	    !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) ||
+	    qc->err_mask)
+		return -EIO;
+
+	if (!ata_eh_request_sense(qc))
+		return -EIO;
+
+	/*
+	 * If we have sense data, call scsi_check_sense() in order to set the
+	 * correct SCSI ML byte (if any). No point in checking the return value,
+	 * since the command has already completed successfully.
+	 */
+	scsi_check_sense(qc->scsicmd);
+
+	return 0;
+}
+
+static void ata_eh_get_success_sense(struct ata_link *link)
+{
+	struct ata_eh_context *ehc = &link->eh_context;
+	struct ata_device *dev = link->device;
+	struct ata_port *ap = link->ap;
+	struct ata_queued_cmd *qc;
+	int tag, ret = 0;
+
+	if (!(ehc->i.dev_action[dev->devno] & ATA_EH_GET_SUCCESS_SENSE))
+		return;
+
+	/* if frozen, we can't do much */
+	if (ata_port_is_frozen(ap)) {
+		ata_dev_warn(dev,
+			"successful sense data available but port frozen\n");
+		goto out;
+	}
+
+	/*
+	 * If the link has sactive set, then we have outstanding NCQ commands
+	 * and have to read the Successful NCQ Commands log to get the sense
+	 * data. Otherwise, we are dealing with a non-NCQ command and use
+	 * request sense ext command to retrieve the sense data.
+	 */
+	if (link->sactive)
+		ret = ata_eh_read_sense_success_ncq_log(link);
+	else
+		ret = ata_eh_read_sense_success_non_ncq(link);
+	if (ret)
+		goto out;
+
+	ata_eh_done(link, dev, ATA_EH_GET_SUCCESS_SENSE);
+	return;
+
+out:
+	/*
+	 * If we failed to get sense data for a successful command that ought to
+	 * have sense data, we cannot simply return BLK_STS_OK to user space.
+	 * This is because we can't know if the sense data that we couldn't get
+	 * was actually "DATA CURRENTLY UNAVAILABLE". Reporting such a command
+	 * as success to user space would result in a silent data corruption.
+	 * Thus, add a bogus ABORTED_COMMAND sense data to such commands, such
+	 * that SCSI will report these commands as BLK_STS_IOERR to user space.
+	 */
+	ata_qc_for_each_raw(ap, qc, tag) {
+		if (!(qc->flags & ATA_QCFLAG_EH) ||
+		    !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) ||
+		    qc->err_mask ||
+		    ata_dev_phys_link(qc->dev) != link)
+			continue;
+
+		/* We managed to get sense for this success command, skip. */
+		if (qc->flags & ATA_QCFLAG_SENSE_VALID)
+			continue;
+
+		/* This success command did not have any sense data, skip. */
+		if (!(qc->result_tf.status & ATA_SENSE))
+			continue;
+
+		/* This success command had sense data, but we failed to get. */
+		ata_scsi_set_sense(dev, qc->scsicmd, ABORTED_COMMAND, 0, 0);
+		qc->flags |= ATA_QCFLAG_SENSE_VALID;
+	}
+	ata_eh_done(link, dev, ATA_EH_GET_SUCCESS_SENSE);
+}
+
 /**
  *	ata_eh_link_autopsy - analyze error and determine recovery action
  *	@link: host link to perform autopsy on
@@ -1957,6 +2050,14 @@ static void ata_eh_link_autopsy(struct ata_link *link)
 	/* analyze NCQ failure */
 	ata_eh_analyze_ncq_error(link);
 
+	/*
+	 * Check if this was a successful command that simply needs sense data.
+	 * Since the sense data is not part of the completion, we need to fetch
+	 * it using an additional command. Since this can't be done from irq
+	 * context, the sense data for successful commands are fetched by EH.
+	 */
+	ata_eh_get_success_sense(link);
+
 	/* any real error trumps AC_ERR_OTHER */
 	if (ehc->i.err_mask & ~AC_ERR_OTHER)
 		ehc->i.err_mask &= ~AC_ERR_OTHER;
@@ -1966,6 +2067,7 @@ static void ata_eh_link_autopsy(struct ata_link *link)
 	ata_qc_for_each_raw(ap, qc, tag) {
 		if (!(qc->flags & ATA_QCFLAG_EH) ||
 		    qc->flags & ATA_QCFLAG_RETRY ||
+		    qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD ||
 		    ata_dev_phys_link(qc->dev) != link)
 			continue;
 
@@ -3825,7 +3927,8 @@ void ata_eh_finish(struct ata_port *ap)
 			else
 				ata_eh_qc_complete(qc);
 		} else {
-			if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
+			if (qc->flags & ATA_QCFLAG_SENSE_VALID ||
+			    qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) {
 				ata_eh_qc_complete(qc);
 			} else {
 				/* feed zero TF to sense generation */
diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
index 57cb33060c9d..7de4d8901fac 100644
--- a/drivers/ata/libata-sata.c
+++ b/drivers/ata/libata-sata.c
@@ -11,7 +11,9 @@
 #include <linux/module.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
+#include <scsi/scsi_eh.h>
 #include <linux/libata.h>
+#include <asm/unaligned.h>
 
 #include "libata.h"
 #include "libata-transport.h"
@@ -1408,6 +1410,95 @@ static int ata_eh_read_log_10h(struct ata_device *dev,
 	return 0;
 }
 
+/**
+ *	ata_eh_read_sense_success_ncq_log - Read the sense data for successful
+ *					    NCQ commands log
+ *	@link: ATA link to get sense data for
+ *
+ *	Read the sense data for successful NCQ commands log page to obtain
+ *	sense data for all NCQ commands that completed successfully with
+ *	the sense data available bit set.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, -errno otherwise.
+ */
+int ata_eh_read_sense_success_ncq_log(struct ata_link *link)
+{
+	struct ata_device *dev = link->device;
+	struct ata_port *ap = dev->link->ap;
+	u8 *buf = ap->ncq_sense_buf;
+	struct ata_queued_cmd *qc;
+	unsigned int err_mask, tag;
+	u8 *sense, sk = 0, asc = 0, ascq = 0;
+	u64 sense_valid, val;
+	int ret = 0;
+
+	err_mask = ata_read_log_page(dev, ATA_LOG_SENSE_NCQ, 0, buf, 2);
+	if (err_mask) {
+		ata_dev_err(dev,
+			"Failed to read Sense Data for Successful NCQ Commands log\n");
+		return -EIO;
+	}
+
+	/* Check the log header */
+	val = get_unaligned_le64(&buf[0]);
+	if ((val & 0xffff) != 1 || ((val >> 16) & 0xff) != 0x0f) {
+		ata_dev_err(dev,
+			"Invalid Sense Data for Successful NCQ Commands log\n");
+		return -EIO;
+	}
+
+	sense_valid = (u64)buf[8] | ((u64)buf[9] << 8) |
+		((u64)buf[10] << 16) | ((u64)buf[11] << 24);
+
+	ata_qc_for_each_raw(ap, qc, tag) {
+		if (!(qc->flags & ATA_QCFLAG_EH) ||
+		    !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) ||
+		    qc->err_mask ||
+		    ata_dev_phys_link(qc->dev) != link)
+			continue;
+
+		/*
+		 * If the command does not have any sense data, clear ATA_SENSE.
+		 * Keep ATA_QCFLAG_EH_SUCCESS_CMD so that command is finished.
+		 */
+		if (!(sense_valid & (1ULL << tag))) {
+			qc->result_tf.status &= ~ATA_SENSE;
+			continue;
+		}
+
+		sense = &buf[32 + 24 * tag];
+		sk = sense[0];
+		asc = sense[1];
+		ascq = sense[2];
+
+		if (!ata_scsi_sense_is_valid(sk, asc, ascq)) {
+			ret = -EIO;
+			continue;
+		}
+
+		/* Set sense without also setting scsicmd->result */
+		scsi_build_sense_buffer(dev->flags & ATA_DFLAG_D_SENSE,
+					qc->scsicmd->sense_buffer, sk,
+					asc, ascq);
+		qc->flags |= ATA_QCFLAG_SENSE_VALID;
+
+		/*
+		 * If we have sense data, call scsi_check_sense() in order to
+		 * set the correct SCSI ML byte (if any). No point in checking
+		 * the return value, since the command has already completed
+		 * successfully.
+		 */
+		scsi_check_sense(qc->scsicmd);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ata_eh_read_sense_success_ncq_log);
+
 /**
  *	ata_eh_analyze_ncq_error - analyze NCQ error
  *	@link: ATA link to analyze NCQ error for
@@ -1488,6 +1579,7 @@ void ata_eh_analyze_ncq_error(struct ata_link *link)
 
 	ata_qc_for_each_raw(ap, qc, tag) {
 		if (!(qc->flags & ATA_QCFLAG_EH) ||
+		    qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD ||
 		    ata_dev_phys_link(qc->dev) != link)
 			continue;
 
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 21108471c6af..792e10a09787 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -325,6 +325,8 @@ enum {
 	ATA_LOG_CDL		= 0x18,
 	ATA_LOG_CDL_SIZE	= ATA_SECT_SIZE,
 	ATA_LOG_IDENTIFY_DEVICE	= 0x30,
+	ATA_LOG_SENSE_NCQ	= 0x0F,
+	ATA_LOG_SENSE_NCQ_SIZE	= ATA_SECT_SIZE * 2,
 	ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47,
 
 	/* Identify device log pages: */
@@ -431,6 +433,7 @@ enum {
 	SATA_DEVSLP		= 0x09,	/* Device Sleep */
 
 	SETFEATURE_SENSE_DATA	= 0xC3, /* Sense Data Reporting feature */
+	SETFEATURE_SENSE_DATA_SUCC_NCQ = 0xC4, /* Sense Data for successful NCQ commands */
 
 	/* feature values for SET_MAX */
 	ATA_SET_MAX_ADDR	= 0x00,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index f679abd2e61f..5c8ef33b0af2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -214,6 +214,7 @@ enum {
 	ATA_QCFLAG_EH		= (1 << 16), /* cmd aborted and owned by EH */
 	ATA_QCFLAG_SENSE_VALID	= (1 << 17), /* sense data valid */
 	ATA_QCFLAG_EH_SCHEDULED = (1 << 18), /* EH scheduled (obsolete) */
+	ATA_QCFLAG_EH_SUCCESS_CMD = (1 << 19), /* EH should fetch sense for this successful cmd */
 
 	/* host set flags */
 	ATA_HOST_SIMPLEX	= (1 << 0),	/* Host is simplex, one DMA channel per host only */
@@ -312,8 +313,10 @@ enum {
 	ATA_EH_RESET		= ATA_EH_SOFTRESET | ATA_EH_HARDRESET,
 	ATA_EH_ENABLE_LINK	= (1 << 3),
 	ATA_EH_PARK		= (1 << 5), /* unload heads and stop I/O */
+	ATA_EH_GET_SUCCESS_SENSE = (1 << 6), /* Get sense data for successful cmd */
 
-	ATA_EH_PERDEV_MASK	= ATA_EH_REVALIDATE | ATA_EH_PARK,
+	ATA_EH_PERDEV_MASK	= ATA_EH_REVALIDATE | ATA_EH_PARK |
+				  ATA_EH_GET_SUCCESS_SENSE,
 	ATA_EH_ALL_ACTIONS	= ATA_EH_REVALIDATE | ATA_EH_RESET |
 				  ATA_EH_ENABLE_LINK,
 
@@ -867,6 +870,7 @@ struct ata_port {
 	struct ata_acpi_gtm	__acpi_init_gtm; /* use ata_acpi_init_gtm() */
 #endif
 	/* owned by EH */
+	u8			*ncq_sense_buf;
 	u8			sector_buf[ATA_SECT_SIZE] ____cacheline_aligned;
 };
 
@@ -1185,6 +1189,7 @@ extern int sata_link_hardreset(struct ata_link *link,
 			bool *online, int (*check_ready)(struct ata_link *));
 extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
 			    unsigned long deadline);
+extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link);
 extern void ata_eh_analyze_ncq_error(struct ata_link *link);
 #else
 static inline const unsigned long *
@@ -1222,6 +1227,10 @@ static inline int sata_link_resume(struct ata_link *link,
 {
 	return -EOPNOTSUPP;
 }
+static inline int ata_eh_read_sense_success_ncq_log(struct ata_link *link)
+{
+	return -EOPNOTSUPP;
+}
 static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { }
 #endif
 extern int sata_link_debounce(struct ata_link *link,
-- 
cgit v1.2.3


From c7dd225bc224726c22db08e680bf787f60ebdee3 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Sun, 2 Apr 2023 17:14:10 +0300
Subject: net/mlx5: DR, Check force-loopback RC QP capability independently
 from RoCE

SW Steering uses RC QP for writing STEs to ICM. This writingis done in LB
(loopback), and FL (force-loopback) QP is preferred for performance. FL is
available when RoCE is enabled or disabled based on RoCE caps.
This patch adds reading of FL capability from HCA caps in addition to the
existing reading from RoCE caps, thus fixing the case where we didn't
have loopback enabled when RoCE was disabled.

Fixes: 7304d603a57a ("net/mlx5: DR, Add support for force-loopback QP")
Signed-off-by: Itamar Gozlan <igozlan@nvidia.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 4 +++-
 include/linux/mlx5/mlx5_ifc.h                             | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 3835ba3f4dda..1aa525e509f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -117,6 +117,8 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
 	caps->gvmi		= MLX5_CAP_GEN(mdev, vhca_id);
 	caps->flex_protocols	= MLX5_CAP_GEN(mdev, flex_parser_protocols);
 	caps->sw_format_ver	= MLX5_CAP_GEN(mdev, steering_format_version);
+	caps->roce_caps.fl_rc_qp_when_roce_disabled =
+		MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled);
 
 	if (MLX5_CAP_GEN(mdev, roce)) {
 		err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en);
@@ -124,7 +126,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
 			return err;
 
 		caps->roce_caps.roce_en = roce_en;
-		caps->roce_caps.fl_rc_qp_when_roce_disabled =
+		caps->roce_caps.fl_rc_qp_when_roce_disabled |=
 			MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled);
 		caps->roce_caps.fl_rc_qp_when_roce_enabled =
 			MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dc5e2cb302a5..b89778d0d326 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1705,7 +1705,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         rc[0x1];
 
 	u8         uar_4k[0x1];
-	u8         reserved_at_241[0x9];
+	u8         reserved_at_241[0x7];
+	u8         fl_rc_qp_when_roce_disabled[0x1];
+	u8         regexp_params[0x1];
 	u8         uar_sz[0x6];
 	u8         port_selection_cap[0x1];
 	u8         reserved_at_248[0x1];
-- 
cgit v1.2.3


From f8ddb0fb3289dfb6f064b1f0573fd4f032189e9e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 23 May 2023 09:53:29 +0200
Subject: ALSA: usb-audio: Define USB MIDI 2.0 specs

Define new structs and constants from USB MIDI 2.0 specification,
to be used in the upcoming MIDI 2.0 support in USB-audio driver.

A new class-specific endpoint descriptor and group terminal block
descriptors are defined.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Jaroslav Kysela <perex@perex.cz>
Link: https://lore.kernel.org/r/20230523075358.9672-9-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/midi-v2.h | 94 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 include/linux/usb/midi-v2.h

(limited to 'include/linux')

diff --git a/include/linux/usb/midi-v2.h b/include/linux/usb/midi-v2.h
new file mode 100644
index 000000000000..ebbffcae0417
--- /dev/null
+++ b/include/linux/usb/midi-v2.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * <linux/usb/midi-v2.h> -- USB MIDI 2.0 definitions.
+ */
+
+#ifndef __LINUX_USB_MIDI_V2_H
+#define __LINUX_USB_MIDI_V2_H
+
+#include <linux/types.h>
+#include <linux/usb/midi.h>
+
+/* A.1 MS Class-Specific Interface Descriptor Types */
+#define USB_DT_CS_GR_TRM_BLOCK	0x26
+
+/* A.1 MS Class-Specific Interface Descriptor Subtypes */
+/* same as MIDI 1.0 */
+
+/* A.2 MS Class-Specific Endpoint Descriptor Subtypes */
+#define USB_MS_GENERAL_2_0	0x02
+
+/* A.3 MS Class-Specific Group Terminal Block Descriptor Subtypes */
+#define USB_MS_GR_TRM_BLOCK_UNDEFINED	0x00
+#define USB_MS_GR_TRM_BLOCK_HEADER	0x01
+#define USB_MS_GR_TRM_BLOCK		0x02
+
+/* A.4 MS Interface Header MIDIStreaming Class Revision */
+#define USB_MS_REV_MIDI_1_0		0x0100
+#define USB_MS_REV_MIDI_2_0		0x0200
+
+/* A.5 MS MIDI IN and OUT Jack Types */
+/* same as MIDI 1.0 */
+
+/* A.6 Group Terminal Block Types */
+#define USB_MS_GR_TRM_BLOCK_TYPE_BIDIRECTIONAL	0x00
+#define USB_MS_GR_TRM_BLOCK_TYPE_INPUT_ONLY	0x01
+#define USB_MS_GR_TRM_BLOCK_TYPE_OUTPUT_ONLY	0x02
+
+/* A.7 Group Terminal Default MIDI Protocol */
+#define USB_MS_MIDI_PROTO_UNKNOWN	0x00 /* Unknown (Use MIDI-CI) */
+#define USB_MS_MIDI_PROTO_1_0_64	0x01 /* MIDI 1.0, UMP up to 64bits */
+#define USB_MS_MIDI_PROTO_1_0_64_JRTS	0x02 /* MIDI 1.0, UMP up to 64bits, Jitter Reduction Timestamps */
+#define USB_MS_MIDI_PROTO_1_0_128	0x03 /* MIDI 1.0, UMP up to 128bits */
+#define USB_MS_MIDI_PROTO_1_0_128_JRTS	0x04 /* MIDI 1.0, UMP up to 128bits, Jitter Reduction Timestamps */
+#define USB_MS_MIDI_PROTO_2_0		0x11 /* MIDI 2.0 */
+#define USB_MS_MIDI_PROTO_2_0_JRTS	0x12 /* MIDI 2.0, Jitter Reduction Timestamps */
+
+/* 5.2.2.1 Class-Specific MS Interface Header Descriptor */
+/* Same as MIDI 1.0, use struct usb_ms_header_descriptor */
+
+/* 5.3.2 Class-Specific MIDI Streaming Data Endpoint Descriptor */
+struct usb_ms20_endpoint_descriptor {
+	__u8  bLength;			/* 4+n */
+	__u8  bDescriptorType;		/* USB_DT_CS_ENDPOINT */
+	__u8  bDescriptorSubtype;	/* USB_MS_GENERAL_2_0 */
+	__u8  bNumGrpTrmBlock;		/* Number of Group Terminal Blocks: n */
+	__u8  baAssoGrpTrmBlkID[];	/* ID of the Group Terminal Blocks [n] */
+} __packed;
+
+#define USB_DT_MS20_ENDPOINT_SIZE(n)	(4 + (n))
+
+/* As above, but more useful for defining your own descriptors: */
+#define DECLARE_USB_MS20_ENDPOINT_DESCRIPTOR(n)			\
+struct usb_ms20_endpoint_descriptor_##n {			\
+	__u8  bLength;						\
+	__u8  bDescriptorType;					\
+	__u8  bDescriptorSubtype;				\
+	__u8  bNumGrpTrmBlock;					\
+	__u8  baAssoGrpTrmBlkID[n];				\
+} __packed
+
+/* 5.4.1 Class-Specific Group Terminal Block Header Descriptor */
+struct usb_ms20_gr_trm_block_header_descriptor {
+	__u8  bLength;			/* 5 */
+	__u8  bDescriptorType;		/* USB_DT_CS_GR_TRM_BLOCK */
+	__u8  bDescriptorSubtype;	/* USB_MS_GR_TRM_BLOCK_HEADER */
+	__u16 wTotalLength;		/* Total number of bytes */
+} __packed;
+
+/* 5.4.2.1 Group Terminal Block Descriptor */
+struct usb_ms20_gr_trm_block_descriptor {
+	__u8  bLength;			/* 13 */
+	__u8  bDescriptorType;		/* USB_DT_CS_GR_TRM_BLOCK */
+	__u8  bDescriptorSubtype;	/* USB_MS_GR_TRM_BLOCK */
+	__u8  bGrpTrmBlkID;		/* ID of this Group Terminal Block */
+	__u8  bGrpTrmBlkType;		/* Group Terminal Block Type */
+	__u8  nGroupTrm;		/* The first member Group Terminal in this block */
+	__u8  nNumGroupTrm;		/* Number of member Group Terminals spanned */
+	__u8  iBlockItem;		/* String ID of Block item */
+	__u8  bMIDIProtocol;		/* Default MIDI protocol */
+	__u16 wMaxInputBandwidth;	/* Max input bandwidth capability in 4kB/s */
+	__u16 wMaxOutputBandwidth;	/* Max output bandwidth capability in 4kB/s */
+} __packed;
+
+#endif /* __LINUX_USB_MIDI_V2_H */
-- 
cgit v1.2.3


From 29173d07f79883ac94f5570294f98af3d4287382 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 22 May 2023 19:56:06 -0700
Subject: bpf, sockmap: Convert schedule_work into delayed_work

Sk_buffs are fed into sockmap verdict programs either from a strparser
(when the user might want to decide how framing of skb is done by attaching
another parser program) or directly through tcp_read_sock. The
tcp_read_sock is the preferred method for performance when the BPF logic is
a stream parser.

The flow for Cilium's common use case with a stream parser is,

 tcp_read_sock()
  sk_psock_verdict_recv
    ret = bpf_prog_run_pin_on_cpu()
    sk_psock_verdict_apply(sock, skb, ret)
     // if system is under memory pressure or app is slow we may
     // need to queue skb. Do this queuing through ingress_skb and
     // then kick timer to wake up handler
     skb_queue_tail(ingress_skb, skb)
     schedule_work(work);

The work queue is wired up to sk_psock_backlog(). This will then walk the
ingress_skb skb list that holds our sk_buffs that could not be handled,
but should be OK to run at some later point. However, its possible that
the workqueue doing this work still hits an error when sending the skb.
When this happens the skbuff is requeued on a temporary 'state' struct
kept with the workqueue. This is necessary because its possible to
partially send an skbuff before hitting an error and we need to know how
and where to restart when the workqueue runs next.

Now for the trouble, we don't rekick the workqueue. This can cause a
stall where the skbuff we just cached on the state variable might never
be sent. This happens when its the last packet in a flow and no further
packets come along that would cause the system to kick the workqueue from
that side.

To fix we could do simple schedule_work(), but while under memory pressure
it makes sense to back off some instead of continue to retry repeatedly. So
instead to fix convert schedule_work to schedule_delayed_work and add
backoff logic to reschedule from backlog queue on errors. Its not obvious
though what a good backoff is so use '1'.

To test we observed some flakes whil running NGINX compliance test with
sockmap we attributed these failed test to this bug and subsequent issue.

>From on list discussion. This commit

 bec217197b41("skmsg: Schedule psock work if the cached skb exists on the psock")

was intended to address similar race, but had a couple cases it missed.
Most obvious it only accounted for receiving traffic on the local socket
so if redirecting into another socket we could still get an sk_buff stuck
here. Next it missed the case where copied=0 in the recv() handler and
then we wouldn't kick the scheduler. Also its sub-optimal to require
userspace to kick the internal mechanisms of sockmap to wake it up and
copy data to user. It results in an extra syscall and requires the app
to actual handle the EAGAIN correctly.

Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: William Findlay <will@isovalent.com>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20230523025618.113937-3-john.fastabend@gmail.com
---
 include/linux/skmsg.h |  2 +-
 net/core/skmsg.c      | 21 ++++++++++++++-------
 net/core/sock_map.c   |  3 ++-
 3 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 84f787416a54..904ff9a32ad6 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -105,7 +105,7 @@ struct sk_psock {
 	struct proto			*sk_proto;
 	struct mutex			work_mutex;
 	struct sk_psock_work_state	work_state;
-	struct work_struct		work;
+	struct delayed_work		work;
 	struct rcu_work			rwork;
 };
 
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 4a3dc8d27295..0a9ee2acac0b 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -482,7 +482,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 	}
 out:
 	if (psock->work_state.skb && copied > 0)
-		schedule_work(&psock->work);
+		schedule_delayed_work(&psock->work, 0);
 	return copied;
 }
 EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
@@ -640,7 +640,8 @@ static void sk_psock_skb_state(struct sk_psock *psock,
 
 static void sk_psock_backlog(struct work_struct *work)
 {
-	struct sk_psock *psock = container_of(work, struct sk_psock, work);
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
 	struct sk_psock_work_state *state = &psock->work_state;
 	struct sk_buff *skb = NULL;
 	bool ingress;
@@ -680,6 +681,12 @@ start:
 				if (ret == -EAGAIN) {
 					sk_psock_skb_state(psock, state, skb,
 							   len, off);
+
+					/* Delay slightly to prioritize any
+					 * other work that might be here.
+					 */
+					if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+						schedule_delayed_work(&psock->work, 1);
 					goto end;
 				}
 				/* Hard errors break pipe and stop xmit. */
@@ -734,7 +741,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
 	INIT_LIST_HEAD(&psock->link);
 	spin_lock_init(&psock->link_lock);
 
-	INIT_WORK(&psock->work, sk_psock_backlog);
+	INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
 	mutex_init(&psock->work_mutex);
 	INIT_LIST_HEAD(&psock->ingress_msg);
 	spin_lock_init(&psock->ingress_lock);
@@ -823,7 +830,7 @@ static void sk_psock_destroy(struct work_struct *work)
 
 	sk_psock_done_strp(psock);
 
-	cancel_work_sync(&psock->work);
+	cancel_delayed_work_sync(&psock->work);
 	mutex_destroy(&psock->work_mutex);
 
 	psock_progs_drop(&psock->progs);
@@ -938,7 +945,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
 	}
 
 	skb_queue_tail(&psock_other->ingress_skb, skb);
-	schedule_work(&psock_other->work);
+	schedule_delayed_work(&psock_other->work, 0);
 	spin_unlock_bh(&psock_other->ingress_lock);
 	return 0;
 }
@@ -1018,7 +1025,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 			spin_lock_bh(&psock->ingress_lock);
 			if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
 				skb_queue_tail(&psock->ingress_skb, skb);
-				schedule_work(&psock->work);
+				schedule_delayed_work(&psock->work, 0);
 				err = 0;
 			}
 			spin_unlock_bh(&psock->ingress_lock);
@@ -1049,7 +1056,7 @@ static void sk_psock_write_space(struct sock *sk)
 	psock = sk_psock(sk);
 	if (likely(psock)) {
 		if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
-			schedule_work(&psock->work);
+			schedule_delayed_work(&psock->work, 0);
 		write_space = psock->saved_write_space;
 	}
 	rcu_read_unlock();
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 7c189c2e2fbf..00afb66cd095 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1644,9 +1644,10 @@ void sock_map_close(struct sock *sk, long timeout)
 		rcu_read_unlock();
 		sk_psock_stop(psock);
 		release_sock(sk);
-		cancel_work_sync(&psock->work);
+		cancel_delayed_work_sync(&psock->work);
 		sk_psock_put(sk, psock);
 	}
+
 	/* Make sure we do not recurse. This is a bug.
 	 * Leak the socket instead of crashing on a stack overflow.
 	 */
-- 
cgit v1.2.3


From 405df89dd52cbcd69a3cd7d9a10d64de38f854b2 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 22 May 2023 19:56:08 -0700
Subject: bpf, sockmap: Improved check for empty queue

We noticed some rare sk_buffs were stepping past the queue when system was
under memory pressure. The general theory is to skip enqueueing
sk_buffs when its not necessary which is the normal case with a system
that is properly provisioned for the task, no memory pressure and enough
cpu assigned.

But, if we can't allocate memory due to an ENOMEM error when enqueueing
the sk_buff into the sockmap receive queue we push it onto a delayed
workqueue to retry later. When a new sk_buff is received we then check
if that queue is empty. However, there is a problem with simply checking
the queue length. When a sk_buff is being processed from the ingress queue
but not yet on the sockmap msg receive queue its possible to also recv
a sk_buff through normal path. It will check the ingress queue which is
zero and then skip ahead of the pkt being processed.

Previously we used sock lock from both contexts which made the problem
harder to hit, but not impossible.

To fix instead of popping the skb from the queue entirely we peek the
skb from the queue and do the copy there. This ensures checks to the
queue length are non-zero while skb is being processed. Then finally
when the entire skb has been copied to user space queue or another
socket we pop it off the queue. This way the queue length check allows
bypassing the queue only after the list has been completely processed.

To reproduce issue we run NGINX compliance test with sockmap running and
observe some flakes in our testing that we attributed to this issue.

Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()")
Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: William Findlay <will@isovalent.com>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20230523025618.113937-5-john.fastabend@gmail.com
---
 include/linux/skmsg.h |  1 -
 net/core/skmsg.c      | 32 ++++++++------------------------
 2 files changed, 8 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 904ff9a32ad6..054d7911bfc9 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -71,7 +71,6 @@ struct sk_psock_link {
 };
 
 struct sk_psock_work_state {
-	struct sk_buff			*skb;
 	u32				len;
 	u32				off;
 };
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 76ff15f8bb06..bcd45a99a3db 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -622,16 +622,12 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 
 static void sk_psock_skb_state(struct sk_psock *psock,
 			       struct sk_psock_work_state *state,
-			       struct sk_buff *skb,
 			       int len, int off)
 {
 	spin_lock_bh(&psock->ingress_lock);
 	if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
-		state->skb = skb;
 		state->len = len;
 		state->off = off;
-	} else {
-		sock_drop(psock->sk, skb);
 	}
 	spin_unlock_bh(&psock->ingress_lock);
 }
@@ -642,23 +638,17 @@ static void sk_psock_backlog(struct work_struct *work)
 	struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
 	struct sk_psock_work_state *state = &psock->work_state;
 	struct sk_buff *skb = NULL;
+	u32 len = 0, off = 0;
 	bool ingress;
-	u32 len, off;
 	int ret;
 
 	mutex_lock(&psock->work_mutex);
-	if (unlikely(state->skb)) {
-		spin_lock_bh(&psock->ingress_lock);
-		skb = state->skb;
+	if (unlikely(state->len)) {
 		len = state->len;
 		off = state->off;
-		state->skb = NULL;
-		spin_unlock_bh(&psock->ingress_lock);
 	}
-	if (skb)
-		goto start;
 
-	while ((skb = skb_dequeue(&psock->ingress_skb))) {
+	while ((skb = skb_peek(&psock->ingress_skb))) {
 		len = skb->len;
 		off = 0;
 		if (skb_bpf_strparser(skb)) {
@@ -667,7 +657,6 @@ static void sk_psock_backlog(struct work_struct *work)
 			off = stm->offset;
 			len = stm->full_len;
 		}
-start:
 		ingress = skb_bpf_ingress(skb);
 		skb_bpf_redirect_clear(skb);
 		do {
@@ -677,8 +666,7 @@ start:
 							  len, ingress);
 			if (ret <= 0) {
 				if (ret == -EAGAIN) {
-					sk_psock_skb_state(psock, state, skb,
-							   len, off);
+					sk_psock_skb_state(psock, state, len, off);
 
 					/* Delay slightly to prioritize any
 					 * other work that might be here.
@@ -690,15 +678,16 @@ start:
 				/* Hard errors break pipe and stop xmit. */
 				sk_psock_report_error(psock, ret ? -ret : EPIPE);
 				sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
-				sock_drop(psock->sk, skb);
 				goto end;
 			}
 			off += ret;
 			len -= ret;
 		} while (len);
 
-		if (!ingress)
+		skb = skb_dequeue(&psock->ingress_skb);
+		if (!ingress) {
 			kfree_skb(skb);
+		}
 	}
 end:
 	mutex_unlock(&psock->work_mutex);
@@ -791,11 +780,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
 		skb_bpf_redirect_clear(skb);
 		sock_drop(psock->sk, skb);
 	}
-	kfree_skb(psock->work_state.skb);
-	/* We null the skb here to ensure that calls to sk_psock_backlog
-	 * do not pick up the free'd skb.
-	 */
-	psock->work_state.skb = NULL;
 	__sk_psock_purge_ingress_msg(psock);
 }
 
@@ -814,7 +798,6 @@ void sk_psock_stop(struct sk_psock *psock)
 	spin_lock_bh(&psock->ingress_lock);
 	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
 	sk_psock_cork_free(psock);
-	__sk_psock_zap_ingress(psock);
 	spin_unlock_bh(&psock->ingress_lock);
 }
 
@@ -829,6 +812,7 @@ static void sk_psock_destroy(struct work_struct *work)
 	sk_psock_done_strp(psock);
 
 	cancel_delayed_work_sync(&psock->work);
+	__sk_psock_zap_ingress(psock);
 	mutex_destroy(&psock->work_mutex);
 
 	psock_progs_drop(&psock->progs);
-- 
cgit v1.2.3


From 712fd23a90eed6a73ea5135a500e59d30356d4f1 Mon Sep 17 00:00:00 2001
From: Li Nan <linan122@huawei.com>
Date: Mon, 22 May 2023 16:53:55 +0800
Subject: block: remove redundant req_op in blk_rq_is_passthrough

op &= REQ_OP_MASK in blk_op_is_passthrough() is exactly what req_op() do.
Therefore, it is redundant to call req_op() for blk_op_is_passthrough().

Signed-off-by: Li Nan <linan122@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20230522085355.1740772-1-linan666@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d778cb6b2112..59b52ec155b1 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -201,7 +201,7 @@ static inline enum req_op req_op(const struct request *req)
 
 static inline bool blk_rq_is_passthrough(struct request *rq)
 {
-	return blk_op_is_passthrough(req_op(rq));
+	return blk_op_is_passthrough(rq->cmd_flags);
 }
 
 static inline unsigned short req_get_ioprio(struct request *req)
-- 
cgit v1.2.3


From a13bd91be22318768d55470cbc0b0f4488ef9edf Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Fri, 14 Apr 2023 16:40:08 +0800
Subject: block/rq_qos: protect rq_qos apis with a new lock

commit 50e34d78815e ("block: disable the elevator int del_gendisk")
move rq_qos_exit() from disk_release() to del_gendisk(), this will
introduce some problems:

1) If rq_qos_add() is triggered by enabling iocost/iolatency through
   cgroupfs, then it can concurrent with del_gendisk(), it's not safe to
   write 'q->rq_qos' concurrently.

2) Activate cgroup policy that is relied on rq_qos will call
   rq_qos_add() and blkcg_activate_policy(), and if rq_qos_exit() is
   called in the middle, null-ptr-dereference will be triggered in
   blkcg_activate_policy().

3) blkg_conf_open_bdev() can call blkdev_get_no_open() first to find the
   disk, then if rq_qos_exit() from del_gendisk() is done before
   rq_qos_add(), then memory will be leaked.

This patch add a new disk level mutex 'rq_qos_mutex':

1) The lock will protect rq_qos_exit() directly.

2) For wbt that doesn't relied on blk-cgroup, rq_qos_add() can only be
   called from disk initialization for now because wbt can't be
   destructed until rq_qos_exit(), so it's safe not to protect wbt for
   now. Hoever, in case that rq_qos dynamically destruction is supported
   in the furture, this patch also protect rq_qos_add() from wbt_init()
   directly, this is enough because blk-sysfs already synchronize
   writers with disk removal.

3) For iocost and iolatency, in order to synchronize disk removal and
   cgroup configuration, the lock is held after blkdev_get_no_open()
   from blkg_conf_open_bdev(), and is released in blkg_conf_exit().
   In order to fix the above memory leak, disk_live() is checked after
   holding the new lock.

Fixes: 50e34d78815e ("block: disable the elevator int del_gendisk")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20230414084008.2085155-1-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c     |  9 +++++++++
 block/blk-core.c       |  1 +
 block/blk-rq-qos.c     | 20 ++++++--------------
 block/blk-wbt.c        |  2 ++
 include/linux/blkdev.h |  1 +
 5 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 0ce64dd73cfe..1c6716f51fff 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -748,6 +748,13 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
 		return -ENODEV;
 	}
 
+	mutex_lock(&bdev->bd_queue->rq_qos_mutex);
+	if (!disk_live(bdev->bd_disk)) {
+		blkdev_put_no_open(bdev);
+		mutex_unlock(&bdev->bd_queue->rq_qos_mutex);
+		return -ENODEV;
+	}
+
 	ctx->body = input;
 	ctx->bdev = bdev;
 	return 0;
@@ -892,6 +899,7 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
  */
 void blkg_conf_exit(struct blkg_conf_ctx *ctx)
 	__releases(&ctx->bdev->bd_queue->queue_lock)
+	__releases(&ctx->bdev->bd_queue->rq_qos_mutex)
 {
 	if (ctx->blkg) {
 		spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
@@ -899,6 +907,7 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
 	}
 
 	if (ctx->bdev) {
+		mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);
 		blkdev_put_no_open(ctx->bdev);
 		ctx->body = NULL;
 		ctx->bdev = NULL;
diff --git a/block/blk-core.c b/block/blk-core.c
index 00c74330fa92..2ae22bebeb3e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -420,6 +420,7 @@ struct request_queue *blk_alloc_queue(int node_id)
 	mutex_init(&q->debugfs_mutex);
 	mutex_init(&q->sysfs_lock);
 	mutex_init(&q->sysfs_dir_lock);
+	mutex_init(&q->rq_qos_mutex);
 	spin_lock_init(&q->queue_lock);
 
 	init_waitqueue_head(&q->mq_freeze_wq);
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index d8cc820a365e..167be74df4ee 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -288,11 +288,13 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
 
 void rq_qos_exit(struct request_queue *q)
 {
+	mutex_lock(&q->rq_qos_mutex);
 	while (q->rq_qos) {
 		struct rq_qos *rqos = q->rq_qos;
 		q->rq_qos = rqos->next;
 		rqos->ops->exit(rqos);
 	}
+	mutex_unlock(&q->rq_qos_mutex);
 }
 
 int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
@@ -300,6 +302,8 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 {
 	struct request_queue *q = disk->queue;
 
+	lockdep_assert_held(&q->rq_qos_mutex);
+
 	rqos->disk = disk;
 	rqos->id = id;
 	rqos->ops = ops;
@@ -307,18 +311,13 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 	/*
 	 * No IO can be in-flight when adding rqos, so freeze queue, which
 	 * is fine since we only support rq_qos for blk-mq queue.
-	 *
-	 * Reuse ->queue_lock for protecting against other concurrent
-	 * rq_qos adding/deleting
 	 */
 	blk_mq_freeze_queue(q);
 
-	spin_lock_irq(&q->queue_lock);
 	if (rq_qos_id(q, rqos->id))
 		goto ebusy;
 	rqos->next = q->rq_qos;
 	q->rq_qos = rqos;
-	spin_unlock_irq(&q->queue_lock);
 
 	blk_mq_unfreeze_queue(q);
 
@@ -330,7 +329,6 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 
 	return 0;
 ebusy:
-	spin_unlock_irq(&q->queue_lock);
 	blk_mq_unfreeze_queue(q);
 	return -EBUSY;
 }
@@ -340,21 +338,15 @@ void rq_qos_del(struct rq_qos *rqos)
 	struct request_queue *q = rqos->disk->queue;
 	struct rq_qos **cur;
 
-	/*
-	 * See comment in rq_qos_add() about freezing queue & using
-	 * ->queue_lock.
-	 */
-	blk_mq_freeze_queue(q);
+	lockdep_assert_held(&q->rq_qos_mutex);
 
-	spin_lock_irq(&q->queue_lock);
+	blk_mq_freeze_queue(q);
 	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
 		if (*cur == rqos) {
 			*cur = rqos->next;
 			break;
 		}
 	}
-	spin_unlock_irq(&q->queue_lock);
-
 	blk_mq_unfreeze_queue(q);
 
 	mutex_lock(&q->debugfs_mutex);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index e49a48684532..53bf5aa6f9ad 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -942,7 +942,9 @@ int wbt_init(struct gendisk *disk)
 	/*
 	 * Assign rwb and add the stats callback.
 	 */
+	mutex_lock(&q->rq_qos_mutex);
 	ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
+	mutex_unlock(&q->rq_qos_mutex);
 	if (ret)
 		goto err_free;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fe99948688df..b2ac587e3402 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -392,6 +392,7 @@ struct request_queue {
 
 	struct blk_queue_stats	*stats;
 	struct rq_qos		*rq_qos;
+	struct mutex		rq_qos_mutex;
 
 	const struct blk_mq_ops	*mq_ops;
 
-- 
cgit v1.2.3


From 431cb97b763133fba8b1c68c1ed089315f25e4dd Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Thu, 4 May 2023 19:36:14 +0200
Subject: regulator: expose regulator_find_closest_bigger

Expose and document the table lookup logic used by
regulator_set_ramp_delay_regmap, so that it can be
reused for devices that cannot be configured via
regulator_set_ramp_delay_regmap.

Tested-by: Diederik de Haas <didi.debian@cknow.org> # Rock64, Quartz64 Model A + B
Tested-by: Vincent Legoll <vincent.legoll@gmail.com> # Pine64 QuartzPro64
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20230504173618.142075-11-sebastian.reichel@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/helpers.c      | 22 ++++++++++++++++++----
 include/linux/regulator/driver.h |  2 ++
 2 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index ad2237a95572..586f42e378ee 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -902,8 +902,21 @@ bool regulator_is_equal(struct regulator *reg1, struct regulator *reg2)
 }
 EXPORT_SYMBOL_GPL(regulator_is_equal);
 
-static int find_closest_bigger(unsigned int target, const unsigned int *table,
-			       unsigned int num_sel, unsigned int *sel)
+/**
+ * regulator_find_closest_bigger - helper to find offset in ramp delay table
+ *
+ * @target: targeted ramp_delay
+ * @table: table with supported ramp delays
+ * @num_sel: number of entries in the table
+ * @sel: Pointer to store table offset
+ *
+ * This is the internal helper used by regulator_set_ramp_delay_regmap to
+ * map ramp delay to register value. It should only be used directly if
+ * regulator_set_ramp_delay_regmap cannot handle a specific device setup
+ * (e.g. because the value is split over multiple registers).
+ */
+int regulator_find_closest_bigger(unsigned int target, const unsigned int *table,
+				  unsigned int num_sel, unsigned int *sel)
 {
 	unsigned int s, tmp, max, maxsel = 0;
 	bool found = false;
@@ -933,6 +946,7 @@ static int find_closest_bigger(unsigned int target, const unsigned int *table,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(regulator_find_closest_bigger);
 
 /**
  * regulator_set_ramp_delay_regmap - set_ramp_delay() helper
@@ -951,8 +965,8 @@ int regulator_set_ramp_delay_regmap(struct regulator_dev *rdev, int ramp_delay)
 	if (WARN_ON(!rdev->desc->n_ramp_values || !rdev->desc->ramp_delay_table))
 		return -EINVAL;
 
-	ret = find_closest_bigger(ramp_delay, rdev->desc->ramp_delay_table,
-				  rdev->desc->n_ramp_values, &sel);
+	ret = regulator_find_closest_bigger(ramp_delay, rdev->desc->ramp_delay_table,
+					    rdev->desc->n_ramp_values, &sel);
 
 	if (ret) {
 		dev_warn(rdev_get_dev(rdev),
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index d3b4a3d4514a..c6ef7d68eb9a 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -758,6 +758,8 @@ int regulator_set_current_limit_regmap(struct regulator_dev *rdev,
 				       int min_uA, int max_uA);
 int regulator_get_current_limit_regmap(struct regulator_dev *rdev);
 void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data);
+int regulator_find_closest_bigger(unsigned int target, const unsigned int *table,
+				  unsigned int num_sel, unsigned int *sel);
 int regulator_set_ramp_delay_regmap(struct regulator_dev *rdev, int ramp_delay);
 int regulator_sync_voltage_rdev(struct regulator_dev *rdev);
 
-- 
cgit v1.2.3


From f6239d3f8ce4ebc5a5cfa3657377bd5007ae1547 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Tue, 23 May 2023 10:09:24 -0700
Subject: rcuwait: Support timeouts

The rcuwait utility provides an efficient and safe single
wait/wake mechanism. It is used in situations where queued
wait is the wrong semantics, and often too bulky. For example,
cases where the wait is already done under a lock.

In the past, rcuwait has been extended to support beyond only
uninterruptible sleep, and similarly, there are users that can
benefit for the addition of timeouts.

As such, tntroduce rcuwait_wait_event_timeout(), with semantics
equivalent to calls for queued wait counterparts.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Link: https://lore.kernel.org/r/20230523170927.20685-2-dave@stgolabs.net
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/rcuwait.h | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 8052d34da782..27343424225c 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -49,9 +49,9 @@ static inline void prepare_to_rcuwait(struct rcuwait *w)
 
 extern void finish_rcuwait(struct rcuwait *w);
 
-#define rcuwait_wait_event(w, condition, state)				\
+#define ___rcuwait_wait_event(w, condition, state, ret, cmd)		\
 ({									\
-	int __ret = 0;							\
+	long __ret = ret;						\
 	prepare_to_rcuwait(w);						\
 	for (;;) {							\
 		/*							\
@@ -67,10 +67,27 @@ extern void finish_rcuwait(struct rcuwait *w);
 			break;						\
 		}							\
 									\
-		schedule();						\
+		cmd;							\
 	}								\
 	finish_rcuwait(w);						\
 	__ret;								\
 })
 
+#define rcuwait_wait_event(w, condition, state)				\
+	___rcuwait_wait_event(w, condition, state, 0, schedule())
+
+#define __rcuwait_wait_event_timeout(w, condition, state, timeout)	\
+	___rcuwait_wait_event(w, ___wait_cond_timeout(condition),	\
+			      state, timeout,				\
+			      __ret = schedule_timeout(__ret))
+
+#define rcuwait_wait_event_timeout(w, condition, state, timeout)	\
+({									\
+	long __ret = timeout;						\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __rcuwait_wait_event_timeout(w, condition,	\
+						     state, timeout);	\
+	__ret;								\
+})
+
 #endif /* _LINUX_RCUWAIT_H_ */
-- 
cgit v1.2.3


From 517985ebc53119a2c2590d29d4056e3f17ef8375 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 8 May 2023 13:31:17 +0300
Subject: iio: trigger: Add simple trigger_validation helper

Some triggers can only be attached to the IIO device that corresponds to
the same physical device. Implement generic helper which can be used as
a validate_trigger callback for such devices.

Suggested-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/51cd3e3e74a6addf8d333f4a109fb9c5a11086ee.1683541225.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-trigger.c | 22 +++++++++++++++++++++-
 include/linux/iio/trigger.h        |  1 +
 2 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index 784dc1e00310..f207e36b12cc 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -322,7 +322,7 @@ int iio_trigger_attach_poll_func(struct iio_trigger *trig,
 	 * this is the case if the IIO device and the trigger device share the
 	 * same parent device.
 	 */
-	if (pf->indio_dev->dev.parent == trig->dev.parent)
+	if (iio_validate_own_trigger(pf->indio_dev, trig))
 		trig->attached_own_device = true;
 
 	return ret;
@@ -728,6 +728,26 @@ bool iio_trigger_using_own(struct iio_dev *indio_dev)
 }
 EXPORT_SYMBOL(iio_trigger_using_own);
 
+/**
+ * iio_validate_own_trigger - Check if a trigger and IIO device belong to
+ *  the same device
+ * @idev: the IIO device to check
+ * @trig: the IIO trigger to check
+ *
+ * This function can be used as the validate_trigger callback for triggers that
+ * can only be attached to their own device.
+ *
+ * Return: 0 if both the trigger and the IIO device belong to the same
+ * device, -EINVAL otherwise.
+ */
+int iio_validate_own_trigger(struct iio_dev *idev, struct iio_trigger *trig)
+{
+	if (idev->dev.parent != trig->dev.parent)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iio_validate_own_trigger);
+
 /**
  * iio_trigger_validate_own_device - Check if a trigger and IIO device belong to
  *  the same device
diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 51f52c5c6092..bce3b1788199 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -171,6 +171,7 @@ void iio_trigger_free(struct iio_trigger *trig);
  */
 bool iio_trigger_using_own(struct iio_dev *indio_dev);
 
+int iio_validate_own_trigger(struct iio_dev *idev, struct iio_trigger *trig);
 int iio_trigger_validate_own_device(struct iio_trigger *trig,
 				     struct iio_dev *indio_dev);
 
-- 
cgit v1.2.3


From cb8edce28073a906401c9e421eca7c99f3396da1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 15 May 2023 16:48:06 -0700
Subject: bpf: Support O_PATH FDs in BPF_OBJ_PIN and BPF_OBJ_GET commands

Current UAPI of BPF_OBJ_PIN and BPF_OBJ_GET commands of bpf() syscall
forces users to specify pinning location as a string-based absolute or
relative (to current working directory) path. This has various
implications related to security (e.g., symlink-based attacks), forces
BPF FS to be exposed in the file system, which can cause races with
other applications.

One of the feedbacks we got from folks working with containers heavily
was that inability to use purely FD-based location specification was an
unfortunate limitation and hindrance for BPF_OBJ_PIN and BPF_OBJ_GET
commands. This patch closes this oversight, adding path_fd field to
BPF_OBJ_PIN and BPF_OBJ_GET UAPI, following conventions established by
*at() syscalls for dirfd + pathname combinations.

This now allows interesting possibilities like working with detached BPF
FS mount (e.g., to perform multiple pinnings without running a risk of
someone interfering with them), and generally making pinning/getting
more secure and not prone to any races and/or security attacks.

This is demonstrated by a selftest added in subsequent patch that takes
advantage of new mount APIs (fsopen, fsconfig, fsmount) to demonstrate
creating detached BPF FS mount, pinning, and then getting BPF map out of
it, all while never exposing this private instance of BPF FS to outside
worlds.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/bpf/20230523170013.728457-4-andrii@kernel.org
---
 include/linux/bpf.h            |  4 ++--
 include/uapi/linux/bpf.h       | 10 ++++++++++
 kernel/bpf/inode.c             | 16 ++++++++--------
 kernel/bpf/syscall.c           | 25 ++++++++++++++++++++-----
 tools/include/uapi/linux/bpf.h | 10 ++++++++++
 5 files changed, 50 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 36e4b2d8cca2..f58895830ada 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2077,8 +2077,8 @@ struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd);
 struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
 
-int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
-int bpf_obj_get_user(const char __user *pathname, int flags);
+int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
+int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
 
 #define BPF_ITER_FUNC_PREFIX "bpf_iter_"
 #define DEFINE_BPF_ITER_FUNC(target, args...)			\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1bb11a6ee667..9273c654743c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1272,6 +1272,9 @@ enum {
 
 /* Create a map that will be registered/unregesitered by the backed bpf_link */
 	BPF_F_LINK		= (1U << 13),
+
+/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
+	BPF_F_PATH_FD		= (1U << 14),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -1420,6 +1423,13 @@ union bpf_attr {
 		__aligned_u64	pathname;
 		__u32		bpf_fd;
 		__u32		file_flags;
+		/* Same as dirfd in openat() syscall; see openat(2)
+		 * manpage for details of path FD and pathname semantics;
+		 * path_fd should accompanied by BPF_F_PATH_FD flag set in
+		 * file_flags field, otherwise it should be set to zero;
+		 * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed.
+		 */
+		__s32		path_fd;
 	};
 
 	struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 329f27d5cacf..4174f76133df 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -435,7 +435,7 @@ static int bpf_iter_link_pin_kernel(struct dentry *parent,
 	return ret;
 }
 
-static int bpf_obj_do_pin(const char __user *pathname, void *raw,
+static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw,
 			  enum bpf_type type)
 {
 	struct dentry *dentry;
@@ -444,7 +444,7 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw,
 	umode_t mode;
 	int ret;
 
-	dentry = user_path_create(AT_FDCWD, pathname, &path, 0);
+	dentry = user_path_create(path_fd, pathname, &path, 0);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
@@ -477,7 +477,7 @@ out:
 	return ret;
 }
 
-int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
+int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname)
 {
 	enum bpf_type type;
 	void *raw;
@@ -487,14 +487,14 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
 	if (IS_ERR(raw))
 		return PTR_ERR(raw);
 
-	ret = bpf_obj_do_pin(pathname, raw, type);
+	ret = bpf_obj_do_pin(path_fd, pathname, raw, type);
 	if (ret != 0)
 		bpf_any_put(raw, type);
 
 	return ret;
 }
 
-static void *bpf_obj_do_get(const char __user *pathname,
+static void *bpf_obj_do_get(int path_fd, const char __user *pathname,
 			    enum bpf_type *type, int flags)
 {
 	struct inode *inode;
@@ -502,7 +502,7 @@ static void *bpf_obj_do_get(const char __user *pathname,
 	void *raw;
 	int ret;
 
-	ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path);
+	ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path);
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -526,7 +526,7 @@ out:
 	return ERR_PTR(ret);
 }
 
-int bpf_obj_get_user(const char __user *pathname, int flags)
+int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags)
 {
 	enum bpf_type type = BPF_TYPE_UNSPEC;
 	int f_flags;
@@ -537,7 +537,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 	if (f_flags < 0)
 		return f_flags;
 
-	raw = bpf_obj_do_get(pathname, &type, f_flags);
+	raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags);
 	if (IS_ERR(raw))
 		return PTR_ERR(raw);
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b2621089904b..c7f6807215e6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2697,23 +2697,38 @@ free_prog:
 	return err;
 }
 
-#define BPF_OBJ_LAST_FIELD file_flags
+#define BPF_OBJ_LAST_FIELD path_fd
 
 static int bpf_obj_pin(const union bpf_attr *attr)
 {
-	if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
+	int path_fd;
+
+	if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD)
+		return -EINVAL;
+
+	/* path_fd has to be accompanied by BPF_F_PATH_FD flag */
+	if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd)
 		return -EINVAL;
 
-	return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
+	path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD;
+	return bpf_obj_pin_user(attr->bpf_fd, path_fd,
+				u64_to_user_ptr(attr->pathname));
 }
 
 static int bpf_obj_get(const union bpf_attr *attr)
 {
+	int path_fd;
+
 	if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
-	    attr->file_flags & ~BPF_OBJ_FLAG_MASK)
+	    attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD))
+		return -EINVAL;
+
+	/* path_fd has to be accompanied by BPF_F_PATH_FD flag */
+	if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd)
 		return -EINVAL;
 
-	return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
+	path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD;
+	return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname),
 				attr->file_flags);
 }
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1bb11a6ee667..9273c654743c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1272,6 +1272,9 @@ enum {
 
 /* Create a map that will be registered/unregesitered by the backed bpf_link */
 	BPF_F_LINK		= (1U << 13),
+
+/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */
+	BPF_F_PATH_FD		= (1U << 14),
 };
 
 /* Flags for BPF_PROG_QUERY. */
@@ -1420,6 +1423,13 @@ union bpf_attr {
 		__aligned_u64	pathname;
 		__u32		bpf_fd;
 		__u32		file_flags;
+		/* Same as dirfd in openat() syscall; see openat(2)
+		 * manpage for details of path FD and pathname semantics;
+		 * path_fd should accompanied by BPF_F_PATH_FD flag set in
+		 * file_flags field, otherwise it should be set to zero;
+		 * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed.
+		 */
+		__s32		path_fd;
 	};
 
 	struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
-- 
cgit v1.2.3


From b156e48fffa9f1caea490e4812a1451adb5c0ef4 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Thu, 11 May 2023 08:44:32 -0700
Subject: vfio/pci: Use xarray for interrupt context storage

Interrupt context is statically allocated at the time interrupts
are allocated. Following allocation, the context is managed by
directly accessing the elements of the array using the vector
as index. The storage is released when interrupts are disabled.

It is possible to dynamically allocate a single MSI-X interrupt
after MSI-X is enabled. A dynamic storage for interrupt context
is needed to support this. Replace the interrupt context array with an
xarray (similar to what the core uses as store for MSI descriptors)
that can support the dynamic expansion while maintaining the
custom that uses the vector as index.

With a dynamic storage it is no longer required to pre-allocate
interrupt contexts at the time the interrupts are allocated.
MSI and MSI-X interrupt contexts are only used when interrupts are
enabled. Their allocation can thus be delayed until interrupt enabling.
Only enabled interrupts will have associated interrupt contexts.
Whether an interrupt has been allocated (a Linux irq number exists
for it) becomes the criteria for whether an interrupt can be enabled.

Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/lkml/20230404122444.59e36a99.alex.williamson@redhat.com/
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/40e235f38d427aff79ae35eda0ced42502aa0937.1683740667.git.reinette.chatre@intel.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci_core.c  |  1 +
 drivers/vfio/pci/vfio_pci_intrs.c | 91 ++++++++++++++++++++-------------------
 include/linux/vfio_pci_core.h     |  2 +-
 3 files changed, 48 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index a5ab416cf476..ae0e161c7fc9 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -2102,6 +2102,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
 	INIT_LIST_HEAD(&vdev->vma_list);
 	INIT_LIST_HEAD(&vdev->sriov_pfs_item);
 	init_rwsem(&vdev->memory_lock);
+	xa_init(&vdev->ctx);
 
 	return 0;
 }
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 96396e1ad085..77957274027c 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -52,25 +52,33 @@ static
 struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev,
 					  unsigned long index)
 {
-	if (index >= vdev->num_ctx)
-		return NULL;
-	return &vdev->ctx[index];
+	return xa_load(&vdev->ctx, index);
 }
 
-static void vfio_irq_ctx_free_all(struct vfio_pci_core_device *vdev)
+static void vfio_irq_ctx_free(struct vfio_pci_core_device *vdev,
+			      struct vfio_pci_irq_ctx *ctx, unsigned long index)
 {
-	kfree(vdev->ctx);
+	xa_erase(&vdev->ctx, index);
+	kfree(ctx);
 }
 
-static int vfio_irq_ctx_alloc_num(struct vfio_pci_core_device *vdev,
-				  unsigned long num)
+static struct vfio_pci_irq_ctx *
+vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index)
 {
-	vdev->ctx = kcalloc(num, sizeof(struct vfio_pci_irq_ctx),
-			    GFP_KERNEL_ACCOUNT);
-	if (!vdev->ctx)
-		return -ENOMEM;
+	struct vfio_pci_irq_ctx *ctx;
+	int ret;
 
-	return 0;
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
+	if (!ctx)
+		return NULL;
+
+	ret = xa_insert(&vdev->ctx, index, ctx, GFP_KERNEL_ACCOUNT);
+	if (ret) {
+		kfree(ctx);
+		return NULL;
+	}
+
+	return ctx;
 }
 
 /*
@@ -226,7 +234,6 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
 static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
 {
 	struct vfio_pci_irq_ctx *ctx;
-	int ret;
 
 	if (!is_irq_none(vdev))
 		return -EINVAL;
@@ -234,15 +241,9 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
 	if (!vdev->pdev->irq)
 		return -ENODEV;
 
-	ret = vfio_irq_ctx_alloc_num(vdev, 1);
-	if (ret)
-		return ret;
-
-	ctx = vfio_irq_ctx_get(vdev, 0);
-	if (!ctx) {
-		vfio_irq_ctx_free_all(vdev);
-		return -EINVAL;
-	}
+	ctx = vfio_irq_ctx_alloc(vdev, 0);
+	if (!ctx)
+		return -ENOMEM;
 
 	vdev->num_ctx = 1;
 
@@ -334,7 +335,7 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
 	vfio_intx_set_signal(vdev, -1);
 	vdev->irq_type = VFIO_PCI_NUM_IRQS;
 	vdev->num_ctx = 0;
-	vfio_irq_ctx_free_all(vdev);
+	vfio_irq_ctx_free(vdev, ctx, 0);
 }
 
 /*
@@ -358,10 +359,6 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
 	if (!is_irq_none(vdev))
 		return -EINVAL;
 
-	ret = vfio_irq_ctx_alloc_num(vdev, nvec);
-	if (ret)
-		return ret;
-
 	/* return the number of supported vectors if we can't get all: */
 	cmd = vfio_pci_memory_lock_and_enable(vdev);
 	ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
@@ -369,7 +366,6 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
 		if (ret > 0)
 			pci_free_irq_vectors(pdev);
 		vfio_pci_memory_unlock_and_restore(vdev, cmd);
-		vfio_irq_ctx_free_all(vdev);
 		return ret;
 	}
 	vfio_pci_memory_unlock_and_restore(vdev, cmd);
@@ -401,12 +397,13 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
 	if (vector >= vdev->num_ctx)
 		return -EINVAL;
 
-	ctx = vfio_irq_ctx_get(vdev, vector);
-	if (!ctx)
-		return -EINVAL;
 	irq = pci_irq_vector(pdev, vector);
+	if (irq < 0)
+		return -EINVAL;
 
-	if (ctx->trigger) {
+	ctx = vfio_irq_ctx_get(vdev, vector);
+
+	if (ctx) {
 		irq_bypass_unregister_producer(&ctx->producer);
 
 		cmd = vfio_pci_memory_lock_and_enable(vdev);
@@ -414,16 +411,22 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
 		vfio_pci_memory_unlock_and_restore(vdev, cmd);
 		kfree(ctx->name);
 		eventfd_ctx_put(ctx->trigger);
-		ctx->trigger = NULL;
+		vfio_irq_ctx_free(vdev, ctx, vector);
 	}
 
 	if (fd < 0)
 		return 0;
 
+	ctx = vfio_irq_ctx_alloc(vdev, vector);
+	if (!ctx)
+		return -ENOMEM;
+
 	ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
 			      msix ? "x" : "", vector, pci_name(pdev));
-	if (!ctx->name)
-		return -ENOMEM;
+	if (!ctx->name) {
+		ret = -ENOMEM;
+		goto out_free_ctx;
+	}
 
 	trigger = eventfd_ctx_fdget(fd);
 	if (IS_ERR(trigger)) {
@@ -469,6 +472,8 @@ out_put_eventfd_ctx:
 	eventfd_ctx_put(trigger);
 out_free_name:
 	kfree(ctx->name);
+out_free_ctx:
+	vfio_irq_ctx_free(vdev, ctx, vector);
 	return ret;
 }
 
@@ -498,16 +503,13 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
 {
 	struct pci_dev *pdev = vdev->pdev;
 	struct vfio_pci_irq_ctx *ctx;
-	unsigned int i;
+	unsigned long i;
 	u16 cmd;
 
-	for (i = 0; i < vdev->num_ctx; i++) {
-		ctx = vfio_irq_ctx_get(vdev, i);
-		if (ctx) {
-			vfio_virqfd_disable(&ctx->unmask);
-			vfio_virqfd_disable(&ctx->mask);
-			vfio_msi_set_vector_signal(vdev, i, -1, msix);
-		}
+	xa_for_each(&vdev->ctx, i, ctx) {
+		vfio_virqfd_disable(&ctx->unmask);
+		vfio_virqfd_disable(&ctx->mask);
+		vfio_msi_set_vector_signal(vdev, i, -1, msix);
 	}
 
 	cmd = vfio_pci_memory_lock_and_enable(vdev);
@@ -523,7 +525,6 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
 
 	vdev->irq_type = VFIO_PCI_NUM_IRQS;
 	vdev->num_ctx = 0;
-	vfio_irq_ctx_free_all(vdev);
 }
 
 /*
@@ -663,7 +664,7 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
 
 	for (i = start; i < start + count; i++) {
 		ctx = vfio_irq_ctx_get(vdev, i);
-		if (!ctx || !ctx->trigger)
+		if (!ctx)
 			continue;
 		if (flags & VFIO_IRQ_SET_DATA_NONE) {
 			eventfd_signal(ctx->trigger, 1);
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 367fd79226a3..61d7873a3973 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -59,7 +59,7 @@ struct vfio_pci_core_device {
 	struct perm_bits	*msi_perm;
 	spinlock_t		irqlock;
 	struct mutex		igate;
-	struct vfio_pci_irq_ctx	*ctx;
+	struct xarray		ctx;
 	int			num_ctx;
 	int			irq_type;
 	int			num_regions;
-- 
cgit v1.2.3


From 63972f63a63f9c3b113cac34dc8692a7c9ae671d Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Thu, 11 May 2023 08:44:33 -0700
Subject: vfio/pci: Remove interrupt context counter

struct vfio_pci_core_device::num_ctx counts how many interrupt
contexts have been allocated. When all interrupt contexts are
allocated simultaneously num_ctx provides the upper bound of all
vectors that can be used as indices into the interrupt context
array.

With the upcoming support for dynamic MSI-X the number of
interrupt contexts does not necessarily span the range of allocated
interrupts. Consequently, num_ctx is no longer a trusted upper bound
for valid indices.

Stop using num_ctx to determine if a provided vector is valid. Use
the existence of allocated interrupt.

This changes behavior on the error path when user space provides
an invalid vector range. Behavior changes from early exit without
any modifications to possible modifications to valid vectors within
the invalid range. This is acceptable considering that an invalid
range is not a valid scenario, see link to discussion.

The checks that ensure that user space provides a range of vectors
that is valid for the device are untouched.

Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/lkml/20230316155646.07ae266f.alex.williamson@redhat.com/
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/e27d350f02a65b8cbacd409b4321f5ce35b3186d.1683740667.git.reinette.chatre@intel.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci_intrs.c | 13 +------------
 include/linux/vfio_pci_core.h     |  1 -
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 77957274027c..e40eca69a293 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -245,8 +245,6 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
 	if (!ctx)
 		return -ENOMEM;
 
-	vdev->num_ctx = 1;
-
 	/*
 	 * If the virtual interrupt is masked, restore it.  Devices
 	 * supporting DisINTx can be masked at the hardware level
@@ -334,7 +332,6 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
 	}
 	vfio_intx_set_signal(vdev, -1);
 	vdev->irq_type = VFIO_PCI_NUM_IRQS;
-	vdev->num_ctx = 0;
 	vfio_irq_ctx_free(vdev, ctx, 0);
 }
 
@@ -370,7 +367,6 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
 	}
 	vfio_pci_memory_unlock_and_restore(vdev, cmd);
 
-	vdev->num_ctx = nvec;
 	vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
 				VFIO_PCI_MSI_IRQ_INDEX;
 
@@ -394,9 +390,6 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
 	int irq, ret;
 	u16 cmd;
 
-	if (vector >= vdev->num_ctx)
-		return -EINVAL;
-
 	irq = pci_irq_vector(pdev, vector);
 	if (irq < 0)
 		return -EINVAL;
@@ -483,9 +476,6 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
 	unsigned int i, j;
 	int ret = 0;
 
-	if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
-		return -EINVAL;
-
 	for (i = 0, j = start; i < count && !ret; i++, j++) {
 		int fd = fds ? fds[i] : -1;
 		ret = vfio_msi_set_vector_signal(vdev, j, fd, msix);
@@ -524,7 +514,6 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
 		pci_intx(pdev, 0);
 
 	vdev->irq_type = VFIO_PCI_NUM_IRQS;
-	vdev->num_ctx = 0;
 }
 
 /*
@@ -659,7 +648,7 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
 		return ret;
 	}
 
-	if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
+	if (!irq_is(vdev, index))
 		return -EINVAL;
 
 	for (i = start; i < start + count; i++) {
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 61d7873a3973..148fd1ae6c1c 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -60,7 +60,6 @@ struct vfio_pci_core_device {
 	spinlock_t		irqlock;
 	struct mutex		igate;
 	struct xarray		ctx;
-	int			num_ctx;
 	int			irq_type;
 	int			num_regions;
 	struct vfio_pci_region	*region;
-- 
cgit v1.2.3


From 9cd0f6d5cbb6fda09aa83beb8146c287a552017e Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Thu, 11 May 2023 08:44:35 -0700
Subject: vfio/pci: Use bitfield for struct vfio_pci_core_device flags

struct vfio_pci_core_device contains eleven boolean flags.
Boolean flags clearly indicate their usage but space usage
starts to be a concern when there are many.

An upcoming change adds another boolean flag to
struct vfio_pci_core_device, thereby increasing the concern
that the boolean flags are consuming unnecessary space.

Transition the boolean flags to use bitfields. On a system that
uses one byte per boolean this reduces the space consumed
by existing flags from 11 bytes to 2 bytes with room for
a few more flags without increasing the structure's size.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/cf34bf0499c889554a8105eeb18cc0ab673005be.1683740667.git.reinette.chatre@intel.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio_pci_core.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index 148fd1ae6c1c..adb47e2914d7 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -68,17 +68,17 @@ struct vfio_pci_core_device {
 	u16			msix_size;
 	u32			msix_offset;
 	u32			rbar[7];
-	bool			pci_2_3;
-	bool			virq_disabled;
-	bool			reset_works;
-	bool			extended_caps;
-	bool			bardirty;
-	bool			has_vga;
-	bool			needs_reset;
-	bool			nointx;
-	bool			needs_pm_restore;
-	bool			pm_intx_masked;
-	bool			pm_runtime_engaged;
+	bool			pci_2_3:1;
+	bool			virq_disabled:1;
+	bool			reset_works:1;
+	bool			extended_caps:1;
+	bool			bardirty:1;
+	bool			has_vga:1;
+	bool			needs_reset:1;
+	bool			nointx:1;
+	bool			needs_pm_restore:1;
+	bool			pm_intx_masked:1;
+	bool			pm_runtime_engaged:1;
 	struct pci_saved_state	*pci_saved_state;
 	struct pci_saved_state	*pm_save;
 	int			ioeventfds_nr;
-- 
cgit v1.2.3


From dd27a707003818fc8435d8621527d4b3af7d2ab1 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Thu, 11 May 2023 08:44:36 -0700
Subject: vfio/pci: Probe and store ability to support dynamic MSI-X

Not all MSI-X devices support dynamic MSI-X allocation. Whether
a device supports dynamic MSI-X should be queried using
pci_msix_can_alloc_dyn().

Instead of scattering code with pci_msix_can_alloc_dyn(),
probe this ability once and store it as a property of the
virtual device.

Suggested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/f1ae022c060ecb7e527f4f53c8ccafe80768da47.1683740667.git.reinette.chatre@intel.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci_core.c | 5 ++++-
 include/linux/vfio_pci_core.h    | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index ae0e161c7fc9..a3635a8e54c8 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -530,8 +530,11 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
 		vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;
 		vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;
 		vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;
-	} else
+		vdev->has_dyn_msix = pci_msix_can_alloc_dyn(pdev);
+	} else {
 		vdev->msix_bar = 0xFF;
+		vdev->has_dyn_msix = false;
+	}
 
 	if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
 		vdev->has_vga = true;
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index adb47e2914d7..562e8754869d 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -68,6 +68,7 @@ struct vfio_pci_core_device {
 	u16			msix_size;
 	u32			msix_offset;
 	u32			rbar[7];
+	bool			has_dyn_msix:1;
 	bool			pci_2_3:1;
 	bool			virq_disabled:1;
 	bool			reset_works:1;
-- 
cgit v1.2.3


From dcbd1ac2668b5fa02069ea96d581ca3f70a7543c Mon Sep 17 00:00:00 2001
From: Beau Belgrave <beaub@linux.microsoft.com>
Date: Fri, 19 May 2023 16:07:40 -0700
Subject: tracing/user_events: Rename link fields for clarity

Currently most list_head fields of various structs within user_events
are simply named link. This causes folks to keep additional context in
their head when working with the code, which can be confusing.

Instead of using link, describe what the actual link is, for example:
list_del_rcu(&mm->link);

Changes into:
list_del_rcu(&mm->mms_link);

The reader now is given a hint the link is to the mms global list
instead of having to remember or spot check within the code.

Link: https://lkml.kernel.org/r/20230519230741.669-4-beaub@linux.microsoft.com
Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=wicngggxVpbnrYHjRTwGE0WYscPRM+L2HO2BF8ia1EXgQ@mail.gmail.com/

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/user_events.h      |  2 +-
 kernel/trace/trace_events_user.c | 40 ++++++++++++++++++++++------------------
 2 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/user_events.h b/include/linux/user_events.h
index 2847f5a18a86..17d452b389de 100644
--- a/include/linux/user_events.h
+++ b/include/linux/user_events.h
@@ -17,7 +17,7 @@
 
 #ifdef CONFIG_USER_EVENTS
 struct user_event_mm {
-	struct list_head	link;
+	struct list_head	mms_link;
 	struct list_head	enablers;
 	struct mm_struct	*mm;
 	struct user_event_mm	*next;
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index d34a59630e70..238c7a0615fa 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -96,7 +96,7 @@ struct user_event {
  * these to track enablement sites that are tied to an event.
  */
 struct user_event_enabler {
-	struct list_head	link;
+	struct list_head	mm_enablers_link;
 	struct user_event	*event;
 	unsigned long		addr;
 
@@ -155,7 +155,7 @@ struct user_event_file_info {
 #define VALIDATOR_REL (1 << 1)
 
 struct user_event_validator {
-	struct list_head	link;
+	struct list_head	user_event_link;
 	int			offset;
 	int			flags;
 };
@@ -261,7 +261,7 @@ error:
 
 static void user_event_enabler_destroy(struct user_event_enabler *enabler)
 {
-	list_del_rcu(&enabler->link);
+	list_del_rcu(&enabler->mm_enablers_link);
 
 	/* No longer tracking the event via the enabler */
 	refcount_dec(&enabler->event->refcnt);
@@ -440,7 +440,7 @@ static bool user_event_enabler_exists(struct user_event_mm *mm,
 {
 	struct user_event_enabler *enabler;
 
-	list_for_each_entry(enabler, &mm->enablers, link) {
+	list_for_each_entry(enabler, &mm->enablers, mm_enablers_link) {
 		if (enabler->addr == uaddr && ENABLE_BIT(enabler) == bit)
 			return true;
 	}
@@ -461,7 +461,7 @@ static void user_event_enabler_update(struct user_event *user)
 		next = mm->next;
 		mmap_read_lock(mm->mm);
 
-		list_for_each_entry(enabler, &mm->enablers, link) {
+		list_for_each_entry(enabler, &mm->enablers, mm_enablers_link) {
 			if (enabler->event == user) {
 				attempt = 0;
 				user_event_enabler_write(mm, enabler, true, &attempt);
@@ -497,7 +497,7 @@ static bool user_event_enabler_dup(struct user_event_enabler *orig,
 	refcount_inc(&enabler->event->refcnt);
 
 	/* Enablers not exposed yet, RCU not required */
-	list_add(&enabler->link, &mm->enablers);
+	list_add(&enabler->mm_enablers_link, &mm->enablers);
 
 	return true;
 }
@@ -527,13 +527,15 @@ static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
 	 */
 	rcu_read_lock();
 
-	list_for_each_entry_rcu(mm, &user_event_mms, link)
-		list_for_each_entry_rcu(enabler, &mm->enablers, link)
+	list_for_each_entry_rcu(mm, &user_event_mms, mms_link) {
+		list_for_each_entry_rcu(enabler, &mm->enablers, mm_enablers_link) {
 			if (enabler->event == user) {
 				mm->next = found;
 				found = user_event_mm_get(mm);
 				break;
 			}
+		}
+	}
 
 	rcu_read_unlock();
 
@@ -572,7 +574,7 @@ static void user_event_mm_attach(struct user_event_mm *user_mm, struct task_stru
 	unsigned long flags;
 
 	spin_lock_irqsave(&user_event_mms_lock, flags);
-	list_add_rcu(&user_mm->link, &user_event_mms);
+	list_add_rcu(&user_mm->mms_link, &user_event_mms);
 	spin_unlock_irqrestore(&user_event_mms_lock, flags);
 
 	t->user_event_mm = user_mm;
@@ -601,7 +603,7 @@ static void user_event_mm_destroy(struct user_event_mm *mm)
 {
 	struct user_event_enabler *enabler, *next;
 
-	list_for_each_entry_safe(enabler, next, &mm->enablers, link)
+	list_for_each_entry_safe(enabler, next, &mm->enablers, mm_enablers_link)
 		user_event_enabler_destroy(enabler);
 
 	mmdrop(mm->mm);
@@ -638,7 +640,7 @@ void user_event_mm_remove(struct task_struct *t)
 
 	/* Remove the mm from the list, so it can no longer be enabled */
 	spin_lock_irqsave(&user_event_mms_lock, flags);
-	list_del_rcu(&mm->link);
+	list_del_rcu(&mm->mms_link);
 	spin_unlock_irqrestore(&user_event_mms_lock, flags);
 
 	/*
@@ -686,9 +688,10 @@ void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm)
 
 	rcu_read_lock();
 
-	list_for_each_entry_rcu(enabler, &old_mm->enablers, link)
+	list_for_each_entry_rcu(enabler, &old_mm->enablers, mm_enablers_link) {
 		if (!user_event_enabler_dup(enabler, mm))
 			goto error;
+	}
 
 	rcu_read_unlock();
 
@@ -757,7 +760,7 @@ retry:
 	 */
 	if (!*write_result) {
 		refcount_inc(&enabler->event->refcnt);
-		list_add_rcu(&enabler->link, &user_mm->enablers);
+		list_add_rcu(&enabler->mm_enablers_link, &user_mm->enablers);
 	}
 
 	mutex_unlock(&event_mutex);
@@ -913,8 +916,8 @@ static void user_event_destroy_validators(struct user_event *user)
 	struct user_event_validator *validator, *next;
 	struct list_head *head = &user->validators;
 
-	list_for_each_entry_safe(validator, next, head, link) {
-		list_del(&validator->link);
+	list_for_each_entry_safe(validator, next, head, user_event_link) {
+		list_del(&validator->user_event_link);
 		kfree(validator);
 	}
 }
@@ -968,7 +971,7 @@ add_validator:
 	validator->offset = offset;
 
 	/* Want sequential access when validating */
-	list_add_tail(&validator->link, &user->validators);
+	list_add_tail(&validator->user_event_link, &user->validators);
 
 add_field:
 	field->type = type;
@@ -1358,7 +1361,7 @@ static int user_event_validate(struct user_event *user, void *data, int len)
 	void *pos, *end = data + len;
 	u32 loc, offset, size;
 
-	list_for_each_entry(validator, head, link) {
+	list_for_each_entry(validator, head, user_event_link) {
 		pos = data + validator->offset;
 
 		/* Already done min_size check, no bounds check here */
@@ -2279,7 +2282,7 @@ static long user_events_ioctl_unreg(unsigned long uarg)
 	 */
 	mutex_lock(&event_mutex);
 
-	list_for_each_entry_safe(enabler, next, &mm->enablers, link)
+	list_for_each_entry_safe(enabler, next, &mm->enablers, mm_enablers_link) {
 		if (enabler->addr == reg.disable_addr &&
 		    ENABLE_BIT(enabler) == reg.disable_bit) {
 			set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler));
@@ -2290,6 +2293,7 @@ static long user_events_ioctl_unreg(unsigned long uarg)
 			/* Removed at least one */
 			ret = 0;
 		}
+	}
 
 	mutex_unlock(&event_mutex);
 
-- 
cgit v1.2.3


From ff9e1632d69e596d8ca256deb07433a8f3565038 Mon Sep 17 00:00:00 2001
From: Beau Belgrave <beaub@linux.microsoft.com>
Date: Fri, 19 May 2023 16:07:41 -0700
Subject: tracing/user_events: Document user_event_mm one-shot list usage

During 6.4 development it became clear that the one-shot list used by
the user_event_mm's next field was confusing to others. It is not clear
how this list is protected or what the next field usage is for unless
you are familiar with the code.

Add comments into the user_event_mm struct indicating lock requirement
and usage. Also document how and why this approach was used via comments
in both user_event_enabler_update() and user_event_mm_get_all() and the
rules to properly use it.

Link: https://lkml.kernel.org/r/20230519230741.669-5-beaub@linux.microsoft.com
Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=wicngggxVpbnrYHjRTwGE0WYscPRM+L2HO2BF8ia1EXgQ@mail.gmail.com/

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/user_events.h      |  1 +
 kernel/trace/trace_events_user.c | 23 ++++++++++++++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/user_events.h b/include/linux/user_events.h
index 17d452b389de..8afa8c3a0973 100644
--- a/include/linux/user_events.h
+++ b/include/linux/user_events.h
@@ -20,6 +20,7 @@ struct user_event_mm {
 	struct list_head	mms_link;
 	struct list_head	enablers;
 	struct mm_struct	*mm;
+	/* Used for one-shot lists, protected by event_mutex */
 	struct user_event_mm	*next;
 	refcount_t		refcnt;
 	refcount_t		tasks;
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 238c7a0615fa..dbb14705d0d3 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -451,12 +451,25 @@ static bool user_event_enabler_exists(struct user_event_mm *mm,
 static void user_event_enabler_update(struct user_event *user)
 {
 	struct user_event_enabler *enabler;
-	struct user_event_mm *mm = user_event_mm_get_all(user);
 	struct user_event_mm *next;
+	struct user_event_mm *mm;
 	int attempt;
 
 	lockdep_assert_held(&event_mutex);
 
+	/*
+	 * We need to build a one-shot list of all the mms that have an
+	 * enabler for the user_event passed in. This list is only valid
+	 * while holding the event_mutex. The only reason for this is due
+	 * to the global mm list being RCU protected and we use methods
+	 * which can wait (mmap_read_lock and pin_user_pages_remote).
+	 *
+	 * NOTE: user_event_mm_get_all() increments the ref count of each
+	 * mm that is added to the list to prevent removal timing windows.
+	 * We must always put each mm after they are used, which may wait.
+	 */
+	mm = user_event_mm_get_all(user);
+
 	while (mm) {
 		next = mm->next;
 		mmap_read_lock(mm->mm);
@@ -515,6 +528,14 @@ static struct user_event_mm *user_event_mm_get_all(struct user_event *user)
 	struct user_event_enabler *enabler;
 	struct user_event_mm *mm;
 
+	/*
+	 * We use the mm->next field to build a one-shot list from the global
+	 * RCU protected list. To build this list the event_mutex must be held.
+	 * This lets us build a list without requiring allocs that could fail
+	 * when user based events are most wanted for diagnostics.
+	 */
+	lockdep_assert_held(&event_mutex);
+
 	/*
 	 * We do not want to block fork/exec while enablements are being
 	 * updated, so we use RCU to walk the current tasks that have used
-- 
cgit v1.2.3


From 4b512860bdbdddcf41467ebd394f27cb8dfb528c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 23 May 2023 23:09:13 -0400
Subject: tracing: Rename stacktrace field to common_stacktrace

The histogram and synthetic events can use a pseudo event called
"stacktrace" that will create a stacktrace at the time of the event and
use it just like it was a normal field. We have other pseudo events such
as "common_cpu" and "common_timestamp". To stay consistent with that,
convert "stacktrace" to "common_stacktrace". As this was used in older
kernels, to keep backward compatibility, this will act just like
"common_cpu" did with "cpu". That is, "cpu" will be the same as
"common_cpu" unless the event has a "cpu" field. In which case, the
event's field is used. The same is true with "stacktrace".

Also update the documentation to reflect this change.

Link: https://lore.kernel.org/linux-trace-kernel/20230523230913.6860e28d@rorschach.local.home

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/trace/histogram.rst | 64 +++++++++++++++++++--------------------
 include/linux/trace_events.h      |  1 +
 kernel/trace/trace.c              |  2 +-
 kernel/trace/trace_events.c       |  2 ++
 kernel/trace/trace_events_hist.c  | 16 ++++++----
 5 files changed, 46 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index 479c9eac6335..3c9b263de9c2 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -35,7 +35,7 @@ Documentation written by Tom Zanussi
   in place of an explicit value field - this is simply a count of
   event hits.  If 'values' isn't specified, an implicit 'hitcount'
   value will be automatically created and used as the only value.
-  Keys can be any field, or the special string 'stacktrace', which
+  Keys can be any field, or the special string 'common_stacktrace', which
   will use the event's kernel stacktrace as the key.  The keywords
   'keys' or 'key' can be used to specify keys, and the keywords
   'values', 'vals', or 'val' can be used to specify values.  Compound
@@ -54,7 +54,7 @@ Documentation written by Tom Zanussi
   'compatible' if the fields named in the trigger share the same
   number and type of fields and those fields also have the same names.
   Note that any two events always share the compatible 'hitcount' and
-  'stacktrace' fields and can therefore be combined using those
+  'common_stacktrace' fields and can therefore be combined using those
   fields, however pointless that may be.
 
   'hist' triggers add a 'hist' file to each event's subdirectory.
@@ -547,9 +547,9 @@ Extended error information
   the hist trigger display symbolic call_sites, we can have the hist
   trigger additionally display the complete set of kernel stack traces
   that led to each call_site.  To do that, we simply use the special
-  value 'stacktrace' for the key parameter::
+  value 'common_stacktrace' for the key parameter::
 
-    # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
+    # echo 'hist:keys=common_stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
            /sys/kernel/tracing/events/kmem/kmalloc/trigger
 
   The above trigger will use the kernel stack trace in effect when an
@@ -561,9 +561,9 @@ Extended error information
   every callpath to a kmalloc for a kernel compile)::
 
     # cat /sys/kernel/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
+    # trigger info: hist:keys=common_stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
 
-    { stacktrace:
+    { common_stacktrace:
          __kmalloc_track_caller+0x10b/0x1a0
          kmemdup+0x20/0x50
          hidraw_report_event+0x8a/0x120 [hid]
@@ -581,7 +581,7 @@ Extended error information
          cpu_startup_entry+0x315/0x3e0
          rest_init+0x7c/0x80
     } hitcount:          3  bytes_req:         21  bytes_alloc:         24
-    { stacktrace:
+    { common_stacktrace:
          __kmalloc_track_caller+0x10b/0x1a0
          kmemdup+0x20/0x50
          hidraw_report_event+0x8a/0x120 [hid]
@@ -596,7 +596,7 @@ Extended error information
          do_IRQ+0x5a/0xf0
          ret_from_intr+0x0/0x30
     } hitcount:          3  bytes_req:         21  bytes_alloc:         24
-    { stacktrace:
+    { common_stacktrace:
          kmem_cache_alloc_trace+0xeb/0x150
          aa_alloc_task_context+0x27/0x40
          apparmor_cred_prepare+0x1f/0x50
@@ -608,7 +608,7 @@ Extended error information
     .
     .
     .
-    { stacktrace:
+    { common_stacktrace:
          __kmalloc+0x11b/0x1b0
          i915_gem_execbuffer2+0x6c/0x2c0 [i915]
          drm_ioctl+0x349/0x670 [drm]
@@ -616,7 +616,7 @@ Extended error information
          SyS_ioctl+0x81/0xa0
          system_call_fastpath+0x12/0x6a
     } hitcount:      17726  bytes_req:   13944120  bytes_alloc:   19593808
-    { stacktrace:
+    { common_stacktrace:
          __kmalloc+0x11b/0x1b0
          load_elf_phdrs+0x76/0xa0
          load_elf_binary+0x102/0x1650
@@ -625,7 +625,7 @@ Extended error information
          SyS_execve+0x3a/0x50
          return_from_execve+0x0/0x23
     } hitcount:      33348  bytes_req:   17152128  bytes_alloc:   20226048
-    { stacktrace:
+    { common_stacktrace:
          kmem_cache_alloc_trace+0xeb/0x150
          apparmor_file_alloc_security+0x27/0x40
          security_file_alloc+0x16/0x20
@@ -636,7 +636,7 @@ Extended error information
          SyS_open+0x1e/0x20
          system_call_fastpath+0x12/0x6a
     } hitcount:    4766422  bytes_req:    9532844  bytes_alloc:   38131376
-    { stacktrace:
+    { common_stacktrace:
          __kmalloc+0x11b/0x1b0
          seq_buf_alloc+0x1b/0x50
          seq_read+0x2cc/0x370
@@ -1026,7 +1026,7 @@ Extended error information
   First we set up an initially paused stacktrace trigger on the
   netif_receive_skb event::
 
-    # echo 'hist:key=stacktrace:vals=len:pause' > \
+    # echo 'hist:key=common_stacktrace:vals=len:pause' > \
            /sys/kernel/tracing/events/net/netif_receive_skb/trigger
 
   Next, we set up an 'enable_hist' trigger on the sched_process_exec
@@ -1060,9 +1060,9 @@ Extended error information
     $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
 
     # cat /sys/kernel/tracing/events/net/netif_receive_skb/hist
-    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+    # trigger info: hist:keys=common_stacktrace:vals=len:sort=hitcount:size=2048 [paused]
 
-    { stacktrace:
+    { common_stacktrace:
          __netif_receive_skb_core+0x46d/0x990
          __netif_receive_skb+0x18/0x60
          netif_receive_skb_internal+0x23/0x90
@@ -1079,7 +1079,7 @@ Extended error information
          kthread+0xd2/0xf0
          ret_from_fork+0x42/0x70
     } hitcount:         85  len:      28884
-    { stacktrace:
+    { common_stacktrace:
          __netif_receive_skb_core+0x46d/0x990
          __netif_receive_skb+0x18/0x60
          netif_receive_skb_internal+0x23/0x90
@@ -1097,7 +1097,7 @@ Extended error information
          irq_thread+0x11f/0x150
          kthread+0xd2/0xf0
     } hitcount:         98  len:     664329
-    { stacktrace:
+    { common_stacktrace:
          __netif_receive_skb_core+0x46d/0x990
          __netif_receive_skb+0x18/0x60
          process_backlog+0xa8/0x150
@@ -1115,7 +1115,7 @@ Extended error information
          inet_sendmsg+0x64/0xa0
          sock_sendmsg+0x3d/0x50
     } hitcount:        115  len:      13030
-    { stacktrace:
+    { common_stacktrace:
          __netif_receive_skb_core+0x46d/0x990
          __netif_receive_skb+0x18/0x60
          netif_receive_skb_internal+0x23/0x90
@@ -1142,14 +1142,14 @@ Extended error information
   into the histogram.  In order to avoid having to set everything up
   again, we can just clear the histogram first::
 
-    # echo 'hist:key=stacktrace:vals=len:clear' >> \
+    # echo 'hist:key=common_stacktrace:vals=len:clear' >> \
            /sys/kernel/tracing/events/net/netif_receive_skb/trigger
 
   Just to verify that it is in fact cleared, here's what we now see in
   the hist file::
 
     # cat /sys/kernel/tracing/events/net/netif_receive_skb/hist
-    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+    # trigger info: hist:keys=common_stacktrace:vals=len:sort=hitcount:size=2048 [paused]
 
     Totals:
         Hits: 0
@@ -1485,12 +1485,12 @@ Extended error information
 
   And here's an example that shows how to combine histogram data from
   any two events even if they don't share any 'compatible' fields
-  other than 'hitcount' and 'stacktrace'.  These commands create a
+  other than 'hitcount' and 'common_stacktrace'.  These commands create a
   couple of triggers named 'bar' using those fields::
 
-    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+    # echo 'hist:name=bar:key=common_stacktrace:val=hitcount' > \
            /sys/kernel/tracing/events/sched/sched_process_fork/trigger
-    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+    # echo 'hist:name=bar:key=common_stacktrace:val=hitcount' > \
           /sys/kernel/tracing/events/net/netif_rx/trigger
 
   And displaying the output of either shows some interesting if
@@ -1501,16 +1501,16 @@ Extended error information
 
     # event histogram
     #
-    # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
+    # trigger info: hist:name=bar:keys=common_stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
     #
 
-    { stacktrace:
+    { common_stacktrace:
              kernel_clone+0x18e/0x330
              kernel_thread+0x29/0x30
              kthreadd+0x154/0x1b0
              ret_from_fork+0x3f/0x70
     } hitcount:          1
-    { stacktrace:
+    { common_stacktrace:
              netif_rx_internal+0xb2/0xd0
              netif_rx_ni+0x20/0x70
              dev_loopback_xmit+0xaa/0xd0
@@ -1528,7 +1528,7 @@ Extended error information
              call_cpuidle+0x3b/0x60
              cpu_startup_entry+0x22d/0x310
     } hitcount:          1
-    { stacktrace:
+    { common_stacktrace:
              netif_rx_internal+0xb2/0xd0
              netif_rx_ni+0x20/0x70
              dev_loopback_xmit+0xaa/0xd0
@@ -1543,7 +1543,7 @@ Extended error information
              SyS_sendto+0xe/0x10
              entry_SYSCALL_64_fastpath+0x12/0x6a
     } hitcount:          2
-    { stacktrace:
+    { common_stacktrace:
              netif_rx_internal+0xb2/0xd0
              netif_rx+0x1c/0x60
              loopback_xmit+0x6c/0xb0
@@ -1561,7 +1561,7 @@ Extended error information
              sock_sendmsg+0x38/0x50
              ___sys_sendmsg+0x14e/0x270
     } hitcount:         76
-    { stacktrace:
+    { common_stacktrace:
              netif_rx_internal+0xb2/0xd0
              netif_rx+0x1c/0x60
              loopback_xmit+0x6c/0xb0
@@ -1579,7 +1579,7 @@ Extended error information
              sock_sendmsg+0x38/0x50
              ___sys_sendmsg+0x269/0x270
     } hitcount:         77
-    { stacktrace:
+    { common_stacktrace:
              netif_rx_internal+0xb2/0xd0
              netif_rx+0x1c/0x60
              loopback_xmit+0x6c/0xb0
@@ -1597,7 +1597,7 @@ Extended error information
              sock_sendmsg+0x38/0x50
              SYSC_sendto+0xef/0x170
     } hitcount:         88
-    { stacktrace:
+    { common_stacktrace:
              kernel_clone+0x18e/0x330
              SyS_clone+0x19/0x20
              entry_SYSCALL_64_fastpath+0x12/0x6a
@@ -1949,7 +1949,7 @@ uninterruptible state::
 
   # cd /sys/kernel/tracing
   # echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
-  # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace  if prev_state == 2' >> events/sched/sched_switch/trigger
+  # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace  if prev_state == 2' >> events/sched/sched_switch/trigger
   # echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
   # echo 1 > events/synthetic/block_lat/enable
   # cat trace
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 0e373222a6df..7c4a0b72334e 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -806,6 +806,7 @@ enum {
 	FILTER_TRACE_FN,
 	FILTER_COMM,
 	FILTER_CPU,
+	FILTER_STACKTRACE,
 };
 
 extern int trace_event_raw_init(struct trace_event_call *call);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ebc59781456a..81801dc31784 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5752,7 +5752,7 @@ static const char readme_msg[] =
 	"\t    table using the key(s) and value(s) named, and the value of a\n"
 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
 	"\t    correspond to fields in the event's format description.  Keys\n"
-	"\t    can be any field, or the special string 'stacktrace'.\n"
+	"\t    can be any field, or the special string 'common_stacktrace'.\n"
 	"\t    Compound keys consisting of up to two fields can be specified\n"
 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 654ffa40457a..57e539d47989 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -194,6 +194,8 @@ static int trace_define_generic_fields(void)
 	__generic_field(int, common_cpu, FILTER_CPU);
 	__generic_field(char *, COMM, FILTER_COMM);
 	__generic_field(char *, comm, FILTER_COMM);
+	__generic_field(char *, stacktrace, FILTER_STACKTRACE);
+	__generic_field(char *, STACKTRACE, FILTER_STACKTRACE);
 
 	return ret;
 }
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 543cb7dc84ad..b97d3ad832f1 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1364,7 +1364,7 @@ static const char *hist_field_name(struct hist_field *field,
 		if (field->field)
 			field_name = field->field->name;
 		else
-			field_name = "stacktrace";
+			field_name = "common_stacktrace";
 	} else if (field->flags & HIST_FIELD_FL_HITCOUNT)
 		field_name = "hitcount";
 
@@ -2367,7 +2367,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
 		hist_data->enable_timestamps = true;
 		if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
 			hist_data->attrs->ts_in_usecs = true;
-	} else if (strcmp(field_name, "stacktrace") == 0) {
+	} else if (strcmp(field_name, "common_stacktrace") == 0) {
 		*flags |= HIST_FIELD_FL_STACKTRACE;
 	} else if (strcmp(field_name, "common_cpu") == 0)
 		*flags |= HIST_FIELD_FL_CPU;
@@ -2378,11 +2378,15 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
 		if (!field || !field->size) {
 			/*
 			 * For backward compatibility, if field_name
-			 * was "cpu", then we treat this the same as
-			 * common_cpu. This also works for "CPU".
+			 * was "cpu" or "stacktrace", then we treat this
+			 * the same as common_cpu and common_stacktrace
+			 * respectively. This also works for "CPU", and
+			 * "STACKTRACE".
 			 */
 			if (field && field->filter_type == FILTER_CPU) {
 				*flags |= HIST_FIELD_FL_CPU;
+			} else if (field && field->filter_type == FILTER_STACKTRACE) {
+				*flags |= HIST_FIELD_FL_STACKTRACE;
 			} else {
 				hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
 					 errpos(field_name));
@@ -5394,7 +5398,7 @@ static void hist_trigger_print_key(struct seq_file *m,
 			if (key_field->field)
 				seq_printf(m, "%s.stacktrace", key_field->field->name);
 			else
-				seq_puts(m, "stacktrace:\n");
+				seq_puts(m, "common_stacktrace:\n");
 			hist_trigger_stacktrace_print(m,
 						      key + key_field->offset,
 						      HIST_STACKTRACE_DEPTH);
@@ -5977,7 +5981,7 @@ static int event_hist_trigger_print(struct seq_file *m,
 			if (field->field)
 				seq_printf(m, "%s.stacktrace", field->field->name);
 			else
-				seq_puts(m, "stacktrace");
+				seq_puts(m, "common_stacktrace");
 		} else
 			hist_field_print(m, field);
 	}
-- 
cgit v1.2.3


From b841b901c452d92610f739a36e54978453528876 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 May 2023 13:11:10 +0100
Subject: net: Declare MSG_SPLICE_PAGES internal sendmsg() flag

Declare MSG_SPLICE_PAGES, an internal sendmsg() flag, that hints to a
network protocol that it should splice pages from the source iterator
rather than copying the data if it can.  This flag is added to a list that
is cleared by sendmsg syscalls on entry.

This is intended as a replacement for the ->sendpage() op, allowing a way
to splice in several multipage folios in one go.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/socket.h | 3 +++
 io_uring/net.c         | 2 ++
 net/socket.c           | 2 ++
 3 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 13c3a237b9c9..bd1cc3238851 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -327,6 +327,7 @@ struct ucred {
 					  */
 
 #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
+#define MSG_SPLICE_PAGES 0x8000000	/* Splice the pages from the iterator in sendmsg() */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
 #define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 					   descriptor received through
@@ -337,6 +338,8 @@ struct ucred {
 #define MSG_CMSG_COMPAT	0		/* We never have 32 bit fixups */
 #endif
 
+/* Flags to be cleared on entry by sendmsg and sendmmsg syscalls */
+#define MSG_INTERNAL_SENDMSG_FLAGS (MSG_SPLICE_PAGES)
 
 /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
 #define SOL_IP		0
diff --git a/io_uring/net.c b/io_uring/net.c
index 89e839013837..f7cbb3c7a575 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -389,6 +389,7 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags)
 	if (flags & MSG_WAITALL)
 		min_ret = iov_iter_count(&msg.msg_iter);
 
+	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
 	msg.msg_flags = flags;
 	ret = sock_sendmsg(sock, &msg);
 	if (ret < min_ret) {
@@ -1136,6 +1137,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
 		msg_flags |= MSG_DONTWAIT;
 	if (msg_flags & MSG_WAITALL)
 		min_ret = iov_iter_count(&msg.msg_iter);
+	msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
 
 	msg.msg_flags = msg_flags;
 	msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
diff --git a/net/socket.c b/net/socket.c
index b7e01d0fe082..3df96e9ba4e2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2138,6 +2138,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
 		msg.msg_name = (struct sockaddr *)&address;
 		msg.msg_namelen = addr_len;
 	}
+	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
 	if (sock->file->f_flags & O_NONBLOCK)
 		flags |= MSG_DONTWAIT;
 	msg.msg_flags = flags;
@@ -2483,6 +2484,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
 		msg_sys->msg_control = ctl_buf;
 		msg_sys->msg_control_is_user = false;
 	}
+	flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
 	msg_sys->msg_flags = flags;
 
 	if (sock->file->f_flags & O_NONBLOCK)
-- 
cgit v1.2.3


From 96449f90240713bd9bd653d6b15266a1044cfa7b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 May 2023 13:11:11 +0100
Subject: net: Pass max frags into skb_append_pagefrags()

Pass the maximum number of fragments into skb_append_pagefrags() rather
than using MAX_SKB_FRAGS so that it can be used from code that wants to
specify sysctl_max_skb_frags.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: David Ahern <dsahern@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 2 +-
 net/core/skbuff.c      | 4 ++--
 net/ipv4/ip_output.c   | 3 ++-
 net/unix/af_unix.c     | 2 +-
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8cff3d817131..15011408c47c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1383,7 +1383,7 @@ static inline int skb_pad(struct sk_buff *skb, int pad)
 #define dev_kfree_skb(a)	consume_skb(a)
 
 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
-			 int offset, size_t size);
+			 int offset, size_t size, size_t max_frags);
 
 struct skb_seq_state {
 	__u32		lower_offset;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6724a84ebb09..7f53dcb26ad3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4188,13 +4188,13 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
 EXPORT_SYMBOL(skb_find_text);
 
 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
-			 int offset, size_t size)
+			 int offset, size_t size, size_t max_frags)
 {
 	int i = skb_shinfo(skb)->nr_frags;
 
 	if (skb_can_coalesce(skb, i, page, offset)) {
 		skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
-	} else if (i < MAX_SKB_FRAGS) {
+	} else if (i < max_frags) {
 		skb_zcopy_downgrade_managed(skb);
 		get_page(page);
 		skb_fill_page_desc_noacc(skb, i, page, offset, size);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 61892268e8a6..52fc840898d8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1450,7 +1450,8 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 		if (len > size)
 			len = size;
 
-		if (skb_append_pagefrags(skb, page, offset, len)) {
+		if (skb_append_pagefrags(skb, page, offset, len,
+					 MAX_SKB_FRAGS)) {
 			err = -EMSGSIZE;
 			goto error;
 		}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index cc695c9f09ec..dd55506b4632 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2349,7 +2349,7 @@ alloc_skb:
 		newskb = NULL;
 	}
 
-	if (skb_append_pagefrags(skb, page, offset, size)) {
+	if (skb_append_pagefrags(skb, page, offset, size, MAX_SKB_FRAGS)) {
 		tail = skb;
 		goto alloc_skb;
 	}
-- 
cgit v1.2.3


From 2e910b95329c2dc7feffbec00907f9e02d1a850a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 May 2023 13:11:12 +0100
Subject: net: Add a function to splice pages into an skbuff for
 MSG_SPLICE_PAGES

Add a function to handle MSG_SPLICE_PAGES being passed internally to
sendmsg().  Pages are spliced into the given socket buffer if possible and
copied in if not (e.g. they're slab pages or have a zero refcount).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: David Ahern <dsahern@kernel.org>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h |  3 ++
 net/core/skbuff.c      | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 15011408c47c..1b2ebf6113e0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -5097,5 +5097,8 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb)
 #endif
 }
 
+ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
+			     ssize_t maxsize, gfp_t gfp);
+
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7f53dcb26ad3..f4a5b51aed22 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -6892,3 +6892,91 @@ nodefer:	__kfree_skb(skb);
 	if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
 		smp_call_function_single_async(cpu, &sd->defer_csd);
 }
+
+static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
+				 size_t offset, size_t len)
+{
+	const char *kaddr;
+	__wsum csum;
+
+	kaddr = kmap_local_page(page);
+	csum = csum_partial(kaddr + offset, len, 0);
+	kunmap_local(kaddr);
+	skb->csum = csum_block_add(skb->csum, csum, skb->len);
+}
+
+/**
+ * skb_splice_from_iter - Splice (or copy) pages to skbuff
+ * @skb: The buffer to add pages to
+ * @iter: Iterator representing the pages to be added
+ * @maxsize: Maximum amount of pages to be added
+ * @gfp: Allocation flags
+ *
+ * This is a common helper function for supporting MSG_SPLICE_PAGES.  It
+ * extracts pages from an iterator and adds them to the socket buffer if
+ * possible, copying them to fragments if not possible (such as if they're slab
+ * pages).
+ *
+ * Returns the amount of data spliced/copied or -EMSGSIZE if there's
+ * insufficient space in the buffer to transfer anything.
+ */
+ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
+			     ssize_t maxsize, gfp_t gfp)
+{
+	size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
+	struct page *pages[8], **ppages = pages;
+	ssize_t spliced = 0, ret = 0;
+	unsigned int i;
+
+	while (iter->count > 0) {
+		ssize_t space, nr;
+		size_t off, len;
+
+		ret = -EMSGSIZE;
+		space = frag_limit - skb_shinfo(skb)->nr_frags;
+		if (space < 0)
+			break;
+
+		/* We might be able to coalesce without increasing nr_frags */
+		nr = clamp_t(size_t, space, 1, ARRAY_SIZE(pages));
+
+		len = iov_iter_extract_pages(iter, &ppages, maxsize, nr, 0, &off);
+		if (len <= 0) {
+			ret = len ?: -EIO;
+			break;
+		}
+
+		i = 0;
+		do {
+			struct page *page = pages[i++];
+			size_t part = min_t(size_t, PAGE_SIZE - off, len);
+
+			ret = -EIO;
+			if (WARN_ON_ONCE(!sendpage_ok(page)))
+				goto out;
+
+			ret = skb_append_pagefrags(skb, page, off, part,
+						   frag_limit);
+			if (ret < 0) {
+				iov_iter_revert(iter, len);
+				goto out;
+			}
+
+			if (skb->ip_summed == CHECKSUM_NONE)
+				skb_splice_csum_page(skb, page, off, part);
+
+			off = 0;
+			spliced += part;
+			maxsize -= part;
+			len -= part;
+		} while (len > 0);
+
+		if (maxsize <= 0)
+			break;
+	}
+
+out:
+	skb_len_add(skb, spliced);
+	return spliced ?: ret;
+}
+EXPORT_SYMBOL(skb_splice_from_iter);
-- 
cgit v1.2.3


From 7c0bf4dad6bf44eef4a573985dd053de77688df1 Mon Sep 17 00:00:00 2001
From: Joel Granados <j.granados@samsung.com>
Date: Tue, 23 May 2023 14:22:13 +0200
Subject: parport: Move magic number "15" to a define

Put the size of a parport name behind a define so we can use it in other
files. This is a preparation patch to be able to use this size in
parport/procfs.c.

Signed-off-by: Joel Granados <j.granados@samsung.com>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 drivers/parport/share.c | 2 +-
 include/linux/parport.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index 62f8407923d4..2d46b1d4fd69 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -467,7 +467,7 @@ struct parport *parport_register_port(unsigned long base, int irq, int dma,
 	atomic_set(&tmp->ref_count, 1);
 	INIT_LIST_HEAD(&tmp->full_list);
 
-	name = kmalloc(15, GFP_KERNEL);
+	name = kmalloc(PARPORT_NAME_MAX_LEN, GFP_KERNEL);
 	if (!name) {
 		kfree(tmp);
 		return NULL;
diff --git a/include/linux/parport.h b/include/linux/parport.h
index a0bc9e0267b7..243c82d7f852 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -180,6 +180,8 @@ struct ieee1284_info {
 	struct semaphore irq;
 };
 
+#define PARPORT_NAME_MAX_LEN 15
+
 /* A parallel port */
 struct parport {
 	unsigned long base;	/* base address */
-- 
cgit v1.2.3


From 19c4e618a1bc3d0cad1f04c857be8076cb05bbb2 Mon Sep 17 00:00:00 2001
From: Joel Granados <j.granados@samsung.com>
Date: Tue, 23 May 2023 14:22:18 +0200
Subject: sysctl: stop exporting register_sysctl_table

We make register_sysctl_table static because the only function calling
it is in fs/proc/proc_sysctl.c (__register_sysctl_base). We remove it
from the sysctl.h header and modify the documentation in both the header
and proc_sysctl.c files to mention "register_sysctl" instead of
"register_sysctl_table".

This plus the commits that remove register_sysctl_table from parport
save 217 bytes:

./scripts/bloat-o-meter .bsysctl/vmlinux.old .bsysctl/vmlinux.new
add/remove: 0/1 grow/shrink: 5/1 up/down: 458/-675 (-217)
Function                                     old     new   delta
__register_sysctl_base                         8     286    +278
parport_proc_register                        268     379    +111
parport_device_proc_register                 195     247     +52
kzalloc.constprop                            598     608     +10
parport_default_proc_register                 62      69      +7
register_sysctl_table                        291       -    -291
parport_sysctl_template                     1288     904    -384
Total: Before=8603076, After=8602859, chg -0.00%

Signed-off-by: Joel Granados <j.granados@samsung.com>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 fs/proc/proc_sysctl.c  | 5 ++---
 include/linux/sysctl.h | 8 +-------
 2 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 8038833ff5b0..f8f19e000d76 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1582,7 +1582,7 @@ out:
  * array. A completely 0 filled entry terminates the table.
  * We are slowly deprecating this call so avoid its use.
  */
-struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
+static struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
 {
 	struct ctl_table *ctl_table_arg = table;
 	int nr_subheaders = count_subheaders(table);
@@ -1634,7 +1634,6 @@ err_register_leaves:
 	header = NULL;
 	goto out;
 }
-EXPORT_SYMBOL(register_sysctl_table);
 
 int __register_sysctl_base(struct ctl_table *base_table)
 {
@@ -1700,7 +1699,7 @@ static void drop_sysctl_table(struct ctl_table_header *header)
 
 /**
  * unregister_sysctl_table - unregister a sysctl table hierarchy
- * @header: the header returned from register_sysctl_table
+ * @header: the header returned from register_sysctl or __register_sysctl_table
  *
  * Unregisters the sysctl table and all children. proc entries may not
  * actually be removed until they are no longer used by anyone.
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 3d08277959af..218e56a26fb0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -89,7 +89,7 @@ int proc_do_static_key(struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos);
 
 /*
- * Register a set of sysctl names by calling register_sysctl_table
+ * Register a set of sysctl names by calling register_sysctl
  * with an initialised array of struct ctl_table's.  An entry with 
  * NULL procname terminates the table.  table->de will be
  * set up by the registration and need not be initialised in advance.
@@ -222,7 +222,6 @@ struct ctl_table_header *__register_sysctl_table(
 	struct ctl_table_set *set,
 	const char *path, struct ctl_table *table);
 struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *table);
-struct ctl_table_header *register_sysctl_table(struct ctl_table * table);
 void unregister_sysctl_table(struct ctl_table_header * table);
 
 extern int sysctl_init_bases(void);
@@ -257,11 +256,6 @@ static inline int __register_sysctl_base(struct ctl_table *base_table)
 
 #define register_sysctl_base(table) __register_sysctl_base(table)
 
-static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
-{
-	return NULL;
-}
-
 static inline void register_sysctl_init(const char *path, struct ctl_table *table)
 {
 }
-- 
cgit v1.2.3


From 2f5edd03ca0d7221a88236b344b84f3fc301b1e3 Mon Sep 17 00:00:00 2001
From: Joel Granados <j.granados@samsung.com>
Date: Tue, 23 May 2023 14:22:19 +0200
Subject: sysctl: Refactor base paths registrations

This is part of the general push to deprecate register_sysctl_paths and
register_sysctl_table. The old way of doing this through
register_sysctl_base and DECLARE_SYSCTL_BASE macro is replaced with a
call to register_sysctl_init. The 5 base paths affected are: "kernel",
"vm", "debug", "dev" and "fs".

We remove the register_sysctl_base function and the DECLARE_SYSCTL_BASE
macro since they are no longer needed.

In order to quickly acertain that the paths did not actually change I
executed `find /proc/sys/ | sha1sum` and made sure that the sha was the
same before and after the commit.

We end up saving 563 bytes with this change:

./scripts/bloat-o-meter vmlinux.0.base vmlinux.1.refactor-base-paths
add/remove: 0/5 grow/shrink: 2/0 up/down: 77/-640 (-563)
Function                                     old     new   delta
sysctl_init_bases                             55     111     +56
init_fs_sysctls                               12      33     +21
vm_base_table                                128       -    -128
kernel_base_table                            128       -    -128
fs_base_table                                128       -    -128
dev_base_table                               128       -    -128
debug_base_table                             128       -    -128
Total: Before=21258215, After=21257652, chg -0.00%

[mcgrof: modified to use register_sysctl_init() over register_sysctl()
 and add bloat-o-meter stats]

Signed-off-by: Joel Granados <j.granados@samsung.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Tested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Christian Brauner <brauner@kernel.org>
---
 fs/sysctls.c           |  5 ++---
 include/linux/sysctl.h | 23 -----------------------
 kernel/sysctl.c        | 30 +++++++++---------------------
 3 files changed, 11 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysctls.c b/fs/sysctls.c
index c701273c9432..76a0aee8c229 100644
--- a/fs/sysctls.c
+++ b/fs/sysctls.c
@@ -29,11 +29,10 @@ static struct ctl_table fs_shared_sysctls[] = {
 	{ }
 };
 
-DECLARE_SYSCTL_BASE(fs, fs_shared_sysctls);
-
 static int __init init_fs_sysctls(void)
 {
-	return register_sysctl_base(fs);
+	register_sysctl_init("fs", fs_shared_sysctls);
+	return 0;
 }
 
 early_initcall(init_fs_sysctls);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 218e56a26fb0..653b66c762b1 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -197,20 +197,6 @@ struct ctl_path {
 
 #ifdef CONFIG_SYSCTL
 
-#define DECLARE_SYSCTL_BASE(_name, _table)				\
-static struct ctl_table _name##_base_table[] = {			\
-	{								\
-		.procname	= #_name,				\
-		.mode		= 0555,					\
-		.child		= _table,				\
-	},								\
-	{ },								\
-}
-
-extern int __register_sysctl_base(struct ctl_table *base_table);
-
-#define register_sysctl_base(_name) __register_sysctl_base(_name##_base_table)
-
 void proc_sys_poll_notify(struct ctl_table_poll *poll);
 
 extern void setup_sysctl_set(struct ctl_table_set *p,
@@ -247,15 +233,6 @@ extern struct ctl_table sysctl_mount_point[];
 
 #else /* CONFIG_SYSCTL */
 
-#define DECLARE_SYSCTL_BASE(_name, _table)
-
-static inline int __register_sysctl_base(struct ctl_table *base_table)
-{
-	return 0;
-}
-
-#define register_sysctl_base(table) __register_sysctl_base(table)
-
 static inline void register_sysctl_init(const char *path, struct ctl_table *table)
 {
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bfe53e835524..73fa9cf7ee11 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1782,11 +1782,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= sysctl_max_threads,
 	},
-	{
-		.procname	= "usermodehelper",
-		.mode		= 0555,
-		.child		= usermodehelper_table,
-	},
 	{
 		.procname	= "overflowuid",
 		.data		= &overflowuid,
@@ -1962,13 +1957,6 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 #endif
-#ifdef CONFIG_KEYS
-	{
-		.procname	= "keys",
-		.mode		= 0555,
-		.child		= key_sysctls,
-	},
-#endif
 #ifdef CONFIG_PERF_EVENTS
 	/*
 	 * User-space scripts rely on the existence of this file
@@ -2348,17 +2336,17 @@ static struct ctl_table dev_table[] = {
 	{ }
 };
 
-DECLARE_SYSCTL_BASE(kernel, kern_table);
-DECLARE_SYSCTL_BASE(vm, vm_table);
-DECLARE_SYSCTL_BASE(debug, debug_table);
-DECLARE_SYSCTL_BASE(dev, dev_table);
-
 int __init sysctl_init_bases(void)
 {
-	register_sysctl_base(kernel);
-	register_sysctl_base(vm);
-	register_sysctl_base(debug);
-	register_sysctl_base(dev);
+	register_sysctl_init("kernel", kern_table);
+	register_sysctl_init("kernel/usermodehelper", usermodehelper_table);
+#ifdef CONFIG_KEYS
+	register_sysctl_init("kernel/keys", key_sysctls);
+#endif
+
+	register_sysctl_init("vm", vm_table);
+	register_sysctl_init("debug", debug_table);
+	register_sysctl_init("dev", dev_table);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 487517557f97809c38ff99f726ec991ab6aa8a73 Mon Sep 17 00:00:00 2001
From: Cai Huoqing <cai.huoqing@linux.dev>
Date: Sat, 20 May 2023 13:08:49 +0800
Subject: dmaengine: dw-edma: Rename dw_edma_core_ops structure to
 dw_edma_plat_ops

The dw_edma_core_ops structure contains a set of the operations:
device IRQ numbers getter, CPU/PCI address translation. Based on the
functions semantics the structure name "dw_edma_plat_ops" looks more
descriptive since indeed the operations are platform-specific. The
"dw_edma_core_ops" name shall be used for a structure with the IP-core
specific set of callbacks in order to abstract out DW eDMA and DW HDMA
setups. Such structure will be added in one of the next commit in the
framework of the set of changes adding the DW HDMA device support.

Anyway the renaming was necessary to distinguish two types of
the implementation callbacks:
1. DW eDMA/hDMA IP-core specific operations: device-specific CSR
setups in one or another aspect of the DMA-engine initialization.
2. DW eDMA/hDMA platform specific operations: the DMA device
environment configs like IRQs, address translation, etc.

Signed-off-by: Cai Huoqing <cai.huoqing@linux.dev>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Tested-by: Serge Semin <fancer.lancer@gmail.com>
Link: https://lore.kernel.org/r/20230520050854.73160-2-cai.huoqing@linux.dev
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw-edma/dw-edma-pcie.c           | 4 ++--
 drivers/pci/controller/dwc/pcie-designware.c | 2 +-
 include/linux/dma/edma.h                     | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
index 2b40f2b44f5e..1c6043751dc9 100644
--- a/drivers/dma/dw-edma/dw-edma-pcie.c
+++ b/drivers/dma/dw-edma/dw-edma-pcie.c
@@ -109,7 +109,7 @@ static u64 dw_edma_pcie_address(struct device *dev, phys_addr_t cpu_addr)
 	return region.start;
 }
 
-static const struct dw_edma_core_ops dw_edma_pcie_core_ops = {
+static const struct dw_edma_plat_ops dw_edma_pcie_plat_ops = {
 	.irq_vector = dw_edma_pcie_irq_vector,
 	.pci_address = dw_edma_pcie_address,
 };
@@ -225,7 +225,7 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 
 	chip->mf = vsec_data.mf;
 	chip->nr_irqs = nr_irqs;
-	chip->ops = &dw_edma_pcie_core_ops;
+	chip->ops = &dw_edma_pcie_plat_ops;
 
 	chip->ll_wr_cnt = vsec_data.wr_ch_cnt;
 	chip->ll_rd_cnt = vsec_data.rd_ch_cnt;
diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 8e33e6e59e68..1f2ee71da4da 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -828,7 +828,7 @@ static int dw_pcie_edma_irq_vector(struct device *dev, unsigned int nr)
 	return platform_get_irq_byname_optional(pdev, name);
 }
 
-static struct dw_edma_core_ops dw_pcie_edma_ops = {
+static struct dw_edma_plat_ops dw_pcie_edma_ops = {
 	.irq_vector = dw_pcie_edma_irq_vector,
 };
 
diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h
index d2638d9259dc..ed401c965a87 100644
--- a/include/linux/dma/edma.h
+++ b/include/linux/dma/edma.h
@@ -40,7 +40,7 @@ struct dw_edma_region {
  *			iATU windows. That will be done by the controller
  *			automatically.
  */
-struct dw_edma_core_ops {
+struct dw_edma_plat_ops {
 	int (*irq_vector)(struct device *dev, unsigned int nr);
 	u64 (*pci_address)(struct device *dev, phys_addr_t cpu_addr);
 };
@@ -80,7 +80,7 @@ enum dw_edma_chip_flags {
 struct dw_edma_chip {
 	struct device		*dev;
 	int			nr_irqs;
-	const struct dw_edma_core_ops   *ops;
+	const struct dw_edma_plat_ops	*ops;
 	u32			flags;
 
 	void __iomem		*reg_base;
-- 
cgit v1.2.3


From e74c39573d35e9ac441090ff8183aa3dc2540649 Mon Sep 17 00:00:00 2001
From: Cai Huoqing <cai.huoqing@linux.dev>
Date: Sat, 20 May 2023 13:08:51 +0800
Subject: dmaengine: dw-edma: Add support for native HDMA

Add support for HDMA NATIVE, as long the IP design has set
the compatible register map parameter-HDMA_NATIVE,
which allows compatibility for native HDMA register configuration.

The HDMA Hyper-DMA IP is an enhancement of the eDMA embedded-DMA IP.
And the native HDMA registers are different from eDMA, so this patch
add support for HDMA NATIVE mode.

HDMA write and read channels operate independently to maximize
the performance of the HDMA read and write data transfer over
the link When you configure the HDMA with multiple read channels,
then it uses a round robin (RR) arbitration scheme to select
the next read channel to be serviced.The same applies when you
have multiple write channels.

The native HDMA driver also supports a maximum of 16 independent
channels (8 write + 8 read), which can run simultaneously.
Both SAR (Source Address Register) and DAR (Destination Address Register)
are aligned to byte.

Signed-off-by: Cai Huoqing <cai.huoqing@linux.dev>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Tested-by: Serge Semin <fancer.lancer@gmail.com>
Link: https://lore.kernel.org/r/20230520050854.73160-4-cai.huoqing@linux.dev
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw-edma/Makefile          |   5 +-
 drivers/dma/dw-edma/dw-edma-core.c    |   6 +-
 drivers/dma/dw-edma/dw-hdma-v0-core.c | 294 ++++++++++++++++++++++++++++++++++
 drivers/dma/dw-edma/dw-hdma-v0-core.h |  17 ++
 drivers/dma/dw-edma/dw-hdma-v0-regs.h | 129 +++++++++++++++
 include/linux/dma/edma.h              |   3 +-
 6 files changed, 450 insertions(+), 4 deletions(-)
 create mode 100644 drivers/dma/dw-edma/dw-hdma-v0-core.c
 create mode 100644 drivers/dma/dw-edma/dw-hdma-v0-core.h
 create mode 100644 drivers/dma/dw-edma/dw-hdma-v0-regs.h

(limited to 'include/linux')

diff --git a/drivers/dma/dw-edma/Makefile b/drivers/dma/dw-edma/Makefile
index 8d45c0d5689d..b1c91ef2c63d 100644
--- a/drivers/dma/dw-edma/Makefile
+++ b/drivers/dma/dw-edma/Makefile
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_DW_EDMA)		+= dw-edma.o
 dw-edma-$(CONFIG_DEBUG_FS)	:= dw-edma-v0-debugfs.o
-dw-edma-objs			:= dw-edma-core.o \
-					dw-edma-v0-core.o $(dw-edma-y)
+dw-edma-objs			:= dw-edma-core.o	\
+				   dw-edma-v0-core.o	\
+				   dw-hdma-v0-core.o $(dw-edma-y)
 obj-$(CONFIG_DW_EDMA_PCIE)	+= dw-edma-pcie.o
diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c
index f17207c66c19..68236247059d 100644
--- a/drivers/dma/dw-edma/dw-edma-core.c
+++ b/drivers/dma/dw-edma/dw-edma-core.c
@@ -18,6 +18,7 @@
 
 #include "dw-edma-core.h"
 #include "dw-edma-v0-core.h"
+#include "dw-hdma-v0-core.h"
 #include "../dmaengine.h"
 #include "../virt-dma.h"
 
@@ -922,7 +923,10 @@ int dw_edma_probe(struct dw_edma_chip *chip)
 
 	dw->chip = chip;
 
-	dw_edma_v0_core_register(dw);
+	if (dw->chip->mf == EDMA_MF_HDMA_NATIVE)
+		dw_hdma_v0_core_register(dw);
+	else
+		dw_edma_v0_core_register(dw);
 
 	raw_spin_lock_init(&dw->lock);
 
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.c b/drivers/dma/dw-edma/dw-hdma-v0-core.c
new file mode 100644
index 000000000000..22b7b0410deb
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-hdma-v0-core.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Cai Huoqing
+ * Synopsys DesignWare HDMA v0 core
+ */
+
+#include <linux/bitfield.h>
+#include <linux/irqreturn.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "dw-edma-core.h"
+#include "dw-hdma-v0-core.h"
+#include "dw-hdma-v0-regs.h"
+
+enum dw_hdma_control {
+	DW_HDMA_V0_CB					= BIT(0),
+	DW_HDMA_V0_TCB					= BIT(1),
+	DW_HDMA_V0_LLP					= BIT(2),
+	DW_HDMA_V0_LIE					= BIT(3),
+	DW_HDMA_V0_RIE					= BIT(4),
+	DW_HDMA_V0_CCS					= BIT(8),
+	DW_HDMA_V0_LLE					= BIT(9),
+};
+
+static inline struct dw_hdma_v0_regs __iomem *__dw_regs(struct dw_edma *dw)
+{
+	return dw->chip->reg_base;
+}
+
+static inline struct dw_hdma_v0_ch_regs __iomem *
+__dw_ch_regs(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch)
+{
+	if (dir == EDMA_DIR_WRITE)
+		return &(__dw_regs(dw)->ch[ch].wr);
+	else
+		return &(__dw_regs(dw)->ch[ch].rd);
+}
+
+#define SET_CH_32(dw, dir, ch, name, value) \
+	writel(value, &(__dw_ch_regs(dw, dir, ch)->name))
+
+#define GET_CH_32(dw, dir, ch, name) \
+	readl(&(__dw_ch_regs(dw, dir, ch)->name))
+
+#define SET_BOTH_CH_32(dw, ch, name, value) \
+	do {					\
+		writel(value, &(__dw_ch_regs(dw, EDMA_DIR_WRITE, ch)->name));	\
+		writel(value, &(__dw_ch_regs(dw, EDMA_DIR_READ, ch)->name));	\
+	} while (0)
+
+/* HDMA management callbacks */
+static void dw_hdma_v0_core_off(struct dw_edma *dw)
+{
+	int id;
+
+	for (id = 0; id < HDMA_V0_MAX_NR_CH; id++) {
+		SET_BOTH_CH_32(dw, id, int_setup,
+			       HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK);
+		SET_BOTH_CH_32(dw, id, int_clear,
+			       HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK);
+		SET_BOTH_CH_32(dw, id, ch_en, 0);
+	}
+}
+
+static u16 dw_hdma_v0_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir)
+{
+	u32 num_ch = 0;
+	int id;
+
+	for (id = 0; id < HDMA_V0_MAX_NR_CH; id++) {
+		if (GET_CH_32(dw, id, dir, ch_en) & BIT(0))
+			num_ch++;
+	}
+
+	if (num_ch > HDMA_V0_MAX_NR_CH)
+		num_ch = HDMA_V0_MAX_NR_CH;
+
+	return (u16)num_ch;
+}
+
+static enum dma_status dw_hdma_v0_core_ch_status(struct dw_edma_chan *chan)
+{
+	struct dw_edma *dw = chan->dw;
+	u32 tmp;
+
+	tmp = FIELD_GET(HDMA_V0_CH_STATUS_MASK,
+			GET_CH_32(dw, chan->id, chan->dir, ch_stat));
+
+	if (tmp == 1)
+		return DMA_IN_PROGRESS;
+	else if (tmp == 3)
+		return DMA_COMPLETE;
+	else
+		return DMA_ERROR;
+}
+
+static void dw_hdma_v0_core_clear_done_int(struct dw_edma_chan *chan)
+{
+	struct dw_edma *dw = chan->dw;
+
+	SET_CH_32(dw, chan->dir, chan->id, int_clear, HDMA_V0_STOP_INT_MASK);
+}
+
+static void dw_hdma_v0_core_clear_abort_int(struct dw_edma_chan *chan)
+{
+	struct dw_edma *dw = chan->dw;
+
+	SET_CH_32(dw, chan->dir, chan->id, int_clear, HDMA_V0_ABORT_INT_MASK);
+}
+
+static u32 dw_hdma_v0_core_status_int(struct dw_edma_chan *chan)
+{
+	struct dw_edma *dw = chan->dw;
+
+	return GET_CH_32(dw, chan->dir, chan->id, int_stat);
+}
+
+static irqreturn_t
+dw_hdma_v0_core_handle_int(struct dw_edma_irq *dw_irq, enum dw_edma_dir dir,
+			   dw_edma_handler_t done, dw_edma_handler_t abort)
+{
+	struct dw_edma *dw = dw_irq->dw;
+	unsigned long total, pos, val;
+	irqreturn_t ret = IRQ_NONE;
+	struct dw_edma_chan *chan;
+	unsigned long off, mask;
+
+	if (dir == EDMA_DIR_WRITE) {
+		total = dw->wr_ch_cnt;
+		off = 0;
+		mask = dw_irq->wr_mask;
+	} else {
+		total = dw->rd_ch_cnt;
+		off = dw->wr_ch_cnt;
+		mask = dw_irq->rd_mask;
+	}
+
+	for_each_set_bit(pos, &mask, total) {
+		chan = &dw->chan[pos + off];
+
+		val = dw_hdma_v0_core_status_int(chan);
+		if (FIELD_GET(HDMA_V0_STOP_INT_MASK, val)) {
+			dw_hdma_v0_core_clear_done_int(chan);
+			done(chan);
+
+			ret = IRQ_HANDLED;
+		}
+
+		if (FIELD_GET(HDMA_V0_ABORT_INT_MASK, val)) {
+			dw_hdma_v0_core_clear_abort_int(chan);
+			abort(chan);
+
+			ret = IRQ_HANDLED;
+		}
+	}
+
+	return ret;
+}
+
+static void dw_hdma_v0_write_ll_data(struct dw_edma_chunk *chunk, int i,
+				     u32 control, u32 size, u64 sar, u64 dar)
+{
+	ptrdiff_t ofs = i * sizeof(struct dw_hdma_v0_lli);
+
+	if (chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL) {
+		struct dw_hdma_v0_lli *lli = chunk->ll_region.vaddr.mem + ofs;
+
+		lli->control = control;
+		lli->transfer_size = size;
+		lli->sar.reg = sar;
+		lli->dar.reg = dar;
+	} else {
+		struct dw_hdma_v0_lli __iomem *lli = chunk->ll_region.vaddr.io + ofs;
+
+		writel(control, &lli->control);
+		writel(size, &lli->transfer_size);
+		writeq(sar, &lli->sar.reg);
+		writeq(dar, &lli->dar.reg);
+	}
+}
+
+static void dw_hdma_v0_write_ll_link(struct dw_edma_chunk *chunk,
+				     int i, u32 control, u64 pointer)
+{
+	ptrdiff_t ofs = i * sizeof(struct dw_hdma_v0_lli);
+
+	if (chunk->chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL) {
+		struct dw_hdma_v0_llp *llp = chunk->ll_region.vaddr.mem + ofs;
+
+		llp->control = control;
+		llp->llp.reg = pointer;
+	} else {
+		struct dw_hdma_v0_llp __iomem *llp = chunk->ll_region.vaddr.io + ofs;
+
+		writel(control, &llp->control);
+		writeq(pointer, &llp->llp.reg);
+	}
+}
+
+static void dw_hdma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
+{
+	struct dw_edma_burst *child;
+	struct dw_edma_chan *chan = chunk->chan;
+	u32 control = 0, i = 0;
+	int j;
+
+	if (chunk->cb)
+		control = DW_HDMA_V0_CB;
+
+	j = chunk->bursts_alloc;
+	list_for_each_entry(child, &chunk->burst->list, list) {
+		j--;
+		if (!j) {
+			control |= DW_HDMA_V0_LIE;
+			if (!(chan->dw->chip->flags & DW_EDMA_CHIP_LOCAL))
+				control |= DW_HDMA_V0_RIE;
+		}
+
+		dw_hdma_v0_write_ll_data(chunk, i++, control, child->sz,
+					 child->sar, child->dar);
+	}
+
+	control = DW_HDMA_V0_LLP | DW_HDMA_V0_TCB;
+	if (!chunk->cb)
+		control |= DW_HDMA_V0_CB;
+
+	dw_hdma_v0_write_ll_link(chunk, i, control, chunk->ll_region.paddr);
+}
+
+static void dw_hdma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
+{
+	struct dw_edma_chan *chan = chunk->chan;
+	struct dw_edma *dw = chan->dw;
+	u32 tmp;
+
+	dw_hdma_v0_core_write_chunk(chunk);
+
+	if (first) {
+		/* Enable engine */
+		SET_CH_32(dw, chan->dir, chan->id, ch_en, BIT(0));
+		/* Interrupt enable&unmask - done, abort */
+		tmp = GET_CH_32(dw, chan->dir, chan->id, int_setup) |
+		      HDMA_V0_STOP_INT_MASK | HDMA_V0_ABORT_INT_MASK |
+		      HDMA_V0_LOCAL_STOP_INT_EN | HDMA_V0_LOCAL_STOP_INT_EN;
+		SET_CH_32(dw, chan->dir, chan->id, int_setup, tmp);
+		/* Channel control */
+		SET_CH_32(dw, chan->dir, chan->id, control1, HDMA_V0_LINKLIST_EN);
+		/* Linked list */
+		/* llp is not aligned on 64bit -> keep 32bit accesses */
+		SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
+			  lower_32_bits(chunk->ll_region.paddr));
+		SET_CH_32(dw, chan->dir, chan->id, llp.msb,
+			  upper_32_bits(chunk->ll_region.paddr));
+	}
+	/* Set consumer cycle */
+	SET_CH_32(dw, chan->dir, chan->id, cycle_sync,
+		  HDMA_V0_CONSUMER_CYCLE_STAT | HDMA_V0_CONSUMER_CYCLE_BIT);
+	/* Doorbell */
+	SET_CH_32(dw, chan->dir, chan->id, doorbell, HDMA_V0_DOORBELL_START);
+}
+
+static void dw_hdma_v0_core_ch_config(struct dw_edma_chan *chan)
+{
+	struct dw_edma *dw = chan->dw;
+
+	/* MSI done addr - low, high */
+	SET_CH_32(dw, chan->dir, chan->id, msi_stop.lsb, chan->msi.address_lo);
+	SET_CH_32(dw, chan->dir, chan->id, msi_stop.msb, chan->msi.address_hi);
+	/* MSI abort addr - low, high */
+	SET_CH_32(dw, chan->dir, chan->id, msi_abort.lsb, chan->msi.address_lo);
+	SET_CH_32(dw, chan->dir, chan->id, msi_abort.msb, chan->msi.address_hi);
+	/* config MSI data */
+	SET_CH_32(dw, chan->dir, chan->id, msi_msgdata, chan->msi.data);
+}
+
+/* HDMA debugfs callbacks */
+static void dw_hdma_v0_core_debugfs_on(struct dw_edma *dw)
+{
+}
+
+static const struct dw_edma_core_ops dw_hdma_v0_core = {
+	.off = dw_hdma_v0_core_off,
+	.ch_count = dw_hdma_v0_core_ch_count,
+	.ch_status = dw_hdma_v0_core_ch_status,
+	.handle_int = dw_hdma_v0_core_handle_int,
+	.start = dw_hdma_v0_core_start,
+	.ch_config = dw_hdma_v0_core_ch_config,
+	.debugfs_on = dw_hdma_v0_core_debugfs_on,
+};
+
+void dw_hdma_v0_core_register(struct dw_edma *dw)
+{
+	dw->core = &dw_hdma_v0_core;
+}
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-core.h b/drivers/dma/dw-edma/dw-hdma-v0-core.h
new file mode 100644
index 000000000000..c373b4f0bd8a
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-hdma-v0-core.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Cai Huoqing
+ * Synopsys DesignWare HDMA v0 core
+ *
+ * Author: Cai Huoqing <cai.huoqing@linux.dev>
+ */
+
+#ifndef _DW_HDMA_V0_CORE_H
+#define _DW_HDMA_V0_CORE_H
+
+#include <linux/dma/edma.h>
+
+/* HDMA core register */
+void dw_hdma_v0_core_register(struct dw_edma *dw);
+
+#endif /* _DW_HDMA_V0_CORE_H */
diff --git a/drivers/dma/dw-edma/dw-hdma-v0-regs.h b/drivers/dma/dw-edma/dw-hdma-v0-regs.h
new file mode 100644
index 000000000000..a974abdf8aaf
--- /dev/null
+++ b/drivers/dma/dw-edma/dw-hdma-v0-regs.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Cai Huoqing
+ * Synopsys DesignWare HDMA v0 reg
+ *
+ * Author: Cai Huoqing <cai.huoqing@linux.dev>
+ */
+
+#ifndef _DW_HDMA_V0_REGS_H
+#define _DW_HDMA_V0_REGS_H
+
+#include <linux/dmaengine.h>
+
+#define HDMA_V0_MAX_NR_CH			8
+#define HDMA_V0_LOCAL_ABORT_INT_EN		BIT(6)
+#define HDMA_V0_REMOTE_ABORT_INT_EN		BIT(5)
+#define HDMA_V0_LOCAL_STOP_INT_EN		BIT(4)
+#define HDMA_V0_REMOTEL_STOP_INT_EN		BIT(3)
+#define HDMA_V0_ABORT_INT_MASK			BIT(2)
+#define HDMA_V0_STOP_INT_MASK			BIT(0)
+#define HDMA_V0_LINKLIST_EN			BIT(0)
+#define HDMA_V0_CONSUMER_CYCLE_STAT		BIT(1)
+#define HDMA_V0_CONSUMER_CYCLE_BIT		BIT(0)
+#define HDMA_V0_DOORBELL_START			BIT(0)
+#define HDMA_V0_CH_STATUS_MASK			GENMASK(1, 0)
+
+struct dw_hdma_v0_ch_regs {
+	u32 ch_en;				/* 0x0000 */
+	u32 doorbell;				/* 0x0004 */
+	u32 prefetch;				/* 0x0008 */
+	u32 handshake;				/* 0x000c */
+	union {
+		u64 reg;			/* 0x0010..0x0014 */
+		struct {
+			u32 lsb;		/* 0x0010 */
+			u32 msb;		/* 0x0014 */
+		};
+	} llp;
+	u32 cycle_sync;				/* 0x0018 */
+	u32 transfer_size;			/* 0x001c */
+	union {
+		u64 reg;			/* 0x0020..0x0024 */
+		struct {
+			u32 lsb;		/* 0x0020 */
+			u32 msb;		/* 0x0024 */
+		};
+	} sar;
+	union {
+		u64 reg;			/* 0x0028..0x002c */
+		struct {
+			u32 lsb;		/* 0x0028 */
+			u32 msb;		/* 0x002c */
+		};
+	} dar;
+	u32 watermark_en;			/* 0x0030 */
+	u32 control1;				/* 0x0034 */
+	u32 func_num;				/* 0x0038 */
+	u32 qos;				/* 0x003c */
+	u32 padding_1[16];			/* 0x0040..0x007c */
+	u32 ch_stat;				/* 0x0080 */
+	u32 int_stat;				/* 0x0084 */
+	u32 int_setup;				/* 0x0088 */
+	u32 int_clear;				/* 0x008c */
+	union {
+		u64 reg;			/* 0x0090..0x0094 */
+		struct {
+			u32 lsb;		/* 0x0090 */
+			u32 msb;		/* 0x0094 */
+		};
+	} msi_stop;
+	union {
+		u64 reg;			/* 0x0098..0x009c */
+		struct {
+			u32 lsb;		/* 0x0098 */
+			u32 msb;		/* 0x009c */
+		};
+	} msi_watermark;
+	union {
+		u64 reg;			/* 0x00a0..0x00a4 */
+		struct {
+			u32 lsb;		/* 0x00a0 */
+			u32 msb;		/* 0x00a4 */
+		};
+	} msi_abort;
+	u32 msi_msgdata;			/* 0x00a8 */
+	u32 padding_2[21];			/* 0x00ac..0x00fc */
+} __packed;
+
+struct dw_hdma_v0_ch {
+	struct dw_hdma_v0_ch_regs wr;		/* 0x0000 */
+	struct dw_hdma_v0_ch_regs rd;		/* 0x0100 */
+} __packed;
+
+struct dw_hdma_v0_regs {
+	struct dw_hdma_v0_ch ch[HDMA_V0_MAX_NR_CH];	/* 0x0000..0x0fa8 */
+} __packed;
+
+struct dw_hdma_v0_lli {
+	u32 control;
+	u32 transfer_size;
+	union {
+		u64 reg;
+		struct {
+			u32 lsb;
+			u32 msb;
+		};
+	} sar;
+	union {
+		u64 reg;
+		struct {
+			u32 lsb;
+			u32 msb;
+		};
+	} dar;
+} __packed;
+
+struct dw_hdma_v0_llp {
+	u32 control;
+	u32 reserved;
+	union {
+		u64 reg;
+		struct {
+			u32 lsb;
+			u32 msb;
+		};
+	} llp;
+} __packed;
+
+#endif /* _DW_HDMA_V0_REGS_H */
diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h
index ed401c965a87..3080747689f6 100644
--- a/include/linux/dma/edma.h
+++ b/include/linux/dma/edma.h
@@ -48,7 +48,8 @@ struct dw_edma_plat_ops {
 enum dw_edma_map_format {
 	EDMA_MF_EDMA_LEGACY = 0x0,
 	EDMA_MF_EDMA_UNROLL = 0x1,
-	EDMA_MF_HDMA_COMPAT = 0x5
+	EDMA_MF_HDMA_COMPAT = 0x5,
+	EDMA_MF_HDMA_NATIVE = 0x7,
 };
 
 /**
-- 
cgit v1.2.3


From 59088b5a946ee8a6603a9a84781670cedb01c40d Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 22 May 2023 16:58:08 +0100
Subject: net: phy: avoid kernel warning dump when stopping an errored PHY

When taking a network interface down (or removing a SFP module) after
the PHY has encountered an error, phy_stop() complains incorrectly
that it was called from HALTED state.

The reason this is incorrect is that the network driver will have
called phy_start() when the interface was brought up, and the fact
that the PHY has a problem bears no relationship to the administrative
state of the interface. Taking the interface administratively down
(which calls phy_stop()) is always the right thing to do after a
successful phy_start() call, whether or not the PHY has encountered
an error.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 11 +++++++----
 include/linux/phy.h   |  7 +++++--
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 0c0df38cd1ab..bdf00b2b2c1d 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -52,6 +52,7 @@ static const char *phy_state_to_str(enum phy_state st)
 	PHY_STATE_STR(NOLINK)
 	PHY_STATE_STR(CABLETEST)
 	PHY_STATE_STR(HALTED)
+	PHY_STATE_STR(ERROR)
 	}
 
 	return NULL;
@@ -1184,7 +1185,7 @@ void phy_stop_machine(struct phy_device *phydev)
 static void phy_process_error(struct phy_device *phydev)
 {
 	mutex_lock(&phydev->lock);
-	phydev->state = PHY_HALTED;
+	phydev->state = PHY_ERROR;
 	mutex_unlock(&phydev->lock);
 
 	phy_trigger_machine(phydev);
@@ -1198,10 +1199,10 @@ static void phy_error_precise(struct phy_device *phydev,
 }
 
 /**
- * phy_error - enter HALTED state for this PHY device
+ * phy_error - enter ERROR state for this PHY device
  * @phydev: target phy_device struct
  *
- * Moves the PHY to the HALTED state in response to a read
+ * Moves the PHY to the ERROR state in response to a read
  * or write error, and tells the controller the link is down.
  * Must not be called from interrupt context, or while the
  * phydev->lock is held.
@@ -1326,7 +1327,8 @@ void phy_stop(struct phy_device *phydev)
 	struct net_device *dev = phydev->attached_dev;
 	enum phy_state old_state;
 
-	if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) {
+	if (!phy_is_started(phydev) && phydev->state != PHY_DOWN &&
+	    phydev->state != PHY_ERROR) {
 		WARN(1, "called from state %s\n",
 		     phy_state_to_str(phydev->state));
 		return;
@@ -1443,6 +1445,7 @@ void phy_state_machine(struct work_struct *work)
 		}
 		break;
 	case PHY_HALTED:
+	case PHY_ERROR:
 		if (phydev->link) {
 			phydev->link = 0;
 			phy_link_down(phydev);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 2da87a36200d..7addde5d14c0 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -497,14 +497,17 @@ struct phy_device *mdiobus_scan_c22(struct mii_bus *bus, int addr);
  * Once complete, move to UP to restart the PHY.
  * - phy_stop aborts the running test and moves to @PHY_HALTED
  *
- * @PHY_HALTED: PHY is up, but no polling or interrupts are done. Or
- * PHY is in an error state.
+ * @PHY_HALTED: PHY is up, but no polling or interrupts are done.
  * - phy_start moves to @PHY_UP
+ *
+ * @PHY_ERROR: PHY is up, but is in an error state.
+ * - phy_stop moves to @PHY_HALTED
  */
 enum phy_state {
 	PHY_DOWN = 0,
 	PHY_READY,
 	PHY_HALTED,
+	PHY_ERROR,
 	PHY_UP,
 	PHY_RUNNING,
 	PHY_NOLINK,
-- 
cgit v1.2.3


From bc06a9e0874239cb6d4eebcb0ecd1a91ad9272db Mon Sep 17 00:00:00 2001
From: Shanker Donthineni <sdonthineni@nvidia.com>
Date: Fri, 19 May 2023 08:49:00 -0500
Subject: genirq: Use hlist for managing resend handlers

The current implementation utilizes a bitmap for managing interrupt resend
handlers, which is allocated based on the SPARSE_IRQ/NR_IRQS macros.
However, this method may not efficiently utilize memory during runtime,
particularly when IRQ_BITMAP_BITS is large.

Address this issue by using an hlist to manage interrupt resend handlers
instead of relying on a static bitmap memory allocation. Additionally, a
new function, clear_irq_resend(), is introduced and called from
irq_shutdown to ensure a graceful teardown of the interrupt.

Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230519134902.1495562-2-sdonthineni@nvidia.com
---
 include/linux/irqdesc.h |  3 +++
 kernel/irq/chip.c       |  1 +
 kernel/irq/internals.h  |  2 ++
 kernel/irq/irqdesc.c    |  2 ++
 kernel/irq/resend.c     | 47 ++++++++++++++++++++++++++++++-----------------
 5 files changed, 38 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 844a8e30e6de..d9451d456a73 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -102,6 +102,9 @@ struct irq_desc {
 	int			parent_irq;
 	struct module		*owner;
 	const char		*name;
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+	struct hlist_node	resend_node;
+#endif
 } ____cacheline_internodealigned_in_smp;
 
 #ifdef CONFIG_SPARSE_IRQ
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 49e7bc871fec..2eac5532c3c8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -306,6 +306,7 @@ static void __irq_disable(struct irq_desc *desc, bool mask);
 void irq_shutdown(struct irq_desc *desc)
 {
 	if (irqd_is_started(&desc->irq_data)) {
+		clear_irq_resend(desc);
 		desc->depth = 1;
 		if (desc->irq_data.chip->irq_shutdown) {
 			desc->irq_data.chip->irq_shutdown(&desc->irq_data);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 5fdc0b557579..51fc8c497c22 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -113,6 +113,8 @@ irqreturn_t handle_irq_event(struct irq_desc *desc);
 
 /* Resending of interrupts :*/
 int check_irq_resend(struct irq_desc *desc, bool inject);
+void clear_irq_resend(struct irq_desc *desc);
+void irq_resend_init(struct irq_desc *desc);
 bool irq_wait_for_poll(struct irq_desc *desc);
 void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
 
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 240e145e969f..b401b89b226a 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -415,6 +415,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
 	desc_set_defaults(irq, desc, node, affinity, owner);
 	irqd_set(&desc->irq_data, flags);
 	kobject_init(&desc->kobj, &irq_kobj_type);
+	irq_resend_init(desc);
 
 	return desc;
 
@@ -581,6 +582,7 @@ int __init early_irq_init(void)
 		mutex_init(&desc[i].request_mutex);
 		init_waitqueue_head(&desc[i].wait_for_threads);
 		desc_set_defaults(i, &desc[i], node, NULL, NULL);
+		irq_resend_init(desc);
 	}
 	return arch_early_irq_init();
 }
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 0c46e9fe3a89..edec335c0a7a 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -21,8 +21,9 @@
 
 #ifdef CONFIG_HARDIRQS_SW_RESEND
 
-/* Bitmap to handle software resend of interrupts: */
-static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
+/* hlist_head to handle software resend of interrupts: */
+static HLIST_HEAD(irq_resend_list);
+static DEFINE_RAW_SPINLOCK(irq_resend_lock);
 
 /*
  * Run software resends of IRQ's
@@ -30,18 +31,17 @@ static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
 static void resend_irqs(struct tasklet_struct *unused)
 {
 	struct irq_desc *desc;
-	int irq;
-
-	while (!bitmap_empty(irqs_resend, nr_irqs)) {
-		irq = find_first_bit(irqs_resend, nr_irqs);
-		clear_bit(irq, irqs_resend);
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
-		local_irq_disable();
+
+	raw_spin_lock_irq(&irq_resend_lock);
+	while (!hlist_empty(&irq_resend_list)) {
+		desc = hlist_entry(irq_resend_list.first, struct irq_desc,
+				   resend_node);
+		hlist_del_init(&desc->resend_node);
+		raw_spin_unlock(&irq_resend_lock);
 		desc->handle_irq(desc);
-		local_irq_enable();
+		raw_spin_lock(&irq_resend_lock);
 	}
+	raw_spin_unlock_irq(&irq_resend_lock);
 }
 
 /* Tasklet to handle resend: */
@@ -49,8 +49,6 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs);
 
 static int irq_sw_resend(struct irq_desc *desc)
 {
-	unsigned int irq = irq_desc_get_irq(desc);
-
 	/*
 	 * Validate whether this interrupt can be safely injected from
 	 * non interrupt context
@@ -70,16 +68,31 @@ static int irq_sw_resend(struct irq_desc *desc)
 		 */
 		if (!desc->parent_irq)
 			return -EINVAL;
-		irq = desc->parent_irq;
 	}
 
-	/* Set it pending and activate the softirq: */
-	set_bit(irq, irqs_resend);
+	/* Add to resend_list and activate the softirq: */
+	raw_spin_lock(&irq_resend_lock);
+	hlist_add_head(&desc->resend_node, &irq_resend_list);
+	raw_spin_unlock(&irq_resend_lock);
 	tasklet_schedule(&resend_tasklet);
 	return 0;
 }
 
+void clear_irq_resend(struct irq_desc *desc)
+{
+	raw_spin_lock(&irq_resend_lock);
+	hlist_del_init(&desc->resend_node);
+	raw_spin_unlock(&irq_resend_lock);
+}
+
+void irq_resend_init(struct irq_desc *desc)
+{
+	INIT_HLIST_NODE(&desc->resend_node);
+}
 #else
+void clear_irq_resend(struct irq_desc *desc) {}
+void irq_resend_init(struct irq_desc *desc) {}
+
 static int irq_sw_resend(struct irq_desc *desc)
 {
 	return -EINVAL;
-- 
cgit v1.2.3


From c32c81f3dbdfd68f6ab20a29ad86f811aed36e4e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 30 Apr 2023 11:35:05 +0200
Subject: ARM/mfd/gpio: Fixup TPS65010 regression on OMAP1 OSK1

Aaro reports problems on the OSK1 board after we altered
the dynamic base for GPIO allocations.

It appears this happens because the OMAP driver now
allocates GPIO numbers dynamically, so all that is
references by number is a bit up in the air.

Let's bite the bullet and try to just move the gpio_chip
in the tps65010 MFD driver over to using dynamic allocations.
Alter everything in the OSK1 board file to use a GPIO
descriptor table and lookups.

Utilize the NULL device to define some board-specific
GPIO lookups and use these to immediately look up the
same GPIOs, convert to IRQ numbers and pass as resources
to the devices. This is ugly but should work.

The .setup() callback for tps65010 was used for some GPIO
hogging, but since the OSK1 is the only user in the entire
kernel we can alter the signatures to something that
is helpful and make a clean transition.

Fixes: 92bf78b33b0b ("gpio: omap: use dynamic allocation of base")
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: andy.shevchenko@gmail.com
Cc: Andreas Kemnade <andreas@kemnade.info>
Acked-by: Lee Jones <lee@kernel.org>
Reviewed-by: Lee Jones <lee@kernel.org>
Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-omap1/board-osk.c | 139 +++++++++++++++++++++++++++-------------
 drivers/mfd/tps65010.c          |  14 ++--
 include/linux/mfd/tps65010.h    |  11 ++--
 3 files changed, 104 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c
index df758c1f9237..a8ca8d427182 100644
--- a/arch/arm/mach-omap1/board-osk.c
+++ b/arch/arm/mach-omap1/board-osk.c
@@ -25,7 +25,8 @@
  * with this program; if not, write  to the Free Software Foundation, Inc.,
  * 675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/gpio/driver.h>
 #include <linux/gpio/machine.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -64,13 +65,12 @@
 /* TPS65010 has four GPIOs.  nPG and LED2 can be treated like GPIOs with
  * alternate pin configurations for hardware-controlled blinking.
  */
-#define OSK_TPS_GPIO_BASE		(OMAP_MAX_GPIO_LINES + 16 /* MPUIO */)
-#	define OSK_TPS_GPIO_USB_PWR_EN	(OSK_TPS_GPIO_BASE + 0)
-#	define OSK_TPS_GPIO_LED_D3	(OSK_TPS_GPIO_BASE + 1)
-#	define OSK_TPS_GPIO_LAN_RESET	(OSK_TPS_GPIO_BASE + 2)
-#	define OSK_TPS_GPIO_DSP_PWR_EN	(OSK_TPS_GPIO_BASE + 3)
-#	define OSK_TPS_GPIO_LED_D9	(OSK_TPS_GPIO_BASE + 4)
-#	define OSK_TPS_GPIO_LED_D2	(OSK_TPS_GPIO_BASE + 5)
+#define OSK_TPS_GPIO_USB_PWR_EN	0
+#define OSK_TPS_GPIO_LED_D3	1
+#define OSK_TPS_GPIO_LAN_RESET	2
+#define OSK_TPS_GPIO_DSP_PWR_EN	3
+#define OSK_TPS_GPIO_LED_D9	4
+#define OSK_TPS_GPIO_LED_D2	5
 
 static struct mtd_partition osk_partitions[] = {
 	/* bootloader (U-Boot, etc) in first sector */
@@ -174,11 +174,20 @@ static const struct gpio_led tps_leds[] = {
 	/* NOTE:  D9 and D2 have hardware blink support.
 	 * Also, D9 requires non-battery power.
 	 */
-	{ .gpio = OSK_TPS_GPIO_LED_D9, .name = "d9",
-			.default_trigger = "disk-activity", },
-	{ .gpio = OSK_TPS_GPIO_LED_D2, .name = "d2", },
-	{ .gpio = OSK_TPS_GPIO_LED_D3, .name = "d3", .active_low = 1,
-			.default_trigger = "heartbeat", },
+	{ .name = "d9", .default_trigger = "disk-activity", },
+	{ .name = "d2", },
+	{ .name = "d3", .default_trigger = "heartbeat", },
+};
+
+static struct gpiod_lookup_table tps_leds_gpio_table = {
+	.dev_id = "leds-gpio",
+	.table = {
+		/* Use local offsets on TPS65010 */
+		GPIO_LOOKUP_IDX("tps65010", OSK_TPS_GPIO_LED_D9, NULL, 0, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX("tps65010", OSK_TPS_GPIO_LED_D2, NULL, 1, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP_IDX("tps65010", OSK_TPS_GPIO_LED_D3, NULL, 2, GPIO_ACTIVE_LOW),
+		{ }
+	},
 };
 
 static struct gpio_led_platform_data tps_leds_data = {
@@ -192,29 +201,34 @@ static struct platform_device osk5912_tps_leds = {
 	.dev.platform_data	= &tps_leds_data,
 };
 
-static int osk_tps_setup(struct i2c_client *client, void *context)
+/* The board just hold these GPIOs hogged from setup to teardown */
+static struct gpio_desc *eth_reset;
+static struct gpio_desc *vdd_dsp;
+
+static int osk_tps_setup(struct i2c_client *client, struct gpio_chip *gc)
 {
+	struct gpio_desc *d;
 	if (!IS_BUILTIN(CONFIG_TPS65010))
 		return -ENOSYS;
 
 	/* Set GPIO 1 HIGH to disable VBUS power supply;
 	 * OHCI driver powers it up/down as needed.
 	 */
-	gpio_request(OSK_TPS_GPIO_USB_PWR_EN, "n_vbus_en");
-	gpio_direction_output(OSK_TPS_GPIO_USB_PWR_EN, 1);
+	d = gpiochip_request_own_desc(gc, OSK_TPS_GPIO_USB_PWR_EN, "n_vbus_en",
+				      GPIO_ACTIVE_HIGH, GPIOD_OUT_HIGH);
 	/* Free the GPIO again as the driver will request it */
-	gpio_free(OSK_TPS_GPIO_USB_PWR_EN);
+	gpiochip_free_own_desc(d);
 
 	/* Set GPIO 2 high so LED D3 is off by default */
 	tps65010_set_gpio_out_value(GPIO2, HIGH);
 
 	/* Set GPIO 3 low to take ethernet out of reset */
-	gpio_request(OSK_TPS_GPIO_LAN_RESET, "smc_reset");
-	gpio_direction_output(OSK_TPS_GPIO_LAN_RESET, 0);
+	eth_reset = gpiochip_request_own_desc(gc, OSK_TPS_GPIO_LAN_RESET, "smc_reset",
+					      GPIO_ACTIVE_HIGH, GPIOD_OUT_LOW);
 
 	/* GPIO4 is VDD_DSP */
-	gpio_request(OSK_TPS_GPIO_DSP_PWR_EN, "dsp_power");
-	gpio_direction_output(OSK_TPS_GPIO_DSP_PWR_EN, 1);
+	vdd_dsp = gpiochip_request_own_desc(gc, OSK_TPS_GPIO_DSP_PWR_EN, "dsp_power",
+					    GPIO_ACTIVE_HIGH, GPIOD_OUT_HIGH);
 	/* REVISIT if DSP support isn't configured, power it off ... */
 
 	/* Let LED1 (D9) blink; leds-gpio may override it */
@@ -232,15 +246,22 @@ static int osk_tps_setup(struct i2c_client *client, void *context)
 
 	/* register these three LEDs */
 	osk5912_tps_leds.dev.parent = &client->dev;
+	gpiod_add_lookup_table(&tps_leds_gpio_table);
 	platform_device_register(&osk5912_tps_leds);
 
 	return 0;
 }
 
+static void osk_tps_teardown(struct i2c_client *client, struct gpio_chip *gc)
+{
+	gpiochip_free_own_desc(eth_reset);
+	gpiochip_free_own_desc(vdd_dsp);
+}
+
 static struct tps65010_board tps_board = {
-	.base		= OSK_TPS_GPIO_BASE,
 	.outmask	= 0x0f,
 	.setup		= osk_tps_setup,
+	.teardown	= osk_tps_teardown,
 };
 
 static struct i2c_board_info __initdata osk_i2c_board_info[] = {
@@ -263,11 +284,6 @@ static void __init osk_init_smc91x(void)
 {
 	u32 l;
 
-	if ((gpio_request(0, "smc_irq")) < 0) {
-		printk("Error requesting gpio 0 for smc91x irq\n");
-		return;
-	}
-
 	/* Check EMIFS wait states to fix errors with SMC_GET_PKT_HDR */
 	l = omap_readl(EMIFS_CCS(1));
 	l |= 0x3;
@@ -279,10 +295,6 @@ static void __init osk_init_cf(int seg)
 	struct resource *res = &osk5912_cf_resources[1];
 
 	omap_cfg_reg(M7_1610_GPIO62);
-	if ((gpio_request(62, "cf_irq")) < 0) {
-		printk("Error requesting gpio 62 for CF irq\n");
-		return;
-	}
 
 	switch (seg) {
 	/* NOTE: CS0 could be configured too ... */
@@ -308,18 +320,17 @@ static void __init osk_init_cf(int seg)
 		seg, omap_readl(EMIFS_CCS(seg)), omap_readl(EMIFS_ACS(seg)));
 	omap_writel(0x0004a1b3, EMIFS_CCS(seg));	/* synch mode 4 etc */
 	omap_writel(0x00000000, EMIFS_ACS(seg));	/* OE hold/setup */
-
-	/* the CF I/O IRQ is really active-low */
-	irq_set_irq_type(gpio_to_irq(62), IRQ_TYPE_EDGE_FALLING);
 }
 
 static struct gpiod_lookup_table osk_usb_gpio_table = {
 	.dev_id = "ohci",
 	.table = {
 		/* Power GPIO on the I2C-attached TPS65010 */
-		GPIO_LOOKUP("tps65010", 0, "power", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("tps65010", OSK_TPS_GPIO_USB_PWR_EN, "power",
+			    GPIO_ACTIVE_HIGH),
 		GPIO_LOOKUP(OMAP_GPIO_LABEL, 9, "overcurrent",
 			    GPIO_ACTIVE_HIGH),
+		{ }
 	},
 };
 
@@ -341,8 +352,25 @@ static struct omap_usb_config osk_usb_config __initdata = {
 
 #define EMIFS_CS3_VAL	(0x88013141)
 
+static struct gpiod_lookup_table osk_irq_gpio_table = {
+	.dev_id = NULL,
+	.table = {
+		/* GPIO used for SMC91x IRQ */
+		GPIO_LOOKUP(OMAP_GPIO_LABEL, 0, "smc_irq",
+			    GPIO_ACTIVE_HIGH),
+		/* GPIO used for CF IRQ */
+		GPIO_LOOKUP("gpio-48-63", 14, "cf_irq",
+			    GPIO_ACTIVE_HIGH),
+		/* GPIO used by the TPS65010 chip */
+		GPIO_LOOKUP("mpuio", 1, "tps65010",
+			    GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
+
 static void __init osk_init(void)
 {
+	struct gpio_desc *d;
 	u32 l;
 
 	osk_init_smc91x();
@@ -359,10 +387,31 @@ static void __init osk_init(void)
 
 	osk_flash_resource.end = osk_flash_resource.start = omap_cs3_phys();
 	osk_flash_resource.end += SZ_32M - 1;
-	osk5912_smc91x_resources[1].start = gpio_to_irq(0);
-	osk5912_smc91x_resources[1].end = gpio_to_irq(0);
-	osk5912_cf_resources[0].start = gpio_to_irq(62);
-	osk5912_cf_resources[0].end = gpio_to_irq(62);
+
+	/*
+	 * Add the GPIOs to be used as IRQs and immediately look them up
+	 * to be passed as an IRQ resource. This is ugly but should work
+	 * until the day we convert to device tree.
+	 */
+	gpiod_add_lookup_table(&osk_irq_gpio_table);
+
+	d = gpiod_get(NULL, "smc_irq", GPIOD_IN);
+	if (IS_ERR(d)) {
+		pr_err("Unable to get SMC IRQ GPIO descriptor\n");
+	} else {
+		irq_set_irq_type(gpiod_to_irq(d), IRQ_TYPE_EDGE_RISING);
+		osk5912_smc91x_resources[1] = DEFINE_RES_IRQ(gpiod_to_irq(d));
+	}
+
+	d = gpiod_get(NULL, "cf_irq", GPIOD_IN);
+	if (IS_ERR(d)) {
+		pr_err("Unable to get CF IRQ GPIO descriptor\n");
+	} else {
+		/* the CF I/O IRQ is really active-low */
+		irq_set_irq_type(gpiod_to_irq(d), IRQ_TYPE_EDGE_FALLING);
+		osk5912_cf_resources[0] = DEFINE_RES_IRQ(gpiod_to_irq(d));
+	}
+
 	platform_add_devices(osk5912_devices, ARRAY_SIZE(osk5912_devices));
 
 	l = omap_readl(USB_TRANSCEIVER_CTRL);
@@ -372,13 +421,15 @@ static void __init osk_init(void)
 	gpiod_add_lookup_table(&osk_usb_gpio_table);
 	omap1_usb_init(&osk_usb_config);
 
+	omap_serial_init();
+
 	/* irq for tps65010 chip */
 	/* bootloader effectively does:  omap_cfg_reg(U19_1610_MPUIO1); */
-	if (gpio_request(OMAP_MPUIO(1), "tps65010") == 0)
-		gpio_direction_input(OMAP_MPUIO(1));
-
-	omap_serial_init();
-	osk_i2c_board_info[0].irq = gpio_to_irq(OMAP_MPUIO(1));
+	d = gpiod_get(NULL, "tps65010", GPIOD_IN);
+	if (IS_ERR(d))
+		pr_err("Unable to get TPS65010 IRQ GPIO descriptor\n");
+	else
+		osk_i2c_board_info[0].irq = gpiod_to_irq(d);
 	omap_register_i2c_bus(1, 400, osk_i2c_board_info,
 			      ARRAY_SIZE(osk_i2c_board_info));
 }
diff --git a/drivers/mfd/tps65010.c b/drivers/mfd/tps65010.c
index fb733288cca3..faea4ff44c6f 100644
--- a/drivers/mfd/tps65010.c
+++ b/drivers/mfd/tps65010.c
@@ -506,12 +506,8 @@ static void tps65010_remove(struct i2c_client *client)
 	struct tps65010		*tps = i2c_get_clientdata(client);
 	struct tps65010_board	*board = dev_get_platdata(&client->dev);
 
-	if (board && board->teardown) {
-		int status = board->teardown(client, board->context);
-		if (status < 0)
-			dev_dbg(&client->dev, "board %s %s err %d\n",
-				"teardown", client->name, status);
-	}
+	if (board && board->teardown)
+		board->teardown(client, &tps->chip);
 	if (client->irq > 0)
 		free_irq(client->irq, tps);
 	cancel_delayed_work_sync(&tps->work);
@@ -619,7 +615,7 @@ static int tps65010_probe(struct i2c_client *client)
 				tps, DEBUG_FOPS);
 
 	/* optionally register GPIOs */
-	if (board && board->base != 0) {
+	if (board) {
 		tps->outmask = board->outmask;
 
 		tps->chip.label = client->name;
@@ -632,7 +628,7 @@ static int tps65010_probe(struct i2c_client *client)
 		/* NOTE:  only partial support for inputs; nyet IRQs */
 		tps->chip.get = tps65010_gpio_get;
 
-		tps->chip.base = board->base;
+		tps->chip.base = -1;
 		tps->chip.ngpio = 7;
 		tps->chip.can_sleep = 1;
 
@@ -641,7 +637,7 @@ static int tps65010_probe(struct i2c_client *client)
 			dev_err(&client->dev, "can't add gpiochip, err %d\n",
 					status);
 		else if (board->setup) {
-			status = board->setup(client, board->context);
+			status = board->setup(client, &tps->chip);
 			if (status < 0) {
 				dev_dbg(&client->dev,
 					"board %s %s err %d\n",
diff --git a/include/linux/mfd/tps65010.h b/include/linux/mfd/tps65010.h
index a1fb9bc5311d..5edf1aef1118 100644
--- a/include/linux/mfd/tps65010.h
+++ b/include/linux/mfd/tps65010.h
@@ -28,6 +28,8 @@
 #ifndef __LINUX_I2C_TPS65010_H
 #define __LINUX_I2C_TPS65010_H
 
+struct gpio_chip;
+
 /*
  * ----------------------------------------------------------------------------
  * Registers, all 8 bits
@@ -176,12 +178,10 @@ struct i2c_client;
 
 /**
  * struct tps65010_board - packages GPIO and LED lines
- * @base: the GPIO number to assign to GPIO-1
  * @outmask: bit (N-1) is set to allow GPIO-N to be used as an
  *	(open drain) output
  * @setup: optional callback issued once the GPIOs are valid
  * @teardown: optional callback issued before the GPIOs are invalidated
- * @context: optional parameter passed to setup() and teardown()
  *
  * Board data may be used to package the GPIO (and LED) lines for use
  * in by the generic GPIO and LED frameworks.  The first four GPIOs
@@ -193,12 +193,9 @@ struct i2c_client;
  * devices in their initial states using these GPIOs.
  */
 struct tps65010_board {
-	int				base;
 	unsigned			outmask;
-
-	int		(*setup)(struct i2c_client *client, void *context);
-	int		(*teardown)(struct i2c_client *client, void *context);
-	void		*context;
+	int		(*setup)(struct i2c_client *client, struct gpio_chip *gc);
+	void		(*teardown)(struct i2c_client *client, struct gpio_chip *gc);
 };
 
 #endif /*  __LINUX_I2C_TPS65010_H */
-- 
cgit v1.2.3


From 767d83361aaa6a1ecb4d5b89eeb38a267239917a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 8 May 2023 23:20:06 +0200
Subject: Input: ads7846 - Convert to use software nodes

The Nokia 770 is using GPIOs from the global numberspace on the
CBUS node to pass down to the LCD controller. This regresses when we
let the OMAP GPIO driver use dynamic GPIO base.

The Nokia 770 now has dynamic allocation of IRQ numbers, so this
needs to be fixed for it to work.

As this is the only user of LCD MIPID we can easily augment the
driver to use a GPIO descriptor instead and resolve the issue.

The platform data .shutdown() callback wasn't even used in the
code, but we encode a shutdown asserting RESET in the remove()
callback for completeness sake.

The CBUS also has the ADS7846 touchscreen attached.

Populate the devices on the Nokia 770 CBUS I2C using software
nodes instead of platform data quirks. This includes the LCD
and the ADS7846 touchscreen so the conversion just brings the LCD
along with it as software nodes is an all-or-nothing design
pattern.

The ADS7846 has some limited support for using GPIO descriptors,
let's convert it over completely to using device properties and then
fix all remaining boardfile users to provide all platform data using
software nodes.

Dump the of includes and of_match_ptr() in the ADS7846 driver as part
of the job.

Since we have to move ADS7846 over to obtaining the GPIOs it is
using exclusively from descriptors, we provide descriptor tables
for the two remaining in-kernel boardfiles using ADS7846:

- PXA Spitz
- MIPS Alchemy DB1000 development board

It was too hard for me to include software node conversion of
these two remaining users at this time: the spitz is using a
hscync callback in the platform data that would require further
GPIO descriptor conversion of the Spitz, and moving the hsync
callback down into the driver: it will just become too big of
a job, but it can be done separately.

The MIPS Alchemy DB1000 is simply something I cannot test, so take
the easier approach of just providing some GPIO descriptors in
this case as I don't want the patch to grow too intrusive.

As we see that several device trees have incorrect polarity flags
and just expect to bypass the gpiolib polarity handling, fix up
all device trees too, in a separate patch.

Suggested-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Fixes: 92bf78b33b0b ("gpio: omap: use dynamic allocation of base")
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-omap1/board-nokia770.c    |  99 ++++++++++++++++++----------
 arch/arm/mach-pxa/spitz.c               |  11 +++-
 arch/mips/alchemy/devboards/db1000.c    |  11 +++-
 drivers/input/touchscreen/ads7846.c     | 113 ++++++++++++--------------------
 drivers/video/fbdev/omap/lcd_mipid.c    |  10 +++
 include/linux/platform_data/lcd-mipid.h |   2 -
 include/linux/spi/ads7846.h             |   2 -
 7 files changed, 136 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index a501a473ffd6..dde74694cb4c 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -8,15 +8,16 @@
 #include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/gpio/machine.h>
+#include <linux/gpio/property.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <linux/input.h>
 #include <linux/omapfb.h>
 
 #include <linux/spi/spi.h>
-#include <linux/spi/ads7846.h>
 #include <linux/workqueue.h>
 #include <linux/delay.h>
 
@@ -35,6 +36,25 @@
 #include "clock.h"
 #include "mmc.h"
 
+static const struct software_node nokia770_mpuio_gpiochip_node = {
+	.name = "mpuio",
+};
+
+static const struct software_node nokia770_gpiochip1_node = {
+	.name = "gpio-0-15",
+};
+
+static const struct software_node nokia770_gpiochip2_node = {
+	.name = "gpio-16-31",
+};
+
+static const struct software_node *nokia770_gpiochip_nodes[] = {
+	&nokia770_mpuio_gpiochip_node,
+	&nokia770_gpiochip1_node,
+	&nokia770_gpiochip2_node,
+	NULL
+};
+
 #define ADS7846_PENDOWN_GPIO	15
 
 static const unsigned int nokia770_keymap[] = {
@@ -85,40 +105,47 @@ static struct platform_device *nokia770_devices[] __initdata = {
 	&nokia770_kp_device,
 };
 
-static void mipid_shutdown(struct mipid_platform_data *pdata)
-{
-	if (pdata->nreset_gpio != -1) {
-		printk(KERN_INFO "shutdown LCD\n");
-		gpio_set_value(pdata->nreset_gpio, 0);
-		msleep(120);
-	}
-}
-
-static struct mipid_platform_data nokia770_mipid_platform_data = {
-	.shutdown = mipid_shutdown,
-};
+static struct mipid_platform_data nokia770_mipid_platform_data = { };
 
 static const struct omap_lcd_config nokia770_lcd_config __initconst = {
 	.ctrl_name	= "hwa742",
 };
 
+static const struct property_entry nokia770_mipid_props[] = {
+	PROPERTY_ENTRY_GPIO("reset-gpios", &nokia770_gpiochip1_node,
+			    13, GPIO_ACTIVE_LOW),
+	{ }
+};
+
+static const struct software_node nokia770_mipid_swnode = {
+	.name = "lcd_mipid",
+	.properties = nokia770_mipid_props,
+};
+
 static void __init mipid_dev_init(void)
 {
-	nokia770_mipid_platform_data.nreset_gpio = 13;
 	nokia770_mipid_platform_data.data_lines = 16;
 
 	omapfb_set_lcd_config(&nokia770_lcd_config);
 }
 
-static struct ads7846_platform_data nokia770_ads7846_platform_data __initdata = {
-	.x_max		= 0x0fff,
-	.y_max		= 0x0fff,
-	.x_plate_ohms	= 180,
-	.pressure_max	= 255,
-	.debounce_max	= 10,
-	.debounce_tol	= 3,
-	.debounce_rep	= 1,
-	.gpio_pendown	= ADS7846_PENDOWN_GPIO,
+static const struct property_entry nokia770_ads7846_props[] = {
+	PROPERTY_ENTRY_STRING("compatible", "ti,ads7846"),
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 4096),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 4096),
+	PROPERTY_ENTRY_U32("touchscreen-max-pressure", 256),
+	PROPERTY_ENTRY_U32("touchscreen-average-samples", 10),
+	PROPERTY_ENTRY_U16("ti,x-plate-ohms", 180),
+	PROPERTY_ENTRY_U16("ti,debounce-tol", 3),
+	PROPERTY_ENTRY_U16("ti,debounce-rep", 1),
+	PROPERTY_ENTRY_GPIO("pendown-gpios", &nokia770_gpiochip1_node,
+			    ADS7846_PENDOWN_GPIO, GPIO_ACTIVE_LOW),
+	{ }
+};
+
+static const struct software_node nokia770_ads7846_swnode = {
+	.name = "ads7846",
+	.properties = nokia770_ads7846_props,
 };
 
 static struct spi_board_info nokia770_spi_board_info[] __initdata = {
@@ -128,13 +155,14 @@ static struct spi_board_info nokia770_spi_board_info[] __initdata = {
 		.chip_select    = 3,
 		.max_speed_hz   = 12000000,
 		.platform_data	= &nokia770_mipid_platform_data,
+		.swnode         = &nokia770_mipid_swnode,
 	},
 	[1] = {
 		.modalias       = "ads7846",
 		.bus_num        = 2,
 		.chip_select    = 0,
 		.max_speed_hz   = 2500000,
-		.platform_data	= &nokia770_ads7846_platform_data,
+		.swnode         = &nokia770_ads7846_swnode,
 	},
 };
 
@@ -212,14 +240,16 @@ static inline void nokia770_mmc_init(void)
 #endif
 
 #if IS_ENABLED(CONFIG_I2C_CBUS_GPIO)
-static struct gpiod_lookup_table nokia770_cbus_gpio_table = {
-	.dev_id = "i2c-cbus-gpio.2",
-	.table = {
-		GPIO_LOOKUP_IDX("mpuio", 9, NULL, 0, 0), /* clk */
-		GPIO_LOOKUP_IDX("mpuio", 10, NULL, 1, 0), /* dat */
-		GPIO_LOOKUP_IDX("mpuio", 11, NULL, 2, 0), /* sel */
-		{ },
-	},
+
+static const struct software_node_ref_args nokia770_cbus_gpio_refs[] = {
+	SOFTWARE_NODE_REFERENCE(&nokia770_mpuio_gpiochip_node, 9, 0),
+	SOFTWARE_NODE_REFERENCE(&nokia770_mpuio_gpiochip_node, 10, 0),
+	SOFTWARE_NODE_REFERENCE(&nokia770_mpuio_gpiochip_node, 11, 0),
+};
+
+static const struct property_entry nokia770_cbus_props[] = {
+	PROPERTY_ENTRY_REF_ARRAY("gpios", nokia770_cbus_gpio_refs),
+	{ }
 };
 
 static struct platform_device nokia770_cbus_device = {
@@ -253,7 +283,8 @@ static void __init nokia770_cbus_init(void)
 	nokia770_i2c_board_info_2[1].irq = gpio_to_irq(tahvo_irq_gpio);
 	i2c_register_board_info(2, nokia770_i2c_board_info_2,
 				ARRAY_SIZE(nokia770_i2c_board_info_2));
-	gpiod_add_lookup_table(&nokia770_cbus_gpio_table);
+	device_create_managed_software_node(&nokia770_cbus_device.dev,
+					    nokia770_cbus_props, NULL);
 	platform_device_register(&nokia770_cbus_device);
 }
 #else /* CONFIG_I2C_CBUS_GPIO */
@@ -273,8 +304,8 @@ static void __init omap_nokia770_init(void)
 	/* Unmask SleepX signal */
 	omap_writew((omap_readw(0xfffb5004) & ~2), 0xfffb5004);
 
+	software_node_register_node_group(nokia770_gpiochip_nodes);
 	platform_add_devices(nokia770_devices, ARRAY_SIZE(nokia770_devices));
-	nokia770_spi_board_info[1].irq = gpio_to_irq(15);
 	spi_register_board_info(nokia770_spi_board_info,
 				ARRAY_SIZE(nokia770_spi_board_info));
 	omap_serial_init();
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 4325bdc2b9ff..28e376e06fdc 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -506,10 +506,18 @@ static struct ads7846_platform_data spitz_ads7846_info = {
 	.x_plate_ohms		= 419,
 	.y_plate_ohms		= 486,
 	.pressure_max		= 1024,
-	.gpio_pendown		= SPITZ_GPIO_TP_INT,
 	.wait_for_sync		= spitz_ads7846_wait_for_hsync,
 };
 
+static struct gpiod_lookup_table spitz_ads7846_gpio_table = {
+	.dev_id = "spi2.0",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", SPITZ_GPIO_TP_INT,
+			    "pendown", GPIO_ACTIVE_LOW),
+		{ }
+	},
+};
+
 static void spitz_bl_kick_battery(void)
 {
 	void (*kick_batt)(void);
@@ -594,6 +602,7 @@ static void __init spitz_spi_init(void)
 	else
 		gpiod_add_lookup_table(&spitz_lcdcon_gpio_table);
 
+	gpiod_add_lookup_table(&spitz_ads7846_gpio_table);
 	gpiod_add_lookup_table(&spitz_spi_gpio_table);
 	pxa2xx_set_spi_info(2, &spitz_spi_info);
 	spi_register_board_info(ARRAY_AND_SIZE(spitz_spi_devices));
diff --git a/arch/mips/alchemy/devboards/db1000.c b/arch/mips/alchemy/devboards/db1000.c
index 2c52ee27b4f2..79d66faa8482 100644
--- a/arch/mips/alchemy/devboards/db1000.c
+++ b/arch/mips/alchemy/devboards/db1000.c
@@ -381,13 +381,21 @@ static struct platform_device db1100_mmc1_dev = {
 static struct ads7846_platform_data db1100_touch_pd = {
 	.model		= 7846,
 	.vref_mv	= 3300,
-	.gpio_pendown	= 21,
 };
 
 static struct spi_gpio_platform_data db1100_spictl_pd = {
 	.num_chipselect = 1,
 };
 
+static struct gpiod_lookup_table db1100_touch_gpio_table = {
+	.dev_id = "spi0.0",
+	.table = {
+		GPIO_LOOKUP("alchemy-gpio2", 21,
+			    "pendown", GPIO_ACTIVE_LOW),
+		{ }
+	},
+};
+
 static struct spi_board_info db1100_spi_info[] __initdata = {
 	[0] = {
 		.modalias	 = "ads7846",
@@ -474,6 +482,7 @@ int __init db1000_dev_setup(void)
 		pfc |= (1 << 0);	/* SSI0 pins as GPIOs */
 		alchemy_wrsys(pfc, AU1000_SYS_PINFUNC);
 
+		gpiod_add_lookup_table(&db1100_touch_gpio_table);
 		spi_register_board_info(db1100_spi_info,
 					ARRAY_SIZE(db1100_spi_info));
 
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index bb1058b1e7fd..40eb27f1b23f 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -24,11 +24,8 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/pm.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
-#include <linux/of_device.h>
+#include <linux/property.h>
 #include <linux/gpio/consumer.h>
-#include <linux/gpio.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/ads7846.h>
 #include <linux/regulator/consumer.h>
@@ -140,7 +137,7 @@ struct ads7846 {
 	int			(*filter)(void *data, int data_idx, int *val);
 	void			*filter_data;
 	int			(*get_pendown_state)(void);
-	int			gpio_pendown;
+	struct gpio_desc	*gpio_pendown;
 
 	void			(*wait_for_sync)(void);
 };
@@ -223,7 +220,7 @@ static int get_pendown_state(struct ads7846 *ts)
 	if (ts->get_pendown_state)
 		return ts->get_pendown_state();
 
-	return !gpio_get_value(ts->gpio_pendown);
+	return gpiod_get_value(ts->gpio_pendown);
 }
 
 static void ads7846_report_pen_up(struct ads7846 *ts)
@@ -989,8 +986,6 @@ static int ads7846_setup_pendown(struct spi_device *spi,
 				 struct ads7846 *ts,
 				 const struct ads7846_platform_data *pdata)
 {
-	int err;
-
 	/*
 	 * REVISIT when the irq can be triggered active-low, or if for some
 	 * reason the touchscreen isn't hooked up, we don't need to access
@@ -999,25 +994,15 @@ static int ads7846_setup_pendown(struct spi_device *spi,
 
 	if (pdata->get_pendown_state) {
 		ts->get_pendown_state = pdata->get_pendown_state;
-	} else if (gpio_is_valid(pdata->gpio_pendown)) {
-
-		err = devm_gpio_request_one(&spi->dev, pdata->gpio_pendown,
-					    GPIOF_IN, "ads7846_pendown");
-		if (err) {
-			dev_err(&spi->dev,
-				"failed to request/setup pendown GPIO%d: %d\n",
-				pdata->gpio_pendown, err);
-			return err;
+	} else {
+		ts->gpio_pendown = gpiod_get(&spi->dev, "pendown", GPIOD_IN);
+		if (IS_ERR(ts->gpio_pendown)) {
+			dev_err(&spi->dev, "failed to request pendown GPIO\n");
+			return PTR_ERR(ts->gpio_pendown);
 		}
-
-		ts->gpio_pendown = pdata->gpio_pendown;
-
 		if (pdata->gpio_pendown_debounce)
-			gpiod_set_debounce(gpio_to_desc(ts->gpio_pendown),
+			gpiod_set_debounce(ts->gpio_pendown,
 					   pdata->gpio_pendown_debounce);
-	} else {
-		dev_err(&spi->dev, "no get_pendown_state nor gpio_pendown?\n");
-		return -EINVAL;
 	}
 
 	return 0;
@@ -1119,7 +1104,6 @@ static int ads7846_setup_spi_msg(struct ads7846 *ts,
 	return 0;
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id ads7846_dt_ids[] = {
 	{ .compatible = "ti,tsc2046",	.data = (void *) 7846 },
 	{ .compatible = "ti,ads7843",	.data = (void *) 7843 },
@@ -1130,20 +1114,14 @@ static const struct of_device_id ads7846_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, ads7846_dt_ids);
 
-static const struct ads7846_platform_data *ads7846_probe_dt(struct device *dev)
+static const struct ads7846_platform_data *ads7846_get_props(struct device *dev)
 {
 	struct ads7846_platform_data *pdata;
-	struct device_node *node = dev->of_node;
-	const struct of_device_id *match;
+	const struct platform_device_id *pdev_id;
 	u32 value;
 
-	if (!node) {
-		dev_err(dev, "Device does not have associated DT data\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	match = of_match_device(ads7846_dt_ids, dev);
-	if (!match) {
+	pdev_id = device_get_match_data(dev);
+	if (!pdev_id) {
 		dev_err(dev, "Unknown device model\n");
 		return ERR_PTR(-EINVAL);
 	}
@@ -1152,60 +1130,51 @@ static const struct ads7846_platform_data *ads7846_probe_dt(struct device *dev)
 	if (!pdata)
 		return ERR_PTR(-ENOMEM);
 
-	pdata->model = (unsigned long)match->data;
+	pdata->model = (unsigned long)pdev_id->driver_data;
 
-	of_property_read_u16(node, "ti,vref-delay-usecs",
-			     &pdata->vref_delay_usecs);
-	of_property_read_u16(node, "ti,vref-mv", &pdata->vref_mv);
-	pdata->keep_vref_on = of_property_read_bool(node, "ti,keep-vref-on");
+	device_property_read_u16(dev, "ti,vref-delay-usecs",
+				 &pdata->vref_delay_usecs);
+	device_property_read_u16(dev, "ti,vref-mv", &pdata->vref_mv);
+	pdata->keep_vref_on = device_property_read_bool(dev, "ti,keep-vref-on");
 
-	pdata->swap_xy = of_property_read_bool(node, "ti,swap-xy");
+	pdata->swap_xy = device_property_read_bool(dev, "ti,swap-xy");
 
-	of_property_read_u16(node, "ti,settle-delay-usec",
-			     &pdata->settle_delay_usecs);
-	of_property_read_u16(node, "ti,penirq-recheck-delay-usecs",
-			     &pdata->penirq_recheck_delay_usecs);
+	device_property_read_u16(dev, "ti,settle-delay-usec",
+				 &pdata->settle_delay_usecs);
+	device_property_read_u16(dev, "ti,penirq-recheck-delay-usecs",
+				 &pdata->penirq_recheck_delay_usecs);
 
-	of_property_read_u16(node, "ti,x-plate-ohms", &pdata->x_plate_ohms);
-	of_property_read_u16(node, "ti,y-plate-ohms", &pdata->y_plate_ohms);
+	device_property_read_u16(dev, "ti,x-plate-ohms", &pdata->x_plate_ohms);
+	device_property_read_u16(dev, "ti,y-plate-ohms", &pdata->y_plate_ohms);
 
-	of_property_read_u16(node, "ti,x-min", &pdata->x_min);
-	of_property_read_u16(node, "ti,y-min", &pdata->y_min);
-	of_property_read_u16(node, "ti,x-max", &pdata->x_max);
-	of_property_read_u16(node, "ti,y-max", &pdata->y_max);
+	device_property_read_u16(dev, "ti,x-min", &pdata->x_min);
+	device_property_read_u16(dev, "ti,y-min", &pdata->y_min);
+	device_property_read_u16(dev, "ti,x-max", &pdata->x_max);
+	device_property_read_u16(dev, "ti,y-max", &pdata->y_max);
 
 	/*
 	 * touchscreen-max-pressure gets parsed during
 	 * touchscreen_parse_properties()
 	 */
-	of_property_read_u16(node, "ti,pressure-min", &pdata->pressure_min);
-	if (!of_property_read_u32(node, "touchscreen-min-pressure", &value))
+	device_property_read_u16(dev, "ti,pressure-min", &pdata->pressure_min);
+	if (!device_property_read_u32(dev, "touchscreen-min-pressure", &value))
 		pdata->pressure_min = (u16) value;
-	of_property_read_u16(node, "ti,pressure-max", &pdata->pressure_max);
+	device_property_read_u16(dev, "ti,pressure-max", &pdata->pressure_max);
 
-	of_property_read_u16(node, "ti,debounce-max", &pdata->debounce_max);
-	if (!of_property_read_u32(node, "touchscreen-average-samples", &value))
+	device_property_read_u16(dev, "ti,debounce-max", &pdata->debounce_max);
+	if (!device_property_read_u32(dev, "touchscreen-average-samples", &value))
 		pdata->debounce_max = (u16) value;
-	of_property_read_u16(node, "ti,debounce-tol", &pdata->debounce_tol);
-	of_property_read_u16(node, "ti,debounce-rep", &pdata->debounce_rep);
+	device_property_read_u16(dev, "ti,debounce-tol", &pdata->debounce_tol);
+	device_property_read_u16(dev, "ti,debounce-rep", &pdata->debounce_rep);
 
-	of_property_read_u32(node, "ti,pendown-gpio-debounce",
+	device_property_read_u32(dev, "ti,pendown-gpio-debounce",
 			     &pdata->gpio_pendown_debounce);
 
-	pdata->wakeup = of_property_read_bool(node, "wakeup-source") ||
-			of_property_read_bool(node, "linux,wakeup");
-
-	pdata->gpio_pendown = of_get_named_gpio(dev->of_node, "pendown-gpio", 0);
+	pdata->wakeup = device_property_read_bool(dev, "wakeup-source") ||
+			device_property_read_bool(dev, "linux,wakeup");
 
 	return pdata;
 }
-#else
-static const struct ads7846_platform_data *ads7846_probe_dt(struct device *dev)
-{
-	dev_err(dev, "no platform data defined\n");
-	return ERR_PTR(-EINVAL);
-}
-#endif
 
 static void ads7846_regulator_disable(void *regulator)
 {
@@ -1269,7 +1238,7 @@ static int ads7846_probe(struct spi_device *spi)
 
 	pdata = dev_get_platdata(dev);
 	if (!pdata) {
-		pdata = ads7846_probe_dt(dev);
+		pdata = ads7846_get_props(dev);
 		if (IS_ERR(pdata))
 			return PTR_ERR(pdata);
 	}
@@ -1426,7 +1395,7 @@ static struct spi_driver ads7846_driver = {
 	.driver = {
 		.name	= "ads7846",
 		.pm	= pm_sleep_ptr(&ads7846_pm),
-		.of_match_table = of_match_ptr(ads7846_dt_ids),
+		.of_match_table = ads7846_dt_ids,
 	},
 	.probe		= ads7846_probe,
 	.remove		= ads7846_remove,
diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c
index 03cff39d392d..e4a7f0b824ff 100644
--- a/drivers/video/fbdev/omap/lcd_mipid.c
+++ b/drivers/video/fbdev/omap/lcd_mipid.c
@@ -7,6 +7,7 @@
  */
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/spi/spi.h>
@@ -41,6 +42,7 @@ struct mipid_device {
 						   when we can issue the
 						   next sleep in/out command */
 	unsigned long	hw_guard_wait;		/* max guard time in jiffies */
+	struct gpio_desc	*reset;
 
 	struct omapfb_device	*fbdev;
 	struct spi_device	*spi;
@@ -556,6 +558,12 @@ static int mipid_spi_probe(struct spi_device *spi)
 		return -ENOMEM;
 	}
 
+	/* This will de-assert RESET if active */
+	md->reset = gpiod_get(&spi->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(md->reset))
+		return dev_err_probe(&spi->dev, PTR_ERR(md->reset),
+				     "no reset GPIO line\n");
+
 	spi->mode = SPI_MODE_0;
 	md->spi = spi;
 	dev_set_drvdata(&spi->dev, md);
@@ -574,6 +582,8 @@ static void mipid_spi_remove(struct spi_device *spi)
 {
 	struct mipid_device *md = dev_get_drvdata(&spi->dev);
 
+	/* Asserts RESET */
+	gpiod_set_value(md->reset, 1);
 	mipid_disable(&md->panel);
 	kfree(md);
 }
diff --git a/include/linux/platform_data/lcd-mipid.h b/include/linux/platform_data/lcd-mipid.h
index 63f05eb23827..4927cfc5158c 100644
--- a/include/linux/platform_data/lcd-mipid.h
+++ b/include/linux/platform_data/lcd-mipid.h
@@ -15,10 +15,8 @@ enum mipid_test_result {
 #ifdef __KERNEL__
 
 struct mipid_platform_data {
-	int	nreset_gpio;
 	int	data_lines;
 
-	void	(*shutdown)(struct mipid_platform_data *pdata);
 	void	(*set_bklight_level)(struct mipid_platform_data *pdata,
 				     int level);
 	int	(*get_bklight_level)(struct mipid_platform_data *pdata);
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index d424c1aadf38..a04c1c34c344 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -35,8 +35,6 @@ struct ads7846_platform_data {
 	u16	debounce_tol;		/* tolerance used for filtering */
 	u16	debounce_rep;		/* additional consecutive good readings
 					 * required after the first two */
-	int	gpio_pendown;		/* the GPIO used to decide the pendown
-					 * state if get_pendown_state == NULL */
 	int	gpio_pendown_debounce;	/* platform specific debounce time for
 					 * the gpio_pendown */
 	int	(*get_pendown_state)(void);
-- 
cgit v1.2.3


From e519f0bb64efc2c9c8b67bb2d114dda458bdc34d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 8 May 2023 23:20:07 +0200
Subject: ARM/mmc: Convert old mmci-omap to GPIO descriptors

A recent change to the OMAP driver making it use a dynamic GPIO
base created problems with some old OMAP1 board files, among
them Nokia 770, SX1 and also the OMAP2 Nokia n8x0.

Fix up all instances of GPIOs being used for the MMC driver
by pushing the handling of power, slot selection and MMC
"cover" into the driver as optional GPIOs.

This is maybe not the most perfect solution as the MMC
framework have some central handlers for some of the
stuff, but it at least makes the situtation better and
solves the immediate issue.

Fixes: 92bf78b33b0b ("gpio: omap: use dynamic allocation of base")
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-omap1/board-nokia770.c   | 43 ++++++-----------
 arch/arm/mach-omap1/board-sx1-mmc.c    |  1 -
 arch/arm/mach-omap2/board-n8x0.c       | 85 +++++++++++-----------------------
 drivers/mmc/host/omap.c                | 46 +++++++++++++++++-
 include/linux/platform_data/mmc-omap.h |  2 -
 5 files changed, 83 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index dde74694cb4c..9583417f5bea 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -184,27 +184,23 @@ static struct omap_usb_config nokia770_usb_config __initdata = {
 
 #if IS_ENABLED(CONFIG_MMC_OMAP)
 
-#define NOKIA770_GPIO_MMC_POWER		41
-#define NOKIA770_GPIO_MMC_SWITCH	23
-
-static int nokia770_mmc_set_power(struct device *dev, int slot, int power_on,
-				int vdd)
-{
-	gpio_set_value(NOKIA770_GPIO_MMC_POWER, power_on);
-	return 0;
-}
-
-static int nokia770_mmc_get_cover_state(struct device *dev, int slot)
-{
-	return gpio_get_value(NOKIA770_GPIO_MMC_SWITCH);
-}
+static struct gpiod_lookup_table nokia770_mmc_gpio_table = {
+	.dev_id = "mmci-omap.1",
+	.table = {
+		/* Slot index 0, VSD power, GPIO 41 */
+		GPIO_LOOKUP_IDX("gpio-32-47", 9,
+				"vsd", 0, GPIO_ACTIVE_HIGH),
+		/* Slot index 0, switch, GPIO 23 */
+		GPIO_LOOKUP_IDX("gpio-16-31", 7,
+				"cover", 0, GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
 
 static struct omap_mmc_platform_data nokia770_mmc2_data = {
 	.nr_slots                       = 1,
 	.max_freq                       = 12000000,
 	.slots[0]       = {
-		.set_power		= nokia770_mmc_set_power,
-		.get_cover_state	= nokia770_mmc_get_cover_state,
 		.ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
 		.name                   = "mmcblk",
 	},
@@ -214,20 +210,7 @@ static struct omap_mmc_platform_data *nokia770_mmc_data[OMAP16XX_NR_MMC];
 
 static void __init nokia770_mmc_init(void)
 {
-	int ret;
-
-	ret = gpio_request(NOKIA770_GPIO_MMC_POWER, "MMC power");
-	if (ret < 0)
-		return;
-	gpio_direction_output(NOKIA770_GPIO_MMC_POWER, 0);
-
-	ret = gpio_request(NOKIA770_GPIO_MMC_SWITCH, "MMC cover");
-	if (ret < 0) {
-		gpio_free(NOKIA770_GPIO_MMC_POWER);
-		return;
-	}
-	gpio_direction_input(NOKIA770_GPIO_MMC_SWITCH);
-
+	gpiod_add_lookup_table(&nokia770_mmc_gpio_table);
 	/* Only the second MMC controller is used */
 	nokia770_mmc_data[1] = &nokia770_mmc2_data;
 	omap1_init_mmc(nokia770_mmc_data, OMAP16XX_NR_MMC);
diff --git a/arch/arm/mach-omap1/board-sx1-mmc.c b/arch/arm/mach-omap1/board-sx1-mmc.c
index f1c160924dfe..f183a8448a7b 100644
--- a/arch/arm/mach-omap1/board-sx1-mmc.c
+++ b/arch/arm/mach-omap1/board-sx1-mmc.c
@@ -9,7 +9,6 @@
  * Copyright (C) 2007 Instituto Nokia de Tecnologia - INdT
  */
 
-#include <linux/gpio.h>
 #include <linux/platform_device.h>
 
 #include "hardware.h"
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 3353b0a923d9..50b88eb23f9f 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -11,6 +11,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -170,22 +171,32 @@ static struct spi_board_info n800_spi_board_info[] __initdata = {
  * GPIO23 and GPIO9		slot 2 EMMC on N810
  *
  */
-#define N8X0_SLOT_SWITCH_GPIO	96
-#define N810_EMMC_VSD_GPIO	23
-#define N810_EMMC_VIO_GPIO	9
-
 static int slot1_cover_open;
 static int slot2_cover_open;
 static struct device *mmc_device;
 
-static int n8x0_mmc_switch_slot(struct device *dev, int slot)
-{
-#ifdef CONFIG_MMC_DEBUG
-	dev_dbg(dev, "Choose slot %d\n", slot + 1);
-#endif
-	gpio_set_value(N8X0_SLOT_SWITCH_GPIO, slot);
-	return 0;
-}
+static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+	.dev_id = "mmci-omap.0",
+	.table = {
+		/* Slot switch, GPIO 96 */
+		GPIO_LOOKUP("gpio-80-111", 16,
+			    "switch", GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
+
+static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
+	.dev_id = "mmci-omap.0",
+	.table = {
+		/* Slot index 1, VSD power, GPIO 23 */
+		GPIO_LOOKUP_IDX("gpio-16-31", 7,
+				"vsd", 1, GPIO_ACTIVE_HIGH),
+		/* Slot index 1, VIO power, GPIO 9 */
+		GPIO_LOOKUP_IDX("gpio-0-15", 9,
+				"vsd", 1, GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
 
 static int n8x0_mmc_set_power_menelaus(struct device *dev, int slot,
 					int power_on, int vdd)
@@ -256,31 +267,13 @@ static int n8x0_mmc_set_power_menelaus(struct device *dev, int slot,
 	return 0;
 }
 
-static void n810_set_power_emmc(struct device *dev,
-					 int power_on)
-{
-	dev_dbg(dev, "Set EMMC power %s\n", power_on ? "on" : "off");
-
-	if (power_on) {
-		gpio_set_value(N810_EMMC_VSD_GPIO, 1);
-		msleep(1);
-		gpio_set_value(N810_EMMC_VIO_GPIO, 1);
-		msleep(1);
-	} else {
-		gpio_set_value(N810_EMMC_VIO_GPIO, 0);
-		msleep(50);
-		gpio_set_value(N810_EMMC_VSD_GPIO, 0);
-		msleep(50);
-	}
-}
-
 static int n8x0_mmc_set_power(struct device *dev, int slot, int power_on,
 			      int vdd)
 {
 	if (board_is_n800() || slot == 0)
 		return n8x0_mmc_set_power_menelaus(dev, slot, power_on, vdd);
 
-	n810_set_power_emmc(dev, power_on);
+	/* The n810 power will be handled by GPIO code in the driver */
 
 	return 0;
 }
@@ -418,13 +411,6 @@ static void n8x0_mmc_shutdown(struct device *dev)
 static void n8x0_mmc_cleanup(struct device *dev)
 {
 	menelaus_unregister_mmc_callback();
-
-	gpio_free(N8X0_SLOT_SWITCH_GPIO);
-
-	if (board_is_n810()) {
-		gpio_free(N810_EMMC_VSD_GPIO);
-		gpio_free(N810_EMMC_VIO_GPIO);
-	}
 }
 
 /*
@@ -433,7 +419,6 @@ static void n8x0_mmc_cleanup(struct device *dev)
  */
 static struct omap_mmc_platform_data mmc1_data = {
 	.nr_slots			= 0,
-	.switch_slot			= n8x0_mmc_switch_slot,
 	.init				= n8x0_mmc_late_init,
 	.cleanup			= n8x0_mmc_cleanup,
 	.shutdown			= n8x0_mmc_shutdown,
@@ -463,14 +448,9 @@ static struct omap_mmc_platform_data mmc1_data = {
 
 static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC];
 
-static struct gpio n810_emmc_gpios[] __initdata = {
-	{ N810_EMMC_VSD_GPIO, GPIOF_OUT_INIT_LOW,  "MMC slot 2 Vddf" },
-	{ N810_EMMC_VIO_GPIO, GPIOF_OUT_INIT_LOW,  "MMC slot 2 Vdd"  },
-};
-
 static void __init n8x0_mmc_init(void)
 {
-	int err;
+	gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table);
 
 	if (board_is_n810()) {
 		mmc1_data.slots[0].name = "external";
@@ -483,20 +463,7 @@ static void __init n8x0_mmc_init(void)
 		 */
 		mmc1_data.slots[1].name = "internal";
 		mmc1_data.slots[1].ban_openended = 1;
-	}
-
-	err = gpio_request_one(N8X0_SLOT_SWITCH_GPIO, GPIOF_OUT_INIT_LOW,
-			       "MMC slot switch");
-	if (err)
-		return;
-
-	if (board_is_n810()) {
-		err = gpio_request_array(n810_emmc_gpios,
-					 ARRAY_SIZE(n810_emmc_gpios));
-		if (err) {
-			gpio_free(N8X0_SLOT_SWITCH_GPIO);
-			return;
-		}
+		gpiod_add_lookup_table(&nokia810_mmc_gpio_table);
 	}
 
 	mmc1_data.nr_slots = 2;
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index ce78edfb402b..a14af21f12da 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -26,6 +26,7 @@
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <linux/gpio/consumer.h>
 #include <linux/platform_data/mmc-omap.h>
 
 
@@ -111,6 +112,9 @@ struct mmc_omap_slot {
 	struct mmc_request      *mrq;
 	struct mmc_omap_host    *host;
 	struct mmc_host		*mmc;
+	struct gpio_desc	*vsd;
+	struct gpio_desc	*vio;
+	struct gpio_desc	*cover;
 	struct omap_mmc_slot_data *pdata;
 };
 
@@ -133,6 +137,7 @@ struct mmc_omap_host {
 	int			irq;
 	unsigned char		bus_mode;
 	unsigned int		reg_shift;
+	struct gpio_desc	*slot_switch;
 
 	struct work_struct	cmd_abort_work;
 	unsigned		abort:1;
@@ -216,8 +221,13 @@ no_claim:
 
 	if (host->current_slot != slot) {
 		OMAP_MMC_WRITE(host, CON, slot->saved_con & 0xFC00);
-		if (host->pdata->switch_slot != NULL)
-			host->pdata->switch_slot(mmc_dev(slot->mmc), slot->id);
+		if (host->slot_switch)
+			/*
+			 * With two slots and a simple GPIO switch, setting
+			 * the GPIO to 0 selects slot ID 0, setting it to 1
+			 * selects slot ID 1.
+			 */
+			gpiod_set_value(host->slot_switch, slot->id);
 		host->current_slot = slot;
 	}
 
@@ -297,6 +307,9 @@ static void mmc_omap_release_slot(struct mmc_omap_slot *slot, int clk_enabled)
 static inline
 int mmc_omap_cover_is_open(struct mmc_omap_slot *slot)
 {
+	/* If we have a GPIO then use that */
+	if (slot->cover)
+		return gpiod_get_value(slot->cover);
 	if (slot->pdata->get_cover_state)
 		return slot->pdata->get_cover_state(mmc_dev(slot->mmc),
 						    slot->id);
@@ -1106,6 +1119,11 @@ static void mmc_omap_set_power(struct mmc_omap_slot *slot, int power_on,
 
 	host = slot->host;
 
+	if (slot->vsd)
+		gpiod_set_value(slot->vsd, power_on);
+	if (slot->vio)
+		gpiod_set_value(slot->vio, power_on);
+
 	if (slot->pdata->set_power != NULL)
 		slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on,
 					vdd);
@@ -1240,6 +1258,23 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id)
 	slot->power_mode = MMC_POWER_UNDEFINED;
 	slot->pdata = &host->pdata->slots[id];
 
+	/* Check for some optional GPIO controls */
+	slot->vsd = gpiod_get_index_optional(host->dev, "vsd",
+					     id, GPIOD_OUT_LOW);
+	if (IS_ERR(slot->vsd))
+		return dev_err_probe(host->dev, PTR_ERR(slot->vsd),
+				     "error looking up VSD GPIO\n");
+	slot->vio = gpiod_get_index_optional(host->dev, "vio",
+					     id, GPIOD_OUT_LOW);
+	if (IS_ERR(slot->vio))
+		return dev_err_probe(host->dev, PTR_ERR(slot->vio),
+				     "error looking up VIO GPIO\n");
+	slot->cover = gpiod_get_index_optional(host->dev, "cover",
+						id, GPIOD_IN);
+	if (IS_ERR(slot->cover))
+		return dev_err_probe(host->dev, PTR_ERR(slot->cover),
+				     "error looking up cover switch GPIO\n");
+
 	host->slots[id] = slot;
 
 	mmc->caps = 0;
@@ -1349,6 +1384,13 @@ static int mmc_omap_probe(struct platform_device *pdev)
 	if (IS_ERR(host->virt_base))
 		return PTR_ERR(host->virt_base);
 
+	host->slot_switch = gpiod_get_optional(host->dev, "switch",
+					       GPIOD_OUT_LOW);
+	if (IS_ERR(host->slot_switch))
+		return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
+				     "error looking up slot switch GPIO\n");
+
+
 	INIT_WORK(&host->slot_release_work, mmc_omap_slot_release_work);
 	INIT_WORK(&host->send_stop_work, mmc_omap_send_stop_work);
 
diff --git a/include/linux/platform_data/mmc-omap.h b/include/linux/platform_data/mmc-omap.h
index 91051e9907f3..054d0c3c5ec5 100644
--- a/include/linux/platform_data/mmc-omap.h
+++ b/include/linux/platform_data/mmc-omap.h
@@ -20,8 +20,6 @@ struct omap_mmc_platform_data {
 	 * maximum frequency on the MMC bus */
 	unsigned int max_freq;
 
-	/* switch the bus to a new slot */
-	int (*switch_slot)(struct device *dev, int slot);
 	/* initialize board-specific MMC functionality, can be NULL if
 	 * not supported */
 	int (*init)(struct device *dev);
-- 
cgit v1.2.3


From d5f4fa60d63aa54ae33339895b88d8932b6037ed Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 1 May 2023 11:05:21 +0200
Subject: ARM/gpio: Push OMAP2 quirk down into TWL4030 driver

The TWL4030 GPIO driver has a custom platform data .set_up()
callback to call back into the platform and do misc stuff such
as hog and export a GPIO for WLAN PWR on a specific OMAP3 board.

Avoid all the kludgery in the platform data and the boardfile
and just put the quirks right into the driver. Make it
conditional on OMAP3.

I think the exported GPIO is used by some kind of userspace
so ordinary DTS hogs will probably not work.

Fixes: 92bf78b33b0b ("gpio: omap: use dynamic allocation of base")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-omap2/omap_device.c  |  1 -
 arch/arm/mach-omap2/pdata-quirks.c | 41 +-----------------------------
 drivers/gpio/gpio-twl4030.c        | 52 ++++++++++++++++++++++++++------------
 include/linux/mfd/twl.h            |  3 ---
 4 files changed, 37 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 4afa2f08e668..fca7869c8075 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -244,7 +244,6 @@ static int _omap_device_notifier_call(struct notifier_block *nb,
 	case BUS_NOTIFY_ADD_DEVICE:
 		if (pdev->dev.of_node)
 			omap_device_build_from_dt(pdev);
-		omap_auxdata_legacy_init(dev);
 		fallthrough;
 	default:
 		od = to_omap_device(pdev);
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 04208cc52784..c363ad8d6a06 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -6,6 +6,7 @@
  */
 #include <linux/clk.h>
 #include <linux/davinci_emac.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio/consumer.h>
 #include <linux/gpio.h>
 #include <linux/init.h>
@@ -41,7 +42,6 @@ struct pdata_init {
 };
 
 static struct of_dev_auxdata omap_auxdata_lookup[];
-static struct twl4030_gpio_platform_data twl_gpio_auxdata;
 
 #ifdef CONFIG_MACH_NOKIA_N8X0
 static void __init omap2420_n8x0_legacy_init(void)
@@ -98,22 +98,6 @@ static struct iommu_platform_data omap3_iommu_isp_pdata = {
 };
 #endif
 
-static int omap3_sbc_t3730_twl_callback(struct device *dev,
-					   unsigned gpio,
-					   unsigned ngpio)
-{
-	int res;
-
-	res = gpio_request_one(gpio + 2, GPIOF_OUT_INIT_HIGH,
-			       "wlan pwr");
-	if (res)
-		return res;
-
-	gpiod_export(gpio_to_desc(gpio), 0);
-
-	return 0;
-}
-
 static void __init omap3_sbc_t3x_usb_hub_init(int gpio, char *hub_name)
 {
 	int err = gpio_request_one(gpio, GPIOF_OUT_INIT_LOW, hub_name);
@@ -131,11 +115,6 @@ static void __init omap3_sbc_t3x_usb_hub_init(int gpio, char *hub_name)
 	msleep(1);
 }
 
-static void __init omap3_sbc_t3730_twl_init(void)
-{
-	twl_gpio_auxdata.setup = omap3_sbc_t3730_twl_callback;
-}
-
 static void __init omap3_sbc_t3730_legacy_init(void)
 {
 	omap3_sbc_t3x_usb_hub_init(167, "sb-t35 usb hub");
@@ -393,21 +372,6 @@ static struct ti_prm_platform_data ti_prm_pdata = {
 	.clkdm_lookup = clkdm_lookup,
 };
 
-/*
- * GPIOs for TWL are initialized by the I2C bus and need custom
- * handing until DSS has device tree bindings.
- */
-void omap_auxdata_legacy_init(struct device *dev)
-{
-	if (dev->platform_data)
-		return;
-
-	if (strcmp("twl4030-gpio", dev_name(dev)))
-		return;
-
-	dev->platform_data = &twl_gpio_auxdata;
-}
-
 #if defined(CONFIG_ARCH_OMAP3) && IS_ENABLED(CONFIG_SND_SOC_OMAP_MCBSP)
 static struct omap_mcbsp_platform_data mcbsp_pdata;
 static void __init omap3_mcbsp_init(void)
@@ -427,9 +391,6 @@ static struct pdata_init auxdata_quirks[] __initdata = {
 	{ "nokia,n800", omap2420_n8x0_legacy_init, },
 	{ "nokia,n810", omap2420_n8x0_legacy_init, },
 	{ "nokia,n810-wimax", omap2420_n8x0_legacy_init, },
-#endif
-#ifdef CONFIG_ARCH_OMAP3
-	{ "compulab,omap3-sbc-t3730", omap3_sbc_t3730_twl_init, },
 #endif
 	{ /* sentinel */ },
 };
diff --git a/drivers/gpio/gpio-twl4030.c b/drivers/gpio/gpio-twl4030.c
index c1bb2c3ca6f2..446599ac234a 100644
--- a/drivers/gpio/gpio-twl4030.c
+++ b/drivers/gpio/gpio-twl4030.c
@@ -17,7 +17,9 @@
 #include <linux/interrupt.h>
 #include <linux/kthread.h>
 #include <linux/irq.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio/driver.h>
+#include <linux/gpio/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
 #include <linux/irqdomain.h>
@@ -465,8 +467,7 @@ static int gpio_twl4030_debounce(u32 debounce, u8 mmc_cd)
 				REG_GPIO_DEBEN1, 3);
 }
 
-static struct twl4030_gpio_platform_data *of_gpio_twl4030(struct device *dev,
-				struct twl4030_gpio_platform_data *pdata)
+static struct twl4030_gpio_platform_data *of_gpio_twl4030(struct device *dev)
 {
 	struct twl4030_gpio_platform_data *omap_twl_info;
 
@@ -474,9 +475,6 @@ static struct twl4030_gpio_platform_data *of_gpio_twl4030(struct device *dev,
 	if (!omap_twl_info)
 		return NULL;
 
-	if (pdata)
-		*omap_twl_info = *pdata;
-
 	omap_twl_info->use_leds = of_property_read_bool(dev->of_node,
 			"ti,use-leds");
 
@@ -504,9 +502,18 @@ static int gpio_twl4030_remove(struct platform_device *pdev)
 	return 0;
 }
 
+/* Called from the registered devm action */
+static void gpio_twl4030_power_off_action(void *data)
+{
+	struct gpio_desc *d = data;
+
+	gpiod_unexport(d);
+	gpiochip_free_own_desc(d);
+}
+
 static int gpio_twl4030_probe(struct platform_device *pdev)
 {
-	struct twl4030_gpio_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct twl4030_gpio_platform_data *pdata;
 	struct device_node *node = pdev->dev.of_node;
 	struct gpio_twl4030_priv *priv;
 	int ret, irq_base;
@@ -546,9 +553,7 @@ no_irqs:
 
 	mutex_init(&priv->mutex);
 
-	if (node)
-		pdata = of_gpio_twl4030(&pdev->dev, pdata);
-
+	pdata = of_gpio_twl4030(&pdev->dev);
 	if (pdata == NULL) {
 		dev_err(&pdev->dev, "Platform data is missing\n");
 		return -ENXIO;
@@ -585,17 +590,32 @@ no_irqs:
 		goto out;
 	}
 
-	platform_set_drvdata(pdev, priv);
+	/*
+	 * Special quirk for the OMAP3 to hog and export a WLAN power
+	 * GPIO.
+	 */
+	if (IS_ENABLED(CONFIG_ARCH_OMAP3) &&
+	    of_machine_is_compatible("compulab,omap3-sbc-t3730")) {
+		struct gpio_desc *d;
 
-	if (pdata->setup) {
-		int status;
+		d = gpiochip_request_own_desc(&priv->gpio_chip,
+						 2, "wlan pwr",
+						 GPIO_ACTIVE_HIGH,
+						 GPIOD_OUT_HIGH);
+		if (IS_ERR(d))
+			return dev_err_probe(&pdev->dev, PTR_ERR(d),
+					     "unable to hog wlan pwr GPIO\n");
+
+		gpiod_export(d, 0);
+
+		ret = devm_add_action_or_reset(&pdev->dev, gpio_twl4030_power_off_action, d);
+		if (ret)
+			return dev_err_probe(&pdev->dev, ret,
+					     "failed to install power off handler\n");
 
-		status = pdata->setup(&pdev->dev, priv->gpio_chip.base,
-				      TWL4030_GPIO_MAX);
-		if (status)
-			dev_dbg(&pdev->dev, "setup --> %d\n", status);
 	}
 
+	platform_set_drvdata(pdev, priv);
 out:
 	return ret;
 }
diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h
index 6e3d99b7a0ee..c062d91a67d9 100644
--- a/include/linux/mfd/twl.h
+++ b/include/linux/mfd/twl.h
@@ -593,9 +593,6 @@ struct twl4030_gpio_platform_data {
 	 */
 	u32		pullups;
 	u32		pulldowns;
-
-	int		(*setup)(struct device *dev,
-				unsigned gpio, unsigned ngpio);
 };
 
 struct twl4030_madc_platform_data {
-- 
cgit v1.2.3


From 8e0285ab95a9baf374f2c13eb152221c8ecb3f28 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 30 Apr 2023 21:38:24 +0200
Subject: ARM/musb: omap2: Remove global GPIO numbers from TUSB6010

The TUSB6010 (MUSB) device is picking up some GPIO lines
hardcoded by number and passing on to the TUSB6010 device
when registering it.

Instead of nasty workarounds, provide a GPIO descriptor
table and then make the TUSB6010 MUSB glue driver pick up
the GPIO lines directly, convert it to an IRQ and pass down
to the MUSB driver. OMAP2 is the only system using the
TUSB6010.

Stash the GPIO descriptors in the glue layer and use
then to power up and down the TUSB6010 on-demand, instead
of using boardfile callbacks.

Since the OMAP2 boards are the only boards using the
.set_power() and .board_set_power() callbacks, we can
just delete them as the power is now handled directly
in the TUSB6010 glue code.

Cc: Bin Liu <b-liu@ti.com>
Cc: linux-usb@vger.kernel.org
Fixes: 92bf78b33b0b ("gpio: omap: use dynamic allocation of base")
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-omap2/board-n8x0.c   | 71 ++++++++++----------------------------
 arch/arm/mach-omap2/usb-tusb6010.c | 20 +++--------
 arch/arm/mach-omap2/usb-tusb6010.h | 12 +++++++
 drivers/usb/musb/musb_core.c       |  1 -
 drivers/usb/musb/musb_core.h       |  2 --
 drivers/usb/musb/tusb6010.c        | 53 ++++++++++++++++++++--------
 include/linux/usb/musb.h           | 13 -------
 7 files changed, 73 insertions(+), 99 deletions(-)
 create mode 100644 arch/arm/mach-omap2/usb-tusb6010.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 50b88eb23f9f..564bf80a2621 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -10,8 +10,8 @@
 
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
 #include <linux/gpio/machine.h>
+#include <linux/gpio/consumer.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -29,13 +29,12 @@
 
 #include "common.h"
 #include "mmc.h"
+#include "usb-tusb6010.h"
 #include "soc.h"
 #include "common-board-devices.h"
 
 #define TUSB6010_ASYNC_CS	1
 #define TUSB6010_SYNC_CS	4
-#define TUSB6010_GPIO_INT	58
-#define TUSB6010_GPIO_ENABLE	0
 #define TUSB6010_DMACHAN	0x3f
 
 #define NOKIA_N810_WIMAX	(1 << 2)
@@ -62,37 +61,6 @@ static void board_check_revision(void)
 }
 
 #if IS_ENABLED(CONFIG_USB_MUSB_TUSB6010)
-/*
- * Enable or disable power to TUSB6010. When enabling, turn on 3.3 V and
- * 1.5 V voltage regulators of PM companion chip. Companion chip will then
- * provide then PGOOD signal to TUSB6010 which will release it from reset.
- */
-static int tusb_set_power(int state)
-{
-	int i, retval = 0;
-
-	if (state) {
-		gpio_set_value(TUSB6010_GPIO_ENABLE, 1);
-		msleep(1);
-
-		/* Wait until TUSB6010 pulls INT pin down */
-		i = 100;
-		while (i && gpio_get_value(TUSB6010_GPIO_INT)) {
-			msleep(1);
-			i--;
-		}
-
-		if (!i) {
-			printk(KERN_ERR "tusb: powerup failed\n");
-			retval = -ENODEV;
-		}
-	} else {
-		gpio_set_value(TUSB6010_GPIO_ENABLE, 0);
-		msleep(10);
-	}
-
-	return retval;
-}
 
 static struct musb_hdrc_config musb_config = {
 	.multipoint	= 1,
@@ -103,39 +71,36 @@ static struct musb_hdrc_config musb_config = {
 
 static struct musb_hdrc_platform_data tusb_data = {
 	.mode		= MUSB_OTG,
-	.set_power	= tusb_set_power,
 	.min_power	= 25,	/* x2 = 50 mA drawn from VBUS as peripheral */
 	.power		= 100,	/* Max 100 mA VBUS for host mode */
 	.config		= &musb_config,
 };
 
+static struct gpiod_lookup_table tusb_gpio_table = {
+	.dev_id = "musb-tusb",
+	.table = {
+		GPIO_LOOKUP("gpio-0-15", 0, "enable",
+			    GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio-48-63", 10, "int",
+			    GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
+
 static void __init n8x0_usb_init(void)
 {
 	int ret = 0;
-	static const char announce[] __initconst = KERN_INFO "TUSB 6010\n";
-
-	/* PM companion chip power control pin */
-	ret = gpio_request_one(TUSB6010_GPIO_ENABLE, GPIOF_OUT_INIT_LOW,
-			       "TUSB6010 enable");
-	if (ret != 0) {
-		printk(KERN_ERR "Could not get TUSB power GPIO%i\n",
-		       TUSB6010_GPIO_ENABLE);
-		return;
-	}
-	tusb_set_power(0);
 
+	gpiod_add_lookup_table(&tusb_gpio_table);
 	ret = tusb6010_setup_interface(&tusb_data, TUSB6010_REFCLK_19, 2,
-					TUSB6010_ASYNC_CS, TUSB6010_SYNC_CS,
-					TUSB6010_GPIO_INT, TUSB6010_DMACHAN);
+				       TUSB6010_ASYNC_CS, TUSB6010_SYNC_CS,
+				       TUSB6010_DMACHAN);
 	if (ret != 0)
-		goto err;
+		return;
 
-	printk(announce);
+	pr_info("TUSB 6010\n");
 
 	return;
-
-err:
-	gpio_free(TUSB6010_GPIO_ENABLE);
 }
 #else
 
diff --git a/arch/arm/mach-omap2/usb-tusb6010.c b/arch/arm/mach-omap2/usb-tusb6010.c
index 18fa52f828dc..b46c254c2bc4 100644
--- a/arch/arm/mach-omap2/usb-tusb6010.c
+++ b/arch/arm/mach-omap2/usb-tusb6010.c
@@ -11,12 +11,12 @@
 #include <linux/errno.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
-#include <linux/gpio.h>
 #include <linux/export.h>
 #include <linux/platform_data/usb-omap.h>
 
 #include <linux/usb/musb.h>
 
+#include "usb-tusb6010.h"
 #include "gpmc.h"
 
 static u8		async_cs, sync_cs;
@@ -132,10 +132,6 @@ static struct resource tusb_resources[] = {
 	{ /* Synchronous access */
 		.flags	= IORESOURCE_MEM,
 	},
-	{ /* IRQ */
-		.name	= "mc",
-		.flags	= IORESOURCE_IRQ,
-	},
 };
 
 static u64 tusb_dmamask = ~(u32)0;
@@ -154,9 +150,9 @@ static struct platform_device tusb_device = {
 
 /* this may be called only from board-*.c setup code */
 int __init tusb6010_setup_interface(struct musb_hdrc_platform_data *data,
-		unsigned ps_refclk, unsigned waitpin,
-		unsigned async, unsigned sync,
-		unsigned irq, unsigned dmachan)
+		unsigned int ps_refclk, unsigned int waitpin,
+		unsigned int async, unsigned int sync,
+		unsigned int dmachan)
 {
 	int		status;
 	static char	error[] __initdata =
@@ -192,14 +188,6 @@ int __init tusb6010_setup_interface(struct musb_hdrc_platform_data *data,
 	if (status < 0)
 		return status;
 
-	/* IRQ */
-	status = gpio_request_one(irq, GPIOF_IN, "TUSB6010 irq");
-	if (status < 0) {
-		printk(error, 3, status);
-		return status;
-	}
-	tusb_resources[2].start = gpio_to_irq(irq);
-
 	/* set up memory timings ... can speed them up later */
 	if (!ps_refclk) {
 		printk(error, 4, status);
diff --git a/arch/arm/mach-omap2/usb-tusb6010.h b/arch/arm/mach-omap2/usb-tusb6010.h
new file mode 100644
index 000000000000..d210ff6238c2
--- /dev/null
+++ b/arch/arm/mach-omap2/usb-tusb6010.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __USB_TUSB6010_H
+#define __USB_TUSB6010_H
+
+extern int __init tusb6010_setup_interface(
+		struct musb_hdrc_platform_data *data,
+		unsigned int ps_refclk, unsigned int waitpin,
+		unsigned int async_cs, unsigned int sync_cs,
+		unsigned int dmachan);
+
+#endif /* __USB_TUSB6010_H */
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index d162afbbe19f..ecbd3784bec3 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2330,7 +2330,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 
 	spin_lock_init(&musb->lock);
 	spin_lock_init(&musb->list_lock);
-	musb->board_set_power = plat->set_power;
 	musb->min_power = plat->min_power;
 	musb->ops = plat->platform_ops;
 	musb->port_mode = plat->mode;
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index b7588d11cfc5..91b5b6b66f96 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -352,8 +352,6 @@ struct musb {
 	u16 epmask;
 	u8 nr_endpoints;
 
-	int			(*board_set_power)(int state);
-
 	u8			min_power;	/* vbus for periph, in mA/2 */
 
 	enum musb_mode		port_mode;
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index a1f29dbc62e6..cbc707fe570f 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -11,6 +11,8 @@
  *   interface.
  */
 
+#include <linux/gpio/consumer.h>
+#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -30,6 +32,8 @@ struct tusb6010_glue {
 	struct device		*dev;
 	struct platform_device	*musb;
 	struct platform_device	*phy;
+	struct gpio_desc	*enable;
+	struct gpio_desc	*intpin;
 };
 
 static void tusb_musb_set_vbus(struct musb *musb, int is_on);
@@ -1021,16 +1025,29 @@ static void tusb_setup_cpu_interface(struct musb *musb)
 
 static int tusb_musb_start(struct musb *musb)
 {
+	struct tusb6010_glue *glue = dev_get_drvdata(musb->controller->parent);
 	void __iomem	*tbase = musb->ctrl_base;
-	int		ret = 0;
 	unsigned long	flags;
 	u32		reg;
+	int		i;
 
-	if (musb->board_set_power)
-		ret = musb->board_set_power(1);
-	if (ret != 0) {
-		printk(KERN_ERR "tusb: Cannot enable TUSB6010\n");
-		return ret;
+	/*
+	 * Enable or disable power to TUSB6010. When enabling, turn on 3.3 V and
+	 * 1.5 V voltage regulators of PM companion chip. Companion chip will then
+	 * provide then PGOOD signal to TUSB6010 which will release it from reset.
+	 */
+	gpiod_set_value(glue->enable, 1);
+	msleep(1);
+
+	/* Wait for 100ms until TUSB6010 pulls INT pin down */
+	i = 100;
+	while (i && gpiod_get_value(glue->intpin)) {
+		msleep(1);
+		i--;
+	}
+	if (!i) {
+		pr_err("tusb: Powerup respones failed\n");
+		return -ENODEV;
 	}
 
 	spin_lock_irqsave(&musb->lock, flags);
@@ -1083,8 +1100,8 @@ static int tusb_musb_start(struct musb *musb)
 err:
 	spin_unlock_irqrestore(&musb->lock, flags);
 
-	if (musb->board_set_power)
-		musb->board_set_power(0);
+	gpiod_set_value(glue->enable, 0);
+	msleep(10);
 
 	return -ENODEV;
 }
@@ -1158,11 +1175,13 @@ done:
 
 static int tusb_musb_exit(struct musb *musb)
 {
+	struct tusb6010_glue *glue = dev_get_drvdata(musb->controller->parent);
+
 	del_timer_sync(&musb->dev_timer);
 	the_musb = NULL;
 
-	if (musb->board_set_power)
-		musb->board_set_power(0);
+	gpiod_set_value(glue->enable, 0);
+	msleep(10);
 
 	iounmap(musb->sync_va);
 
@@ -1218,6 +1237,15 @@ static int tusb_probe(struct platform_device *pdev)
 
 	glue->dev			= &pdev->dev;
 
+	glue->enable = devm_gpiod_get(glue->dev, "enable", GPIOD_OUT_LOW);
+	if (IS_ERR(glue->enable))
+		return dev_err_probe(glue->dev, PTR_ERR(glue->enable),
+				     "could not obtain power on/off GPIO\n");
+	glue->intpin = devm_gpiod_get(glue->dev, "int", GPIOD_IN);
+	if (IS_ERR(glue->intpin))
+		return dev_err_probe(glue->dev, PTR_ERR(glue->intpin),
+				     "could not obtain INT GPIO\n");
+
 	pdata->platform_ops		= &tusb_ops;
 
 	usb_phy_generic_register();
@@ -1236,10 +1264,7 @@ static int tusb_probe(struct platform_device *pdev)
 	musb_resources[1].end = pdev->resource[1].end;
 	musb_resources[1].flags = pdev->resource[1].flags;
 
-	musb_resources[2].name = pdev->resource[2].name;
-	musb_resources[2].start = pdev->resource[2].start;
-	musb_resources[2].end = pdev->resource[2].end;
-	musb_resources[2].flags = pdev->resource[2].flags;
+	musb_resources[2] = DEFINE_RES_IRQ_NAMED(gpiod_to_irq(glue->intpin), "mc");
 
 	pinfo = tusb_dev_info;
 	pinfo.parent = &pdev->dev;
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index e4a3ad3c800f..3963e55e88a3 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -99,9 +99,6 @@ struct musb_hdrc_platform_data {
 	/* (HOST or OTG) program PHY for external Vbus */
 	unsigned	extvbus:1;
 
-	/* Power the device on or off */
-	int		(*set_power)(int state);
-
 	/* MUSB configuration-specific details */
 	const struct musb_hdrc_config *config;
 
@@ -135,14 +132,4 @@ static inline int musb_mailbox(enum musb_vbus_id_status status)
 #define	TUSB6010_REFCLK_24	41667	/* psec/clk @ 24.0 MHz XI */
 #define	TUSB6010_REFCLK_19	52083	/* psec/clk @ 19.2 MHz CLKIN */
 
-#ifdef	CONFIG_ARCH_OMAP2
-
-extern int __init tusb6010_setup_interface(
-		struct musb_hdrc_platform_data *data,
-		unsigned ps_refclk, unsigned waitpin,
-		unsigned async_cs, unsigned sync_cs,
-		unsigned irq, unsigned dmachan);
-
-#endif	/* OMAP2 */
-
 #endif /* __LINUX_USB_MUSB_H */
-- 
cgit v1.2.3


From 69df79a4511117f377d6a5909b47bd4fb541b978 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 May 2023 14:49:50 +0100
Subject: splice: Rename direct_splice_read() to copy_splice_read()

Rename direct_splice_read() to copy_splice_read() to better reflect as to
what it does.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
cc: Steve French <sfrench@samba.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: linux-cifs@vger.kernel.org
cc: linux-mm@kvack.org
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20230522135018.2742245-4-dhowells@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/cifs/cifsfs.c   |  4 ++--
 fs/cifs/file.c     |  2 +-
 fs/splice.c        | 11 +++++------
 include/linux/fs.h |  6 +++---
 4 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 43a4d8603db3..fa2477bbcc86 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1416,7 +1416,7 @@ const struct file_operations cifs_file_direct_ops = {
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
 	.mmap = cifs_file_mmap,
-	.splice_read = direct_splice_read,
+	.splice_read = copy_splice_read,
 	.splice_write = iter_file_splice_write,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
@@ -1470,7 +1470,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
 	.mmap = cifs_file_mmap,
-	.splice_read = direct_splice_read,
+	.splice_read = copy_splice_read,
 	.splice_write = iter_file_splice_write,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c5fcefdfd797..023496207c18 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -5091,6 +5091,6 @@ ssize_t cifs_splice_read(struct file *in, loff_t *ppos,
 	if (unlikely(!len))
 		return 0;
 	if (in->f_flags & O_DIRECT)
-		return direct_splice_read(in, ppos, pipe, len, flags);
+		return copy_splice_read(in, ppos, pipe, len, flags);
 	return filemap_splice_read(in, ppos, pipe, len, flags);
 }
diff --git a/fs/splice.c b/fs/splice.c
index 3e06611d19ae..2478e065bc53 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -300,12 +300,11 @@ void splice_shrink_spd(struct splice_pipe_desc *spd)
 }
 
 /*
- * Splice data from an O_DIRECT file into pages and then add them to the output
- * pipe.
+ * Copy data from a file into pages and then splice those into the output pipe.
  */
-ssize_t direct_splice_read(struct file *in, loff_t *ppos,
-			   struct pipe_inode_info *pipe,
-			   size_t len, unsigned int flags)
+ssize_t copy_splice_read(struct file *in, loff_t *ppos,
+			 struct pipe_inode_info *pipe,
+			 size_t len, unsigned int flags)
 {
 	struct iov_iter to;
 	struct bio_vec *bv;
@@ -390,7 +389,7 @@ ssize_t direct_splice_read(struct file *in, loff_t *ppos,
 	kfree(bv);
 	return ret;
 }
-EXPORT_SYMBOL(direct_splice_read);
+EXPORT_SYMBOL(copy_splice_read);
 
 /**
  * generic_file_splice_read - splice data from file to a pipe
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21a981680856..e3c22efa413e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2752,9 +2752,9 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
 ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
 			    struct pipe_inode_info *pipe,
 			    size_t len, unsigned int flags);
-ssize_t direct_splice_read(struct file *in, loff_t *ppos,
-			   struct pipe_inode_info *pipe,
-			   size_t len, unsigned int flags);
+ssize_t copy_splice_read(struct file *in, loff_t *ppos,
+			 struct pipe_inode_info *pipe,
+			 size_t len, unsigned int flags);
 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
-- 
cgit v1.2.3


From 6a3f30b8bdb23842aff5eea65b6a7693c49f5506 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 May 2023 14:49:52 +0100
Subject: splice: Make do_splice_to() generic and export it

Rename do_splice_to() to vfs_splice_read() and export it so that it can be
used as a helper when calling down to a lower layer filesystem as it
performs all the necessary checks[1].

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
cc: Miklos Szeredi <miklos@szeredi.hu>
cc: Jens Axboe <axboe@kernel.dk>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: John Hubbard <jhubbard@nvidia.com>
cc: David Hildenbrand <david@redhat.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-unionfs@vger.kernel.org
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Link: https://lore.kernel.org/r/CAJfpeguGksS3sCigmRi9hJdUec8qtM9f+_9jC1rJhsXT+dV01w@mail.gmail.com/ [1]
Link: https://lore.kernel.org/r/20230522135018.2742245-6-dhowells@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/splice.c            | 27 ++++++++++++++++++++-------
 include/linux/splice.h |  3 +++
 2 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/splice.c b/fs/splice.c
index f9a9be797b0c..d815a69f6589 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -867,12 +867,24 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 	return out->f_op->splice_write(pipe, out, ppos, len, flags);
 }
 
-/*
- * Attempt to initiate a splice from a file to a pipe.
+/**
+ * vfs_splice_read - Read data from a file and splice it into a pipe
+ * @in:		File to splice from
+ * @ppos:	Input file offset
+ * @pipe:	Pipe to splice to
+ * @len:	Number of bytes to splice
+ * @flags:	Splice modifier flags (SPLICE_F_*)
+ *
+ * Splice the requested amount of data from the input file to the pipe.  This
+ * is synchronous as the caller must hold the pipe lock across the entire
+ * operation.
+ *
+ * If successful, it returns the amount of data spliced, 0 if it hit the EOF or
+ * a hole and a negative error code otherwise.
  */
-static long do_splice_to(struct file *in, loff_t *ppos,
-			 struct pipe_inode_info *pipe, size_t len,
-			 unsigned int flags)
+long vfs_splice_read(struct file *in, loff_t *ppos,
+		     struct pipe_inode_info *pipe, size_t len,
+		     unsigned int flags)
 {
 	unsigned int p_space;
 	int ret;
@@ -895,6 +907,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 		return warn_unsupported(in, "read");
 	return in->f_op->splice_read(in, ppos, pipe, len, flags);
 }
+EXPORT_SYMBOL_GPL(vfs_splice_read);
 
 /**
  * splice_direct_to_actor - splices data directly between two non-pipes
@@ -964,7 +977,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 		size_t read_len;
 		loff_t pos = sd->pos, prev_pos = pos;
 
-		ret = do_splice_to(in, &pos, pipe, len, flags);
+		ret = vfs_splice_read(in, &pos, pipe, len, flags);
 		if (unlikely(ret <= 0))
 			goto out_release;
 
@@ -1112,7 +1125,7 @@ long splice_file_to_pipe(struct file *in,
 	pipe_lock(opipe);
 	ret = wait_for_space(opipe, flags);
 	if (!ret)
-		ret = do_splice_to(in, offset, opipe, len, flags);
+		ret = vfs_splice_read(in, offset, opipe, len, flags);
 	pipe_unlock(opipe);
 	if (ret > 0)
 		wakeup_pipe_readers(opipe);
diff --git a/include/linux/splice.h b/include/linux/splice.h
index a55179fd60fc..8f052c3dae95 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -76,6 +76,9 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *,
 			      struct splice_pipe_desc *);
 extern ssize_t add_to_pipe(struct pipe_inode_info *,
 			      struct pipe_buffer *);
+long vfs_splice_read(struct file *in, loff_t *ppos,
+		     struct pipe_inode_info *pipe, size_t len,
+		     unsigned int flags);
 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
 				      splice_direct_actor *);
 extern long do_splice(struct file *in, loff_t *off_in,
-- 
cgit v1.2.3


From c6585011bc1d8934cc78046c50fc94590fb2ab24 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 May 2023 14:50:16 +0100
Subject: splice: Remove generic_file_splice_read()

Remove generic_file_splice_read() as it has been replaced with calls to
filemap_splice_read() and copy_splice_read().

With this, ITER_PIPE is no longer used.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Steve French <smfrench@gmail.com>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: David Hildenbrand <david@redhat.com>
cc: John Hubbard <jhubbard@nvidia.com>
cc: linux-mm@kvack.org
cc: linux-block@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20230522135018.2742245-30-dhowells@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/splice.c        | 43 -------------------------------------------
 include/linux/fs.h |  2 --
 2 files changed, 45 deletions(-)

(limited to 'include/linux')

diff --git a/fs/splice.c b/fs/splice.c
index 8268248df3a9..9be4cb3b9879 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -386,49 +386,6 @@ ssize_t copy_splice_read(struct file *in, loff_t *ppos,
 }
 EXPORT_SYMBOL(copy_splice_read);
 
-/**
- * generic_file_splice_read - splice data from file to a pipe
- * @in:		file to splice from
- * @ppos:	position in @in
- * @pipe:	pipe to splice to
- * @len:	number of bytes to splice
- * @flags:	splice modifier flags
- *
- * Description:
- *    Will read pages from given file and fill them into a pipe. Can be
- *    used as long as it has more or less sane ->read_iter().
- *
- */
-ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
-				 struct pipe_inode_info *pipe, size_t len,
-				 unsigned int flags)
-{
-	struct iov_iter to;
-	struct kiocb kiocb;
-	int ret;
-
-	iov_iter_pipe(&to, ITER_DEST, pipe, len);
-	init_sync_kiocb(&kiocb, in);
-	kiocb.ki_pos = *ppos;
-	ret = call_read_iter(in, &kiocb, &to);
-	if (ret > 0) {
-		*ppos = kiocb.ki_pos;
-		file_accessed(in);
-	} else if (ret < 0) {
-		/* free what was emitted */
-		pipe_discard_from(pipe, to.start_head);
-		/*
-		 * callers of ->splice_read() expect -EAGAIN on
-		 * "can't put anything in there", rather than -EFAULT.
-		 */
-		if (ret == -EFAULT)
-			ret = -EAGAIN;
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(generic_file_splice_read);
-
 const struct pipe_buf_operations default_pipe_buf_ops = {
 	.release	= generic_pipe_buf_release,
 	.try_steal	= generic_pipe_buf_try_steal,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e3c22efa413e..08ba2ae1d3ce 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2755,8 +2755,6 @@ ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
 ssize_t copy_splice_read(struct file *in, loff_t *ppos,
 			 struct pipe_inode_info *pipe,
 			 size_t len, unsigned int flags);
-extern ssize_t generic_file_splice_read(struct file *, loff_t *,
-		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
-- 
cgit v1.2.3


From 3fc40265ae2b48a7475c41c5c0b256374c419f4b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 May 2023 14:50:17 +0100
Subject: iov_iter: Kill ITER_PIPE

The ITER_PIPE-type iterator was only used by generic_file_splice_read() and
that has been replaced and removed.  This leaves ITER_PIPE unused - so
remove it too.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: David Hildenbrand <david@redhat.com>
cc: John Hubbard <jhubbard@nvidia.com>
cc: linux-mm@kvack.org
cc: linux-block@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20230522135018.2742245-31-dhowells@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/uio.h |  14 --
 lib/iov_iter.c      | 431 +---------------------------------------------------
 mm/filemap.c        |   3 +-
 3 files changed, 4 insertions(+), 444 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 044c1d8c230c..60c342bb7ab8 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -11,7 +11,6 @@
 #include <uapi/linux/uio.h>
 
 struct page;
-struct pipe_inode_info;
 
 typedef unsigned int __bitwise iov_iter_extraction_t;
 
@@ -25,7 +24,6 @@ enum iter_type {
 	ITER_IOVEC,
 	ITER_KVEC,
 	ITER_BVEC,
-	ITER_PIPE,
 	ITER_XARRAY,
 	ITER_DISCARD,
 	ITER_UBUF,
@@ -74,7 +72,6 @@ struct iov_iter {
 				const struct kvec *kvec;
 				const struct bio_vec *bvec;
 				struct xarray *xarray;
-				struct pipe_inode_info *pipe;
 				void __user *ubuf;
 			};
 			size_t count;
@@ -82,10 +79,6 @@ struct iov_iter {
 	};
 	union {
 		unsigned long nr_segs;
-		struct {
-			unsigned int head;
-			unsigned int start_head;
-		};
 		loff_t xarray_start;
 	};
 };
@@ -133,11 +126,6 @@ static inline bool iov_iter_is_bvec(const struct iov_iter *i)
 	return iov_iter_type(i) == ITER_BVEC;
 }
 
-static inline bool iov_iter_is_pipe(const struct iov_iter *i)
-{
-	return iov_iter_type(i) == ITER_PIPE;
-}
-
 static inline bool iov_iter_is_discard(const struct iov_iter *i)
 {
 	return iov_iter_type(i) == ITER_DISCARD;
@@ -286,8 +274,6 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec
 			unsigned long nr_segs, size_t count);
 void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
 			unsigned long nr_segs, size_t count);
-void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
-			size_t count);
 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
 void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
 		     loff_t start, size_t count);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 960223ed9199..f18138e0292a 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -14,8 +14,6 @@
 #include <linux/scatterlist.h>
 #include <linux/instrumented.h>
 
-#define PIPE_PARANOIA /* for now */
-
 /* covers ubuf and kbuf alike */
 #define iterate_buf(i, n, base, len, off, __p, STEP) {		\
 	size_t __maybe_unused off = 0;				\
@@ -198,150 +196,6 @@ static int copyin(void *to, const void __user *from, size_t n)
 	return res;
 }
 
-#ifdef PIPE_PARANOIA
-static bool sanity(const struct iov_iter *i)
-{
-	struct pipe_inode_info *pipe = i->pipe;
-	unsigned int p_head = pipe->head;
-	unsigned int p_tail = pipe->tail;
-	unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
-	unsigned int i_head = i->head;
-	unsigned int idx;
-
-	if (i->last_offset) {
-		struct pipe_buffer *p;
-		if (unlikely(p_occupancy == 0))
-			goto Bad;	// pipe must be non-empty
-		if (unlikely(i_head != p_head - 1))
-			goto Bad;	// must be at the last buffer...
-
-		p = pipe_buf(pipe, i_head);
-		if (unlikely(p->offset + p->len != abs(i->last_offset)))
-			goto Bad;	// ... at the end of segment
-	} else {
-		if (i_head != p_head)
-			goto Bad;	// must be right after the last buffer
-	}
-	return true;
-Bad:
-	printk(KERN_ERR "idx = %d, offset = %d\n", i_head, i->last_offset);
-	printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
-			p_head, p_tail, pipe->ring_size);
-	for (idx = 0; idx < pipe->ring_size; idx++)
-		printk(KERN_ERR "[%p %p %d %d]\n",
-			pipe->bufs[idx].ops,
-			pipe->bufs[idx].page,
-			pipe->bufs[idx].offset,
-			pipe->bufs[idx].len);
-	WARN_ON(1);
-	return false;
-}
-#else
-#define sanity(i) true
-#endif
-
-static struct page *push_anon(struct pipe_inode_info *pipe, unsigned size)
-{
-	struct page *page = alloc_page(GFP_USER);
-	if (page) {
-		struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++);
-		*buf = (struct pipe_buffer) {
-			.ops = &default_pipe_buf_ops,
-			.page = page,
-			.offset = 0,
-			.len = size
-		};
-	}
-	return page;
-}
-
-static void push_page(struct pipe_inode_info *pipe, struct page *page,
-			unsigned int offset, unsigned int size)
-{
-	struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++);
-	*buf = (struct pipe_buffer) {
-		.ops = &page_cache_pipe_buf_ops,
-		.page = page,
-		.offset = offset,
-		.len = size
-	};
-	get_page(page);
-}
-
-static inline int last_offset(const struct pipe_buffer *buf)
-{
-	if (buf->ops == &default_pipe_buf_ops)
-		return buf->len;	// buf->offset is 0 for those
-	else
-		return -(buf->offset + buf->len);
-}
-
-static struct page *append_pipe(struct iov_iter *i, size_t size,
-				unsigned int *off)
-{
-	struct pipe_inode_info *pipe = i->pipe;
-	int offset = i->last_offset;
-	struct pipe_buffer *buf;
-	struct page *page;
-
-	if (offset > 0 && offset < PAGE_SIZE) {
-		// some space in the last buffer; add to it
-		buf = pipe_buf(pipe, pipe->head - 1);
-		size = min_t(size_t, size, PAGE_SIZE - offset);
-		buf->len += size;
-		i->last_offset += size;
-		i->count -= size;
-		*off = offset;
-		return buf->page;
-	}
-	// OK, we need a new buffer
-	*off = 0;
-	size = min_t(size_t, size, PAGE_SIZE);
-	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
-		return NULL;
-	page = push_anon(pipe, size);
-	if (!page)
-		return NULL;
-	i->head = pipe->head - 1;
-	i->last_offset = size;
-	i->count -= size;
-	return page;
-}
-
-static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
-{
-	struct pipe_inode_info *pipe = i->pipe;
-	unsigned int head = pipe->head;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	if (!sanity(i))
-		return 0;
-
-	if (offset && i->last_offset == -offset) { // could we merge it?
-		struct pipe_buffer *buf = pipe_buf(pipe, head - 1);
-		if (buf->page == page) {
-			buf->len += bytes;
-			i->last_offset -= bytes;
-			i->count -= bytes;
-			return bytes;
-		}
-	}
-	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
-		return 0;
-
-	push_page(pipe, page, offset, bytes);
-	i->last_offset = -(offset + bytes);
-	i->head = head;
-	i->count -= bytes;
-	return bytes;
-}
-
 /*
  * fault_in_iov_iter_readable - fault in iov iterator for reading
  * @i: iterator
@@ -446,46 +300,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
 }
 EXPORT_SYMBOL(iov_iter_init);
 
-// returns the offset in partial buffer (if any)
-static inline unsigned int pipe_npages(const struct iov_iter *i, int *npages)
-{
-	struct pipe_inode_info *pipe = i->pipe;
-	int used = pipe->head - pipe->tail;
-	int off = i->last_offset;
-
-	*npages = max((int)pipe->max_usage - used, 0);
-
-	if (off > 0 && off < PAGE_SIZE) { // anon and not full
-		(*npages)++;
-		return off;
-	}
-	return 0;
-}
-
-static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
-				struct iov_iter *i)
-{
-	unsigned int off, chunk;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-	if (unlikely(!bytes))
-		return 0;
-
-	if (!sanity(i))
-		return 0;
-
-	for (size_t n = bytes; n; n -= chunk) {
-		struct page *page = append_pipe(i, n, &off);
-		chunk = min_t(size_t, n, PAGE_SIZE - off);
-		if (!page)
-			return bytes - n;
-		memcpy_to_page(page, off, addr, chunk);
-		addr += chunk;
-	}
-	return bytes;
-}
-
 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
 			      __wsum sum, size_t off)
 {
@@ -493,44 +307,10 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
 	return csum_block_add(sum, next, off);
 }
 
-static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
-					 struct iov_iter *i, __wsum *sump)
-{
-	__wsum sum = *sump;
-	size_t off = 0;
-	unsigned int chunk, r;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-	if (unlikely(!bytes))
-		return 0;
-
-	if (!sanity(i))
-		return 0;
-
-	while (bytes) {
-		struct page *page = append_pipe(i, bytes, &r);
-		char *p;
-
-		if (!page)
-			break;
-		chunk = min_t(size_t, bytes, PAGE_SIZE - r);
-		p = kmap_local_page(page);
-		sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off);
-		kunmap_local(p);
-		off += chunk;
-		bytes -= chunk;
-	}
-	*sump = sum;
-	return off;
-}
-
 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (WARN_ON_ONCE(i->data_source))
 		return 0;
-	if (unlikely(iov_iter_is_pipe(i)))
-		return copy_pipe_to_iter(addr, bytes, i);
 	if (user_backed_iter(i))
 		might_fault();
 	iterate_and_advance(i, bytes, base, len, off,
@@ -552,42 +332,6 @@ static int copyout_mc(void __user *to, const void *from, size_t n)
 	return n;
 }
 
-static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
-				struct iov_iter *i)
-{
-	size_t xfer = 0;
-	unsigned int off, chunk;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-	if (unlikely(!bytes))
-		return 0;
-
-	if (!sanity(i))
-		return 0;
-
-	while (bytes) {
-		struct page *page = append_pipe(i, bytes, &off);
-		unsigned long rem;
-		char *p;
-
-		if (!page)
-			break;
-		chunk = min_t(size_t, bytes, PAGE_SIZE - off);
-		p = kmap_local_page(page);
-		rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
-		chunk -= rem;
-		kunmap_local(p);
-		xfer += chunk;
-		bytes -= chunk;
-		if (rem) {
-			iov_iter_revert(i, rem);
-			break;
-		}
-	}
-	return xfer;
-}
-
 /**
  * _copy_mc_to_iter - copy to iter with source memory error exception handling
  * @addr: source kernel address
@@ -607,9 +351,8 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
  *   alignment and poison alignment assumptions to avoid re-triggering
  *   hardware exceptions.
  *
- * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
- *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
- *   a short copy.
+ * * ITER_KVEC and ITER_BVEC can return short copies.  Compare to
+ *   copy_to_iter() where only ITER_IOVEC attempts might return a short copy.
  *
  * Return: number of bytes copied (may be %0)
  */
@@ -617,8 +360,6 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (WARN_ON_ONCE(i->data_source))
 		return 0;
-	if (unlikely(iov_iter_is_pipe(i)))
-		return copy_mc_pipe_to_iter(addr, bytes, i);
 	if (user_backed_iter(i))
 		might_fault();
 	__iterate_and_advance(i, bytes, base, len, off,
@@ -732,8 +473,6 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 		return 0;
 	if (WARN_ON_ONCE(i->data_source))
 		return 0;
-	if (unlikely(iov_iter_is_pipe(i)))
-		return copy_page_to_iter_pipe(page, offset, bytes, i);
 	page += offset / PAGE_SIZE; // first subpage
 	offset %= PAGE_SIZE;
 	while (1) {
@@ -764,8 +503,6 @@ size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t byte
 		return 0;
 	if (WARN_ON_ONCE(i->data_source))
 		return 0;
-	if (unlikely(iov_iter_is_pipe(i)))
-		return copy_page_to_iter_pipe(page, offset, bytes, i);
 	page += offset / PAGE_SIZE; // first subpage
 	offset %= PAGE_SIZE;
 	while (1) {
@@ -818,36 +555,8 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 }
 EXPORT_SYMBOL(copy_page_from_iter);
 
-static size_t pipe_zero(size_t bytes, struct iov_iter *i)
-{
-	unsigned int chunk, off;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-	if (unlikely(!bytes))
-		return 0;
-
-	if (!sanity(i))
-		return 0;
-
-	for (size_t n = bytes; n; n -= chunk) {
-		struct page *page = append_pipe(i, n, &off);
-		char *p;
-
-		if (!page)
-			return bytes - n;
-		chunk = min_t(size_t, n, PAGE_SIZE - off);
-		p = kmap_local_page(page);
-		memset(p + off, 0, chunk);
-		kunmap_local(p);
-	}
-	return bytes;
-}
-
 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 {
-	if (unlikely(iov_iter_is_pipe(i)))
-		return pipe_zero(bytes, i);
 	iterate_and_advance(i, bytes, base, len, count,
 		clear_user(base, len),
 		memset(base, 0, len)
@@ -878,32 +587,6 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
 }
 EXPORT_SYMBOL(copy_page_from_iter_atomic);
 
-static void pipe_advance(struct iov_iter *i, size_t size)
-{
-	struct pipe_inode_info *pipe = i->pipe;
-	int off = i->last_offset;
-
-	if (!off && !size) {
-		pipe_discard_from(pipe, i->start_head); // discard everything
-		return;
-	}
-	i->count -= size;
-	while (1) {
-		struct pipe_buffer *buf = pipe_buf(pipe, i->head);
-		if (off) /* make it relative to the beginning of buffer */
-			size += abs(off) - buf->offset;
-		if (size <= buf->len) {
-			buf->len = size;
-			i->last_offset = last_offset(buf);
-			break;
-		}
-		size -= buf->len;
-		i->head++;
-		off = 0;
-	}
-	pipe_discard_from(pipe, i->head + 1); // discard everything past this one
-}
-
 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
 {
 	const struct bio_vec *bvec, *end;
@@ -955,8 +638,6 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
 		iov_iter_iovec_advance(i, size);
 	} else if (iov_iter_is_bvec(i)) {
 		iov_iter_bvec_advance(i, size);
-	} else if (iov_iter_is_pipe(i)) {
-		pipe_advance(i, size);
 	} else if (iov_iter_is_discard(i)) {
 		i->count -= size;
 	}
@@ -970,26 +651,6 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 	if (WARN_ON(unroll > MAX_RW_COUNT))
 		return;
 	i->count += unroll;
-	if (unlikely(iov_iter_is_pipe(i))) {
-		struct pipe_inode_info *pipe = i->pipe;
-		unsigned int head = pipe->head;
-
-		while (head > i->start_head) {
-			struct pipe_buffer *b = pipe_buf(pipe, --head);
-			if (unroll < b->len) {
-				b->len -= unroll;
-				i->last_offset = last_offset(b);
-				i->head = head;
-				return;
-			}
-			unroll -= b->len;
-			pipe_buf_release(pipe, b);
-			pipe->head--;
-		}
-		i->last_offset = 0;
-		i->head = head;
-		return;
-	}
 	if (unlikely(iov_iter_is_discard(i)))
 		return;
 	if (unroll <= i->iov_offset) {
@@ -1079,24 +740,6 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
 }
 EXPORT_SYMBOL(iov_iter_bvec);
 
-void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
-			struct pipe_inode_info *pipe,
-			size_t count)
-{
-	BUG_ON(direction != READ);
-	WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
-	*i = (struct iov_iter){
-		.iter_type = ITER_PIPE,
-		.data_source = false,
-		.pipe = pipe,
-		.head = pipe->head,
-		.start_head = pipe->head,
-		.last_offset = 0,
-		.count = count
-	};
-}
-EXPORT_SYMBOL(iov_iter_pipe);
-
 /**
  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
  * @i: The iterator to initialise.
@@ -1224,19 +867,6 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
 	if (iov_iter_is_bvec(i))
 		return iov_iter_aligned_bvec(i, addr_mask, len_mask);
 
-	if (iov_iter_is_pipe(i)) {
-		size_t size = i->count;
-
-		if (size & len_mask)
-			return false;
-		if (size && i->last_offset > 0) {
-			if (i->last_offset & addr_mask)
-				return false;
-		}
-
-		return true;
-	}
-
 	if (iov_iter_is_xarray(i)) {
 		if (i->count & len_mask)
 			return false;
@@ -1307,14 +937,6 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 	if (iov_iter_is_bvec(i))
 		return iov_iter_alignment_bvec(i);
 
-	if (iov_iter_is_pipe(i)) {
-		size_t size = i->count;
-
-		if (size && i->last_offset > 0)
-			return size | i->last_offset;
-		return size;
-	}
-
 	if (iov_iter_is_xarray(i))
 		return (i->xarray_start + i->iov_offset) | i->count;
 
@@ -1367,36 +989,6 @@ static int want_pages_array(struct page ***res, size_t size,
 	return count;
 }
 
-static ssize_t pipe_get_pages(struct iov_iter *i,
-		   struct page ***pages, size_t maxsize, unsigned maxpages,
-		   size_t *start)
-{
-	unsigned int npages, count, off, chunk;
-	struct page **p;
-	size_t left;
-
-	if (!sanity(i))
-		return -EFAULT;
-
-	*start = off = pipe_npages(i, &npages);
-	if (!npages)
-		return -EFAULT;
-	count = want_pages_array(pages, maxsize, off, min(npages, maxpages));
-	if (!count)
-		return -ENOMEM;
-	p = *pages;
-	for (npages = 0, left = maxsize ; npages < count; npages++, left -= chunk) {
-		struct page *page = append_pipe(i, left, &off);
-		if (!page)
-			break;
-		chunk = min_t(size_t, left, PAGE_SIZE - off);
-		get_page(*p++ = page);
-	}
-	if (!npages)
-		return -EFAULT;
-	return maxsize - left;
-}
-
 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
 					  pgoff_t index, unsigned int nr_pages)
 {
@@ -1547,8 +1139,6 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		}
 		return maxsize;
 	}
-	if (iov_iter_is_pipe(i))
-		return pipe_get_pages(i, pages, maxsize, maxpages, start);
 	if (iov_iter_is_xarray(i))
 		return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
 	return -EFAULT;
@@ -1638,9 +1228,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
 	}
 
 	sum = csum_shift(csstate->csum, csstate->off);
-	if (unlikely(iov_iter_is_pipe(i)))
-		bytes = csum_and_copy_to_pipe_iter(addr, bytes, i, &sum);
-	else iterate_and_advance(i, bytes, base, len, off, ({
+	iterate_and_advance(i, bytes, base, len, off, ({
 		next = csum_and_copy_to_user(addr + off, base, len);
 		sum = csum_block_add(sum, next, off);
 		next ? 0 : len;
@@ -1725,15 +1313,6 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 		return iov_npages(i, maxpages);
 	if (iov_iter_is_bvec(i))
 		return bvec_npages(i, maxpages);
-	if (iov_iter_is_pipe(i)) {
-		int npages;
-
-		if (!sanity(i))
-			return 0;
-
-		pipe_npages(i, &npages);
-		return min(npages, maxpages);
-	}
 	if (iov_iter_is_xarray(i)) {
 		unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
 		int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
@@ -1746,10 +1325,6 @@ EXPORT_SYMBOL(iov_iter_npages);
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 {
 	*new = *old;
-	if (unlikely(iov_iter_is_pipe(new))) {
-		WARN_ON(1);
-		return NULL;
-	}
 	if (iov_iter_is_bvec(new))
 		return new->bvec = kmemdup(new->bvec,
 				    new->nr_segs * sizeof(struct bio_vec),
diff --git a/mm/filemap.c b/mm/filemap.c
index 0fcb0b80c2e2..603b562d69b1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2687,8 +2687,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
 		if (unlikely(iocb->ki_pos >= i_size_read(inode)))
 			break;
 
-		error = filemap_get_pages(iocb, iter->count, &fbatch,
-					  iov_iter_is_pipe(iter));
+		error = filemap_get_pages(iocb, iter->count, &fbatch, false);
 		if (error < 0)
 			break;
 
-- 
cgit v1.2.3


From 09e8c253415b8eb9ca29a2131d2ebf17743534c5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 May 2023 21:57:40 +0100
Subject: block: Fix bio_flagged() so that gcc can better optimise it

Fix bio_flagged() so that multiple instances of it, such as:

	if (bio_flagged(bio, BIO_PAGE_REFFED) ||
	    bio_flagged(bio, BIO_PAGE_PINNED))

can be combined by the gcc optimiser into a single test in assembly
(arguably, this is a compiler optimisation issue[1]).

The missed optimisation stems from bio_flagged() comparing the result of
the bitwise-AND to zero.  This results in an out-of-line bio_release_page()
being compiled to something like:

   <+0>:     mov    0x14(%rdi),%eax
   <+3>:     test   $0x1,%al
   <+5>:     jne    0xffffffff816dac53 <bio_release_pages+11>
   <+7>:     test   $0x2,%al
   <+9>:     je     0xffffffff816dac5c <bio_release_pages+20>
   <+11>:    movzbl %sil,%esi
   <+15>:    jmp    0xffffffff816daba1 <__bio_release_pages>
   <+20>:    jmp    0xffffffff81d0b800 <__x86_return_thunk>

However, the test is superfluous as the return type is bool.  Removing it
results in:

   <+0>:     testb  $0x3,0x14(%rdi)
   <+4>:     je     0xffffffff816e4af4 <bio_release_pages+15>
   <+6>:     movzbl %sil,%esi
   <+10>:    jmp    0xffffffff816dab7c <__bio_release_pages>
   <+15>:    jmp    0xffffffff81d0b7c0 <__x86_return_thunk>

instead.

Also, the MOVZBL instruction looks unnecessary[2] - I think it's just
're-booling' the mark_dirty parameter.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: linux-block@vger.kernel.org
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108370 [1]
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108371 [2]
Link: https://lore.kernel.org/r/167391056756.2311931.356007731815807265.stgit@warthog.procyon.org.uk/ # v6
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230522205744.2825689-3-dhowells@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index b3e7529ff55e..7f53be035cf0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -229,7 +229,7 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count)
 
 static inline bool bio_flagged(struct bio *bio, unsigned int bit)
 {
-	return (bio->bi_flags & (1U << bit)) != 0;
+	return bio->bi_flags & (1U << bit);
 }
 
 static inline void bio_set_flag(struct bio *bio, unsigned int bit)
-- 
cgit v1.2.3


From e51bab4e20586fb3afc30536b776a97ed8ffb681 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 22 May 2023 21:57:41 +0100
Subject: block: Replace BIO_NO_PAGE_REF with BIO_PAGE_REFFED with inverted
 logic

Replace BIO_NO_PAGE_REF with a BIO_PAGE_REFFED flag that has the inverted
meaning is only set when a page reference has been acquired that needs to
be released by bio_release_pages().

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Jens Axboe <axboe@kernel.dk>
cc: Jan Kara <jack@suse.cz>
cc: Matthew Wilcox <willy@infradead.org>
cc: Logan Gunthorpe <logang@deltatee.com>
cc: linux-block@vger.kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230522205744.2825689-4-dhowells@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               | 2 +-
 block/blk-map.c           | 1 +
 fs/direct-io.c            | 2 ++
 fs/iomap/direct-io.c      | 1 -
 include/linux/bio.h       | 2 +-
 include/linux/blk_types.h | 2 +-
 6 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 043944fd46eb..8516adeaea26 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1191,7 +1191,6 @@ void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
 	bio->bi_io_vec = (struct bio_vec *)iter->bvec;
 	bio->bi_iter.bi_bvec_done = iter->iov_offset;
 	bio->bi_iter.bi_size = size;
-	bio_set_flag(bio, BIO_NO_PAGE_REF);
 	bio_set_flag(bio, BIO_CLONED);
 }
 
@@ -1336,6 +1335,7 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 		return 0;
 	}
 
+	bio_set_flag(bio, BIO_PAGE_REFFED);
 	do {
 		ret = __bio_iov_iter_get_pages(bio, iter);
 	} while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
diff --git a/block/blk-map.c b/block/blk-map.c
index 04c55f1c492e..33d9f6e89ba6 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -282,6 +282,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 	if (blk_queue_pci_p2pdma(rq->q))
 		extraction_flags |= ITER_ALLOW_P2PDMA;
 
+	bio_set_flag(bio, BIO_PAGE_REFFED);
 	while (iov_iter_count(iter)) {
 		struct page **pages, *stack_pages[UIO_FASTIOV];
 		ssize_t bytes;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0b380bb8a81e..ad20f3428bab 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -402,6 +402,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 		bio->bi_end_io = dio_bio_end_aio;
 	else
 		bio->bi_end_io = dio_bio_end_io;
+	/* for now require references for all pages */
+	bio_set_flag(bio, BIO_PAGE_REFFED);
 	sdio->bio = bio;
 	sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
 }
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 66a9f10e3207..08873f0627dd 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -203,7 +203,6 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
 	bio->bi_private = dio;
 	bio->bi_end_io = iomap_dio_bio_end_io;
 
-	bio_set_flag(bio, BIO_NO_PAGE_REF);
 	__bio_add_page(bio, page, len, 0);
 	iomap_dio_submit_bio(iter, dio, bio, pos);
 }
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7f53be035cf0..0922729acd26 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -488,7 +488,7 @@ void zero_fill_bio(struct bio *bio);
 
 static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
 {
-	if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+	if (bio_flagged(bio, BIO_PAGE_REFFED))
 		__bio_release_pages(bio, mark_dirty);
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 740afe80f297..dfd2c2cb909d 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -323,7 +323,7 @@ struct bio {
  * bio flags
  */
 enum {
-	BIO_NO_PAGE_REF,	/* don't put release vec pages */
+	BIO_PAGE_REFFED,	/* put pages in bio_release_pages() */
 	BIO_CLONED,		/* doesn't own data */
 	BIO_BOUNCED,		/* bio is a bounce bio */
 	BIO_QUIET,		/* Make BIO Quiet */
-- 
cgit v1.2.3


From fd363244e883323e1ac9412d96fd22b51e255b0c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 May 2023 21:57:42 +0100
Subject: block: Add BIO_PAGE_PINNED and associated infrastructure

Add BIO_PAGE_PINNED to indicate that the pages in a bio are pinned
(FOLL_PIN) and that the pin will need removing.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Jens Axboe <axboe@kernel.dk>
cc: Jan Kara <jack@suse.cz>
cc: Matthew Wilcox <willy@infradead.org>
cc: Logan Gunthorpe <logang@deltatee.com>
cc: linux-block@vger.kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230522205744.2825689-5-dhowells@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               |  6 +++---
 block/blk.h               | 12 ++++++++++++
 include/linux/bio.h       |  3 ++-
 include/linux/blk_types.h |  1 +
 4 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 8516adeaea26..17bd01ecde36 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1169,7 +1169,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
 	bio_for_each_segment_all(bvec, bio, iter_all) {
 		if (mark_dirty && !PageCompound(bvec->bv_page))
 			set_page_dirty_lock(bvec->bv_page);
-		put_page(bvec->bv_page);
+		bio_release_page(bio, bvec->bv_page);
 	}
 }
 EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1489,8 +1489,8 @@ void bio_set_pages_dirty(struct bio *bio)
  * the BIO and re-dirty the pages in process context.
  *
  * It is expected that bio_check_pages_dirty() will wholly own the BIO from
- * here on.  It will run one put_page() against each page and will run one
- * bio_put() against the BIO.
+ * here on.  It will unpin each page and will run one bio_put() against the
+ * BIO.
  */
 
 static void bio_dirty_fn(struct work_struct *work);
diff --git a/block/blk.h b/block/blk.h
index 9f171b8f1e34..7ad7cb6ffa01 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -420,6 +420,18 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
 		unsigned int max_sectors, bool *same_page);
 
+/*
+ * Clean up a page appropriately, where the page may be pinned, may have a
+ * ref taken on it or neither.
+ */
+static inline void bio_release_page(struct bio *bio, struct page *page)
+{
+	if (bio_flagged(bio, BIO_PAGE_PINNED))
+		unpin_user_page(page);
+	else if (bio_flagged(bio, BIO_PAGE_REFFED))
+		put_page(page);
+}
+
 struct request_queue *blk_alloc_queue(int node_id);
 
 int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 0922729acd26..8588bcfbc6ef 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -488,7 +488,8 @@ void zero_fill_bio(struct bio *bio);
 
 static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
 {
-	if (bio_flagged(bio, BIO_PAGE_REFFED))
+	if (bio_flagged(bio, BIO_PAGE_REFFED) ||
+	    bio_flagged(bio, BIO_PAGE_PINNED))
 		__bio_release_pages(bio, mark_dirty);
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dfd2c2cb909d..8ef209e3aa96 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -323,6 +323,7 @@ struct bio {
  * bio flags
  */
 enum {
+	BIO_PAGE_PINNED,	/* Unpin pages in bio_release_pages() */
 	BIO_PAGE_REFFED,	/* put pages in bio_release_pages() */
 	BIO_CLONED,		/* doesn't own data */
 	BIO_BOUNCED,		/* bio is a bounce bio */
-- 
cgit v1.2.3


From 335b4223466dd75f9f3ea4918187afbadd22e5c8 Mon Sep 17 00:00:00 2001
From: Maximilian Heyne <mheyne@amazon.de>
Date: Wed, 3 May 2023 13:16:53 +0000
Subject: x86/pci/xen: populate MSI sysfs entries

Commit bf5e758f02fc ("genirq/msi: Simplify sysfs handling") reworked the
creation of sysfs entries for MSI IRQs. The creation used to be in
msi_domain_alloc_irqs_descs_locked after calling ops->domain_alloc_irqs.
Then it moved into __msi_domain_alloc_irqs which is an implementation of
domain_alloc_irqs. However, Xen comes with the only other implementation
of domain_alloc_irqs and hence doesn't run the sysfs population code
anymore.

Commit 6c796996ee70 ("x86/pci/xen: Fixup fallout from the PCI/MSI
overhaul") set the flag MSI_FLAG_DEV_SYSFS for the xen msi_domain_info
but that doesn't actually have an effect because Xen uses it's own
domain_alloc_irqs implementation.

Fix this by making use of the fallback functions for sysfs population.

Fixes: bf5e758f02fc ("genirq/msi: Simplify sysfs handling")
Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20230503131656.15928-1-mheyne@amazon.de
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/pci/xen.c  | 8 +++++---
 include/linux/msi.h | 9 ++++++++-
 kernel/irq/msi.c    | 4 ++--
 3 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 8babce71915f..014c508e914d 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -198,7 +198,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		i++;
 	}
 	kfree(v);
-	return 0;
+	return msi_device_populate_sysfs(&dev->dev);
 
 error:
 	if (ret == -ENOSYS)
@@ -254,7 +254,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		dev_dbg(&dev->dev,
 			"xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
 	}
-	return 0;
+	return msi_device_populate_sysfs(&dev->dev);
 
 error:
 	dev_err(&dev->dev, "Failed to create MSI%s! ret=%d!\n",
@@ -346,7 +346,7 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		if (ret < 0)
 			goto out;
 	}
-	ret = 0;
+	ret = msi_device_populate_sysfs(&dev->dev);
 out:
 	return ret;
 }
@@ -394,6 +394,8 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
 			xen_destroy_irq(msidesc->irq + i);
 		msidesc->irq = 0;
 	}
+
+	msi_device_destroy_sysfs(&dev->dev);
 }
 
 static void xen_pv_teardown_msi_irqs(struct pci_dev *dev)
diff --git a/include/linux/msi.h b/include/linux/msi.h
index cdb14a1ef268..a50ea79522f8 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -383,6 +383,13 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
 void arch_teardown_msi_irq(unsigned int irq);
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void arch_teardown_msi_irqs(struct pci_dev *dev);
+#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
+
+/*
+ * Xen uses non-default msi_domain_ops and hence needs a way to populate sysfs
+ * entries of MSI IRQs.
+ */
+#if defined(CONFIG_PCI_XEN) || defined(CONFIG_PCI_MSI_ARCH_FALLBACKS)
 #ifdef CONFIG_SYSFS
 int msi_device_populate_sysfs(struct device *dev);
 void msi_device_destroy_sysfs(struct device *dev);
@@ -390,7 +397,7 @@ void msi_device_destroy_sysfs(struct device *dev);
 static inline int msi_device_populate_sysfs(struct device *dev) { return 0; }
 static inline void msi_device_destroy_sysfs(struct device *dev) { }
 #endif /* !CONFIG_SYSFS */
-#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
+#endif /* CONFIG_PCI_XEN || CONFIG_PCI_MSI_ARCH_FALLBACKS */
 
 /*
  * The restore hook is still available even for fully irq domain based
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 7a97bcb086bf..b4c31a5c1147 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -542,7 +542,7 @@ fail:
 	return ret;
 }
 
-#ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS
+#if defined(CONFIG_PCI_MSI_ARCH_FALLBACKS) || defined(CONFIG_PCI_XEN)
 /**
  * msi_device_populate_sysfs - Populate msi_irqs sysfs entries for a device
  * @dev:	The device (PCI, platform etc) which will get sysfs entries
@@ -574,7 +574,7 @@ void msi_device_destroy_sysfs(struct device *dev)
 	msi_for_each_desc(desc, dev, MSI_DESC_ALL)
 		msi_sysfs_remove_desc(dev, desc);
 }
-#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK */
+#endif /* CONFIG_PCI_MSI_ARCH_FALLBACK || CONFIG_PCI_XEN */
 #else /* CONFIG_SYSFS */
 static inline int msi_sysfs_create_group(struct device *dev) { return 0; }
 static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *desc) { return 0; }
-- 
cgit v1.2.3


From e9261467ae86a6544bb602a55a1eab52696e71e3 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 23 May 2023 11:15:48 +0100
Subject: net: mdio: add clause 73 to ethtool conversion helper

Add a helper to convert a clause 73 advertisement to an ethtool bitmap.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mdio.h      | 39 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/mdio.h | 24 ++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 27013d6bf24a..0670cc6e067c 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -486,6 +486,45 @@ static inline u32 linkmode_adv_to_mii_10base_t1_t(unsigned long *adv)
 	return result;
 }
 
+/**
+ * mii_c73_mod_linkmode - convert a Clause 73 advertisement to linkmodes
+ * @adv: linkmode advertisement setting
+ * @lpa: array of three u16s containing the advertisement
+ *
+ * Convert an IEEE 802.3 Clause 73 advertisement to ethtool link modes.
+ */
+static inline void mii_c73_mod_linkmode(unsigned long *adv, u16 *lpa)
+{
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+			 adv, lpa[0] & MDIO_AN_C73_0_PAUSE);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+			 adv, lpa[0] & MDIO_AN_C73_0_ASM_DIR);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+			 adv, lpa[1] & MDIO_AN_C73_1_1000BASE_KX);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+			 adv, lpa[1] & MDIO_AN_C73_1_10GBASE_KX4);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+			 adv, lpa[1] & MDIO_AN_C73_1_40GBASE_KR4);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+			 adv, lpa[1] & MDIO_AN_C73_1_40GBASE_CR4);
+	/* 100GBASE_CR10 and 100GBASE_KP4 not implemented */
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+			 adv, lpa[1] & MDIO_AN_C73_1_100GBASE_KR4);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+			 adv, lpa[1] & MDIO_AN_C73_1_100GBASE_CR4);
+	/* 25GBASE_R_S not implemented */
+	/* The 25GBASE_R bit can be used for 25Gbase KR or CR modes */
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+			 adv, lpa[1] & MDIO_AN_C73_1_25GBASE_R);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+			 adv, lpa[1] & MDIO_AN_C73_1_25GBASE_R);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+			 adv, lpa[1] & MDIO_AN_C73_1_10GBASE_KR);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
+			 adv, lpa[2] & MDIO_AN_C73_2_2500BASE_KX);
+	/* 5GBASE_KR not implemented */
+}
+
 int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
 int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
 int __mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum,
diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index 256b463e47a6..b826598d1e94 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -231,6 +231,30 @@
 #define MDIO_PMA_EXTABLE_BT1		0x0800	/* BASE-T1 ability */
 #define MDIO_PMA_EXTABLE_NBT		0x4000  /* 2.5/5GBASE-T ability */
 
+/* AN Clause 73 linkword */
+#define MDIO_AN_C73_0_S_MASK		GENMASK(4, 0)
+#define MDIO_AN_C73_0_E_MASK		GENMASK(9, 5)
+#define MDIO_AN_C73_0_PAUSE		BIT(10)
+#define MDIO_AN_C73_0_ASM_DIR		BIT(11)
+#define MDIO_AN_C73_0_C2		BIT(12)
+#define MDIO_AN_C73_0_RF		BIT(13)
+#define MDIO_AN_C73_0_ACK		BIT(14)
+#define MDIO_AN_C73_0_NP		BIT(15)
+#define MDIO_AN_C73_1_T_MASK		GENMASK(4, 0)
+#define MDIO_AN_C73_1_1000BASE_KX	BIT(5)
+#define MDIO_AN_C73_1_10GBASE_KX4	BIT(6)
+#define MDIO_AN_C73_1_10GBASE_KR	BIT(7)
+#define MDIO_AN_C73_1_40GBASE_KR4	BIT(8)
+#define MDIO_AN_C73_1_40GBASE_CR4	BIT(9)
+#define MDIO_AN_C73_1_100GBASE_CR10	BIT(10)
+#define MDIO_AN_C73_1_100GBASE_KP4	BIT(11)
+#define MDIO_AN_C73_1_100GBASE_KR4	BIT(12)
+#define MDIO_AN_C73_1_100GBASE_CR4	BIT(13)
+#define MDIO_AN_C73_1_25GBASE_R_S	BIT(14)
+#define MDIO_AN_C73_1_25GBASE_R		BIT(15)
+#define MDIO_AN_C73_2_2500BASE_KX	BIT(0)
+#define MDIO_AN_C73_2_5GBASE_KR		BIT(1)
+
 /* PHY XGXS lane state register. */
 #define MDIO_PHYXS_LNSTAT_SYNC0		0x0001
 #define MDIO_PHYXS_LNSTAT_SYNC1		0x0002
-- 
cgit v1.2.3


From dad987484eaaa7cd7f7f7459f4aee1470d8ec8ef Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 23 May 2023 11:15:58 +0100
Subject: net: phylink: add function to resolve clause 73 negotiation

Add a function to resolve clause 73 negotiation according to the
priority resolution function described in clause 73.3.6.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phylink.c | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/phylink.h   |  2 ++
 2 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index d68015db0f2f..28417d307cf0 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -3212,6 +3212,45 @@ static const struct sfp_upstream_ops sfp_phylink_ops = {
 
 /* Helpers for MAC drivers */
 
+static struct {
+	int bit;
+	int speed;
+} phylink_c73_priority_resolution[] = {
+	{ ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, SPEED_100000 },
+	{ ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, SPEED_100000 },
+	/* 100GBASE-KP4 and 100GBASE-CR10 not supported */
+	{ ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, SPEED_40000 },
+	{ ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, SPEED_40000 },
+	{ ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, SPEED_10000 },
+	{ ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, SPEED_10000 },
+	/* 5GBASE-KR not supported */
+	{ ETHTOOL_LINK_MODE_2500baseX_Full_BIT, SPEED_2500 },
+	{ ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, SPEED_1000 },
+};
+
+void phylink_resolve_c73(struct phylink_link_state *state)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(phylink_c73_priority_resolution); i++) {
+		int bit = phylink_c73_priority_resolution[i].bit;
+		if (linkmode_test_bit(bit, state->advertising) &&
+		    linkmode_test_bit(bit, state->lp_advertising))
+			break;
+	}
+
+	if (i < ARRAY_SIZE(phylink_c73_priority_resolution)) {
+		state->speed = phylink_c73_priority_resolution[i].speed;
+		state->duplex = DUPLEX_FULL;
+	} else {
+		/* negotiation failure */
+		state->link = false;
+	}
+
+	phylink_resolve_an_pause(state);
+}
+EXPORT_SYMBOL_GPL(phylink_resolve_c73);
+
 static void phylink_decode_c37_word(struct phylink_link_state *state,
 				    uint16_t config_reg, int speed)
 {
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index bb782f05ad08..0cf07d7d11b8 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -656,6 +656,8 @@ int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode,
 			       const unsigned long *advertising);
 void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs);
 
+void phylink_resolve_c73(struct phylink_link_state *state);
+
 void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs,
 				   struct phylink_link_state *state);
 
-- 
cgit v1.2.3


From 31cb1304ad8bd27b7d2abd8669fb887fb47d8eaf Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 19 Apr 2023 10:44:05 +0800
Subject: powercap: intel_rapl: Remove unused field in struct rapl_if_priv

After commit f1e8d7560d30 ("powercap/intel_rapl: enumerate Psys RAPL
domain together with package RAPL domain"), the platform_rapl_domain field
is not used anymore. Remove it from rapl_if_priv structure.

Fixes: f1e8d7560d30 ("powercap/intel_rapl: enumerate Psys RAPL domain together with package RAPL domain")
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/intel_rapl.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 9f4b6f5b822f..828557645770 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -124,7 +124,6 @@ struct reg_action {
  */
 struct rapl_if_priv {
 	struct powercap_control_type *control_type;
-	struct rapl_domain *platform_rapl_domain;
 	enum cpuhp_state pcap_rapl_online;
 	u64 reg_unit;
 	u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
-- 
cgit v1.2.3


From e8e28c2af16b279b6c37d533e1e73effb197cf2e Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 19 Apr 2023 10:44:07 +0800
Subject: powercap: intel_rapl: Support per Interface rapl_defaults

rapl_defaults is Interface specific.

Although current MSR and MMIO Interface share the same rapl_defaults,
new Interface like TPMI need its own rapl_defaults callbacks.

Save the rapl_defaults information in the Interface private structure.

No functional change.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Wang Wendy <wendy.wang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 46 ++++++++++++++++++++++++++----------
 include/linux/intel_rapl.h           |  2 ++
 2 files changed, 35 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 2d2c15eea8eb..59e470a57342 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -115,6 +115,11 @@ struct rapl_defaults {
 };
 static struct rapl_defaults *rapl_defaults;
 
+static struct rapl_defaults *get_defaults(struct rapl_package *rp)
+{
+	return rp->priv->defaults;
+}
+
 /* Sideband MBI registers */
 #define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
 #define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf)
@@ -227,14 +232,15 @@ static int find_nr_power_limit(struct rapl_domain *rd)
 static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
 {
 	struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
+	struct rapl_defaults *defaults = get_defaults(rd->rp);
 
 	if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
 		return -EACCES;
 
 	cpus_read_lock();
 	rapl_write_data_raw(rd, PL1_ENABLE, mode);
-	if (rapl_defaults->set_floor_freq)
-		rapl_defaults->set_floor_freq(rd, mode);
+	if (defaults->set_floor_freq)
+		defaults->set_floor_freq(rd, mode);
 	cpus_read_unlock();
 
 	return 0;
@@ -551,6 +557,7 @@ static void rapl_init_domains(struct rapl_package *rp)
 	enum rapl_domain_type i;
 	enum rapl_domain_reg_id j;
 	struct rapl_domain *rd = rp->domains;
+	struct rapl_defaults *defaults = get_defaults(rp);
 
 	for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
 		unsigned int mask = rp->domain_map & (1 << i);
@@ -592,14 +599,14 @@ static void rapl_init_domains(struct rapl_package *rp)
 		switch (i) {
 		case RAPL_DOMAIN_DRAM:
 			rd->domain_energy_unit =
-			    rapl_defaults->dram_domain_energy_unit;
+			    defaults->dram_domain_energy_unit;
 			if (rd->domain_energy_unit)
 				pr_info("DRAM domain energy unit %dpj\n",
 					rd->domain_energy_unit);
 			break;
 		case RAPL_DOMAIN_PLATFORM:
 			rd->domain_energy_unit =
-			    rapl_defaults->psys_domain_energy_unit;
+			    defaults->psys_domain_energy_unit;
 			if (rd->domain_energy_unit)
 				pr_info("Platform domain energy unit %dpj\n",
 					rd->domain_energy_unit);
@@ -616,6 +623,7 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
 {
 	u64 units = 1;
 	struct rapl_package *rp = rd->rp;
+	struct rapl_defaults *defaults = get_defaults(rp);
 	u64 scale = 1;
 
 	switch (type) {
@@ -631,7 +639,7 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
 			units = rp->energy_unit;
 		break;
 	case TIME_UNIT:
-		return rapl_defaults->compute_time_window(rp, value, to_raw);
+		return defaults->compute_time_window(rp, value, to_raw);
 	case ARBITRARY_UNIT:
 	default:
 		return value;
@@ -702,10 +710,18 @@ static struct rapl_primitive_info rpi[] = {
 	{NULL, 0, 0, 0},
 };
 
+static int rapl_config(struct rapl_package *rp)
+{
+	rp->priv->defaults = (void *)rapl_defaults;
+	return 0;
+}
+
 static enum rapl_primitives
 prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
 {
-	if (!rapl_defaults->spr_psys_bits)
+	struct rapl_defaults *defaults = get_defaults(rd->rp);
+
+	if (!defaults->spr_psys_bits)
 		return prim;
 
 	if (rd->id != RAPL_DOMAIN_PLATFORM)
@@ -960,16 +976,17 @@ static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
 static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
 {
 	static u32 power_ctrl_orig_val;
+	struct rapl_defaults *defaults = get_defaults(rd->rp);
 	u32 mdata;
 
-	if (!rapl_defaults->floor_freq_reg_addr) {
+	if (!defaults->floor_freq_reg_addr) {
 		pr_err("Invalid floor frequency config register\n");
 		return;
 	}
 
 	if (!power_ctrl_orig_val)
 		iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
-			      rapl_defaults->floor_freq_reg_addr,
+			      defaults->floor_freq_reg_addr,
 			      &power_ctrl_orig_val);
 	mdata = power_ctrl_orig_val;
 	if (enable) {
@@ -977,7 +994,7 @@ static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
 		mdata |= 1 << 8;
 	}
 	iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
-		       rapl_defaults->floor_freq_reg_addr, mdata);
+		       defaults->floor_freq_reg_addr, mdata);
 }
 
 static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
@@ -1385,11 +1402,9 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
 {
 	int id = topology_logical_die_id(cpu);
 	struct rapl_package *rp;
+	struct rapl_defaults *defaults;
 	int ret;
 
-	if (!rapl_defaults)
-		return ERR_PTR(-ENODEV);
-
 	rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
 	if (!rp)
 		return ERR_PTR(-ENOMEM);
@@ -1399,6 +1414,10 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
 	rp->lead_cpu = cpu;
 	rp->priv = priv;
 
+	ret = rapl_config(rp);
+	if (ret)
+		goto err_free_package;
+
 	if (topology_max_die_per_package() > 1)
 		snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH,
 			 "package-%d-die-%d",
@@ -1407,8 +1426,9 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
 		snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
 			 topology_physical_package_id(cpu));
 
+	defaults = get_defaults(rp);
 	/* check if the package contains valid domains */
-	if (rapl_detect_domains(rp, cpu) || rapl_defaults->check_unit(rp, cpu)) {
+	if (rapl_detect_domains(rp, cpu) || defaults->check_unit(rp, cpu)) {
 		ret = -ENODEV;
 		goto err_free_package;
 	}
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 828557645770..ebd1cad78212 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -121,6 +121,7 @@ struct reg_action {
  *				registers.
  * @write_raw:			Callback for writing RAPL interface specific
  *				registers.
+ * @defaults:			internal pointer to interface default settings
  */
 struct rapl_if_priv {
 	struct powercap_control_type *control_type;
@@ -130,6 +131,7 @@ struct rapl_if_priv {
 	int limits[RAPL_DOMAIN_MAX];
 	int (*read_raw)(int cpu, struct reg_action *ra);
 	int (*write_raw)(int cpu, struct reg_action *ra);
+	void *defaults;
 };
 
 /* maximum rapl package domain name: package-%d-die-%d */
-- 
cgit v1.2.3


From 98ff639a7289067247b3ef9dd5d1e922361e7365 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 19 Apr 2023 10:44:08 +0800
Subject: powercap: intel_rapl: Support per Interface primitive information

RAPL primitive information is Interface specific.

Although current MSR and MMIO Interface share the same RAPL primitives,
new Interface like TPMI has its own RAPL primitive information.

Save the primitive information in the Interface private structure.

Plus, using variant name "rp" for struct rapl_primitive_info is
confusing because "rp" is also used for struct rapl_package.
Use "rpi" as the variant name for struct rapl_primitive_info, and rename
the previous rpi[] array to avoid conflict.

No functional change.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Wang Wendy <wendy.wang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 50 ++++++++++++++++++++++++------------
 include/linux/intel_rapl.h           |  2 ++
 2 files changed, 35 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 59e470a57342..267ec36b1649 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -654,7 +654,7 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
 }
 
 /* in the order of enum rapl_primitives */
-static struct rapl_primitive_info rpi[] = {
+static struct rapl_primitive_info rpi_default[] = {
 	/* name, mask, shift, msr index, unit divisor */
 	PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
 			    RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
@@ -710,9 +710,20 @@ static struct rapl_primitive_info rpi[] = {
 	{NULL, 0, 0, 0},
 };
 
+static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
+{
+	struct rapl_primitive_info *rpi = rp->priv->rpi;
+
+	if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi)
+		return NULL;
+
+	return &rpi[prim];
+}
+
 static int rapl_config(struct rapl_package *rp)
 {
 	rp->priv->defaults = (void *)rapl_defaults;
+	rp->priv->rpi = (void *)rpi_default;
 	return 0;
 }
 
@@ -763,14 +774,14 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
 {
 	u64 value;
 	enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
-	struct rapl_primitive_info *rp = &rpi[prim_fixed];
+	struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
 	struct reg_action ra;
 	int cpu;
 
-	if (!rp->name || rp->flag & RAPL_PRIMITIVE_DUMMY)
+	if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
 		return -EINVAL;
 
-	ra.reg = rd->regs[rp->id];
+	ra.reg = rd->regs[rpi->id];
 	if (!ra.reg)
 		return -EINVAL;
 
@@ -778,26 +789,26 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
 
 	/* domain with 2 limits has different bit */
 	if (prim == FW_LOCK && rd->rp->priv->limits[rd->id] == 2) {
-		rp->mask = POWER_HIGH_LOCK;
-		rp->shift = 63;
+		rpi->mask = POWER_HIGH_LOCK;
+		rpi->shift = 63;
 	}
 	/* non-hardware data are collected by the polling thread */
-	if (rp->flag & RAPL_PRIMITIVE_DERIVED) {
+	if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
 		*data = rd->rdd.primitives[prim];
 		return 0;
 	}
 
-	ra.mask = rp->mask;
+	ra.mask = rpi->mask;
 
 	if (rd->rp->priv->read_raw(cpu, &ra)) {
 		pr_debug("failed to read reg 0x%llx on cpu %d\n", ra.reg, cpu);
 		return -EIO;
 	}
 
-	value = ra.value >> rp->shift;
+	value = ra.value >> rpi->shift;
 
 	if (xlate)
-		*data = rapl_unit_xlate(rd, rp->unit, value, 0);
+		*data = rapl_unit_xlate(rd, rpi->unit, value, 0);
 	else
 		*data = value;
 
@@ -810,21 +821,24 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
 			       unsigned long long value)
 {
 	enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
-	struct rapl_primitive_info *rp = &rpi[prim_fixed];
+	struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
 	int cpu;
 	u64 bits;
 	struct reg_action ra;
 	int ret;
 
+	if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
+		return -EINVAL;
+
 	cpu = rd->rp->lead_cpu;
-	bits = rapl_unit_xlate(rd, rp->unit, value, 1);
-	bits <<= rp->shift;
-	bits &= rp->mask;
+	bits = rapl_unit_xlate(rd, rpi->unit, value, 1);
+	bits <<= rpi->shift;
+	bits &= rpi->mask;
 
 	memset(&ra, 0, sizeof(ra));
 
-	ra.reg = rd->regs[rp->id];
-	ra.mask = rp->mask;
+	ra.reg = rd->regs[rpi->id];
+	ra.mask = rpi->mask;
 	ra.value = bits;
 
 	ret = rd->rp->priv->write_raw(cpu, &ra);
@@ -1176,8 +1190,10 @@ static void rapl_update_domain_data(struct rapl_package *rp)
 			 rp->domains[dmn].name);
 		/* exclude non-raw primitives */
 		for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
+			struct rapl_primitive_info *rpi = get_rpi(rp, prim);
+
 			if (!rapl_read_data_raw(&rp->domains[dmn], prim,
-						rpi[prim].unit, &val))
+						rpi->unit, &val))
 				rp->domains[dmn].rdd.primitives[prim] = val;
 		}
 	}
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index ebd1cad78212..f51e2df7130e 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -122,6 +122,7 @@ struct reg_action {
  * @write_raw:			Callback for writing RAPL interface specific
  *				registers.
  * @defaults:			internal pointer to interface default settings
+ * @rpi:			internal pointer to interface primitive info
  */
 struct rapl_if_priv {
 	struct powercap_control_type *control_type;
@@ -132,6 +133,7 @@ struct rapl_if_priv {
 	int (*read_raw)(int cpu, struct reg_action *ra);
 	int (*write_raw)(int cpu, struct reg_action *ra);
 	void *defaults;
+	void *rpi;
 };
 
 /* maximum rapl package domain name: package-%d-die-%d */
-- 
cgit v1.2.3


From cb532e728ee2880be53264752e74945fd2d917ac Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 19 Apr 2023 10:44:09 +0800
Subject: powercap: intel_rapl: Support per domain energy/power/time unit

RAPL MSR/MMIO Interface has package scope unit register but some RAPL
domains like Dram/Psys may use a fixed energy unit value instead of the
default unit value on certain platforms.
RAPL TPMI Interface supports per domain unit register.

For the above reasons, add support for per domain unit register and per
domain energy/power/time unit.

When per domain unit register is not available, use the package scope
unit register as the per domain unit register for each RAPL domain so
that this change is transparent to MSR/MMIO Interface.

No functional change intended.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Wang Wendy <wendy.wang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 128 +++++++++++++++++++----------------
 include/linux/intel_rapl.h           |   8 +--
 2 files changed, 73 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 267ec36b1649..3625d4466cb3 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -105,9 +105,9 @@ static const char pl4_name[] = "peak_power";
 
 struct rapl_defaults {
 	u8 floor_freq_reg_addr;
-	int (*check_unit)(struct rapl_package *rp, int cpu);
+	int (*check_unit)(struct rapl_domain *rd, int cpu);
 	void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
-	u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
+	u64 (*compute_time_window)(struct rapl_domain *rd, u64 val,
 				    bool to_raw);
 	unsigned int dram_domain_energy_unit;
 	unsigned int psys_domain_energy_unit;
@@ -557,7 +557,6 @@ static void rapl_init_domains(struct rapl_package *rp)
 	enum rapl_domain_type i;
 	enum rapl_domain_reg_id j;
 	struct rapl_domain *rd = rp->domains;
-	struct rapl_defaults *defaults = get_defaults(rp);
 
 	for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
 		unsigned int mask = rp->domain_map & (1 << i);
@@ -596,24 +595,6 @@ static void rapl_init_domains(struct rapl_package *rp)
 		for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
 			rd->regs[j] = rp->priv->regs[i][j];
 
-		switch (i) {
-		case RAPL_DOMAIN_DRAM:
-			rd->domain_energy_unit =
-			    defaults->dram_domain_energy_unit;
-			if (rd->domain_energy_unit)
-				pr_info("DRAM domain energy unit %dpj\n",
-					rd->domain_energy_unit);
-			break;
-		case RAPL_DOMAIN_PLATFORM:
-			rd->domain_energy_unit =
-			    defaults->psys_domain_energy_unit;
-			if (rd->domain_energy_unit)
-				pr_info("Platform domain energy unit %dpj\n",
-					rd->domain_energy_unit);
-			break;
-		default:
-			break;
-		}
 		rd++;
 	}
 }
@@ -622,24 +603,19 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
 			   u64 value, int to_raw)
 {
 	u64 units = 1;
-	struct rapl_package *rp = rd->rp;
-	struct rapl_defaults *defaults = get_defaults(rp);
+	struct rapl_defaults *defaults = get_defaults(rd->rp);
 	u64 scale = 1;
 
 	switch (type) {
 	case POWER_UNIT:
-		units = rp->power_unit;
+		units = rd->power_unit;
 		break;
 	case ENERGY_UNIT:
 		scale = ENERGY_UNIT_SCALE;
-		/* per domain unit takes precedence */
-		if (rd->domain_energy_unit)
-			units = rd->domain_energy_unit;
-		else
-			units = rp->energy_unit;
+		units = rd->energy_unit;
 		break;
 	case TIME_UNIT:
-		return defaults->compute_time_window(rp, value, to_raw);
+		return defaults->compute_time_window(rd, value, to_raw);
 	case ARBITRARY_UNIT:
 	default:
 		return value;
@@ -857,58 +833,58 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
  * power unit : microWatts  : Represented in milliWatts by default
  * time unit  : microseconds: Represented in seconds by default
  */
-static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
+static int rapl_check_unit_core(struct rapl_domain *rd, int cpu)
 {
 	struct reg_action ra;
 	u32 value;
 
-	ra.reg = rp->priv->reg_unit;
+	ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
 	ra.mask = ~0;
-	if (rp->priv->read_raw(cpu, &ra)) {
+	if (rd->rp->priv->read_raw(cpu, &ra)) {
 		pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
-		       rp->priv->reg_unit, cpu);
+			ra.reg, cpu);
 		return -ENODEV;
 	}
 
 	value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
-	rp->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
+	rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
 
 	value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
-	rp->power_unit = 1000000 / (1 << value);
+	rd->power_unit = 1000000 / (1 << value);
 
 	value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
-	rp->time_unit = 1000000 / (1 << value);
+	rd->time_unit = 1000000 / (1 << value);
 
-	pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n",
-		 rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
+	pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
+		 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
 
 	return 0;
 }
 
-static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
+static int rapl_check_unit_atom(struct rapl_domain *rd, int cpu)
 {
 	struct reg_action ra;
 	u32 value;
 
-	ra.reg = rp->priv->reg_unit;
+	ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
 	ra.mask = ~0;
-	if (rp->priv->read_raw(cpu, &ra)) {
+	if (rd->rp->priv->read_raw(cpu, &ra)) {
 		pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
-		       rp->priv->reg_unit, cpu);
+			ra.reg, cpu);
 		return -ENODEV;
 	}
 
 	value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
-	rp->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
+	rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
 
 	value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
-	rp->power_unit = (1 << value) * 1000;
+	rd->power_unit = (1 << value) * 1000;
 
 	value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
-	rp->time_unit = 1000000 / (1 << value);
+	rd->time_unit = 1000000 / (1 << value);
 
-	pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n",
-		 rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
+	pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
+		 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
 
 	return 0;
 }
@@ -1011,7 +987,7 @@ static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
 		       defaults->floor_freq_reg_addr, mdata);
 }
 
-static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
+static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
 					 bool to_raw)
 {
 	u64 f, y;		/* fraction and exp. used for time unit */
@@ -1023,12 +999,12 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
 	if (!to_raw) {
 		f = (value & 0x60) >> 5;
 		y = value & 0x1f;
-		value = (1 << y) * (4 + f) * rp->time_unit / 4;
+		value = (1 << y) * (4 + f) * rd->time_unit / 4;
 	} else {
-		if (value < rp->time_unit)
+		if (value < rd->time_unit)
 			return 0;
 
-		do_div(value, rp->time_unit);
+		do_div(value, rd->time_unit);
 		y = ilog2(value);
 
 		/*
@@ -1044,7 +1020,7 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
 	return value;
 }
 
-static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
+static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
 					 bool to_raw)
 {
 	/*
@@ -1052,9 +1028,9 @@ static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
 	 * where time_unit is default to 1 sec. Never 0.
 	 */
 	if (!to_raw)
-		return (value) ? value * rp->time_unit : rp->time_unit;
+		return (value) ? value * rd->time_unit : rd->time_unit;
 
-	value = div64_u64(value, rp->time_unit);
+	value = div64_u64(value, rd->time_unit);
 
 	return value;
 }
@@ -1299,6 +1275,40 @@ static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp)
 	return 0;
 }
 
+/*
+ * Get per domain energy/power/time unit.
+ * RAPL Interfaces without per domain unit register will use the package
+ * scope unit register to set per domain units.
+ */
+static int rapl_get_domain_unit(struct rapl_domain *rd)
+{
+	struct rapl_defaults *defaults = get_defaults(rd->rp);
+	int ret;
+
+	if (!rd->regs[RAPL_DOMAIN_REG_UNIT]) {
+		if (!rd->rp->priv->reg_unit) {
+			pr_err("No valid Unit register found\n");
+			return -ENODEV;
+		}
+		rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit;
+	}
+
+	if (!defaults->check_unit) {
+		pr_err("missing .check_unit() callback\n");
+		return -ENODEV;
+	}
+
+	ret = defaults->check_unit(rd, rd->rp->lead_cpu);
+	if (ret)
+		return ret;
+
+	if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit)
+		rd->energy_unit = defaults->dram_domain_energy_unit;
+	if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit)
+		rd->energy_unit = defaults->psys_domain_energy_unit;
+	return 0;
+}
+
 /*
  * Check if power limits are available. Two cases when they are not available:
  * 1. Locked by BIOS, in this case we still provide read-only access so that
@@ -1359,8 +1369,10 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
 
 	rapl_init_domains(rp);
 
-	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++)
+	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
+		rapl_get_domain_unit(rd);
 		rapl_detect_powerlimit(rd);
+	}
 
 	return 0;
 }
@@ -1418,7 +1430,6 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
 {
 	int id = topology_logical_die_id(cpu);
 	struct rapl_package *rp;
-	struct rapl_defaults *defaults;
 	int ret;
 
 	rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
@@ -1442,9 +1453,8 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
 		snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
 			 topology_physical_package_id(cpu));
 
-	defaults = get_defaults(rp);
 	/* check if the package contains valid domains */
-	if (rapl_detect_domains(rp, cpu) || defaults->check_unit(rp, cpu)) {
+	if (rapl_detect_domains(rp, cpu)) {
 		ret = -ENODEV;
 		goto err_free_package;
 	}
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index f51e2df7130e..936fb8c3082c 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -30,6 +30,7 @@ enum rapl_domain_reg_id {
 	RAPL_DOMAIN_REG_POLICY,
 	RAPL_DOMAIN_REG_INFO,
 	RAPL_DOMAIN_REG_PL4,
+	RAPL_DOMAIN_REG_UNIT,
 	RAPL_DOMAIN_REG_MAX,
 };
 
@@ -96,7 +97,9 @@ struct rapl_domain {
 	struct rapl_power_limit rpl[NR_POWER_LIMITS];
 	u64 attr_map;		/* track capabilities */
 	unsigned int state;
-	unsigned int domain_energy_unit;
+	unsigned int power_unit;
+	unsigned int energy_unit;
+	unsigned int time_unit;
 	struct rapl_package *rp;
 };
 
@@ -143,9 +146,6 @@ struct rapl_package {
 	unsigned int id;	/* logical die id, equals physical 1-die systems */
 	unsigned int nr_domains;
 	unsigned long domain_map;	/* bit map of active domains */
-	unsigned int power_unit;
-	unsigned int energy_unit;
-	unsigned int time_unit;
 	struct rapl_domain *domains;	/* array of domains, sized at runtime */
 	struct powercap_zone *power_zone;	/* keep track of parent zone */
 	unsigned long power_limit_irq;	/* keep track of package power limit
-- 
cgit v1.2.3


From 045610c383bd6b740bb7e7c780d6f7729249e60d Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 19 Apr 2023 10:44:11 +0800
Subject: powercap: intel_rapl: Change primitive order

The same set of operations are shared by different Powert Limits,
including Power Limit get/set, Power Limit enable/disable, clamping
enable/disable, time window get/set, and max power get/set, etc.

But the same operation for different Power Limit has different
primitives because they use different registers/register bits.

A lot of dirty/duplicate code was introduced to handle this difference.

Instead of using hardcoded primitive name directly, using Power Limit id
+ operation type is much cleaner.

For this sense, move POWER_LIMIT1/POWER_LIMIT2/POWER_LIMIT4 to the
beginning of enum rapl_primitives so that they can be reused as
Power Limit ids.

No functional change.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Wang Wendy <wendy.wang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 4 ++--
 include/linux/intel_rapl.h           | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 97d9e1e628e2..154f93b3dee5 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -631,14 +631,14 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
 
 static struct rapl_primitive_info rpi_default[NR_RAPL_PRIMITIVES] = {
 	/* name, mask, shift, msr index, unit divisor */
-	[ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
-			    RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
 	[POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
 			    RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
 	[POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
 			    RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
 	[POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
 				RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
+	[ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
+			    RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
 	[FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
 			    RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
 	[PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 936fb8c3082c..bbd03b17dc8d 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -37,10 +37,10 @@ enum rapl_domain_reg_id {
 struct rapl_domain;
 
 enum rapl_primitives {
-	ENERGY_COUNTER,
 	POWER_LIMIT1,
 	POWER_LIMIT2,
 	POWER_LIMIT4,
+	ENERGY_COUNTER,
 	FW_LOCK,
 
 	PL1_ENABLE,		/* power limit 1, aka long term */
@@ -75,7 +75,8 @@ struct rapl_domain_data {
 	unsigned long timestamp;
 };
 
-#define NR_POWER_LIMITS (3)
+#define NR_POWER_LIMITS	(POWER_LIMIT4 + 1)
+
 struct rapl_power_limit {
 	struct powercap_zone_constraint *constraint;
 	int prim_id;		/* primitive ID used to enable */
-- 
cgit v1.2.3


From 9050a9cd5e4c848e265915d6e7b1f731e6e1e0e6 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 19 Apr 2023 10:44:13 +0800
Subject: powercap: intel_rapl: Cleanup Power Limits support

The same set of operations are shared by different Powert Limits,
including Power Limit get/set, Power Limit enable/disable, clamping
enable/disable, time window get/set, and max power get/set, etc.

But the same operation for different Power Limit has different
primitives because they use different registers/register bits.

A lot of dirty/duplicate code was introduced to handle this difference.

Introduce a universal way to issue Power Limit operations.
Instead of using hardcoded primitive name directly, use Power Limit id
+ operation type, and hide all the Power Limit difference details in a
central place, get_pl_prim(). Two helpers, rapl_read_pl_data() and
rapl_write_pl_data(), are introduced at the same time to simplify the
code for issuing Power Limit operations.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Wang Wendy <wendy.wang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 343 +++++++++++++++--------------------
 include/linux/intel_rapl.h           |   1 -
 2 files changed, 146 insertions(+), 198 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 8e77df42257a..7f80c35e5c86 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -96,9 +96,67 @@ enum unit_type {
 #define	DOMAIN_STATE_POWER_LIMIT_SET    BIT(1)
 #define DOMAIN_STATE_BIOS_LOCKED        BIT(2)
 
-static const char pl1_name[] = "long_term";
-static const char pl2_name[] = "short_term";
-static const char pl4_name[] = "peak_power";
+static const char *pl_names[NR_POWER_LIMITS] = {
+	[POWER_LIMIT1] = "long_term",
+	[POWER_LIMIT2] = "short_term",
+	[POWER_LIMIT4] = "peak_power",
+};
+
+enum pl_prims {
+	PL_ENABLE,
+	PL_CLAMP,
+	PL_LIMIT,
+	PL_TIME_WINDOW,
+	PL_MAX_POWER,
+};
+
+static bool is_pl_valid(struct rapl_domain *rd, int pl)
+{
+	if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4)
+		return false;
+	return rd->rpl[pl].name ? true : false;
+}
+
+static int get_pl_prim(int pl, enum pl_prims prim)
+{
+	switch (pl) {
+	case POWER_LIMIT1:
+		if (prim == PL_ENABLE)
+			return PL1_ENABLE;
+		if (prim == PL_CLAMP)
+			return PL1_CLAMP;
+		if (prim == PL_LIMIT)
+			return POWER_LIMIT1;
+		if (prim == PL_TIME_WINDOW)
+			return TIME_WINDOW1;
+		if (prim == PL_MAX_POWER)
+			return THERMAL_SPEC_POWER;
+		return -EINVAL;
+	case POWER_LIMIT2:
+		if (prim == PL_ENABLE)
+			return PL2_ENABLE;
+		if (prim == PL_CLAMP)
+			return PL2_CLAMP;
+		if (prim == PL_LIMIT)
+			return POWER_LIMIT2;
+		if (prim == PL_TIME_WINDOW)
+			return TIME_WINDOW2;
+		if (prim == PL_MAX_POWER)
+			return MAX_POWER;
+		return -EINVAL;
+	case POWER_LIMIT4:
+		if (prim == PL_LIMIT)
+			return POWER_LIMIT4;
+		if (prim == PL_ENABLE)
+			return PL4_ENABLE;
+		/* PL4 would be around two times PL2, use same prim as PL2. */
+		if (prim == PL_MAX_POWER)
+			return MAX_POWER;
+		return -EINVAL;
+	default:
+		return -EINVAL;
+	}
+}
 
 #define power_zone_to_rapl_domain(_zone) \
 	container_of(_zone, struct rapl_domain, power_zone)
@@ -155,6 +213,12 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
 static int rapl_write_data_raw(struct rapl_domain *rd,
 			       enum rapl_primitives prim,
 			       unsigned long long value);
+static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
+			      enum pl_prims pl_prim,
+			      bool xlate, u64 *data);
+static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
+			       enum pl_prims pl_prim,
+			       unsigned long long value);
 static u64 rapl_unit_xlate(struct rapl_domain *rd,
 			   enum unit_type type, u64 value, int to_raw);
 static void package_power_limit_irq_save(struct rapl_package *rp);
@@ -222,7 +286,7 @@ static int find_nr_power_limit(struct rapl_domain *rd)
 	int i, nr_pl = 0;
 
 	for (i = 0; i < NR_POWER_LIMITS; i++) {
-		if (rd->rpl[i].name)
+		if (is_pl_valid(rd, i))
 			nr_pl++;
 	}
 
@@ -233,37 +297,34 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
 {
 	struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
 	struct rapl_defaults *defaults = get_defaults(rd->rp);
-
-	if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
-		return -EACCES;
+	int ret;
 
 	cpus_read_lock();
-	rapl_write_data_raw(rd, PL1_ENABLE, mode);
-	if (defaults->set_floor_freq)
+	ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
+	if (!ret && defaults->set_floor_freq)
 		defaults->set_floor_freq(rd, mode);
 	cpus_read_unlock();
 
-	return 0;
+	return ret;
 }
 
 static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
 {
 	struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
 	u64 val;
+	int ret;
 
 	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
 		*mode = false;
 		return 0;
 	}
 	cpus_read_lock();
-	if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) {
-		cpus_read_unlock();
-		return -EIO;
-	}
-	*mode = val;
+	ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val);
+	if (!ret)
+		*mode = val;
 	cpus_read_unlock();
 
-	return 0;
+	return ret;
 }
 
 /* per RAPL domain ops, in the order of rapl_domain_type */
@@ -319,8 +380,8 @@ static int contraint_to_pl(struct rapl_domain *rd, int cid)
 {
 	int i, j;
 
-	for (i = 0, j = 0; i < NR_POWER_LIMITS; i++) {
-		if ((rd->rpl[i].name) && j++ == cid) {
+	for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) {
+		if (is_pl_valid(rd, i) && j++ == cid) {
 			pr_debug("%s: index %d\n", __func__, i);
 			return i;
 		}
@@ -341,36 +402,11 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
 	cpus_read_lock();
 	rd = power_zone_to_rapl_domain(power_zone);
 	id = contraint_to_pl(rd, cid);
-	if (id < 0) {
-		ret = id;
-		goto set_exit;
-	}
-
 	rp = rd->rp;
 
-	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
-		dev_warn(&power_zone->dev,
-			 "%s locked by BIOS, monitoring only\n", rd->name);
-		ret = -EACCES;
-		goto set_exit;
-	}
-
-	switch (rd->rpl[id].prim_id) {
-	case PL1_ENABLE:
-		rapl_write_data_raw(rd, POWER_LIMIT1, power_limit);
-		break;
-	case PL2_ENABLE:
-		rapl_write_data_raw(rd, POWER_LIMIT2, power_limit);
-		break;
-	case PL4_ENABLE:
-		rapl_write_data_raw(rd, POWER_LIMIT4, power_limit);
-		break;
-	default:
-		ret = -EINVAL;
-	}
+	ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit);
 	if (!ret)
 		package_power_limit_irq_save(rp);
-set_exit:
 	cpus_read_unlock();
 	return ret;
 }
@@ -380,38 +416,17 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
 {
 	struct rapl_domain *rd;
 	u64 val;
-	int prim;
 	int ret = 0;
 	int id;
 
 	cpus_read_lock();
 	rd = power_zone_to_rapl_domain(power_zone);
 	id = contraint_to_pl(rd, cid);
-	if (id < 0) {
-		ret = id;
-		goto get_exit;
-	}
 
-	switch (rd->rpl[id].prim_id) {
-	case PL1_ENABLE:
-		prim = POWER_LIMIT1;
-		break;
-	case PL2_ENABLE:
-		prim = POWER_LIMIT2;
-		break;
-	case PL4_ENABLE:
-		prim = POWER_LIMIT4;
-		break;
-	default:
-		cpus_read_unlock();
-		return -EINVAL;
-	}
-	if (rapl_read_data_raw(rd, prim, true, &val))
-		ret = -EIO;
-	else
+	ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val);
+	if (!ret)
 		*data = val;
 
-get_exit:
 	cpus_read_unlock();
 
 	return ret;
@@ -427,23 +442,9 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
 	cpus_read_lock();
 	rd = power_zone_to_rapl_domain(power_zone);
 	id = contraint_to_pl(rd, cid);
-	if (id < 0) {
-		ret = id;
-		goto set_time_exit;
-	}
 
-	switch (rd->rpl[id].prim_id) {
-	case PL1_ENABLE:
-		rapl_write_data_raw(rd, TIME_WINDOW1, window);
-		break;
-	case PL2_ENABLE:
-		rapl_write_data_raw(rd, TIME_WINDOW2, window);
-		break;
-	default:
-		ret = -EINVAL;
-	}
+	ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window);
 
-set_time_exit:
 	cpus_read_unlock();
 	return ret;
 }
@@ -459,33 +460,11 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
 	cpus_read_lock();
 	rd = power_zone_to_rapl_domain(power_zone);
 	id = contraint_to_pl(rd, cid);
-	if (id < 0) {
-		ret = id;
-		goto get_time_exit;
-	}
 
-	switch (rd->rpl[id].prim_id) {
-	case PL1_ENABLE:
-		ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val);
-		break;
-	case PL2_ENABLE:
-		ret = rapl_read_data_raw(rd, TIME_WINDOW2, true, &val);
-		break;
-	case PL4_ENABLE:
-		/*
-		 * Time window parameter is not applicable for PL4 entry
-		 * so assigining '0' as default value.
-		 */
-		val = 0;
-		break;
-	default:
-		cpus_read_unlock();
-		return -EINVAL;
-	}
+	ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val);
 	if (!ret)
 		*data = val;
 
-get_time_exit:
 	cpus_read_unlock();
 
 	return ret;
@@ -505,36 +484,23 @@ static const char *get_constraint_name(struct powercap_zone *power_zone,
 	return NULL;
 }
 
-static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
+static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data)
 {
 	struct rapl_domain *rd;
 	u64 val;
-	int prim;
 	int ret = 0;
+	int id;
 
 	cpus_read_lock();
 	rd = power_zone_to_rapl_domain(power_zone);
-	switch (rd->rpl[id].prim_id) {
-	case PL1_ENABLE:
-		prim = THERMAL_SPEC_POWER;
-		break;
-	case PL2_ENABLE:
-		prim = MAX_POWER;
-		break;
-	case PL4_ENABLE:
-		prim = MAX_POWER;
-		break;
-	default:
-		cpus_read_unlock();
-		return -EINVAL;
-	}
-	if (rapl_read_data_raw(rd, prim, true, &val))
-		ret = -EIO;
-	else
+	id = contraint_to_pl(rd, cid);
+
+	ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val);
+	if (!ret)
 		*data = val;
 
 	/* As a generalization rule, PL4 would be around two times PL2. */
-	if (rd->rpl[id].prim_id == PL4_ENABLE)
+	if (id == POWER_LIMIT4)
 		*data = *data * 2;
 
 	cpus_read_unlock();
@@ -560,6 +526,7 @@ static void rapl_init_domains(struct rapl_package *rp)
 
 	for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
 		unsigned int mask = rp->domain_map & (1 << i);
+		int t;
 
 		if (!mask)
 			continue;
@@ -577,17 +544,10 @@ static void rapl_init_domains(struct rapl_package *rp)
 
 		/* PL1 is supported by default */
 		rp->priv->limits[i] |= BIT(POWER_LIMIT1);
-		rd->rpl[0].prim_id = PL1_ENABLE;
-		rd->rpl[0].name = pl1_name;
 
-		if (rp->priv->limits[i] & BIT(POWER_LIMIT2)) {
-			rd->rpl[1].prim_id = PL2_ENABLE;
-			rd->rpl[1].name = pl2_name;
-		}
-
-		if (rp->priv->limits[i] & BIT(POWER_LIMIT4)) {
-			rd->rpl[2].prim_id = PL4_ENABLE;
-			rd->rpl[2].name = pl4_name;
+		for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) {
+			if (rp->priv->limits[i] & BIT(t))
+				rd->rpl[t].name = pl_names[t];
 		}
 
 		for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
@@ -818,6 +778,33 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
 	return ret;
 }
 
+static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
+			      enum pl_prims pl_prim, bool xlate, u64 *data)
+{
+	enum rapl_primitives prim = get_pl_prim(pl, pl_prim);
+
+	if (!is_pl_valid(rd, pl))
+		return -EINVAL;
+
+	return rapl_read_data_raw(rd, prim, xlate, data);
+}
+
+static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
+			       enum pl_prims pl_prim,
+			       unsigned long long value)
+{
+	enum rapl_primitives prim = get_pl_prim(pl, pl_prim);
+
+	if (!is_pl_valid(rd, pl))
+		return -EINVAL;
+
+	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
+		pr_warn("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]);
+		return -EACCES;
+	}
+
+	return rapl_write_data_raw(rd, prim, value);
+}
 /*
  * Raw RAPL data stored in MSRs are in certain scales. We need to
  * convert them into standard units based on the units reported in
@@ -945,17 +932,16 @@ static void package_power_limit_irq_restore(struct rapl_package *rp)
 
 static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
 {
-	int nr_powerlimit = find_nr_power_limit(rd);
+	int i;
 
 	/* always enable clamp such that p-state can go below OS requested
 	 * range. power capping priority over guranteed frequency.
 	 */
-	rapl_write_data_raw(rd, PL1_CLAMP, mode);
+	rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode);
 
-	/* some domains have pl2 */
-	if (nr_powerlimit > 1) {
-		rapl_write_data_raw(rd, PL2_ENABLE, mode);
-		rapl_write_data_raw(rd, PL2_CLAMP, mode);
+	for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) {
+		rapl_write_pl_data(rd, i, PL_ENABLE, mode);
+		rapl_write_pl_data(rd, i, PL_CLAMP, mode);
 	}
 }
 
@@ -1327,11 +1313,10 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
 			rd->state |= DOMAIN_STATE_BIOS_LOCKED;
 		}
 	}
-	/* check if power limit MSR exists, otherwise domain is monitoring only */
-	for (i = 0; i < NR_POWER_LIMITS; i++) {
-		int prim = rd->rpl[i].prim_id;
 
-		if (rapl_read_data_raw(rd, prim, false, &val64))
+	/* check if power limit exists, otherwise domain is monitoring only */
+	for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
+		if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
 			rd->rpl[i].name = NULL;
 	}
 }
@@ -1381,13 +1366,13 @@ void rapl_remove_package(struct rapl_package *rp)
 	package_power_limit_irq_restore(rp);
 
 	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
-		rapl_write_data_raw(rd, PL1_ENABLE, 0);
-		rapl_write_data_raw(rd, PL1_CLAMP, 0);
-		if (find_nr_power_limit(rd) > 1) {
-			rapl_write_data_raw(rd, PL2_ENABLE, 0);
-			rapl_write_data_raw(rd, PL2_CLAMP, 0);
-			rapl_write_data_raw(rd, PL4_ENABLE, 0);
+		int i;
+
+		for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
+			rapl_write_pl_data(rd, i, PL_ENABLE, 0);
+			rapl_write_pl_data(rd, i, PL_CLAMP, 0);
 		}
+
 		if (rd->id == RAPL_DOMAIN_PACKAGE) {
 			rd_package = rd;
 			continue;
@@ -1472,38 +1457,18 @@ static void power_limit_state_save(void)
 {
 	struct rapl_package *rp;
 	struct rapl_domain *rd;
-	int nr_pl, ret, i;
+	int ret, i;
 
 	cpus_read_lock();
 	list_for_each_entry(rp, &rapl_packages, plist) {
 		if (!rp->power_zone)
 			continue;
 		rd = power_zone_to_rapl_domain(rp->power_zone);
-		nr_pl = find_nr_power_limit(rd);
-		for (i = 0; i < nr_pl; i++) {
-			switch (rd->rpl[i].prim_id) {
-			case PL1_ENABLE:
-				ret = rapl_read_data_raw(rd,
-						 POWER_LIMIT1, true,
+		for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
+			ret = rapl_read_pl_data(rd, i, PL_LIMIT, true,
 						 &rd->rpl[i].last_power_limit);
-				if (ret)
-					rd->rpl[i].last_power_limit = 0;
-				break;
-			case PL2_ENABLE:
-				ret = rapl_read_data_raw(rd,
-						 POWER_LIMIT2, true,
-						 &rd->rpl[i].last_power_limit);
-				if (ret)
-					rd->rpl[i].last_power_limit = 0;
-				break;
-			case PL4_ENABLE:
-				ret = rapl_read_data_raw(rd,
-						 POWER_LIMIT4, true,
-						 &rd->rpl[i].last_power_limit);
-				if (ret)
-					rd->rpl[i].last_power_limit = 0;
-				break;
-			}
+			if (ret)
+				rd->rpl[i].last_power_limit = 0;
 		}
 	}
 	cpus_read_unlock();
@@ -1513,33 +1478,17 @@ static void power_limit_state_restore(void)
 {
 	struct rapl_package *rp;
 	struct rapl_domain *rd;
-	int nr_pl, i;
+	int i;
 
 	cpus_read_lock();
 	list_for_each_entry(rp, &rapl_packages, plist) {
 		if (!rp->power_zone)
 			continue;
 		rd = power_zone_to_rapl_domain(rp->power_zone);
-		nr_pl = find_nr_power_limit(rd);
-		for (i = 0; i < nr_pl; i++) {
-			switch (rd->rpl[i].prim_id) {
-			case PL1_ENABLE:
-				if (rd->rpl[i].last_power_limit)
-					rapl_write_data_raw(rd, POWER_LIMIT1,
-					    rd->rpl[i].last_power_limit);
-				break;
-			case PL2_ENABLE:
-				if (rd->rpl[i].last_power_limit)
-					rapl_write_data_raw(rd, POWER_LIMIT2,
-					    rd->rpl[i].last_power_limit);
-				break;
-			case PL4_ENABLE:
-				if (rd->rpl[i].last_power_limit)
-					rapl_write_data_raw(rd, POWER_LIMIT4,
-					    rd->rpl[i].last_power_limit);
-				break;
-			}
-		}
+		for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++)
+			if (rd->rpl[i].last_power_limit)
+				rapl_write_pl_data(rd, i, PL_LIMIT,
+					       rd->rpl[i].last_power_limit);
 	}
 	cpus_read_unlock();
 }
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index bbd03b17dc8d..df17f4e51dbf 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -79,7 +79,6 @@ struct rapl_domain_data {
 
 struct rapl_power_limit {
 	struct powercap_zone_constraint *constraint;
-	int prim_id;		/* primitive ID used to enable */
 	struct rapl_domain *domain;
 	const char *name;
 	u64 last_power_limit;
-- 
cgit v1.2.3


From f442bd2742174eed6993315ec621275df13f311d Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 19 Apr 2023 10:44:14 +0800
Subject: powercap: intel_rapl: Add support for lock bit per Power Limit

With RAPL MSR/MMIO Interface, each RAPL domain has one Power Limit
register. Each Power Limit register has one lock bit which tells the OS
if the power limit register can be used or not.
Depending on the number of power limits supported by the power limit
register, the lock bit may apply to one or more power limits.

With RAPL TPMI Interface, each RAPL domain has multiple Power Limits,
and each Power Limit has its own register, with a lock bit.

To handle this, introduce support for lock bit per Power Limit.

For existing RAPL MSR/MMIO Interfaces, the lock bit in the Power Limit
register applies to all the Power Limits controlled by this register.

Remove the per domain DOMAIN_STATE_BIOS_LOCKED flag at the same time
because it can be replaced by the per Power Limit lock.

No functional change intended.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Wang Wendy <wendy.wang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 50 +++++++++++++++++++++---------------
 include/linux/intel_rapl.h           |  2 ++
 2 files changed, 32 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 7f80c35e5c86..d88008308d7a 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -94,7 +94,6 @@ enum unit_type {
 
 #define	DOMAIN_STATE_INACTIVE           BIT(0)
 #define	DOMAIN_STATE_POWER_LIMIT_SET    BIT(1)
-#define DOMAIN_STATE_BIOS_LOCKED        BIT(2)
 
 static const char *pl_names[NR_POWER_LIMITS] = {
 	[POWER_LIMIT1] = "long_term",
@@ -108,6 +107,7 @@ enum pl_prims {
 	PL_LIMIT,
 	PL_TIME_WINDOW,
 	PL_MAX_POWER,
+	PL_LOCK,
 };
 
 static bool is_pl_valid(struct rapl_domain *rd, int pl)
@@ -117,7 +117,18 @@ static bool is_pl_valid(struct rapl_domain *rd, int pl)
 	return rd->rpl[pl].name ? true : false;
 }
 
-static int get_pl_prim(int pl, enum pl_prims prim)
+static int get_pl_lock_prim(struct rapl_domain *rd, int pl)
+{
+	/*
+	 * Power Limit register that supports two power limits has a different
+	 * bit position for the Lock bit.
+	 */
+	if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2))
+		return FW_HIGH_LOCK;
+	return FW_LOCK;
+}
+
+static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
 {
 	switch (pl) {
 	case POWER_LIMIT1:
@@ -131,6 +142,8 @@ static int get_pl_prim(int pl, enum pl_prims prim)
 			return TIME_WINDOW1;
 		if (prim == PL_MAX_POWER)
 			return THERMAL_SPEC_POWER;
+		if (prim == PL_LOCK)
+			return get_pl_lock_prim(rd, pl);
 		return -EINVAL;
 	case POWER_LIMIT2:
 		if (prim == PL_ENABLE)
@@ -143,6 +156,8 @@ static int get_pl_prim(int pl, enum pl_prims prim)
 			return TIME_WINDOW2;
 		if (prim == PL_MAX_POWER)
 			return MAX_POWER;
+		if (prim == PL_LOCK)
+			return get_pl_lock_prim(rd, pl);
 		return -EINVAL;
 	case POWER_LIMIT4:
 		if (prim == PL_LIMIT)
@@ -314,7 +329,7 @@ static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
 	u64 val;
 	int ret;
 
-	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
+	if (rd->rpl[POWER_LIMIT1].locked) {
 		*mode = false;
 		return 0;
 	}
@@ -599,6 +614,8 @@ static struct rapl_primitive_info rpi_default[NR_RAPL_PRIMITIVES] = {
 			    RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
 	[FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
 			    RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+	[FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
+			    RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
 	[PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
 			    RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
 	[PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
@@ -719,11 +736,6 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
 
 	cpu = rd->rp->lead_cpu;
 
-	/* domain with 2 limits has different bit */
-	if (prim == FW_LOCK && (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2))) {
-		rpi->mask = POWER_HIGH_LOCK;
-		rpi->shift = 63;
-	}
 	/* non-hardware data are collected by the polling thread */
 	if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
 		*data = rd->rdd.primitives[prim];
@@ -781,7 +793,7 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
 static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
 			      enum pl_prims pl_prim, bool xlate, u64 *data)
 {
-	enum rapl_primitives prim = get_pl_prim(pl, pl_prim);
+	enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
 
 	if (!is_pl_valid(rd, pl))
 		return -EINVAL;
@@ -793,12 +805,12 @@ static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
 			       enum pl_prims pl_prim,
 			       unsigned long long value)
 {
-	enum rapl_primitives prim = get_pl_prim(pl, pl_prim);
+	enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
 
 	if (!is_pl_valid(rd, pl))
 		return -EINVAL;
 
-	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
+	if (rd->rpl[pl].locked) {
 		pr_warn("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]);
 		return -EACCES;
 	}
@@ -1305,17 +1317,15 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
 	u64 val64;
 	int i;
 
-	/* check if the domain is locked by BIOS, ignore if MSR doesn't exist */
-	if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) {
-		if (val64) {
-			pr_info("RAPL %s domain %s locked by BIOS\n",
-				rd->rp->name, rd->name);
-			rd->state |= DOMAIN_STATE_BIOS_LOCKED;
+	for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
+		if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) {
+			if (val64) {
+				rd->rpl[i].locked = true;
+				pr_info("%s:%s:%s locked by BIOS\n",
+					rd->rp->name, rd->name, pl_names[i]);
+			}
 		}
-	}
 
-	/* check if power limit exists, otherwise domain is monitoring only */
-	for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
 		if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
 			rd->rpl[i].name = NULL;
 	}
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index df17f4e51dbf..d07b460bac3b 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -42,6 +42,7 @@ enum rapl_primitives {
 	POWER_LIMIT4,
 	ENERGY_COUNTER,
 	FW_LOCK,
+	FW_HIGH_LOCK,
 
 	PL1_ENABLE,		/* power limit 1, aka long term */
 	PL1_CLAMP,		/* allow frequency to go below OS request */
@@ -81,6 +82,7 @@ struct rapl_power_limit {
 	struct powercap_zone_constraint *constraint;
 	struct rapl_domain *domain;
 	const char *name;
+	bool locked;
 	u64 last_power_limit;
 };
 
-- 
cgit v1.2.3


From bf44b9011df3d6e34a23be77d86540553ba2bbe2 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 19 Apr 2023 10:44:16 +0800
Subject: powercap: intel_rapl: Make cpu optional for rapl_package

MSR RAPL Interface always removes a rapl_package when all the CPUs in
that rapl_package are offlined. This is because it relies on an online
CPU to access the MSR.

But for RAPL Interface using MMIO registers, when all the cpus within
the rapl_package are offlined,
1. the register can still be accessed
2. monitoring and setting the Power Pimits for the rapl_package is still
   meaningful because of uncore power.

This means that, a valid rapl_package doesn't rely on one or more cpus
being onlined.

For this sense, make cpu optional for rapl_package. A rapl_package can
be registered either using a CPU id to represent the physical
package/die, or using the physical package id directly.

Note that, the thermal throttling interrupt is not disabled via
MSR_IA32_PACKAGE_THERM_INTERRUPT for such rapl_package at the moment.
If it is still needed in the future, this can be achieved by selecting
an onlined CPU using the physical package id.

Note that, processor_thermal_rapl, the current MMIO RAPL Interface
driver, can also be converted to register using a package id instead.
But this is not done right now because processor_thermal_rapl driver
works on single-package systems only, and offlining the only package
will not happen. So keep the previous logic.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Wang Wendy <wendy.wang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c               | 84 +++++++++++++---------
 drivers/powercap/intel_rapl_msr.c                  |  6 +-
 .../intel/int340x_thermal/processor_thermal_rapl.c |  6 +-
 include/linux/intel_rapl.h                         |  8 +--
 4 files changed, 60 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index fcb92f1f4bfc..d337270b65d4 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -532,6 +532,12 @@ static const struct powercap_zone_constraint_ops constraint_ops = {
 	.get_name = get_constraint_name,
 };
 
+/* Return the id used for read_raw/write_raw callback */
+static int get_rid(struct rapl_package *rp)
+{
+	return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id;
+}
+
 /* called after domain detection and package level data are set */
 static void rapl_init_domains(struct rapl_package *rp)
 {
@@ -550,10 +556,12 @@ static void rapl_init_domains(struct rapl_package *rp)
 
 		if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) {
 			snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d",
-				topology_physical_package_id(rp->lead_cpu));
-		} else
+				rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) :
+				rp->id);
+		} else {
 			snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s",
 				rapl_domain_names[i]);
+		}
 
 		rd->id = i;
 
@@ -725,7 +733,6 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
 	enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
 	struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
 	struct reg_action ra;
-	int cpu;
 
 	if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
 		return -EINVAL;
@@ -734,8 +741,6 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
 	if (!ra.reg)
 		return -EINVAL;
 
-	cpu = rd->rp->lead_cpu;
-
 	/* non-hardware data are collected by the polling thread */
 	if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
 		*data = rd->rdd.primitives[prim];
@@ -744,8 +749,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
 
 	ra.mask = rpi->mask;
 
-	if (rd->rp->priv->read_raw(cpu, &ra)) {
-		pr_debug("failed to read reg 0x%llx on cpu %d\n", ra.reg, cpu);
+	if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+		pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg, rd->rp->name, rd->name);
 		return -EIO;
 	}
 
@@ -766,7 +771,6 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
 {
 	enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
 	struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
-	int cpu;
 	u64 bits;
 	struct reg_action ra;
 	int ret;
@@ -774,7 +778,6 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
 	if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
 		return -EINVAL;
 
-	cpu = rd->rp->lead_cpu;
 	bits = rapl_unit_xlate(rd, rpi->unit, value, 1);
 	bits <<= rpi->shift;
 	bits &= rpi->mask;
@@ -785,7 +788,7 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
 	ra.mask = rpi->mask;
 	ra.value = bits;
 
-	ret = rd->rp->priv->write_raw(cpu, &ra);
+	ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra);
 
 	return ret;
 }
@@ -835,9 +838,9 @@ static int rapl_check_unit_core(struct rapl_domain *rd)
 
 	ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
 	ra.mask = ~0;
-	if (rd->rp->priv->read_raw(rd->rp->lead_cpu, &ra)) {
-		pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
-			ra.reg, rd->rp->lead_cpu);
+	if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+		pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
+			ra.reg, rd->rp->name, rd->name);
 		return -ENODEV;
 	}
 
@@ -863,9 +866,9 @@ static int rapl_check_unit_atom(struct rapl_domain *rd)
 
 	ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
 	ra.mask = ~0;
-	if (rd->rp->priv->read_raw(rd->rp->lead_cpu, &ra)) {
-		pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
-			ra.reg, rd->rp->lead_cpu);
+	if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+		pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
+			ra.reg, rd->rp->name, rd->name);
 		return -ENODEV;
 	}
 
@@ -911,6 +914,9 @@ static void power_limit_irq_save_cpu(void *info)
 
 static void package_power_limit_irq_save(struct rapl_package *rp)
 {
+	if (rp->lead_cpu < 0)
+		return;
+
 	if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
 		return;
 
@@ -925,6 +931,9 @@ static void package_power_limit_irq_restore(struct rapl_package *rp)
 {
 	u32 l, h;
 
+	if (rp->lead_cpu < 0)
+		return;
+
 	if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
 		return;
 
@@ -1263,7 +1272,7 @@ static int rapl_check_domain(int domain, struct rapl_package *rp)
 	 */
 
 	ra.mask = ENERGY_STATUS_MASK;
-	if (rp->priv->read_raw(rp->lead_cpu, &ra) || !ra.value)
+	if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value)
 		return -ENODEV;
 
 	return 0;
@@ -1401,13 +1410,18 @@ void rapl_remove_package(struct rapl_package *rp)
 EXPORT_SYMBOL_GPL(rapl_remove_package);
 
 /* caller to ensure CPU hotplug lock is held */
-struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv)
+struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
 {
-	int id = topology_logical_die_id(cpu);
 	struct rapl_package *rp;
+	int uid;
+
+	if (id_is_cpu)
+		uid = topology_logical_die_id(id);
+	else
+		uid = id;
 
 	list_for_each_entry(rp, &rapl_packages, plist) {
-		if (rp->id == id
+		if (rp->id == uid
 		    && rp->priv->control_type == priv->control_type)
 			return rp;
 	}
@@ -1417,9 +1431,8 @@ struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv
 EXPORT_SYMBOL_GPL(rapl_find_package_domain);
 
 /* called from CPU hotplug notifier, hotplug lock held */
-struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
+struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
 {
-	int id = topology_logical_die_id(cpu);
 	struct rapl_package *rp;
 	int ret;
 
@@ -1427,23 +1440,26 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
 	if (!rp)
 		return ERR_PTR(-ENOMEM);
 
-	/* add the new package to the list */
-	rp->id = id;
-	rp->lead_cpu = cpu;
-	rp->priv = priv;
+	if (id_is_cpu) {
+		rp->id = topology_logical_die_id(id);
+		rp->lead_cpu = id;
+		if (topology_max_die_per_package() > 1)
+			snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d",
+				 topology_physical_package_id(id), topology_die_id(id));
+		else
+			snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
+				 topology_physical_package_id(id));
+	} else {
+		rp->id = id;
+		rp->lead_cpu = -1;
+		snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id);
+	}
 
+	rp->priv = priv;
 	ret = rapl_config(rp);
 	if (ret)
 		goto err_free_package;
 
-	if (topology_max_die_per_package() > 1)
-		snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH,
-			 "package-%d-die-%d",
-			 topology_physical_package_id(cpu), topology_die_id(cpu));
-	else
-		snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
-			 topology_physical_package_id(cpu));
-
 	/* check if the package contains valid domains */
 	if (rapl_detect_domains(rp)) {
 		ret = -ENODEV;
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 6fe5e556aa51..fff1a49d3b6c 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -68,9 +68,9 @@ static int rapl_cpu_online(unsigned int cpu)
 {
 	struct rapl_package *rp;
 
-	rp = rapl_find_package_domain(cpu, rapl_msr_priv);
+	rp = rapl_find_package_domain(cpu, rapl_msr_priv, true);
 	if (!rp) {
-		rp = rapl_add_package(cpu, rapl_msr_priv);
+		rp = rapl_add_package(cpu, rapl_msr_priv, true);
 		if (IS_ERR(rp))
 			return PTR_ERR(rp);
 	}
@@ -83,7 +83,7 @@ static int rapl_cpu_down_prep(unsigned int cpu)
 	struct rapl_package *rp;
 	int lead_cpu;
 
-	rp = rapl_find_package_domain(cpu, rapl_msr_priv);
+	rp = rapl_find_package_domain(cpu, rapl_msr_priv, true);
 	if (!rp)
 		return 0;
 
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
index e070239106f5..140fb85cfa7b 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
@@ -27,9 +27,9 @@ static int rapl_mmio_cpu_online(unsigned int cpu)
 	if (topology_physical_package_id(cpu))
 		return 0;
 
-	rp = rapl_find_package_domain(cpu, &rapl_mmio_priv);
+	rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
 	if (!rp) {
-		rp = rapl_add_package(cpu, &rapl_mmio_priv);
+		rp = rapl_add_package(cpu, &rapl_mmio_priv, true);
 		if (IS_ERR(rp))
 			return PTR_ERR(rp);
 	}
@@ -42,7 +42,7 @@ static int rapl_mmio_cpu_down_prep(unsigned int cpu)
 	struct rapl_package *rp;
 	int lead_cpu;
 
-	rp = rapl_find_package_domain(cpu, &rapl_mmio_priv);
+	rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
 	if (!rp)
 		return 0;
 
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index d07b460bac3b..51509f35027b 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -135,8 +135,8 @@ struct rapl_if_priv {
 	u64 reg_unit;
 	u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
 	int limits[RAPL_DOMAIN_MAX];
-	int (*read_raw)(int cpu, struct reg_action *ra);
-	int (*write_raw)(int cpu, struct reg_action *ra);
+	int (*read_raw)(int id, struct reg_action *ra);
+	int (*write_raw)(int id, struct reg_action *ra);
 	void *defaults;
 	void *rpi;
 };
@@ -161,8 +161,8 @@ struct rapl_package {
 	struct rapl_if_priv *priv;
 };
 
-struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv);
-struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv);
+struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu);
+struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu);
 void rapl_remove_package(struct rapl_package *rp);
 
 #endif /* __INTEL_RAPL_H__ */
-- 
cgit v1.2.3


From b4288ce788aaf160f2a706672af2eaef417bb057 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 19 Apr 2023 10:44:17 +0800
Subject: powercap: intel_rapl: Introduce RAPL I/F type

Different RAPL Interfaces may have different primitive information and
rapl_defaults calls.

To better distinguish this difference in the RAPL framework code,
introduce a new enum to represent different types of RAPL Interfaces.

No functional change.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Wang Wendy <wendy.wang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c                   | 18 +++++++++++++-----
 drivers/powercap/intel_rapl_msr.c                      |  2 ++
 .../intel/int340x_thermal/processor_thermal_rapl.c     |  1 +
 include/linux/intel_rapl.h                             |  6 ++++++
 4 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index d337270b65d4..b70d6915a6ba 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -186,7 +186,7 @@ struct rapl_defaults {
 	unsigned int psys_domain_energy_unit;
 	bool spr_psys_bits;
 };
-static struct rapl_defaults *rapl_defaults;
+static struct rapl_defaults *defaults_msr;
 
 static struct rapl_defaults *get_defaults(struct rapl_package *rp)
 {
@@ -610,7 +610,7 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
 	return div64_u64(value, scale);
 }
 
-static struct rapl_primitive_info rpi_default[NR_RAPL_PRIMITIVES] = {
+static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
 	/* name, mask, shift, msr index, unit divisor */
 	[POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
 			    RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
@@ -679,8 +679,16 @@ static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
 
 static int rapl_config(struct rapl_package *rp)
 {
-	rp->priv->defaults = (void *)rapl_defaults;
-	rp->priv->rpi = (void *)rpi_default;
+	switch (rp->priv->type) {
+	/* MMIO I/F shares the same register layout as MSR registers */
+	case RAPL_IF_MMIO:
+	case RAPL_IF_MSR:
+		rp->priv->defaults = (void *)defaults_msr;
+		rp->priv->rpi = (void *)rpi_msr;
+		break;
+	default:
+		return -EINVAL;
+	}
 	return 0;
 }
 
@@ -1546,7 +1554,7 @@ static int __init rapl_init(void)
 
 	id = x86_match_cpu(rapl_ids);
 	if (id) {
-		rapl_defaults = (struct rapl_defaults *)id->driver_data;
+		defaults_msr = (struct rapl_defaults *)id->driver_data;
 
 		rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
 		if (!rapl_msr_platdev)
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index fff1a49d3b6c..cff5c6e8d570 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -34,6 +34,7 @@
 static struct rapl_if_priv *rapl_msr_priv;
 
 static struct rapl_if_priv rapl_msr_priv_intel = {
+	.type = RAPL_IF_MSR,
 	.reg_unit = MSR_RAPL_POWER_UNIT,
 	.regs[RAPL_DOMAIN_PACKAGE] = {
 		MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO },
@@ -50,6 +51,7 @@ static struct rapl_if_priv rapl_msr_priv_intel = {
 };
 
 static struct rapl_if_priv rapl_msr_priv_amd = {
+	.type = RAPL_IF_MSR,
 	.reg_unit = MSR_AMD_RAPL_POWER_UNIT,
 	.regs[RAPL_DOMAIN_PACKAGE] = {
 		0, MSR_AMD_PKG_ENERGY_STATUS, 0, 0, 0 },
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
index 140fb85cfa7b..013f1633f082 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
@@ -97,6 +97,7 @@ int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc
 						rapl_regs->regs[domain][reg];
 		rapl_mmio_priv.limits[domain] = rapl_regs->limits[domain];
 	}
+	rapl_mmio_priv.type = RAPL_IF_MMIO;
 	rapl_mmio_priv.reg_unit = (u64)proc_priv->mmio_base + rapl_regs->reg_unit;
 
 	rapl_mmio_priv.read_raw = rapl_mmio_read_raw;
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 51509f35027b..65f358b64096 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -14,6 +14,11 @@
 #include <linux/powercap.h>
 #include <linux/cpuhotplug.h>
 
+enum rapl_if_type {
+	RAPL_IF_MSR,	/* RAPL I/F using MSR registers */
+	RAPL_IF_MMIO,	/* RAPL I/F using MMIO registers */
+};
+
 enum rapl_domain_type {
 	RAPL_DOMAIN_PACKAGE,	/* entire package/socket */
 	RAPL_DOMAIN_PP0,	/* core power plane */
@@ -130,6 +135,7 @@ struct reg_action {
  * @rpi:			internal pointer to interface primitive info
  */
 struct rapl_if_priv {
+	enum rapl_if_type type;
 	struct powercap_control_type *control_type;
 	enum cpuhp_state pcap_rapl_online;
 	u64 reg_unit;
-- 
cgit v1.2.3


From e12dee18b89f1b0d4fc070eda4843f9d806645ca Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 19 Apr 2023 10:44:18 +0800
Subject: powercap: intel_rapl: Introduce core support for TPMI interface

Compared with existing RAPL MSR/MMIO Interface, the RAPL TPMI Interface
1. has per Power Limit register, thus has per Power Limit Lock and
   Enable bit.
2. doesn't have Power Limit Clamp bit.
3. the Power Limit Lock and Enable bits have different bit offsets.
These mean RAPL TPMI Interface needs its own primitive information.

RAPL TPMI Interface also has per domain unit register but with a
different register layout. This requires a TPMI specific rapl_defaults
call to decode the unit register.

Introduce the RAPL core support for TPMI Interface.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Wang Wendy <wendy.wang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl_common.c | 119 ++++++++++++++++++++++++++++++++++-
 include/linux/intel_rapl.h           |   5 ++
 2 files changed, 122 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index b70d6915a6ba..4e646e5e48f6 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -75,6 +75,15 @@
 #define PSYS_TIME_WINDOW1_MASK       (0x7FULL<<19)
 #define PSYS_TIME_WINDOW2_MASK       (0x7FULL<<51)
 
+/* bitmasks for RAPL TPMI, used by primitive access functions */
+#define TPMI_POWER_LIMIT_MASK	0x3FFFF
+#define TPMI_POWER_LIMIT_ENABLE	BIT_ULL(62)
+#define TPMI_TIME_WINDOW_MASK	(0x7FULL<<18)
+#define TPMI_INFO_SPEC_MASK	0x3FFFF
+#define TPMI_INFO_MIN_MASK	(0x3FFFFULL << 18)
+#define TPMI_INFO_MAX_MASK	(0x3FFFFULL << 36)
+#define TPMI_INFO_MAX_TIME_WIN_MASK	(0x7FULL << 54)
+
 /* Non HW constants */
 #define RAPL_PRIMITIVE_DERIVED       BIT(1)	/* not from raw data */
 #define RAPL_PRIMITIVE_DUMMY         BIT(2)
@@ -119,6 +128,19 @@ static bool is_pl_valid(struct rapl_domain *rd, int pl)
 
 static int get_pl_lock_prim(struct rapl_domain *rd, int pl)
 {
+	if (rd->rp->priv->type == RAPL_IF_TPMI) {
+		if (pl == POWER_LIMIT1)
+			return PL1_LOCK;
+		if (pl == POWER_LIMIT2)
+			return PL2_LOCK;
+		if (pl == POWER_LIMIT4)
+			return PL4_LOCK;
+	}
+
+	/* MSR/MMIO Interface doesn't have Lock bit for PL4 */
+	if (pl == POWER_LIMIT4)
+		return -EINVAL;
+
 	/*
 	 * Power Limit register that supports two power limits has a different
 	 * bit position for the Lock bit.
@@ -134,7 +156,7 @@ static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
 	case POWER_LIMIT1:
 		if (prim == PL_ENABLE)
 			return PL1_ENABLE;
-		if (prim == PL_CLAMP)
+		if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
 			return PL1_CLAMP;
 		if (prim == PL_LIMIT)
 			return POWER_LIMIT1;
@@ -148,7 +170,7 @@ static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
 	case POWER_LIMIT2:
 		if (prim == PL_ENABLE)
 			return PL2_ENABLE;
-		if (prim == PL_CLAMP)
+		if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
 			return PL2_CLAMP;
 		if (prim == PL_LIMIT)
 			return POWER_LIMIT2;
@@ -167,6 +189,8 @@ static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
 		/* PL4 would be around two times PL2, use same prim as PL2. */
 		if (prim == PL_MAX_POWER)
 			return MAX_POWER;
+		if (prim == PL_LOCK)
+			return get_pl_lock_prim(rd, pl);
 		return -EINVAL;
 	default:
 		return -EINVAL;
@@ -187,6 +211,7 @@ struct rapl_defaults {
 	bool spr_psys_bits;
 };
 static struct rapl_defaults *defaults_msr;
+static const struct rapl_defaults defaults_tpmi;
 
 static struct rapl_defaults *get_defaults(struct rapl_package *rp)
 {
@@ -610,6 +635,7 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
 	return div64_u64(value, scale);
 }
 
+/* RAPL primitives for MSR and MMIO I/F */
 static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
 	/* name, mask, shift, msr index, unit divisor */
 	[POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
@@ -667,6 +693,48 @@ static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
 			    RAPL_PRIMITIVE_DERIVED),
 };
 
+/* RAPL primitives for TPMI I/F */
+static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = {
+	/* name, mask, shift, msr index, unit divisor */
+	[POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0,
+		RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+	[POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0,
+		RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0),
+	[POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0,
+		RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
+	[ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
+		RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
+	[PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63,
+		RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+	[PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63,
+		RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
+	[PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63,
+		RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
+	[PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
+		RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+	[PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
+		RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
+	[PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
+		RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
+	[TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18,
+		RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+	[TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18,
+		RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0),
+	[THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0,
+		RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+	[MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36,
+		RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+	[MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18,
+		RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+	[MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54,
+		RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
+	[THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
+		RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
+	/* non-hardware */
+	[AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0,
+		POWER_UNIT, RAPL_PRIMITIVE_DERIVED),
+};
+
 static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
 {
 	struct rapl_primitive_info *rpi = rp->priv->rpi;
@@ -686,6 +754,10 @@ static int rapl_config(struct rapl_package *rp)
 		rp->priv->defaults = (void *)defaults_msr;
 		rp->priv->rpi = (void *)rpi_msr;
 		break;
+	case RAPL_IF_TPMI:
+		rp->priv->defaults = (void *)&defaults_tpmi;
+		rp->priv->rpi = (void *)rpi_tpmi;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1046,6 +1118,49 @@ static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
 	return value;
 }
 
+/* TPMI Unit register has different layout */
+#define TPMI_POWER_UNIT_OFFSET	POWER_UNIT_OFFSET
+#define TPMI_POWER_UNIT_MASK	POWER_UNIT_MASK
+#define TPMI_ENERGY_UNIT_OFFSET	0x06
+#define TPMI_ENERGY_UNIT_MASK	0x7C0
+#define TPMI_TIME_UNIT_OFFSET	0x0C
+#define TPMI_TIME_UNIT_MASK	0xF000
+
+static int rapl_check_unit_tpmi(struct rapl_domain *rd)
+{
+	struct reg_action ra;
+	u32 value;
+
+	ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
+	ra.mask = ~0;
+	if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+		pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
+			ra.reg, rd->rp->name, rd->name);
+		return -ENODEV;
+	}
+
+	value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET;
+	rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
+
+	value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET;
+	rd->power_unit = 1000000 / (1 << value);
+
+	value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET;
+	rd->time_unit = 1000000 / (1 << value);
+
+	pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
+		 rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
+
+	return 0;
+}
+
+static const struct rapl_defaults defaults_tpmi = {
+	.check_unit = rapl_check_unit_tpmi,
+	/* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */
+	.set_floor_freq = set_floor_freq_default,
+	.compute_time_window = rapl_compute_time_window_core,
+};
+
 static const struct rapl_defaults rapl_defaults_core = {
 	.floor_freq_reg_addr = 0,
 	.check_unit = rapl_check_unit_core,
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 65f358b64096..e6936cb25047 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -17,6 +17,7 @@
 enum rapl_if_type {
 	RAPL_IF_MSR,	/* RAPL I/F using MSR registers */
 	RAPL_IF_MMIO,	/* RAPL I/F using MMIO registers */
+	RAPL_IF_TPMI,	/* RAPL I/F using TPMI registers */
 };
 
 enum rapl_domain_type {
@@ -36,6 +37,7 @@ enum rapl_domain_reg_id {
 	RAPL_DOMAIN_REG_INFO,
 	RAPL_DOMAIN_REG_PL4,
 	RAPL_DOMAIN_REG_UNIT,
+	RAPL_DOMAIN_REG_PL2,
 	RAPL_DOMAIN_REG_MAX,
 };
 
@@ -48,6 +50,9 @@ enum rapl_primitives {
 	ENERGY_COUNTER,
 	FW_LOCK,
 	FW_HIGH_LOCK,
+	PL1_LOCK,
+	PL2_LOCK,
+	PL4_LOCK,
 
 	PL1_ENABLE,		/* power limit 1, aka long term */
 	PL1_CLAMP,		/* allow frequency to go below OS request */
-- 
cgit v1.2.3


From 2e41e3ca4729455e002bcb585f0d3749ee66d572 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 2 May 2023 17:04:34 +0200
Subject: PM: suspend: Fix pm_suspend_target_state handling for !CONFIG_PM

Move the pm_suspend_target_state definition for CONFIG_SUSPEND
unset from the wakeup code into the headers so as to allow it
to still be used elsewhere when CONFIG_SUSPEND is not set.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
[ rjw: Changelog and subject edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeup.c | 5 -----
 include/linux/suspend.h     | 4 +++-
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 7cc0c0cf8eaa..a917219feea6 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -19,11 +19,6 @@
 
 #include "power.h"
 
-#ifndef CONFIG_SUSPEND
-suspend_state_t pm_suspend_target_state;
-#define pm_suspend_target_state	(PM_SUSPEND_ON)
-#endif
-
 #define list_for_each_entry_rcu_locked(pos, head, member) \
 	list_for_each_entry_rcu(pos, head, member, \
 		srcu_read_lock_held(&wakeup_srcu))
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d0d4598a7b3f..474ecfbbaa62 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -202,6 +202,7 @@ struct platform_s2idle_ops {
 };
 
 #ifdef CONFIG_SUSPEND
+extern suspend_state_t pm_suspend_target_state;
 extern suspend_state_t mem_sleep_current;
 extern suspend_state_t mem_sleep_default;
 
@@ -337,6 +338,8 @@ extern bool sync_on_suspend_enabled;
 #else /* !CONFIG_SUSPEND */
 #define suspend_valid_only_mem	NULL
 
+#define pm_suspend_target_state	(PM_SUSPEND_ON)
+
 static inline void pm_suspend_clear_flags(void) {}
 static inline void pm_set_suspend_via_firmware(void) {}
 static inline void pm_set_resume_via_firmware(void) {}
@@ -503,7 +506,6 @@ extern void pm_report_max_hw_sleep(u64 t);
 
 /* drivers/base/power/wakeup.c */
 extern bool events_check_enabled;
-extern suspend_state_t pm_suspend_target_state;
 
 extern bool pm_wakeup_pending(void);
 extern void pm_system_wakeup(void);
-- 
cgit v1.2.3


From ab23ed6e73ecd198bf577077677beaded0a9e718 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 15:10:58 +0200
Subject: PM: suspend: add a arch_resume_nosmt() prototype

The arch_resume_nosmt() has a __weak definition, plus an x86
specific override, but no prototype that ensures the two have
the same arguments. This causes a W=1 warning:

arch/x86/power/hibernate.c:189:5: error: no previous prototype for 'arch_resume_nosmt' [-Werror=missing-prototypes]

Add the prototype in linux/suspend.h, which is included in
both places.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/suspend.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 474ecfbbaa62..1a0426e6761c 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -471,6 +471,8 @@ static inline int hibernate_quiet_exec(int (*func)(void *data), void *data) {
 }
 #endif /* CONFIG_HIBERNATION */
 
+int arch_resume_nosmt(void);
+
 #ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV
 int is_hibernate_resume_dev(dev_t dev);
 #else
-- 
cgit v1.2.3


From 1db1f21caebbb1b6e9b1e7657df613616be3fb49 Mon Sep 17 00:00:00 2001
From: Dragos Tatulea <dtatulea@nvidia.com>
Date: Thu, 13 Apr 2023 15:48:30 +0300
Subject: net/mlx5e: Use query_special_contexts cmd only once per mdev

Don't query the firmware so many times (num rqs * num wqes * wqe frags)
because it slows down linearly the interface creation time when the
product is larger. Do it only once per mdev and store the result in
mlx5e_param.

Due to helper function being called from different files, move it to
an appropriate location. Rename the function with a proper prefix and
add a small cleanup.

This fix applies only for legacy rq.

Fixes: 1b1e4868836a ("net/mlx5e: Use query_special_contexts for mkeys")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Or Har-Toov <ohartoov@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++--------------------
 drivers/net/ethernet/mellanox/mlx5/core/mr.c      | 21 ++++++++++++++++++++
 include/linux/mlx5/driver.h                       |  1 +
 4 files changed, 26 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b8987a404d75..8e999f238194 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -327,6 +327,7 @@ struct mlx5e_params {
 	unsigned int sw_mtu;
 	int hard_mtu;
 	bool ptp_rx;
+	__be32 terminate_lkey_be;
 };
 
 static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 2944691f06ad..0235adcbc609 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -727,26 +727,6 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
 	mlx5e_rq_shampo_hd_free(rq);
 }
 
-static __be32 mlx5e_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev)
-{
-	u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
-	u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
-	int res;
-
-	if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey))
-		return MLX5_TERMINATE_SCATTER_LIST_LKEY;
-
-	MLX5_SET(query_special_contexts_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
-	res = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out);
-	if (res)
-		return MLX5_TERMINATE_SCATTER_LIST_LKEY;
-
-	res = MLX5_GET(query_special_contexts_out, out,
-		       terminate_scatter_list_mkey);
-	return cpu_to_be32(res);
-}
-
 static int mlx5e_alloc_rq(struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk,
 			  struct mlx5e_rq_param *rqp,
@@ -908,7 +888,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
 			/* check if num_frags is not a pow of two */
 			if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
 				wqe->data[f].byte_count = 0;
-				wqe->data[f].lkey = mlx5e_get_terminate_scatter_list_mkey(mdev);
+				wqe->data[f].lkey = params->terminate_lkey_be;
 				wqe->data[f].addr = 0;
 			}
 		}
@@ -5007,6 +4987,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
 	/* RQ */
 	mlx5e_build_rq_params(mdev, params);
 
+	params->terminate_lkey_be = mlx5_core_get_terminate_scatter_list_mkey(mdev);
+
 	params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
 
 	/* CQ moderation params */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 9d735c343a3b..678f0be81375 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -32,6 +32,7 @@
 
 #include <linux/kernel.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/qp.h>
 #include "mlx5_core.h"
 
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in,
@@ -122,3 +123,23 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
 	return mlx5_cmd_exec_in(dev, destroy_psv, in);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_psv);
+
+__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev)
+{
+	u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
+	u32 mkey;
+
+	if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey))
+		return MLX5_TERMINATE_SCATTER_LIST_LKEY;
+
+	MLX5_SET(query_special_contexts_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+	if (mlx5_cmd_exec_inout(dev, query_special_contexts, in, out))
+		return MLX5_TERMINATE_SCATTER_LIST_LKEY;
+
+	mkey = MLX5_GET(query_special_contexts_out, out,
+			terminate_scatter_list_mkey);
+	return cpu_to_be32(mkey);
+}
+EXPORT_SYMBOL(mlx5_core_get_terminate_scatter_list_mkey);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a4c4f737f9c1..94d2be5848ae 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1093,6 +1093,7 @@ void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
 			 int npsvs, u32 *sig_index);
 int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
+__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev);
 void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
 int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
 			struct mlx5_odp_caps *odp_caps);
-- 
cgit v1.2.3


From fd936fd8ac105ba3eb764185e8ba483c789c893e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 23 May 2023 21:01:30 +0200
Subject: efi: fix missing prototype warnings

The cper.c file needs to include an extra header, and efi_zboot_entry
needs an extern declaration to avoid these 'make W=1' warnings:

drivers/firmware/efi/libstub/zboot.c:65:1: error: no previous prototype for 'efi_zboot_entry' [-Werror=missing-prototypes]
drivers/firmware/efi/efi.c:176:16: error: no previous prototype for 'efi_attr_is_visible' [-Werror=missing-prototypes]
drivers/firmware/efi/cper.c:626:6: error: no previous prototype for 'cper_estatus_print' [-Werror=missing-prototypes]
drivers/firmware/efi/cper.c:649:5: error: no previous prototype for 'cper_estatus_check_header' [-Werror=missing-prototypes]
drivers/firmware/efi/cper.c:662:5: error: no previous prototype for 'cper_estatus_check' [-Werror=missing-prototypes]

To make this easier, move the cper specific declarations to
include/linux/cper.h.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/acpi/apei/apei-internal.h      | 6 ------
 drivers/acpi/apei/bert.c               | 1 +
 drivers/firmware/efi/libstub/efistub.h | 3 +++
 include/linux/cper.h                   | 6 ++++++
 include/linux/efi.h                    | 2 ++
 5 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index 1d6ef9654725..67c2c3b959e1 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -7,7 +7,6 @@
 #ifndef APEI_INTERNAL_H
 #define APEI_INTERNAL_H
 
-#include <linux/cper.h>
 #include <linux/acpi.h>
 
 struct apei_exec_context;
@@ -130,10 +129,5 @@ static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus)
 		return sizeof(*estatus) + estatus->data_length;
 }
 
-void cper_estatus_print(const char *pfx,
-			const struct acpi_hest_generic_status *estatus);
-int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
-int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
-
 int apei_osc_setup(void);
 #endif
diff --git a/drivers/acpi/apei/bert.c b/drivers/acpi/apei/bert.c
index c23eb75866d0..7514e38d5640 100644
--- a/drivers/acpi/apei/bert.c
+++ b/drivers/acpi/apei/bert.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
+#include <linux/cper.h>
 #include <linux/io.h>
 
 #include "apei-internal.h"
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 67d5a20802e0..54a2822cae77 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -1133,4 +1133,7 @@ const u8 *__efi_get_smbios_string(const struct efi_smbios_record *record,
 void efi_remap_image(unsigned long image_base, unsigned alloc_size,
 		     unsigned long code_size);
 
+asmlinkage efi_status_t __efiapi
+efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab);
+
 #endif
diff --git a/include/linux/cper.h b/include/linux/cper.h
index eacb7dd7b3af..c1a7dc325121 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -572,4 +572,10 @@ void cper_print_proc_ia(const char *pfx,
 int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg);
 int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg);
 
+struct acpi_hest_generic_status;
+void cper_estatus_print(const char *pfx,
+			const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus);
+int cper_estatus_check(const struct acpi_hest_generic_status *estatus);
+
 #endif
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 7aa62c92185f..571d1a6e1b74 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1338,4 +1338,6 @@ bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table)
 	return xen_efi_config_table_is_usable(guid, table);
 }
 
+umode_t efi_attr_is_visible(struct kobject *kobj, struct attribute *attr, int n);
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From 54a7bef5aa8d5247a78d79460bac47849b91a28b Mon Sep 17 00:00:00 2001
From: Maarten Zanders <maarten.zanders@mind.be>
Date: Fri, 21 Apr 2023 09:53:05 +0200
Subject: leds: lp55xx: Configure internal charge pump

The LP55xx range of devices have an internal charge pump which
can (automatically) increase the output voltage towards the
LED's, boosting the output voltage to 4.5V.

Implement this option from the devicetree. When the setting
is not present it will operate in automatic mode as before.

Tested on LP55231. Datasheet analysis shows that LP5521, LP5523
and LP8501 are identical in topology and are modified in the
same way.

Signed-off-by: Maarten Zanders <maarten.zanders@mind.be>
Signed-off-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/20230421075305.37597-3-maarten.zanders@mind.be
---
 drivers/leds/leds-lp5521.c                | 11 +++++------
 drivers/leds/leds-lp5523.c                | 14 +++++++++-----
 drivers/leds/leds-lp55xx-common.c         |  9 +++++++++
 drivers/leds/leds-lp8501.c                |  8 +++++---
 include/linux/platform_data/leds-lp55xx.h |  3 +++
 5 files changed, 31 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c
index a004af8e22c7..acd37d0f0e26 100644
--- a/drivers/leds/leds-lp5521.c
+++ b/drivers/leds/leds-lp5521.c
@@ -58,14 +58,11 @@
 /* CONFIG register */
 #define LP5521_PWM_HF			0x40	/* PWM: 0 = 256Hz, 1 = 558Hz */
 #define LP5521_PWRSAVE_EN		0x20	/* 1 = Power save mode */
-#define LP5521_CP_MODE_OFF		0	/* Charge pump (CP) off */
-#define LP5521_CP_MODE_BYPASS		8	/* CP forced to bypass mode */
-#define LP5521_CP_MODE_1X5		0x10	/* CP forced to 1.5x mode */
-#define LP5521_CP_MODE_AUTO		0x18	/* Automatic mode selection */
+#define LP5521_CP_MODE_MASK		0x18	/* Charge pump mode */
+#define LP5521_CP_MODE_SHIFT		3
 #define LP5521_R_TO_BATT		0x04	/* R out: 0 = CP, 1 = Vbat */
 #define LP5521_CLK_INT			0x01	/* Internal clock */
-#define LP5521_DEFAULT_CFG		\
-	(LP5521_PWM_HF | LP5521_PWRSAVE_EN | LP5521_CP_MODE_AUTO)
+#define LP5521_DEFAULT_CFG		(LP5521_PWM_HF | LP5521_PWRSAVE_EN)
 
 /* Status */
 #define LP5521_EXT_CLK_USED		0x08
@@ -310,6 +307,8 @@ static int lp5521_post_init_device(struct lp55xx_chip *chip)
 	if (!lp55xx_is_extclk_used(chip))
 		val |= LP5521_CLK_INT;
 
+	val |= (chip->pdata->charge_pump_mode << LP5521_CP_MODE_SHIFT) & LP5521_CP_MODE_MASK;
+
 	ret = lp55xx_write(chip, LP5521_REG_CONFIG, val);
 	if (ret)
 		return ret;
diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c
index 55da914b8e5c..a8df22938bdb 100644
--- a/drivers/leds/leds-lp5523.c
+++ b/drivers/leds/leds-lp5523.c
@@ -57,8 +57,11 @@
 #define LP5523_AUTO_INC			0x40
 #define LP5523_PWR_SAVE			0x20
 #define LP5523_PWM_PWR_SAVE		0x04
-#define LP5523_CP_AUTO			0x18
+#define LP5523_CP_MODE_MASK		0x18
+#define LP5523_CP_MODE_SHIFT		3
 #define LP5523_AUTO_CLK			0x02
+#define LP5523_DEFAULT_CONFIG \
+	(LP5523_AUTO_INC | LP5523_PWR_SAVE | LP5523_AUTO_CLK | LP5523_PWM_PWR_SAVE)
 
 #define LP5523_EN_LEDTEST		0x80
 #define LP5523_LEDTEST_DONE		0x80
@@ -125,6 +128,7 @@ static void lp5523_set_led_current(struct lp55xx_led *led, u8 led_current)
 static int lp5523_post_init_device(struct lp55xx_chip *chip)
 {
 	int ret;
+	int val;
 
 	ret = lp55xx_write(chip, LP5523_REG_ENABLE, LP5523_ENABLE);
 	if (ret)
@@ -133,10 +137,10 @@ static int lp5523_post_init_device(struct lp55xx_chip *chip)
 	/* Chip startup time is 500 us, 1 - 2 ms gives some margin */
 	usleep_range(1000, 2000);
 
-	ret = lp55xx_write(chip, LP5523_REG_CONFIG,
-			    LP5523_AUTO_INC | LP5523_PWR_SAVE |
-			    LP5523_CP_AUTO | LP5523_AUTO_CLK |
-			    LP5523_PWM_PWR_SAVE);
+	val = LP5523_DEFAULT_CONFIG;
+	val |= (chip->pdata->charge_pump_mode << LP5523_CP_MODE_SHIFT) & LP5523_CP_MODE_MASK;
+
+	ret = lp55xx_write(chip, LP5523_REG_CONFIG, val);
 	if (ret)
 		return ret;
 
diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c
index c1940964067a..77bb26906ea6 100644
--- a/drivers/leds/leds-lp55xx-common.c
+++ b/drivers/leds/leds-lp55xx-common.c
@@ -18,6 +18,7 @@
 #include <linux/platform_data/leds-lp55xx.h>
 #include <linux/slab.h>
 #include <linux/gpio/consumer.h>
+#include <dt-bindings/leds/leds-lp55xx.h>
 
 #include "leds-lp55xx-common.h"
 
@@ -691,6 +692,14 @@ struct lp55xx_platform_data *lp55xx_of_populate_pdata(struct device *dev,
 		i++;
 	}
 
+	if (of_property_read_u32(np, "ti,charge-pump-mode", &pdata->charge_pump_mode))
+		pdata->charge_pump_mode = LP55XX_CP_AUTO;
+
+	if (pdata->charge_pump_mode > LP55XX_CP_AUTO) {
+		dev_err(dev, "invalid charge pump mode %d\n", pdata->charge_pump_mode);
+		return ERR_PTR(-EINVAL);
+	}
+
 	of_property_read_string(np, "label", &pdata->label);
 	of_property_read_u8(np, "clock-mode", &pdata->clock_mode);
 
diff --git a/drivers/leds/leds-lp8501.c b/drivers/leds/leds-lp8501.c
index 165d6423a928..878d81dace8a 100644
--- a/drivers/leds/leds-lp8501.c
+++ b/drivers/leds/leds-lp8501.c
@@ -53,10 +53,10 @@
 #define LP8501_PWM_PSAVE		BIT(7)
 #define LP8501_AUTO_INC			BIT(6)
 #define LP8501_PWR_SAVE			BIT(5)
-#define LP8501_CP_AUTO			0x18
+#define LP8501_CP_MODE_MASK		0x18
+#define LP8501_CP_MODE_SHIFT		3
 #define LP8501_INT_CLK			BIT(0)
-#define LP8501_DEFAULT_CFG	\
-	(LP8501_PWM_PSAVE | LP8501_AUTO_INC | LP8501_PWR_SAVE | LP8501_CP_AUTO)
+#define LP8501_DEFAULT_CFG (LP8501_PWM_PSAVE | LP8501_AUTO_INC | LP8501_PWR_SAVE)
 
 #define LP8501_REG_RESET		0x3D
 #define LP8501_RESET			0xFF
@@ -102,6 +102,8 @@ static int lp8501_post_init_device(struct lp55xx_chip *chip)
 	if (chip->pdata->clock_mode != LP55XX_CLOCK_EXT)
 		val |= LP8501_INT_CLK;
 
+	val |= (chip->pdata->charge_pump_mode << LP8501_CP_MODE_SHIFT) & LP8501_CP_MODE_MASK;
+
 	ret = lp55xx_write(chip, LP8501_REG_CONFIG, val);
 	if (ret)
 		return ret;
diff --git a/include/linux/platform_data/leds-lp55xx.h b/include/linux/platform_data/leds-lp55xx.h
index 3441064713a3..3cc8db0b12b5 100644
--- a/include/linux/platform_data/leds-lp55xx.h
+++ b/include/linux/platform_data/leds-lp55xx.h
@@ -73,6 +73,9 @@ struct lp55xx_platform_data {
 	/* Clock configuration */
 	u8 clock_mode;
 
+	/* Charge pump mode */
+	u32 charge_pump_mode;
+
 	/* optional enable GPIO */
 	struct gpio_desc *enable_gpiod;
 
-- 
cgit v1.2.3


From e298d8a38b2341865f9feb04591aabb109e8bb13 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 10 May 2023 18:22:31 +0200
Subject: leds: Change led_trigger_blink[_oneshot]() delay parameters to
 pass-by-value

led_blink_set[_oneshot]()'s delay_on and delay_off function parameters
are pass by reference, so that hw-blink implementations can report
back the actual achieved delays when the values have been rounded
to something the hw supports.

This is really only interesting for the sysfs API / the timer trigger.
Other triggers don't really care about this and none of the callers of
led_trigger_blink[_oneshot]() do anything with the returned delay values.

Change the led_trigger_blink[_oneshot]() delay parameters to pass-by-value,
there are 2 reasons for this:

1. led_cdev->blink_set() may sleep, while led_trigger_blink() may not.
So on hw where led_cdev->blink_set() sleeps the call needs to be deferred
to a workqueue, in which case the actual achieved delays are unknown
(this is a preparation patch for the deferring).

2. Since the callers don't care about the actual achieved delays, allowing
callers to directly pass a value leads to simpler code for most callers.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Tested-by: Yauhen Kharuzhy <jekhor@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Acked-by: Florian Westphal <fw@strlen.de>
Link: https://lore.kernel.org/r/20230510162234.291439-2-hdegoede@redhat.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/leds/led-triggers.c              | 16 ++++++++--------
 drivers/leds/trigger/ledtrig-disk.c      |  9 +++------
 drivers/leds/trigger/ledtrig-mtd.c       |  8 ++------
 drivers/net/arcnet/arcnet.c              |  8 ++------
 drivers/power/supply/power_supply_leds.c |  5 +----
 drivers/usb/common/led.c                 |  4 +---
 include/linux/leds.h                     | 16 ++++++++--------
 net/mac80211/led.c                       |  2 +-
 net/mac80211/led.h                       |  8 ++------
 net/netfilter/xt_LED.c                   |  3 +--
 10 files changed, 29 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 072491d3e17b..e06361165e9b 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -393,8 +393,8 @@ void led_trigger_event(struct led_trigger *trig,
 EXPORT_SYMBOL_GPL(led_trigger_event);
 
 static void led_trigger_blink_setup(struct led_trigger *trig,
-			     unsigned long *delay_on,
-			     unsigned long *delay_off,
+			     unsigned long delay_on,
+			     unsigned long delay_off,
 			     int oneshot,
 			     int invert)
 {
@@ -406,25 +406,25 @@ static void led_trigger_blink_setup(struct led_trigger *trig,
 	rcu_read_lock();
 	list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) {
 		if (oneshot)
-			led_blink_set_oneshot(led_cdev, delay_on, delay_off,
+			led_blink_set_oneshot(led_cdev, &delay_on, &delay_off,
 					      invert);
 		else
-			led_blink_set(led_cdev, delay_on, delay_off);
+			led_blink_set(led_cdev, &delay_on, &delay_off);
 	}
 	rcu_read_unlock();
 }
 
 void led_trigger_blink(struct led_trigger *trig,
-		       unsigned long *delay_on,
-		       unsigned long *delay_off)
+		       unsigned long delay_on,
+		       unsigned long delay_off)
 {
 	led_trigger_blink_setup(trig, delay_on, delay_off, 0, 0);
 }
 EXPORT_SYMBOL_GPL(led_trigger_blink);
 
 void led_trigger_blink_oneshot(struct led_trigger *trig,
-			       unsigned long *delay_on,
-			       unsigned long *delay_off,
+			       unsigned long delay_on,
+			       unsigned long delay_off,
 			       int invert)
 {
 	led_trigger_blink_setup(trig, delay_on, delay_off, 1, invert);
diff --git a/drivers/leds/trigger/ledtrig-disk.c b/drivers/leds/trigger/ledtrig-disk.c
index 0b7dfbd04273..e9b87ee944f2 100644
--- a/drivers/leds/trigger/ledtrig-disk.c
+++ b/drivers/leds/trigger/ledtrig-disk.c
@@ -19,16 +19,13 @@ DEFINE_LED_TRIGGER(ledtrig_disk_write);
 
 void ledtrig_disk_activity(bool write)
 {
-	unsigned long blink_delay = BLINK_DELAY;
-
-	led_trigger_blink_oneshot(ledtrig_disk,
-				  &blink_delay, &blink_delay, 0);
+	led_trigger_blink_oneshot(ledtrig_disk, BLINK_DELAY, BLINK_DELAY, 0);
 	if (write)
 		led_trigger_blink_oneshot(ledtrig_disk_write,
-					  &blink_delay, &blink_delay, 0);
+					  BLINK_DELAY, BLINK_DELAY, 0);
 	else
 		led_trigger_blink_oneshot(ledtrig_disk_read,
-					  &blink_delay, &blink_delay, 0);
+					  BLINK_DELAY, BLINK_DELAY, 0);
 }
 EXPORT_SYMBOL(ledtrig_disk_activity);
 
diff --git a/drivers/leds/trigger/ledtrig-mtd.c b/drivers/leds/trigger/ledtrig-mtd.c
index 8fa763c2269b..bbe6876a249d 100644
--- a/drivers/leds/trigger/ledtrig-mtd.c
+++ b/drivers/leds/trigger/ledtrig-mtd.c
@@ -22,12 +22,8 @@ DEFINE_LED_TRIGGER(ledtrig_nand);
 
 void ledtrig_mtd_activity(void)
 {
-	unsigned long blink_delay = BLINK_DELAY;
-
-	led_trigger_blink_oneshot(ledtrig_mtd,
-				  &blink_delay, &blink_delay, 0);
-	led_trigger_blink_oneshot(ledtrig_nand,
-				  &blink_delay, &blink_delay, 0);
+	led_trigger_blink_oneshot(ledtrig_mtd, BLINK_DELAY, BLINK_DELAY, 0);
+	led_trigger_blink_oneshot(ledtrig_nand, BLINK_DELAY, BLINK_DELAY, 0);
 }
 EXPORT_SYMBOL(ledtrig_mtd_activity);
 
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 1bad1866ae46..99265667538c 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -196,13 +196,10 @@ static void arcnet_dump_packet(struct net_device *dev, int bufnum,
 void arcnet_led_event(struct net_device *dev, enum arcnet_led_event event)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
-	unsigned long led_delay = 350;
-	unsigned long tx_delay = 50;
 
 	switch (event) {
 	case ARCNET_LED_EVENT_RECON:
-		led_trigger_blink_oneshot(lp->recon_led_trig,
-					  &led_delay, &led_delay, 0);
+		led_trigger_blink_oneshot(lp->recon_led_trig, 350, 350, 0);
 		break;
 	case ARCNET_LED_EVENT_OPEN:
 		led_trigger_event(lp->tx_led_trig, LED_OFF);
@@ -213,8 +210,7 @@ void arcnet_led_event(struct net_device *dev, enum arcnet_led_event event)
 		led_trigger_event(lp->recon_led_trig, LED_OFF);
 		break;
 	case ARCNET_LED_EVENT_TX:
-		led_trigger_blink_oneshot(lp->tx_led_trig,
-					  &tx_delay, &tx_delay, 0);
+		led_trigger_blink_oneshot(lp->tx_led_trig, 50, 50, 0);
 		break;
 	}
 }
diff --git a/drivers/power/supply/power_supply_leds.c b/drivers/power/supply/power_supply_leds.c
index 702bf83f6e6d..e2f554e4e4e6 100644
--- a/drivers/power/supply/power_supply_leds.c
+++ b/drivers/power/supply/power_supply_leds.c
@@ -22,8 +22,6 @@
 static void power_supply_update_bat_leds(struct power_supply *psy)
 {
 	union power_supply_propval status;
-	unsigned long delay_on = 0;
-	unsigned long delay_off = 0;
 
 	if (power_supply_get_property(psy, POWER_SUPPLY_PROP_STATUS, &status))
 		return;
@@ -42,8 +40,7 @@ static void power_supply_update_bat_leds(struct power_supply *psy)
 		led_trigger_event(psy->charging_full_trig, LED_FULL);
 		led_trigger_event(psy->charging_trig, LED_FULL);
 		led_trigger_event(psy->full_trig, LED_OFF);
-		led_trigger_blink(psy->charging_blink_full_solid_trig,
-			&delay_on, &delay_off);
+		led_trigger_blink(psy->charging_blink_full_solid_trig, 0, 0);
 		break;
 	default:
 		led_trigger_event(psy->charging_full_trig, LED_OFF);
diff --git a/drivers/usb/common/led.c b/drivers/usb/common/led.c
index 0865dd44a80a..1de18d90b134 100644
--- a/drivers/usb/common/led.c
+++ b/drivers/usb/common/led.c
@@ -14,8 +14,6 @@
 
 #define BLINK_DELAY 30
 
-static unsigned long usb_blink_delay = BLINK_DELAY;
-
 DEFINE_LED_TRIGGER(ledtrig_usb_gadget);
 DEFINE_LED_TRIGGER(ledtrig_usb_host);
 
@@ -32,7 +30,7 @@ void usb_led_activity(enum usb_led_event ev)
 		break;
 	}
 	/* led_trigger_blink_oneshot() handles trig == NULL gracefully */
-	led_trigger_blink_oneshot(trig, &usb_blink_delay, &usb_blink_delay, 0);
+	led_trigger_blink_oneshot(trig, BLINK_DELAY, BLINK_DELAY, 0);
 }
 EXPORT_SYMBOL_GPL(usb_led_activity);
 
diff --git a/include/linux/leds.h b/include/linux/leds.h
index c39bbf17a25b..c3dc22d184e2 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -433,11 +433,11 @@ void led_trigger_register_simple(const char *name,
 				struct led_trigger **trigger);
 void led_trigger_unregister_simple(struct led_trigger *trigger);
 void led_trigger_event(struct led_trigger *trigger,  enum led_brightness event);
-void led_trigger_blink(struct led_trigger *trigger, unsigned long *delay_on,
-		       unsigned long *delay_off);
+void led_trigger_blink(struct led_trigger *trigger, unsigned long delay_on,
+		       unsigned long delay_off);
 void led_trigger_blink_oneshot(struct led_trigger *trigger,
-			       unsigned long *delay_on,
-			       unsigned long *delay_off,
+			       unsigned long delay_on,
+			       unsigned long delay_off,
 			       int invert);
 void led_trigger_set_default(struct led_classdev *led_cdev);
 int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trigger);
@@ -487,11 +487,11 @@ static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {}
 static inline void led_trigger_event(struct led_trigger *trigger,
 				enum led_brightness event) {}
 static inline void led_trigger_blink(struct led_trigger *trigger,
-				      unsigned long *delay_on,
-				      unsigned long *delay_off) {}
+				      unsigned long delay_on,
+				      unsigned long delay_off) {}
 static inline void led_trigger_blink_oneshot(struct led_trigger *trigger,
-				      unsigned long *delay_on,
-				      unsigned long *delay_off,
+				      unsigned long delay_on,
+				      unsigned long delay_off,
 				      int invert) {}
 static inline void led_trigger_set_default(struct led_classdev *led_cdev) {}
 static inline int led_trigger_set(struct led_classdev *led_cdev,
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 6de8d0ad5497..2dc732147e85 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -282,7 +282,7 @@ static void tpt_trig_timer(struct timer_list *t)
 		}
 	}
 
-	led_trigger_blink(&local->tpt_led, &on, &off);
+	led_trigger_blink(&local->tpt_led, on, off);
 }
 
 const char *
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index b71a1428d883..d25f13346b82 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -13,22 +13,18 @@
 static inline void ieee80211_led_rx(struct ieee80211_local *local)
 {
 #ifdef CONFIG_MAC80211_LEDS
-	unsigned long led_delay = MAC80211_BLINK_DELAY;
-
 	if (!atomic_read(&local->rx_led_active))
 		return;
-	led_trigger_blink_oneshot(&local->rx_led, &led_delay, &led_delay, 0);
+	led_trigger_blink_oneshot(&local->rx_led, MAC80211_BLINK_DELAY, MAC80211_BLINK_DELAY, 0);
 #endif
 }
 
 static inline void ieee80211_led_tx(struct ieee80211_local *local)
 {
 #ifdef CONFIG_MAC80211_LEDS
-	unsigned long led_delay = MAC80211_BLINK_DELAY;
-
 	if (!atomic_read(&local->tx_led_active))
 		return;
-	led_trigger_blink_oneshot(&local->tx_led, &led_delay, &led_delay, 0);
+	led_trigger_blink_oneshot(&local->tx_led, MAC80211_BLINK_DELAY, MAC80211_BLINK_DELAY, 0);
 #endif
 }
 
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 66b0f941d8fb..36c9720ad8d6 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -43,7 +43,6 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_led_info *ledinfo = par->targinfo;
 	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
-	unsigned long led_delay = XT_LED_BLINK_DELAY;
 
 	/*
 	 * If "always blink" is enabled, and there's still some time until the
@@ -52,7 +51,7 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	if ((ledinfo->delay > 0) && ledinfo->always_blink &&
 	    timer_pending(&ledinternal->timer))
 		led_trigger_blink_oneshot(&ledinternal->netfilter_led_trigger,
-					  &led_delay, &led_delay, 1);
+					  XT_LED_BLINK_DELAY, XT_LED_BLINK_DELAY, 1);
 	else
 		led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
 
-- 
cgit v1.2.3


From fa15d8c69238b352cc143cb9d8f2ca4594b94022 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 10 May 2023 18:22:32 +0200
Subject: leds: Fix set_brightness_delayed() race

When a trigger wants to switch from blinking to LED on it needs to call:
	led_set_brightness(LED_OFF);
	led_set_brightness(LED_FULL);

To first call disables blinking and the second then turns the LED on
(the power-supply charging-blink-full-solid triggers do this).

These calls happen immediately after each other, so it is possible
that set_brightness_delayed() from the first call has not run yet
when the led_set_brightness(LED_FULL) call finishes.

If this race hits then this is causing problems for both
sw- and hw-blinking:

For sw-blinking set_brightness_delayed() clears delayed_set_value
when LED_BLINK_DISABLE is set causing the led_set_brightness(LED_FULL)
call effects to get lost when hitting the race, resulting in the LED
turning off instead of on.

For hw-blinking if the race hits delayed_set_value has been
set to LED_FULL by the time set_brightness_delayed() runs.
So led_cdev->brightness_set_blocking() is never called with
LED_OFF as argument and the hw-blinking is never disabled leaving
the LED blinking instead of on.

Fix both issues by adding LED_SET_BRIGHTNESS and LED_SET_BRIGHTNESS_OFF
work_flags making this 2 separate actions to be run by
set_brightness_delayed().

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Tested-by: Yauhen Kharuzhy <jekhor@gmail.com>
Link: https://lore.kernel.org/r/20230510162234.291439-3-hdegoede@redhat.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/leds/led-core.c | 57 ++++++++++++++++++++++++++++++++++++++-----------
 include/linux/leds.h    |  3 +++
 2 files changed, 47 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 4a97cb745788..e61acc785410 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -114,21 +114,14 @@ static void led_timer_function(struct timer_list *t)
 	mod_timer(&led_cdev->blink_timer, jiffies + msecs_to_jiffies(delay));
 }
 
-static void set_brightness_delayed(struct work_struct *ws)
+static void set_brightness_delayed_set_brightness(struct led_classdev *led_cdev,
+						  unsigned int value)
 {
-	struct led_classdev *led_cdev =
-		container_of(ws, struct led_classdev, set_brightness_work);
 	int ret = 0;
 
-	if (test_and_clear_bit(LED_BLINK_DISABLE, &led_cdev->work_flags)) {
-		led_cdev->delayed_set_value = LED_OFF;
-		led_stop_software_blink(led_cdev);
-	}
-
-	ret = __led_set_brightness(led_cdev, led_cdev->delayed_set_value);
+	ret = __led_set_brightness(led_cdev, value);
 	if (ret == -ENOTSUPP)
-		ret = __led_set_brightness_blocking(led_cdev,
-					led_cdev->delayed_set_value);
+		ret = __led_set_brightness_blocking(led_cdev, value);
 	if (ret < 0 &&
 	    /* LED HW might have been unplugged, therefore don't warn */
 	    !(ret == -ENODEV && (led_cdev->flags & LED_UNREGISTERING) &&
@@ -137,6 +130,30 @@ static void set_brightness_delayed(struct work_struct *ws)
 			"Setting an LED's brightness failed (%d)\n", ret);
 }
 
+static void set_brightness_delayed(struct work_struct *ws)
+{
+	struct led_classdev *led_cdev =
+		container_of(ws, struct led_classdev, set_brightness_work);
+
+	if (test_and_clear_bit(LED_BLINK_DISABLE, &led_cdev->work_flags)) {
+		led_stop_software_blink(led_cdev);
+		set_bit(LED_SET_BRIGHTNESS_OFF, &led_cdev->work_flags);
+	}
+
+	/*
+	 * Triggers may call led_set_brightness(LED_OFF),
+	 * led_set_brightness(LED_FULL) in quick succession to disable blinking
+	 * and turn the LED on. Both actions may have been scheduled to run
+	 * before this work item runs once. To make sure this works properly
+	 * handle LED_SET_BRIGHTNESS_OFF first.
+	 */
+	if (test_and_clear_bit(LED_SET_BRIGHTNESS_OFF, &led_cdev->work_flags))
+		set_brightness_delayed_set_brightness(led_cdev, LED_OFF);
+
+	if (test_and_clear_bit(LED_SET_BRIGHTNESS, &led_cdev->work_flags))
+		set_brightness_delayed_set_brightness(led_cdev, led_cdev->delayed_set_value);
+}
+
 static void led_set_software_blink(struct led_classdev *led_cdev,
 				   unsigned long delay_on,
 				   unsigned long delay_off)
@@ -271,8 +288,22 @@ void led_set_brightness_nopm(struct led_classdev *led_cdev, unsigned int value)
 	if (!__led_set_brightness(led_cdev, value))
 		return;
 
-	/* If brightness setting can sleep, delegate it to a work queue task */
-	led_cdev->delayed_set_value = value;
+	/*
+	 * Brightness setting can sleep, delegate it to a work queue task.
+	 * value 0 / LED_OFF is special, since it also disables hw-blinking
+	 * (sw-blink disable is handled in led_set_brightness()).
+	 * To avoid a hw-blink-disable getting lost when a second brightness
+	 * change is done immediately afterwards (before the work runs),
+	 * it uses a separate work_flag.
+	 */
+	if (value) {
+		led_cdev->delayed_set_value = value;
+		set_bit(LED_SET_BRIGHTNESS, &led_cdev->work_flags);
+	} else {
+		clear_bit(LED_SET_BRIGHTNESS, &led_cdev->work_flags);
+		set_bit(LED_SET_BRIGHTNESS_OFF, &led_cdev->work_flags);
+	}
+
 	schedule_work(&led_cdev->set_brightness_work);
 }
 EXPORT_SYMBOL_GPL(led_set_brightness_nopm);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index c3dc22d184e2..de813fe96a20 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -124,6 +124,9 @@ struct led_classdev {
 #define LED_BLINK_INVERT		3
 #define LED_BLINK_BRIGHTNESS_CHANGE 	4
 #define LED_BLINK_DISABLE		5
+	/* Brightness off also disables hw-blinking so it is a separate action */
+#define LED_SET_BRIGHTNESS_OFF		6
+#define LED_SET_BRIGHTNESS		7
 
 	/* Set LED brightness level
 	 * Must not sleep. Use brightness_set_blocking for drivers
-- 
cgit v1.2.3


From 22720a87d0a9667c003bcffd38d15228b3a40f8c Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 10 May 2023 18:22:33 +0200
Subject: leds: Fix oops about sleeping in led_trigger_blink()

led_trigger_blink() calls led_blink_set() from a RCU read-side critical
section so led_blink_set() must not sleep. Note sleeping was not allowed
before the switch to RCU either because a spinlock was held before.

led_blink_set() does not sleep when sw-blinking is used, but
many LED controller drivers with hw blink support have a blink_set
function which may sleep, leading to an oops like this one:

[  832.605062] ------------[ cut here ]------------
[  832.605085] Voluntary context switch within RCU read-side critical section!
[  832.605119] WARNING: CPU: 2 PID: 370 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch+0x4ee/0x690
<snip>
[  832.606453] Call Trace:
[  832.606466]  <TASK>
[  832.606487]  __schedule+0x9f/0x1480
[  832.606527]  schedule+0x5d/0xe0
[  832.606549]  schedule_timeout+0x79/0x140
[  832.606572]  ? __pfx_process_timeout+0x10/0x10
[  832.606599]  wait_for_completion_timeout+0x6f/0x140
[  832.606627]  i2c_dw_xfer+0x101/0x460
[  832.606659]  ? psi_group_change+0x168/0x400
[  832.606680]  __i2c_transfer+0x172/0x6d0
[  832.606709]  i2c_smbus_xfer_emulated+0x27d/0x9c0
[  832.606732]  ? __schedule+0x430/0x1480
[  832.606753]  ? preempt_count_add+0x6a/0xa0
[  832.606778]  ? get_nohz_timer_target+0x18/0x190
[  832.606796]  ? lock_timer_base+0x61/0x80
[  832.606817]  ? preempt_count_add+0x6a/0xa0
[  832.606842]  __i2c_smbus_xfer+0xa2/0x3f0
[  832.606862]  i2c_smbus_xfer+0x66/0xf0
[  832.606882]  i2c_smbus_read_byte_data+0x41/0x70
[  832.606901]  ? _raw_spin_unlock_irqrestore+0x23/0x40
[  832.606922]  ? __pm_runtime_suspend+0x46/0xc0
[  832.606946]  cht_wc_byte_reg_read+0x2e/0x60
[  832.606972]  _regmap_read+0x5c/0x120
[  832.606997]  _regmap_update_bits+0x96/0xc0
[  832.607023]  regmap_update_bits_base+0x5b/0x90
[  832.607053]  cht_wc_leds_brightness_get+0x412/0x910 [leds_cht_wcove]
[  832.607094]  led_blink_setup+0x28/0x100
[  832.607119]  led_trigger_blink+0x40/0x70
[  832.607145]  power_supply_update_leds+0x1b7/0x1c0
[  832.607174]  power_supply_changed_work+0x67/0xe0
[  832.607198]  process_one_work+0x1c8/0x3c0
[  832.607222]  worker_thread+0x4d/0x380
[  832.607243]  ? __pfx_worker_thread+0x10/0x10
[  832.607258]  kthread+0xe9/0x110
[  832.607279]  ? __pfx_kthread+0x10/0x10
[  832.607300]  ret_from_fork+0x2c/0x50
[  832.607337]  </TASK>
[  832.607344] ---[ end trace 0000000000000000 ]---

Add a new led_blink_set_nosleep() function which defers the actual
led_blink_set() call to a workqueue when necessary to fix this.

This also fixes an existing race where a pending led_set_brightness() has
been deferred to set_brightness_work and might then race with a later
led_cdev->blink_set() call. Note this race is only an issue with triggers
mixing led_trigger_event() and led_trigger_blink() calls, sysfs API
calls and led_trigger_blink_oneshot() are not affected.

Note rather then adding a separate blink_set_blocking callback this uses
the presence of the already existing brightness_set_blocking callback to
detect if the blinking call should be deferred to set_brightness_work.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Tested-by: Yauhen Kharuzhy <jekhor@gmail.com>
Link: https://lore.kernel.org/r/20230510162234.291439-4-hdegoede@redhat.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/leds/led-core.c     | 24 ++++++++++++++++++++++++
 drivers/leds/led-triggers.c |  2 +-
 include/linux/leds.h        | 24 ++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index e61acc785410..b9b1295833c9 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -152,6 +152,13 @@ static void set_brightness_delayed(struct work_struct *ws)
 
 	if (test_and_clear_bit(LED_SET_BRIGHTNESS, &led_cdev->work_flags))
 		set_brightness_delayed_set_brightness(led_cdev, led_cdev->delayed_set_value);
+
+	if (test_and_clear_bit(LED_SET_BLINK, &led_cdev->work_flags)) {
+		unsigned long delay_on = led_cdev->delayed_delay_on;
+		unsigned long delay_off = led_cdev->delayed_delay_off;
+
+		led_blink_set(led_cdev, &delay_on, &delay_off);
+	}
 }
 
 static void led_set_software_blink(struct led_classdev *led_cdev,
@@ -246,6 +253,22 @@ void led_blink_set_oneshot(struct led_classdev *led_cdev,
 }
 EXPORT_SYMBOL_GPL(led_blink_set_oneshot);
 
+void led_blink_set_nosleep(struct led_classdev *led_cdev, unsigned long delay_on,
+			   unsigned long delay_off)
+{
+	/* If necessary delegate to a work queue task. */
+	if (led_cdev->blink_set && led_cdev->brightness_set_blocking) {
+		led_cdev->delayed_delay_on = delay_on;
+		led_cdev->delayed_delay_off = delay_off;
+		set_bit(LED_SET_BLINK, &led_cdev->work_flags);
+		schedule_work(&led_cdev->set_brightness_work);
+		return;
+	}
+
+	led_blink_set(led_cdev, &delay_on, &delay_off);
+}
+EXPORT_SYMBOL_GPL(led_blink_set_nosleep);
+
 void led_stop_software_blink(struct led_classdev *led_cdev)
 {
 	del_timer_sync(&led_cdev->blink_timer);
@@ -301,6 +324,7 @@ void led_set_brightness_nopm(struct led_classdev *led_cdev, unsigned int value)
 		set_bit(LED_SET_BRIGHTNESS, &led_cdev->work_flags);
 	} else {
 		clear_bit(LED_SET_BRIGHTNESS, &led_cdev->work_flags);
+		clear_bit(LED_SET_BLINK, &led_cdev->work_flags);
 		set_bit(LED_SET_BRIGHTNESS_OFF, &led_cdev->work_flags);
 	}
 
diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index e06361165e9b..8214d3f7bc5f 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -409,7 +409,7 @@ static void led_trigger_blink_setup(struct led_trigger *trig,
 			led_blink_set_oneshot(led_cdev, &delay_on, &delay_off,
 					      invert);
 		else
-			led_blink_set(led_cdev, &delay_on, &delay_off);
+			led_blink_set_nosleep(led_cdev, delay_on, delay_off);
 	}
 	rcu_read_unlock();
 }
diff --git a/include/linux/leds.h b/include/linux/leds.h
index de813fe96a20..50b2f8f153fb 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -127,6 +127,7 @@ struct led_classdev {
 	/* Brightness off also disables hw-blinking so it is a separate action */
 #define LED_SET_BRIGHTNESS_OFF		6
 #define LED_SET_BRIGHTNESS		7
+#define LED_SET_BLINK			8
 
 	/* Set LED brightness level
 	 * Must not sleep. Use brightness_set_blocking for drivers
@@ -150,6 +151,10 @@ struct led_classdev {
 	 * match the values specified exactly.
 	 * Deactivate blinking again when the brightness is set to LED_OFF
 	 * via the brightness_set() callback.
+	 * For led_blink_set_nosleep() the LED core assumes that blink_set
+	 * implementations, of drivers which do not use brightness_set_blocking,
+	 * will not sleep. Therefor if brightness_set_blocking is not set
+	 * this function must not sleep!
 	 */
 	int		(*blink_set)(struct led_classdev *led_cdev,
 				     unsigned long *delay_on,
@@ -173,6 +178,8 @@ struct led_classdev {
 
 	struct work_struct	set_brightness_work;
 	int			delayed_set_value;
+	unsigned long		delayed_delay_on;
+	unsigned long		delayed_delay_off;
 
 #ifdef CONFIG_LEDS_TRIGGERS
 	/* Protects the trigger data below */
@@ -275,12 +282,27 @@ struct led_classdev *__must_check devm_of_led_get(struct device *dev,
  * software blinking if there is no hardware blinking or if
  * the LED refuses the passed values.
  *
+ * This function may sleep!
+ *
  * Note that if software blinking is active, simply calling
  * led_cdev->brightness_set() will not stop the blinking,
  * use led_set_brightness() instead.
  */
 void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on,
 		   unsigned long *delay_off);
+
+/**
+ * led_blink_set_nosleep - set blinking, guaranteed to not sleep
+ * @led_cdev: the LED to start blinking
+ * @delay_on: the time it should be on (in ms)
+ * @delay_off: the time it should ble off (in ms)
+ *
+ * This function makes the LED blink and is guaranteed to not sleep. Otherwise
+ * this is the same as led_blink_set(), see led_blink_set() for details.
+ */
+void led_blink_set_nosleep(struct led_classdev *led_cdev, unsigned long delay_on,
+			   unsigned long delay_off);
+
 /**
  * led_blink_set_oneshot - do a oneshot software blink
  * @led_cdev: the LED to start blinking
@@ -294,6 +316,8 @@ void led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on,
  *
  * If invert is set, led blinks for delay_off first, then for
  * delay_on and leave the led on after the on-off cycle.
+ *
+ * This function is guaranteed not to sleep.
  */
 void led_blink_set_oneshot(struct led_classdev *led_cdev,
 			   unsigned long *delay_on, unsigned long *delay_off,
-- 
cgit v1.2.3


From 75c8cb2f4cb218aaf4ea68cab08d6dbc96eeae15 Mon Sep 17 00:00:00 2001
From: Martin Botka <martin.botka@somainline.org>
Date: Wed, 24 May 2023 01:00:10 +0100
Subject: mfd: axp20x: Add support for AXP313a PMIC

The AXP313a is a PMIC chip produced by X-Powers, it can be connected via
an I2C bus.
The name AXP1530 seems to appear as well, and this is what is used in
the BSP driver. From all we know it's the same chip, just a different
name. However we have only seen AXP313a chips in the wild, so go with
this name.

Compared to the other AXP PMICs it's a rather simple affair: just three
DCDC converters, three LDOs, and no battery charging support.

Describe the regmap and the MFD bits, along with the registers exposed
via I2C. Aside from the various regulators, also describe the power key
interrupts, and adjust the shutdown handler routine to use a different
register than the other PMICs.
Eventually advertise the device using the new compatible string.

Signed-off-by: Martin Botka <martin.botka@somainline.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Link: https://lore.kernel.org/r/20230524000012.15028-2-andre.przywara@arm.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/axp20x-i2c.c   |  2 ++
 drivers/mfd/axp20x.c       | 78 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mfd/axp20x.h | 32 +++++++++++++++++++
 3 files changed, 111 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/axp20x-i2c.c b/drivers/mfd/axp20x-i2c.c
index b4f5cb457117..a49e5e217554 100644
--- a/drivers/mfd/axp20x-i2c.c
+++ b/drivers/mfd/axp20x-i2c.c
@@ -63,6 +63,7 @@ static const struct of_device_id axp20x_i2c_of_match[] = {
 	{ .compatible = "x-powers,axp209", .data = (void *)AXP209_ID },
 	{ .compatible = "x-powers,axp221", .data = (void *)AXP221_ID },
 	{ .compatible = "x-powers,axp223", .data = (void *)AXP223_ID },
+	{ .compatible = "x-powers,axp313a", .data = (void *)AXP313A_ID },
 	{ .compatible = "x-powers,axp803", .data = (void *)AXP803_ID },
 	{ .compatible = "x-powers,axp806", .data = (void *)AXP806_ID },
 	{ .compatible = "x-powers,axp15060", .data = (void *)AXP15060_ID },
@@ -77,6 +78,7 @@ static const struct i2c_device_id axp20x_i2c_id[] = {
 	{ "axp209", 0 },
 	{ "axp221", 0 },
 	{ "axp223", 0 },
+	{ "axp313a", 0 },
 	{ "axp803", 0 },
 	{ "axp806", 0 },
 	{ "axp15060", 0 },
diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index 72b87aae60cc..07a846ecbf18 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -39,6 +39,7 @@ static const char * const axp20x_model_names[] = {
 	"AXP221",
 	"AXP223",
 	"AXP288",
+	"AXP313a",
 	"AXP803",
 	"AXP806",
 	"AXP809",
@@ -156,6 +157,25 @@ static const struct regmap_range axp806_writeable_ranges[] = {
 	regmap_reg_range(AXP806_REG_ADDR_EXT, AXP806_REG_ADDR_EXT),
 };
 
+static const struct regmap_range axp313a_writeable_ranges[] = {
+	regmap_reg_range(AXP313A_ON_INDICATE, AXP313A_IRQ_STATE),
+};
+
+static const struct regmap_range axp313a_volatile_ranges[] = {
+	regmap_reg_range(AXP313A_SHUTDOWN_CTRL, AXP313A_SHUTDOWN_CTRL),
+	regmap_reg_range(AXP313A_IRQ_STATE, AXP313A_IRQ_STATE),
+};
+
+static const struct regmap_access_table axp313a_writeable_table = {
+	.yes_ranges = axp313a_writeable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(axp313a_writeable_ranges),
+};
+
+static const struct regmap_access_table axp313a_volatile_table = {
+	.yes_ranges = axp313a_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(axp313a_volatile_ranges),
+};
+
 static const struct regmap_range axp806_volatile_ranges[] = {
 	regmap_reg_range(AXP20X_IRQ1_STATE, AXP20X_IRQ2_STATE),
 };
@@ -248,6 +268,11 @@ static const struct resource axp288_fuel_gauge_resources[] = {
 	DEFINE_RES_IRQ(AXP288_IRQ_WL1),
 };
 
+static const struct resource axp313a_pek_resources[] = {
+	DEFINE_RES_IRQ_NAMED(AXP313A_IRQ_PEK_RIS_EDGE, "PEK_DBR"),
+	DEFINE_RES_IRQ_NAMED(AXP313A_IRQ_PEK_FAL_EDGE, "PEK_DBF"),
+};
+
 static const struct resource axp803_pek_resources[] = {
 	DEFINE_RES_IRQ_NAMED(AXP803_IRQ_PEK_RIS_EDGE, "PEK_DBR"),
 	DEFINE_RES_IRQ_NAMED(AXP803_IRQ_PEK_FAL_EDGE, "PEK_DBF"),
@@ -304,6 +329,15 @@ static const struct regmap_config axp288_regmap_config = {
 	.cache_type	= REGCACHE_RBTREE,
 };
 
+static const struct regmap_config axp313a_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.wr_table = &axp313a_writeable_table,
+	.volatile_table = &axp313a_volatile_table,
+	.max_register = AXP313A_IRQ_STATE,
+	.cache_type = REGCACHE_RBTREE,
+};
+
 static const struct regmap_config axp806_regmap_config = {
 	.reg_bits	= 8,
 	.val_bits	= 8,
@@ -456,6 +490,16 @@ static const struct regmap_irq axp288_regmap_irqs[] = {
 	INIT_REGMAP_IRQ(AXP288, BC_USB_CHNG,            5, 1),
 };
 
+static const struct regmap_irq axp313a_regmap_irqs[] = {
+	INIT_REGMAP_IRQ(AXP313A, PEK_RIS_EDGE,		0, 7),
+	INIT_REGMAP_IRQ(AXP313A, PEK_FAL_EDGE,		0, 6),
+	INIT_REGMAP_IRQ(AXP313A, PEK_SHORT,		0, 5),
+	INIT_REGMAP_IRQ(AXP313A, PEK_LONG,		0, 4),
+	INIT_REGMAP_IRQ(AXP313A, DCDC3_V_LOW,		0, 3),
+	INIT_REGMAP_IRQ(AXP313A, DCDC2_V_LOW,		0, 2),
+	INIT_REGMAP_IRQ(AXP313A, DIE_TEMP_HIGH,		0, 0),
+};
+
 static const struct regmap_irq axp803_regmap_irqs[] = {
 	INIT_REGMAP_IRQ(AXP803, ACIN_OVER_V,		0, 7),
 	INIT_REGMAP_IRQ(AXP803, ACIN_PLUGIN,		0, 6),
@@ -606,6 +650,17 @@ static const struct regmap_irq_chip axp288_regmap_irq_chip = {
 
 };
 
+static const struct regmap_irq_chip axp313a_regmap_irq_chip = {
+	.name			= "axp313a_irq_chip",
+	.status_base		= AXP313A_IRQ_STATE,
+	.ack_base		= AXP313A_IRQ_STATE,
+	.unmask_base		= AXP313A_IRQ_EN,
+	.init_ack_masked	= true,
+	.irqs			= axp313a_regmap_irqs,
+	.num_irqs		= ARRAY_SIZE(axp313a_regmap_irqs),
+	.num_regs		= 1,
+};
+
 static const struct regmap_irq_chip axp803_regmap_irq_chip = {
 	.name			= "axp803",
 	.status_base		= AXP20X_IRQ1_STATE,
@@ -745,6 +800,11 @@ static const struct mfd_cell axp152_cells[] = {
 	},
 };
 
+static struct mfd_cell axp313a_cells[] = {
+	MFD_CELL_NAME("axp20x-regulator"),
+	MFD_CELL_RES("axp313a-pek", axp313a_pek_resources),
+};
+
 static const struct resource axp288_adc_resources[] = {
 	DEFINE_RES_IRQ_NAMED(AXP288_IRQ_GPADC, "GPADC"),
 };
@@ -914,8 +974,18 @@ static const struct mfd_cell axp_regulator_only_cells[] = {
 static int axp20x_power_off(struct sys_off_data *data)
 {
 	struct axp20x_dev *axp20x = data->cb_data;
+	unsigned int shutdown_reg;
 
-	regmap_write(axp20x->regmap, AXP20X_OFF_CTRL, AXP20X_OFF);
+	switch (axp20x->variant) {
+	case AXP313A_ID:
+		shutdown_reg = AXP313A_SHUTDOWN_CTRL;
+		break;
+	default:
+		shutdown_reg = AXP20X_OFF_CTRL;
+		break;
+	}
+
+	regmap_write(axp20x->regmap, shutdown_reg, AXP20X_OFF);
 
 	/* Give capacitors etc. time to drain to avoid kernel panic msg. */
 	mdelay(500);
@@ -978,6 +1048,12 @@ int axp20x_match_device(struct axp20x_dev *axp20x)
 		axp20x->regmap_irq_chip = &axp288_regmap_irq_chip;
 		axp20x->irq_flags = IRQF_TRIGGER_LOW;
 		break;
+	case AXP313A_ID:
+		axp20x->nr_cells = ARRAY_SIZE(axp313a_cells);
+		axp20x->cells = axp313a_cells;
+		axp20x->regmap_cfg = &axp313a_regmap_config;
+		axp20x->regmap_irq_chip = &axp313a_regmap_irq_chip;
+		break;
 	case AXP803_ID:
 		axp20x->nr_cells = ARRAY_SIZE(axp803_cells);
 		axp20x->cells = axp803_cells;
diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index beb3f44f85c5..fff7fa6b7c5d 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -17,6 +17,7 @@ enum axp20x_variants {
 	AXP221_ID,
 	AXP223_ID,
 	AXP288_ID,
+	AXP313A_ID,
 	AXP803_ID,
 	AXP806_ID,
 	AXP809_ID,
@@ -92,6 +93,17 @@ enum axp20x_variants {
 #define AXP22X_ALDO3_V_OUT		0x2a
 #define AXP22X_CHRG_CTRL3		0x35
 
+#define AXP313A_ON_INDICATE		0x00
+#define AXP313A_OUTPUT_CONTROL		0x10
+#define AXP313A_DCDC1_CONRTOL		0x13
+#define AXP313A_DCDC2_CONRTOL		0x14
+#define AXP313A_DCDC3_CONRTOL		0x15
+#define AXP313A_ALDO1_CONRTOL		0x16
+#define AXP313A_DLDO1_CONRTOL		0x17
+#define AXP313A_SHUTDOWN_CTRL		0x1a
+#define AXP313A_IRQ_EN			0x20
+#define AXP313A_IRQ_STATE		0x21
+
 #define AXP806_STARTUP_SRC		0x00
 #define AXP806_CHIP_ID			0x03
 #define AXP806_PWR_OUT_CTRL1		0x10
@@ -363,6 +375,16 @@ enum {
 	AXP22X_REG_ID_MAX,
 };
 
+enum {
+	AXP313A_DCDC1 = 0,
+	AXP313A_DCDC2,
+	AXP313A_DCDC3,
+	AXP313A_ALDO1,
+	AXP313A_DLDO1,
+	AXP313A_RTC_LDO,
+	AXP313A_REG_ID_MAX,
+};
+
 enum {
 	AXP806_DCDCA = 0,
 	AXP806_DCDCB,
@@ -616,6 +638,16 @@ enum axp288_irqs {
 	AXP288_IRQ_BC_USB_CHNG,
 };
 
+enum axp313a_irqs {
+	AXP313A_IRQ_DIE_TEMP_HIGH,
+	AXP313A_IRQ_DCDC2_V_LOW = 2,
+	AXP313A_IRQ_DCDC3_V_LOW,
+	AXP313A_IRQ_PEK_LONG,
+	AXP313A_IRQ_PEK_SHORT,
+	AXP313A_IRQ_PEK_FAL_EDGE,
+	AXP313A_IRQ_PEK_RIS_EDGE,
+};
+
 enum axp803_irqs {
 	AXP803_IRQ_ACIN_OVER_V = 1,
 	AXP803_IRQ_ACIN_PLUGIN,
-- 
cgit v1.2.3


From 5f3139fc46993b2d653a7aa5cdfe66a91881fd06 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 15 May 2023 13:54:42 +0100
Subject: io_uring/cmd: add cmd lazy tw wake helper

We want to use IOU_F_TWQ_LAZY_WAKE in commands. First, introduce a new
cmd tw helper accepting TWQ flags, and then add
io_uring_cmd_do_in_task_laz() that will pass IOU_F_TWQ_LAZY_WAKE and
imply the "lazy" semantics, i.e. it posts no more than 1 CQE and
delaying execution of this tw should not prevent forward progress.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/5b9f6716006df7e817f18bd555aee2f8f9c8b0c3.1684154817.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring.h | 18 ++++++++++++++++--
 io_uring/uring_cmd.c     | 16 ++++++++++++----
 2 files changed, 28 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 7fe31b2cd02f..bb9c666bd584 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -46,13 +46,23 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
 			      struct iov_iter *iter, void *ioucmd);
 void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
 			unsigned issue_flags);
-void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
-			void (*task_work_cb)(struct io_uring_cmd *, unsigned));
 struct sock *io_uring_get_socket(struct file *file);
 void __io_uring_cancel(bool cancel_all);
 void __io_uring_free(struct task_struct *tsk);
 void io_uring_unreg_ringfd(void);
 const char *io_uring_get_opcode(u8 opcode);
+void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
+			    void (*task_work_cb)(struct io_uring_cmd *, unsigned),
+			    unsigned flags);
+/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */
+void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
+			void (*task_work_cb)(struct io_uring_cmd *, unsigned));
+
+static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
+			void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+{
+	__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
+}
 
 static inline void io_uring_files_cancel(void)
 {
@@ -85,6 +95,10 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
 			void (*task_work_cb)(struct io_uring_cmd *, unsigned))
 {
 }
+static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
+			void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+{
+}
 static inline struct sock *io_uring_get_socket(struct file *file)
 {
 	return NULL;
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 5e32db48696d..476c7877ce58 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -20,16 +20,24 @@ static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
 	ioucmd->task_work_cb(ioucmd, issue_flags);
 }
 
-void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
-			void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
+			void (*task_work_cb)(struct io_uring_cmd *, unsigned),
+			unsigned flags)
 {
 	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
 
 	ioucmd->task_work_cb = task_work_cb;
 	req->io_task_work.func = io_uring_cmd_work;
-	io_req_task_work_add(req);
+	__io_req_task_work_add(req, flags);
+}
+EXPORT_SYMBOL_GPL(__io_uring_cmd_do_in_task);
+
+void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
+			void (*task_work_cb)(struct io_uring_cmd *, unsigned))
+{
+	__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
 }
-EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+EXPORT_SYMBOL_GPL(io_uring_cmd_do_in_task_lazy);
 
 static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
 					  u64 extra1, u64 extra2)
-- 
cgit v1.2.3


From 09fcdbd28404b7e02cc9fc4862ae5b43b76867c0 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Mon, 22 May 2023 22:24:24 +0200
Subject: mmc: sdio: Add/rename SDIO ID of the RTL8723DS SDIO wifi cards

RTL8723DS comes in two variant and each of them has their own SDIO ID:
- 0xd723 can connect two antennas. The WiFi part is still 1x1 so the
  second antenna can be dedicated to Bluetooth
- 0xd724 can only connect one antenna so it's shared between WiFi and
  Bluetooth

Add a new entry for the single antenna RTL8723DS (0xd724) which can be
found on the MangoPi MQ-Quad. Also rename the existing RTL8723DS entry
(0xd723) so it's name reflects that it's the variant with support for
two antennas.

Reviewed-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://lore.kernel.org/r/20230522202425.1827005-4-martin.blumenstingl@googlemail.com
---
 include/linux/mmc/sdio_ids.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index c653accdc7fd..7fada7a714fe 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -121,7 +121,8 @@
 #define SDIO_DEVICE_ID_REALTEK_RTW8822BS	0xb822
 #define SDIO_DEVICE_ID_REALTEK_RTW8821CS	0xc821
 #define SDIO_DEVICE_ID_REALTEK_RTW8822CS	0xc822
-#define SDIO_DEVICE_ID_REALTEK_RTW8723DS	0xd723
+#define SDIO_DEVICE_ID_REALTEK_RTW8723DS_2ANT	0xd723
+#define SDIO_DEVICE_ID_REALTEK_RTW8723DS_1ANT	0xd724
 #define SDIO_DEVICE_ID_REALTEK_RTW8821DS	0xd821
 
 #define SDIO_VENDOR_ID_SIANO			0x039a
-- 
cgit v1.2.3


From 9828ed3f695a138f7add89fa2a186ababceb8006 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 25 May 2023 09:32:25 -0700
Subject: module: error out early on concurrent load of the same module file

It turns out that udev under certain circumstances will concurrently try
to load the same modules over-and-over excessively.  This isn't a kernel
bug, but it ends up affecting the kernel, to the point that under
certain circumstances we can fail to boot, because the kernel uses a lot
of memory to read all the module data all at once.

Note that it isn't a memory leak, it's just basically a thundering herd
problem happening at bootup with a lot of CPUs, with the worst cases
then being pretty bad.

Admittedly the worst situations are somewhat contrived: lots and lots of
CPUs, not a lot of memory, and KASAN enabled to make it all slower and
as such (unintentionally) exacerbate the problem.

Luis explains: [1]

 "My best assessment of the situation is that each CPU in udev ends up
  triggering a load of duplicate set of modules, not just one, but *a
  lot*. Not sure what heuristics udev uses to load a set of modules per
  CPU."

Petr Pavlu chimes in: [2]

 "My understanding is that udev workers are forked. An initial kmod
  context is created by the main udevd process but no sharing happens
  after the fork. It means that the mentioned memory pool logic doesn't
  really kick in.

  Multiple parallel load requests come from multiple udev workers, for
  instance, each handling an udev event for one CPU device and making
  the exactly same requests as all others are doing at the same time.

  The optimization idea would be to recognize these duplicate requests
  at the udevd/kmod level and converge them"

Note that module loading has tried to mitigate this issue before, see
for example commit 064f4536d139 ("module: avoid allocation if module is
already present and ready"), which has a few ASCII graphs on memory use
due to this same issue.

However, while that noticed that the module was already loaded, and
exited with an error early before spending any more time on setting up
the module, it didn't handle the case of multiple concurrent module
loads all being active - but not complete - at the same time.

Yes, one of them will eventually win the race and finalize its copy, and
the others will then notice that the module already exists and error
out, but while this all happens, we have tons of unnecessary concurrent
work being done.

Again, the real fix is for udev to not do that (maybe it should use
threads instead of fork, and have actual shared data structures and not
cause duplicate work). That real fix is apparently not trivial.

But it turns out that the kernel already has a pretty good model for
dealing with concurrent access to the same file: the i_writecount of the
inode.

In fact, the module loading already indirectly uses 'i_writecount' ,
because 'kernel_file_read()' will in fact do

	ret = deny_write_access(file);
	if (ret)
		return ret;
	...
	allow_write_access(file);

around the read of the file data.  We do not allow concurrent writes to
the file, and return -ETXTBUSY if the file was open for writing at the
same time as the module data is loaded from it.

And the solution to the reader concurrency problem is to simply extend
this "no concurrent writers" logic to simply be "exclusive access".

Note that "exclusive" in this context isn't really some absolute thing:
it's only exclusion from writers and from other "special readers" that
do this writer denial.  So we simply introduce a variation of that
"deny_write_access()" logic that not only denies write access, but also
requires that this is the _only_ such access that denies write access.

Which means that you can't start loading a module that is already being
loaded as a module by somebody else, or you will get the same -ETXTBSY
error that you would get if there were writers around.

[ It also means that you can't try to load a currently executing
  executable as a module, for the same reason: executables do that same
  "deny_write_access()" thing, and that's obviously where the whole
  ETXTBSY logic traditionally came from.

  This is not a problem for kernel modules, since the set of normal
  executable files and kernel module files is entirely disjoint. ]

This new function is called "exclusive_deny_write_access()", and the
implementation is trivial, in that it's just an atomic decrement of
i_writecount if it was 0 before.

To use that new exclusivity check, all we then do is wrap the module
loading with that exclusive_deny_write_access()() / allow_write_access()
pair.  The actual patch is a bit bigger than that, because we want to
surround not just the "load file data" part, but the whole module setup,
to get maximum exclusion.

So this ends up splitting up "finit_module()" into a few helper
functions to make it all very clear and legible.

In Luis' test-case (bringing up 255 vcpu's in a virtual machine [3]),
the "wasted vmalloc" space (ie module data read into a vmalloc'ed area
in order to be loaded as a module, but then discarded because somebody
else loaded the same module instead) dropped from 1.8GiB to 474kB.  Yes,
that's gigabytes to kilobytes.

It doesn't drop completely to zero, because even with this change, you
can still end up having completely serial pointless module loads, where
one udev process has loaded a module fully (and thus the kernel has
released that exclusive lock on the module file), and then another udev
process tries to load the same module again.

So while we cannot fully get rid of the fundamental bug in user space,
we _can_ get rid of the excessive concurrent thundering herd effect.

A couple of final side notes on this all:

 - This tweak only affects the "finit_module()" system call, which gives
   the kernel a file descriptor with the module data.

   You can also just feed the module data as raw data from user space
   with "init_module()" (note the lack of 'f' at the beginning), and
   obviously for that case we do _not_ have any "exclusive read" logic.

   So if you absolutely want to do things wrong in user space, and try
   to load the same module multiple times, and error out only later when
   the kernel ends up saying "you can't load the same module name
   twice", you can still do that.

   And in fact, some distros will do exactly that, because they will
   uncompress the kernel module data in user space before feeding it to
   the kernel (mainly because they haven't started using the new kernel
   side decompression yet).

   So this is not some absolute "you can't do concurrent loads of the
   same module". It's literally just a very simple heuristic that will
   catch it early in case you try to load the exact same module file at
   the same time, and in that case avoid a potentially nasty situation.

 - There is another user of "deny_write_access()": the verity code that
   enables fs-verity on a file (the FS_IOC_ENABLE_VERITY ioctl).

   If you use fs-verity and you care about verifying the kernel modules
   (which does make sense), you should do it *before* loading said
   kernel module. That may sound obvious, but now the implementation
   basically requires it. Because if you try to do it concurrently, the
   kernel may refuse to load the module file that is being set up by the
   fs-verity code.

 - This all will obviously mean that if you insist on loading the same
   module in parallel, only one module load will succeed, and the others
   will return with an error.

   That was true before too, but what is different is that the -ETXTBSY
   error can be returned *before* the success case of another process
   fully loading and instantiating the module.

   Again, that might sound obvious, and it is indeed the whole point of
   the whole change: we are much quicker to notice the whole "you're
   already in the process of loading this module".

   So it's very much intentional, but it does mean that if you just
   spray the kernel with "finit_module()", and expect that the module is
   immediately loaded afterwards without checking the return value, you
   are doing something horribly horribly wrong.

   I'd like to say that that would never happen, but the whole _reason_
   for this commit is that udev is currently doing something horribly
   horribly wrong, so ...

Link: https://lore.kernel.org/all/ZEGopJ8VAYnE7LQ2@bombadil.infradead.org/ [1]
Link: https://lore.kernel.org/all/23bd0ce6-ef78-1cd8-1f21-0e706a00424a@suse.com/ [2]
Link: https://lore.kernel.org/lkml/ZG%2Fa+nrt4%2FAAUi5z@bombadil.infradead.org/ [3]
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Petr Pavlu <petr.pavlu@suse.com>
Tested-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h   |  6 ++++++
 kernel/module/main.c | 58 ++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 49 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 133f0640fb24..86b50271b4f7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2566,6 +2566,12 @@ static inline int deny_write_access(struct file *file)
 	struct inode *inode = file_inode(file);
 	return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY;
 }
+static inline int exclusive_deny_write_access(struct file *file)
+{
+	int old = 0;
+	struct inode *inode = file_inode(file);
+	return atomic_try_cmpxchg(&inode->i_writecount, &old, -1) ? 0 : -ETXTBSY;
+}
 static inline void put_write_access(struct inode * inode)
 {
 	atomic_dec(&inode->i_writecount);
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 044aa2c9e3cb..b4c7e925fdb0 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -3057,25 +3057,13 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	return load_module(&info, uargs, 0);
 }
 
-SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
+static int file_init_module(struct file *file, const char __user * uargs, int flags)
 {
 	struct load_info info = { };
 	void *buf = NULL;
 	int len;
-	int err;
-
-	err = may_init_module();
-	if (err)
-		return err;
-
-	pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
 
-	if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
-		      |MODULE_INIT_IGNORE_VERMAGIC
-		      |MODULE_INIT_COMPRESSED_FILE))
-		return -EINVAL;
-
-	len = kernel_read_file_from_fd(fd, 0, &buf, INT_MAX, NULL,
+	len = kernel_read_file(file, 0, &buf, INT_MAX, NULL,
 				       READING_MODULE);
 	if (len < 0) {
 		mod_stat_inc(&failed_kreads);
@@ -3084,7 +3072,7 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
 	}
 
 	if (flags & MODULE_INIT_COMPRESSED_FILE) {
-		err = module_decompress(&info, buf, len);
+		int err = module_decompress(&info, buf, len);
 		vfree(buf); /* compressed data is no longer needed */
 		if (err) {
 			mod_stat_inc(&failed_decompress);
@@ -3099,6 +3087,46 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
 	return load_module(&info, uargs, flags);
 }
 
+/*
+ * kernel_read_file() will already deny write access, but module
+ * loading wants _exclusive_ access to the file, so we do that
+ * here, along with basic sanity checks.
+ */
+static int prepare_file_for_module_load(struct file *file)
+{
+	if (!file || !(file->f_mode & FMODE_READ))
+		return -EBADF;
+	if (!S_ISREG(file_inode(file)->i_mode))
+		return -EINVAL;
+	return exclusive_deny_write_access(file);
+}
+
+SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
+{
+	struct fd f;
+	int err;
+
+	err = may_init_module();
+	if (err)
+		return err;
+
+	pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
+
+	if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
+		      |MODULE_INIT_IGNORE_VERMAGIC
+		      |MODULE_INIT_COMPRESSED_FILE))
+		return -EINVAL;
+
+	f = fdget(fd);
+	err = prepare_file_for_module_load(f.file);
+	if (!err) {
+		err = file_init_module(f.file, uargs, flags);
+		allow_write_access(f.file);
+	}
+	fdput(f);
+	return err;
+}
+
 /* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */
 char *module_flags(struct module *mod, char *buf, bool show_state)
 {
-- 
cgit v1.2.3


From 30955b4afc2bbea9046c60df994297fac5edc02c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 17:30:57 +0200
Subject: ARM: davinci: fix davinci_cpufreq_init() declaration

The davinci_cpufreq_init() declaration is only seen by its caller
but not the definition:

drivers/cpufreq/davinci-cpufreq.c:153:12: error: no previous prototype for 'davinci_cpufreq_init'

Move it into the platform_data header that is already used an
interface between the two places.

Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Link: https://lore.kernel.org/r/20230516153109.514251-2-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-davinci/common.c                | 1 +
 arch/arm/mach-davinci/common.h                | 6 ------
 include/linux/platform_data/davinci-cpufreq.h | 6 ++++++
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index c1ce6b2a8d48..7bc7018688de 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -11,6 +11,7 @@
 #include <linux/etherdevice.h>
 #include <linux/davinci_emac.h>
 #include <linux/dma-mapping.h>
+#include <linux/platform_data/davinci-cpufreq.h>
 
 #include <asm/tlb.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mach-davinci/common.h b/arch/arm/mach-davinci/common.h
index b4fd0e9acf6c..8aa6d4fc3f6f 100644
--- a/arch/arm/mach-davinci/common.h
+++ b/arch/arm/mach-davinci/common.h
@@ -55,12 +55,6 @@ extern void davinci_common_init(const struct davinci_soc_info *soc_info);
 extern void davinci_init_ide(void);
 void davinci_init_late(void);
 
-#ifdef CONFIG_CPU_FREQ
-int davinci_cpufreq_init(void);
-#else
-static inline int davinci_cpufreq_init(void) { return 0; }
-#endif
-
 #ifdef CONFIG_SUSPEND
 int davinci_pm_init(void);
 #else
diff --git a/include/linux/platform_data/davinci-cpufreq.h b/include/linux/platform_data/davinci-cpufreq.h
index bc208c64e3d7..1ef91c36f609 100644
--- a/include/linux/platform_data/davinci-cpufreq.h
+++ b/include/linux/platform_data/davinci-cpufreq.h
@@ -16,4 +16,10 @@ struct davinci_cpufreq_config {
 	int (*init)(void);
 };
 
+#ifdef CONFIG_CPU_FREQ
+int davinci_cpufreq_init(void);
+#else
+static inline int davinci_cpufreq_init(void) { return 0; }
+#endif
+
 #endif /* _MACH_DAVINCI_CPUFREQ_H */
-- 
cgit v1.2.3


From a9ae9c526cc232b69b0bc9d668e303c90600e848 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 17:31:06 +0200
Subject: ARM: pxa: fix missing-prototypes warnings

The PXA platform has a number of configurations that end up with
a warning like these when building with W=1:

drivers/hwmon/max1111.c:83:5: error: no previous prototype for 'max1111_read_channel' [-Werror=missing-prototypes]
arch/arm/mach-pxa/reset.c:86:6: error: no previous prototype for 'pxa_restart' [-Werror=missing-prototypes]
arch/arm/mach-pxa/mfp-pxa2xx.c:254:5: error: no previous prototype for 'keypad_set_wake' [-Werror=missing-prototypes]
drivers/clk/pxa/clk-pxa25x.c:70:14: error: no previous prototype for 'pxa25x_get_clk_frequency_khz' [-Werror=missing-prototypes]
drivers/clk/pxa/clk-pxa25x.c:325:12: error: no previous prototype for 'pxa25x_clocks_init' [-Werror=missing-prototypes]
drivers/clk/pxa/clk-pxa27x.c:74:14: error: no previous prototype for 'pxa27x_get_clk_frequency_khz' [-Werror=missing-prototypes]
drivers/clk/pxa/clk-pxa27x.c:102:6: error: no previous prototype for 'pxa27x_is_ppll_disabled' [-Werror=missing-prototypes]
drivers/clk/pxa/clk-pxa27x.c:470:12: error: no previous prototype for 'pxa27x_clocks_init' [-Werror=missing-prototypes]
arch/arm/mach-pxa/pxa27x.c:44:6: error: no previous prototype for 'pxa27x_clear_otgph' [-Werror=missing-prototypes]
arch/arm/mach-pxa/pxa27x.c:58:6: error: no previous prototype for 'pxa27x_configure_ac97reset' [-Werror=missing-prototypes]
arch/arm/mach-pxa/spitz_pm.c:170:15: error: no previous prototype for 'spitzpm_read_devdata' [-Werror=missing-prototypes]

The problem is that there is a declaration for each of these, but
it's only seen by the caller and not the callee. Moving these
into appropriate header files ensures that both use the same
calling conventions and it avoids the warnings.

Acked-by: Stephen Boyd <sboyd@kernel.org>
Link: https://lore.kernel.org/r/20230516153109.514251-11-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-pxa/generic.h              | 15 ---------------
 arch/arm/mach-pxa/mfp-pxa2xx.c           |  1 +
 arch/arm/mach-pxa/pxa25x.c               |  1 +
 arch/arm/mach-pxa/pxa27x.c               |  3 +++
 arch/arm/mach-pxa/reset.c                |  1 +
 arch/arm/mach-pxa/spitz_pm.c             |  2 +-
 drivers/clk/pxa/clk-pxa25x.c             |  2 ++
 drivers/clk/pxa/clk-pxa27x.c             |  3 ++-
 drivers/hwmon/max1111.c                  |  1 +
 drivers/usb/gadget/udc/pxa27x_udc.c      |  6 ------
 drivers/usb/host/ohci-pxa27x.c           |  7 +------
 include/linux/platform_data/asoc-pxa.h   |  1 +
 include/linux/platform_data/pxa2xx_udc.h |  6 ++++++
 include/linux/soc/pxa/smemc.h            | 16 ++++++++++++++++
 sound/arm/pxa2xx-ac97-lib.c              |  2 --
 15 files changed, 36 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pxa/generic.h b/arch/arm/mach-pxa/generic.h
index 7bb1499de4c5..c9c2c46ecead 100644
--- a/arch/arm/mach-pxa/generic.h
+++ b/arch/arm/mach-pxa/generic.h
@@ -27,7 +27,6 @@ extern void __init pxa25x_map_io(void);
 extern void __init pxa26x_init_irq(void);
 
 #define pxa27x_handle_irq ichp_handle_irq
-extern unsigned	pxa27x_get_clk_frequency_khz(int);
 extern void __init pxa27x_init_irq(void);
 extern void __init pxa27x_map_io(void);
 
@@ -52,18 +51,4 @@ extern void pxa2xx_clear_reset_status(unsigned int);
 static inline void pxa2xx_clear_reset_status(unsigned int mask) {}
 #endif
 
-/*
- * Once fully converted to the clock framework, all these functions should be
- * removed, and replaced with a clk_get(NULL, "core").
- */
-#ifdef CONFIG_PXA25x
-extern unsigned pxa25x_get_clk_frequency_khz(int);
-#else
-#define pxa25x_get_clk_frequency_khz(x)		(0)
-#endif
-
-#ifdef CONFIG_PXA27x
-#else
-#define pxa27x_get_clk_frequency_khz(x)		(0)
-#endif
 
diff --git a/arch/arm/mach-pxa/mfp-pxa2xx.c b/arch/arm/mach-pxa/mfp-pxa2xx.c
index b556452dfcf9..f5a3d890f682 100644
--- a/arch/arm/mach-pxa/mfp-pxa2xx.c
+++ b/arch/arm/mach-pxa/mfp-pxa2xx.c
@@ -20,6 +20,7 @@
 
 #include "pxa2xx-regs.h"
 #include "mfp-pxa2xx.h"
+#include "mfp-pxa27x.h"
 
 #include "generic.h"
 
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index 1b83be181bab..9d2127264c1d 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -27,6 +27,7 @@
 #include <linux/irqchip.h>
 #include <linux/platform_data/mmp_dma.h>
 #include <linux/soc/pxa/cpu.h>
+#include <linux/soc/pxa/smemc.h>
 
 #include <asm/mach/map.h>
 #include <asm/suspend.h>
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 4135ba2877c4..b1e89a70a886 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -24,6 +24,7 @@
 #include <linux/platform_data/i2c-pxa.h>
 #include <linux/platform_data/mmp_dma.h>
 #include <linux/soc/pxa/cpu.h>
+#include <linux/soc/pxa/smemc.h>
 
 #include <asm/mach/map.h>
 #include <asm/irq.h>
@@ -31,7 +32,9 @@
 #include "irqs.h"
 #include "pxa27x.h"
 #include "reset.h"
+#include <linux/platform_data/pxa2xx_udc.h>
 #include <linux/platform_data/usb-ohci-pxa27x.h>
+#include <linux/platform_data/asoc-pxa.h>
 #include "pm.h"
 #include "addr-map.h"
 #include "smemc.h"
diff --git a/arch/arm/mach-pxa/reset.c b/arch/arm/mach-pxa/reset.c
index f0be90573ad3..27293549f8ad 100644
--- a/arch/arm/mach-pxa/reset.c
+++ b/arch/arm/mach-pxa/reset.c
@@ -10,6 +10,7 @@
 #include "regs-ost.h"
 #include "reset.h"
 #include "smemc.h"
+#include "generic.h"
 
 static void do_hw_reset(void);
 
diff --git a/arch/arm/mach-pxa/spitz_pm.c b/arch/arm/mach-pxa/spitz_pm.c
index 6689b67f9ce5..1c021cef965f 100644
--- a/arch/arm/mach-pxa/spitz_pm.c
+++ b/arch/arm/mach-pxa/spitz_pm.c
@@ -166,7 +166,7 @@ static bool spitz_charger_wakeup(void)
 		gpio_get_value(SPITZ_GPIO_SYNC);
 }
 
-unsigned long spitzpm_read_devdata(int type)
+static unsigned long spitzpm_read_devdata(int type)
 {
 	switch (type) {
 	case SHARPSL_STATUS_ACIN:
diff --git a/drivers/clk/pxa/clk-pxa25x.c b/drivers/clk/pxa/clk-pxa25x.c
index 93d5907b8530..0a4da519d704 100644
--- a/drivers/clk/pxa/clk-pxa25x.c
+++ b/drivers/clk/pxa/clk-pxa25x.c
@@ -11,10 +11,12 @@
  */
 #include <linux/clk-provider.h>
 #include <linux/clk.h>
+#include <linux/clk/pxa.h>
 #include <linux/clkdev.h>
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/soc/pxa/smemc.h>
+#include <linux/soc/pxa/cpu.h>
 
 #include <dt-bindings/clock/pxa-clock.h>
 #include "clk-pxa.h"
diff --git a/drivers/clk/pxa/clk-pxa27x.c b/drivers/clk/pxa/clk-pxa27x.c
index 116c6ac666e3..2bea89874ec1 100644
--- a/drivers/clk/pxa/clk-pxa27x.c
+++ b/drivers/clk/pxa/clk-pxa27x.c
@@ -12,6 +12,7 @@
 #include <linux/clkdev.h>
 #include <linux/of.h>
 #include <linux/soc/pxa/smemc.h>
+#include <linux/clk/pxa.h>
 
 #include <dt-bindings/clock/pxa-clock.h>
 #include "clk-pxa.h"
@@ -99,7 +100,7 @@ unsigned int pxa27x_get_clk_frequency_khz(int info)
 	return (unsigned int)clks[0] / KHz;
 }
 
-bool pxa27x_is_ppll_disabled(void)
+static bool pxa27x_is_ppll_disabled(void)
 {
 	unsigned long ccsr = readl(clk_regs + CCSR);
 
diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c
index 4c5487aeb3cf..5cc08c720b52 100644
--- a/drivers/hwmon/max1111.c
+++ b/drivers/hwmon/max1111.c
@@ -80,6 +80,7 @@ static int max1111_read(struct device *dev, int channel)
 #ifdef CONFIG_SHARPSL_PM
 static struct max1111_data *the_max1111;
 
+int max1111_read_channel(int channel);
 int max1111_read_channel(int channel)
 {
 	if (!the_max1111 || !the_max1111->spi)
diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
index 0ecdfd2ba9e9..fdf9cd4506b0 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -2472,12 +2472,6 @@ static void pxa_udc_shutdown(struct platform_device *_dev)
 		udc_disable(udc);
 }
 
-#ifdef CONFIG_PXA27x
-extern void pxa27x_clear_otgph(void);
-#else
-#define pxa27x_clear_otgph()   do {} while (0)
-#endif
-
 #ifdef CONFIG_PM
 /**
  * pxa_udc_suspend - Suspend udc device
diff --git a/drivers/usb/host/ohci-pxa27x.c b/drivers/usb/host/ohci-pxa27x.c
index 0bc7e96bcc93..dcac2938789a 100644
--- a/drivers/usb/host/ohci-pxa27x.c
+++ b/drivers/usb/host/ohci-pxa27x.c
@@ -29,6 +29,7 @@
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
 #include <linux/platform_data/usb-ohci-pxa27x.h>
+#include <linux/platform_data/pxa2xx_udc.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/signal.h>
@@ -263,12 +264,6 @@ static inline void pxa27x_reset_hc(struct pxa27x_ohci *pxa_ohci)
 	__raw_writel(uhchr & ~UHCHR_FHR, pxa_ohci->mmio_base + UHCHR);
 }
 
-#ifdef CONFIG_PXA27x
-extern void pxa27x_clear_otgph(void);
-#else
-#define pxa27x_clear_otgph()	do {} while (0)
-#endif
-
 static int pxa27x_start_hc(struct pxa27x_ohci *pxa_ohci, struct device *dev)
 {
 	int retval;
diff --git a/include/linux/platform_data/asoc-pxa.h b/include/linux/platform_data/asoc-pxa.h
index 327454cd8246..7b5b9e20fbf5 100644
--- a/include/linux/platform_data/asoc-pxa.h
+++ b/include/linux/platform_data/asoc-pxa.h
@@ -27,5 +27,6 @@ typedef struct {
 } pxa2xx_audio_ops_t;
 
 extern void pxa_set_ac97_info(pxa2xx_audio_ops_t *ops);
+extern void pxa27x_configure_ac97reset(int reset_gpio, bool to_gpio);
 
 #endif
diff --git a/include/linux/platform_data/pxa2xx_udc.h b/include/linux/platform_data/pxa2xx_udc.h
index ff9c35dca59d..bc99cc6a3c5f 100644
--- a/include/linux/platform_data/pxa2xx_udc.h
+++ b/include/linux/platform_data/pxa2xx_udc.h
@@ -25,4 +25,10 @@ struct pxa2xx_udc_mach_info {
 	int	gpio_pullup;			/* high == pullup activated */
 };
 
+#ifdef CONFIG_PXA27x
+extern void pxa27x_clear_otgph(void);
+#else
+#define pxa27x_clear_otgph()	do {} while (0)
+#endif
+
 #endif
diff --git a/include/linux/soc/pxa/smemc.h b/include/linux/soc/pxa/smemc.h
index f1ffea236c15..4feb1dded3ec 100644
--- a/include/linux/soc/pxa/smemc.h
+++ b/include/linux/soc/pxa/smemc.h
@@ -10,4 +10,20 @@ int pxa2xx_smemc_get_sdram_rows(void);
 unsigned int pxa3xx_smemc_get_memclkdiv(void);
 void __iomem *pxa_smemc_get_mdrefr(void);
 
+/*
+ * Once fully converted to the clock framework, all these functions should be
+ * removed, and replaced with a clk_get(NULL, "core").
+ */
+#ifdef CONFIG_PXA25x
+extern unsigned pxa25x_get_clk_frequency_khz(int);
+#else
+#define pxa25x_get_clk_frequency_khz(x)		(0)
+#endif
+
+#ifdef CONFIG_PXA27x
+extern unsigned pxa27x_get_clk_frequency_khz(int);
+#else
+#define pxa27x_get_clk_frequency_khz(x)		(0)
+#endif
+
 #endif
diff --git a/sound/arm/pxa2xx-ac97-lib.c b/sound/arm/pxa2xx-ac97-lib.c
index 2ca33fd5a575..a03a3291de84 100644
--- a/sound/arm/pxa2xx-ac97-lib.c
+++ b/sound/arm/pxa2xx-ac97-lib.c
@@ -33,8 +33,6 @@ static struct clk *ac97conf_clk;
 static int reset_gpio;
 static void __iomem *ac97_reg_base;
 
-extern void pxa27x_configure_ac97reset(int reset_gpio, bool to_gpio);
-
 /*
  * Beware PXA27x bugs:
  *
-- 
cgit v1.2.3


From db967cf828fc134ba17c5e4539b1a3687cdd3f2d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 22:12:12 +0200
Subject: arm-cci: add cci_enable_port_for_self prototype

The cci_enable_port_for_self() is called from assembler, so
add the prototype only to shut up the W=1 warning:

drivers/bus/arm-cci.c:298:25: error: no previous prototype for 'cci_enable_port_for_self' [-Werror=missing-prototypes]

Link: https://lore.kernel.org/r/20230516201218.556437-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/arm-cci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/arm-cci.h b/include/linux/arm-cci.h
index d0e44201d855..7f7a576267bc 100644
--- a/include/linux/arm-cci.h
+++ b/include/linux/arm-cci.h
@@ -43,6 +43,8 @@ static inline int __cci_control_port_by_index(u32 port, bool enable)
 }
 #endif
 
+void cci_enable_port_for_self(void);
+
 #define cci_disable_port_by_device(dev) \
 	__cci_control_port_by_device(dev, false)
 #define cci_enable_port_by_device(dev) \
-- 
cgit v1.2.3


From 62c68e7cee332e08e625af3bca3318814086490d Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 23 May 2023 14:04:32 -0700
Subject: HID: ensure timely release of driver-allocated resources

More and more drivers rely on devres to manage their resources, however
if bus' probe() and release() methods are not trivial and control some
of resources as well (for example enable or disable clocks, or attach
device to a power domain), we need to make sure that driver-allocated
resources are released immediately after driver's remove() method
returns, and not postponed until driver core gets around to releasing
resources.

In case of HID we should not try to close the report and release
associated memory until after all devres callbacks are executed. To fix
that we open a new devres group before calling driver's probe() and
explicitly release it when we return from driver's remove().

This is similar to what we did for I2C bus in commit 5b5475826c52 ("i2c:
ensure timely release of driver-allocated resources"). It is tempting to
try and move this into driver core, but actually doing so is challenging,
we need to split bus' remove() method into pre- and post-remove methods,
which would make the logic even less clear.

Reported-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20230505232417.1377393-1-swboyd@chromium.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 17 +++++++++++++++++
 include/linux/hid.h    |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 1f0bb2784bfc..4b7062dcefec 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2614,6 +2614,10 @@ static int __hid_device_probe(struct hid_device *hdev, struct hid_driver *hdrv)
 	if (!hid_check_device_match(hdev, hdrv, &id))
 		return -ENODEV;
 
+	hdev->devres_group_id = devres_open_group(&hdev->dev, NULL, GFP_KERNEL);
+	if (!hdev->devres_group_id)
+		return -ENOMEM;
+
 	/* reset the quirks that has been previously set */
 	hdev->quirks = hid_lookup_quirk(hdev);
 	hdev->driver = hdrv;
@@ -2626,7 +2630,16 @@ static int __hid_device_probe(struct hid_device *hdev, struct hid_driver *hdrv)
 			ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	}
 
+	/*
+	 * Note that we are not closing the devres group opened above so
+	 * even resources that were attached to the device after probe is
+	 * run are released when hid_device_remove() is executed. This is
+	 * needed as some drivers would allocate additional resources,
+	 * for example when updating firmware.
+	 */
+
 	if (ret) {
+		devres_release_group(&hdev->dev, hdev->devres_group_id);
 		hid_close_report(hdev);
 		hdev->driver = NULL;
 	}
@@ -2669,6 +2682,10 @@ static void hid_device_remove(struct device *dev)
 			hdrv->remove(hdev);
 		else /* default remove */
 			hid_hw_stop(hdev);
+
+		/* Release all devres resources allocated by the driver */
+		devres_release_group(&hdev->dev, hdev->devres_group_id);
+
 		hid_close_report(hdev);
 		hdev->driver = NULL;
 	}
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 4e4c4fe36911..39e21e3815ad 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -597,6 +597,7 @@ struct hid_device {							/* device report descriptor */
 	struct semaphore driver_input_lock;				/* protects the current driver */
 	struct device dev;						/* device */
 	struct hid_driver *driver;
+	void *devres_group_id;						/* ID of probe devres group	*/
 
 	const struct hid_ll_driver *ll_driver;
 	struct mutex ll_open_lock;
-- 
cgit v1.2.3


From d67790ddf0219aa0ad3e13b53ae0a7619b3425a2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 22 May 2023 14:18:13 -0700
Subject: overflow: Add struct_size_t() helper

While struct_size() is normally used in situations where the structure
type already has a pointer instance, there are places where no variable
is available. In the past, this has been worked around by using a typed
NULL first argument, but this is a bit ugly. Add a helper to do this,
and replace the handful of instances of the code pattern with it.

Instances were found with this Coccinelle script:

@struct_size_t@
identifier STRUCT, MEMBER;
expression COUNT;
@@

-       struct_size((struct STRUCT *)\(0\|NULL\),
+       struct_size_t(struct STRUCT,
                MEMBER, COUNT)

Suggested-by: Christoph Hellwig <hch@infradead.org>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Tony Nguyen <anthony.l.nguyen@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: James Smart <james.smart@broadcom.com>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: HighPoint Linux Team <linux@highpoint-tech.com>
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Sumit Saxena <sumit.saxena@broadcom.com>
Cc: Shivasharan S <shivasharan.srikanteshwara@broadcom.com>
Cc: Don Brace <don.brace@microchip.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Guo Xuenan <guoxuenan@huawei.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Daniel Latypov <dlatypov@google.com>
Cc: kernel test robot <lkp@intel.com>
Cc: intel-wired-lan@lists.osuosl.org
Cc: netdev@vger.kernel.org
Cc: linux-nvme@lists.infradead.org
Cc: linux-scsi@vger.kernel.org
Cc: megaraidlinux.pdl@broadcom.com
Cc: storagedev@microchip.com
Cc: linux-xfs@vger.kernel.org
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/r/20230522211810.never.421-kees@kernel.org
---
 drivers/net/ethernet/intel/ice/ice_ddp.h  |  9 ++++-----
 drivers/nvme/host/fc.c                    |  8 ++++----
 drivers/scsi/hptiop.c                     |  4 ++--
 drivers/scsi/megaraid/megaraid_sas_base.c | 12 ++++++------
 drivers/scsi/megaraid/megaraid_sas_fp.c   |  6 +++---
 drivers/scsi/smartpqi/smartpqi_init.c     |  2 +-
 fs/xfs/libxfs/xfs_btree.h                 |  2 +-
 fs/xfs/scrub/btree.h                      |  2 +-
 include/linux/overflow.h                  | 18 +++++++++++++++++-
 lib/overflow_kunit.c                      |  2 +-
 10 files changed, 40 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.h b/drivers/net/ethernet/intel/ice/ice_ddp.h
index 37eadb3d27a8..41acfe26df1c 100644
--- a/drivers/net/ethernet/intel/ice/ice_ddp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ddp.h
@@ -185,7 +185,7 @@ struct ice_buf_hdr {
 
 #define ICE_MAX_ENTRIES_IN_BUF(hd_sz, ent_sz)                                 \
 	((ICE_PKG_BUF_SIZE -                                                  \
-	  struct_size((struct ice_buf_hdr *)0, section_entry, 1) - (hd_sz)) / \
+	  struct_size_t(struct ice_buf_hdr,  section_entry, 1) - (hd_sz)) / \
 	 (ent_sz))
 
 /* ice package section IDs */
@@ -297,7 +297,7 @@ struct ice_label_section {
 };
 
 #define ICE_MAX_LABELS_IN_BUF                                             \
-	ICE_MAX_ENTRIES_IN_BUF(struct_size((struct ice_label_section *)0, \
+	ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_label_section,  \
 					   label, 1) -                    \
 				       sizeof(struct ice_label),          \
 			       sizeof(struct ice_label))
@@ -352,7 +352,7 @@ struct ice_boost_tcam_section {
 };
 
 #define ICE_MAX_BST_TCAMS_IN_BUF                                               \
-	ICE_MAX_ENTRIES_IN_BUF(struct_size((struct ice_boost_tcam_section *)0, \
+	ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_boost_tcam_section,  \
 					   tcam, 1) -                          \
 				       sizeof(struct ice_boost_tcam_entry),    \
 			       sizeof(struct ice_boost_tcam_entry))
@@ -372,8 +372,7 @@ struct ice_marker_ptype_tcam_section {
 };
 
 #define ICE_MAX_MARKER_PTYPE_TCAMS_IN_BUF                                    \
-	ICE_MAX_ENTRIES_IN_BUF(                                              \
-		struct_size((struct ice_marker_ptype_tcam_section *)0, tcam, \
+	ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_marker_ptype_tcam_section,  tcam, \
 			    1) -                                             \
 			sizeof(struct ice_marker_ptype_tcam_entry),          \
 		sizeof(struct ice_marker_ptype_tcam_entry))
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 2ed75923507d..691f2df574ce 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2917,8 +2917,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
 
 	ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set,
 			&nvme_fc_mq_ops, 1,
-			struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
-				    ctrl->lport->ops->fcprqst_priv_sz));
+			struct_size_t(struct nvme_fcp_op_w_sgl, priv,
+				      ctrl->lport->ops->fcprqst_priv_sz));
 	if (ret)
 		return ret;
 
@@ -3536,8 +3536,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
 			&nvme_fc_admin_mq_ops,
-			struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
-				    ctrl->lport->ops->fcprqst_priv_sz));
+			struct_size_t(struct nvme_fcp_op_w_sgl, priv,
+				      ctrl->lport->ops->fcprqst_priv_sz));
 	if (ret)
 		goto fail_ctrl;
 
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index 06ccb51bf6a9..f5334ccbf2ca 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -1394,8 +1394,8 @@ static int hptiop_probe(struct pci_dev *pcidev, const struct pci_device_id *id)
 	host->cmd_per_lun = le32_to_cpu(iop_config.max_requests);
 	host->max_cmd_len = 16;
 
-	req_size = struct_size((struct hpt_iop_request_scsi_command *)0,
-			       sg_list, hba->max_sg_descriptors);
+	req_size = struct_size_t(struct hpt_iop_request_scsi_command,
+				 sg_list, hba->max_sg_descriptors);
 	if ((req_size & 0x1f) != 0)
 		req_size = (req_size + 0x1f) & ~0x1f;
 
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 317c944c68e3..050eed8e2684 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -5153,8 +5153,8 @@ static void megasas_update_ext_vd_details(struct megasas_instance *instance)
 		fusion->max_map_sz = ventura_map_sz;
 	} else {
 		fusion->old_map_sz =
-			struct_size((struct MR_FW_RAID_MAP *)0, ldSpanMap,
-				    instance->fw_supported_vd_count);
+			struct_size_t(struct MR_FW_RAID_MAP, ldSpanMap,
+				      instance->fw_supported_vd_count);
 		fusion->new_map_sz =  sizeof(struct MR_FW_RAID_MAP_EXT);
 
 		fusion->max_map_sz =
@@ -5789,8 +5789,8 @@ megasas_setup_jbod_map(struct megasas_instance *instance)
 	struct fusion_context *fusion = instance->ctrl_context;
 	size_t pd_seq_map_sz;
 
-	pd_seq_map_sz = struct_size((struct MR_PD_CFG_SEQ_NUM_SYNC *)0, seq,
-				    MAX_PHYSICAL_DEVICES);
+	pd_seq_map_sz = struct_size_t(struct MR_PD_CFG_SEQ_NUM_SYNC, seq,
+				      MAX_PHYSICAL_DEVICES);
 
 	instance->use_seqnum_jbod_fp =
 		instance->support_seqnum_jbod_fp;
@@ -8033,8 +8033,8 @@ skip_firing_dcmds:
 	if (instance->adapter_type != MFI_SERIES) {
 		megasas_release_fusion(instance);
 		pd_seq_map_sz =
-			struct_size((struct MR_PD_CFG_SEQ_NUM_SYNC *)0,
-				    seq, MAX_PHYSICAL_DEVICES);
+			struct_size_t(struct MR_PD_CFG_SEQ_NUM_SYNC,
+				      seq, MAX_PHYSICAL_DEVICES);
 		for (i = 0; i < 2 ; i++) {
 			if (fusion->ld_map[i])
 				dma_free_coherent(&instance->pdev->dev,
diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c
index 4463a538102a..b8b388a4e28f 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fp.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fp.c
@@ -326,9 +326,9 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance, u64 map_id)
 	else if (instance->supportmax256vd)
 		expected_size = sizeof(struct MR_FW_RAID_MAP_EXT);
 	else
-		expected_size = struct_size((struct MR_FW_RAID_MAP *)0,
-					    ldSpanMap,
-					    le16_to_cpu(pDrvRaidMap->ldCount));
+		expected_size = struct_size_t(struct MR_FW_RAID_MAP,
+					      ldSpanMap,
+					      le16_to_cpu(pDrvRaidMap->ldCount));
 
 	if (le32_to_cpu(pDrvRaidMap->totalSize) != expected_size) {
 		dev_dbg(&instance->pdev->dev, "megasas: map info structure size 0x%x",
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 03de97cd72c2..f4e0aa262164 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -5015,7 +5015,7 @@ static int pqi_create_queues(struct pqi_ctrl_info *ctrl_info)
 }
 
 #define PQI_REPORT_EVENT_CONFIG_BUFFER_LENGTH	\
-	struct_size((struct pqi_event_config *)0, descriptors, PQI_MAX_EVENT_DESCRIPTORS)
+	struct_size_t(struct pqi_event_config,  descriptors, PQI_MAX_EVENT_DESCRIPTORS)
 
 static int pqi_configure_events(struct pqi_ctrl_info *ctrl_info,
 	bool enable_events)
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index a2aa36b23e25..4d68a58be160 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -301,7 +301,7 @@ struct xfs_btree_cur
 static inline size_t
 xfs_btree_cur_sizeof(unsigned int nlevels)
 {
-	return struct_size((struct xfs_btree_cur *)NULL, bc_levels, nlevels);
+	return struct_size_t(struct xfs_btree_cur, bc_levels, nlevels);
 }
 
 /* cursor flags */
diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
index 9d7b9ee8bef4..c32b5fad6174 100644
--- a/fs/xfs/scrub/btree.h
+++ b/fs/xfs/scrub/btree.h
@@ -60,7 +60,7 @@ struct xchk_btree {
 static inline size_t
 xchk_btree_sizeof(unsigned int nlevels)
 {
-	return struct_size((struct xchk_btree *)NULL, lastkey, nlevels - 1);
+	return struct_size_t(struct xchk_btree, lastkey, nlevels - 1);
 }
 
 int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 0e33b5cbdb9f..f9b60313eaea 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -283,7 +283,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
  * @member: Name of the array member.
  * @count: Number of elements in the array.
  *
- * Calculates size of memory needed for structure @p followed by an
+ * Calculates size of memory needed for structure of @p followed by an
  * array of @count number of @member elements.
  *
  * Return: number of bytes needed or SIZE_MAX on overflow.
@@ -293,4 +293,20 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend)
 		sizeof(*(p)) + flex_array_size(p, member, count),	\
 		size_add(sizeof(*(p)), flex_array_size(p, member, count)))
 
+/**
+ * struct_size_t() - Calculate size of structure with trailing flexible array
+ * @type: structure type name.
+ * @member: Name of the array member.
+ * @count: Number of elements in the array.
+ *
+ * Calculates size of memory needed for structure @type followed by an
+ * array of @count number of @member elements. Prefer using struct_size()
+ * when possible instead, to keep calculations associated with a specific
+ * instance variable of type @type.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define struct_size_t(type, member, count)					\
+	struct_size((type *)NULL, member, count)
+
 #endif /* __LINUX_OVERFLOW_H */
diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c
index dcd3ba102db6..34db0b3aa502 100644
--- a/lib/overflow_kunit.c
+++ b/lib/overflow_kunit.c
@@ -649,7 +649,7 @@ struct __test_flex_array {
 static void overflow_size_helpers_test(struct kunit *test)
 {
 	/* Make sure struct_size() can be used in a constant expression. */
-	u8 ce_array[struct_size((struct __test_flex_array *)0, data, 55)];
+	u8 ce_array[struct_size_t(struct __test_flex_array, data, 55)];
 	struct __test_flex_array *obj;
 	int count = 0;
 	int var;
-- 
cgit v1.2.3


From 4f521bab5bfc854ec0dab7ef560dfa75247e615d Mon Sep 17 00:00:00 2001
From: Maninder Singh <maninder1.s@samsung.com>
Date: Fri, 26 May 2023 12:51:23 +0530
Subject: kallsyms: remove unsed API lookup_symbol_attrs

with commit '7878c231dae0 ("slab: remove /proc/slab_allocators")'
lookup_symbol_attrs usage is removed.

Thus removing redundant API.

Signed-off-by: Maninder Singh <maninder1.s@samsung.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 include/linux/kallsyms.h |  6 ------
 include/linux/module.h   |  9 ---------
 kernel/kallsyms.c        | 28 ----------------------------
 kernel/module/kallsyms.c | 28 ----------------------------
 4 files changed, 71 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index fe3c9993b5bf..1037f4957caa 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -93,7 +93,6 @@ extern int sprint_backtrace(char *buffer, unsigned long address);
 extern int sprint_backtrace_build_id(char *buffer, unsigned long address);
 
 int lookup_symbol_name(unsigned long addr, char *symname);
-int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name);
 
 /* How and when do we show kallsyms values? */
 extern bool kallsyms_show_value(const struct cred *cred);
@@ -155,11 +154,6 @@ static inline int lookup_symbol_name(unsigned long addr, char *symname)
 	return -ERANGE;
 }
 
-static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name)
-{
-	return -ERANGE;
-}
-
 static inline bool kallsyms_show_value(const struct cred *cred)
 {
 	return false;
diff --git a/include/linux/module.h b/include/linux/module.h
index 9e56763dff81..a98e188cf37b 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -968,15 +968,6 @@ static inline int lookup_module_symbol_name(unsigned long addr, char *symname)
 	return -ERANGE;
 }
 
-static inline int lookup_module_symbol_attrs(unsigned long addr,
-					     unsigned long *size,
-					     unsigned long *offset,
-					     char *modname,
-					     char *name)
-{
-	return -ERANGE;
-}
-
 static inline int module_get_kallsym(unsigned int symnum, unsigned long *value,
 				     char *type, char *name,
 				     char *module_name, int *exported)
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 290d8cc1943a..8193e947aa10 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -484,34 +484,6 @@ found:
 	return 0;
 }
 
-int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
-			unsigned long *offset, char *modname, char *name)
-{
-	int res;
-
-	name[0] = '\0';
-	name[KSYM_NAME_LEN - 1] = '\0';
-
-	if (is_ksym_addr(addr)) {
-		unsigned long pos;
-
-		pos = get_symbol_pos(addr, size, offset);
-		/* Grab name */
-		kallsyms_expand_symbol(get_symbol_offset(pos),
-				       name, KSYM_NAME_LEN);
-		modname[0] = '\0';
-		goto found;
-	}
-	/* See if it's in a module. */
-	res = lookup_module_symbol_attrs(addr, size, offset, modname, name);
-	if (res)
-		return res;
-
-found:
-	cleanup_symbol_name(name);
-	return 0;
-}
-
 /* Look up a kernel symbol and return it in a text buffer. */
 static int __sprint_symbol(char *buffer, unsigned long address,
 			   int symbol_offset, int add_offset, int add_buildid)
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index c550d7d45f2f..ef73ae7c8909 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -381,34 +381,6 @@ out:
 	return -ERANGE;
 }
 
-int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
-			       unsigned long *offset, char *modname, char *name)
-{
-	struct module *mod;
-
-	preempt_disable();
-	list_for_each_entry_rcu(mod, &modules, list) {
-		if (mod->state == MODULE_STATE_UNFORMED)
-			continue;
-		if (within_module(addr, mod)) {
-			const char *sym;
-
-			sym = find_kallsyms_symbol(mod, addr, size, offset);
-			if (!sym)
-				goto out;
-			if (modname)
-				strscpy(modname, mod->name, MODULE_NAME_LEN);
-			if (name)
-				strscpy(name, sym, KSYM_NAME_LEN);
-			preempt_enable();
-			return 0;
-		}
-	}
-out:
-	preempt_enable();
-	return -ERANGE;
-}
-
 int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		       char *name, char *module_name, int *exported)
 {
-- 
cgit v1.2.3


From ec001bb71e4476f7f5be9db693d5f43e65b9d8cb Mon Sep 17 00:00:00 2001
From: Robert Marko <robimarko@gmail.com>
Date: Fri, 26 May 2023 22:47:58 +0200
Subject: soc: qcom: socinfo: move SMEM item struct and defines to a header

Move SMEM item struct and related defines to a header in order to be able
to reuse them in the SMEM driver instead of duplicating them.

Signed-off-by: Robert Marko <robimarko@gmail.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20230526204802.3081168-1-robimarko@gmail.com
---
 drivers/soc/qcom/socinfo.c       | 67 +-------------------------------------
 include/linux/soc/qcom/socinfo.h | 70 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 66 deletions(-)
 create mode 100644 include/linux/soc/qcom/socinfo.h

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c
index 0decf0a08468..dfd4330daef5 100644
--- a/drivers/soc/qcom/socinfo.c
+++ b/drivers/soc/qcom/socinfo.c
@@ -11,6 +11,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/soc/qcom/smem.h>
+#include <linux/soc/qcom/socinfo.h>
 #include <linux/string.h>
 #include <linux/stringify.h>
 #include <linux/sys_soc.h>
@@ -32,15 +33,6 @@
 #define qcom_board_id(id) QCOM_ID_ ## id, __stringify(id)
 #define qcom_board_id_named(id, name) QCOM_ID_ ## id, (name)
 
-#define SMEM_SOCINFO_BUILD_ID_LENGTH           32
-#define SMEM_SOCINFO_CHIP_ID_LENGTH            32
-
-/*
- * SMEM item id, used to acquire handles to respective
- * SMEM region.
- */
-#define SMEM_HW_SW_BUILD_ID            137
-
 #ifdef CONFIG_DEBUG_FS
 #define SMEM_IMAGE_VERSION_BLOCKS_COUNT        32
 #define SMEM_IMAGE_VERSION_SIZE                4096
@@ -126,64 +118,7 @@ static const char *const pmic_models[] = {
 	[58] = "PM8450",
 	[65] = "PM8010",
 };
-#endif /* CONFIG_DEBUG_FS */
-
-/* Socinfo SMEM item structure */
-struct socinfo {
-	__le32 fmt;
-	__le32 id;
-	__le32 ver;
-	char build_id[SMEM_SOCINFO_BUILD_ID_LENGTH];
-	/* Version 2 */
-	__le32 raw_id;
-	__le32 raw_ver;
-	/* Version 3 */
-	__le32 hw_plat;
-	/* Version 4 */
-	__le32 plat_ver;
-	/* Version 5 */
-	__le32 accessory_chip;
-	/* Version 6 */
-	__le32 hw_plat_subtype;
-	/* Version 7 */
-	__le32 pmic_model;
-	__le32 pmic_die_rev;
-	/* Version 8 */
-	__le32 pmic_model_1;
-	__le32 pmic_die_rev_1;
-	__le32 pmic_model_2;
-	__le32 pmic_die_rev_2;
-	/* Version 9 */
-	__le32 foundry_id;
-	/* Version 10 */
-	__le32 serial_num;
-	/* Version 11 */
-	__le32 num_pmics;
-	__le32 pmic_array_offset;
-	/* Version 12 */
-	__le32 chip_family;
-	__le32 raw_device_family;
-	__le32 raw_device_num;
-	/* Version 13 */
-	__le32 nproduct_id;
-	char chip_id[SMEM_SOCINFO_CHIP_ID_LENGTH];
-	/* Version 14 */
-	__le32 num_clusters;
-	__le32 ncluster_array_offset;
-	__le32 num_defective_parts;
-	__le32 ndefective_parts_array_offset;
-	/* Version 15 */
-	__le32 nmodem_supported;
-	/* Version 16 */
-	__le32  feature_code;
-	__le32  pcode;
-	__le32  npartnamemap_offset;
-	__le32  nnum_partname_mapping;
-	/* Version 17 */
-	__le32 oem_variant;
-};
 
-#ifdef CONFIG_DEBUG_FS
 struct socinfo_params {
 	u32 raw_device_family;
 	u32 hw_plat_subtype;
diff --git a/include/linux/soc/qcom/socinfo.h b/include/linux/soc/qcom/socinfo.h
new file mode 100644
index 000000000000..d1cbc49a2a2d
--- /dev/null
+++ b/include/linux/soc/qcom/socinfo.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __QCOM_SOCINFO_H__
+#define __QCOM_SOCINFO_H__
+
+/*
+ * SMEM item id, used to acquire handles to respective
+ * SMEM region.
+ */
+#define SMEM_HW_SW_BUILD_ID		137
+
+#define SMEM_SOCINFO_BUILD_ID_LENGTH	32
+#define SMEM_SOCINFO_CHIP_ID_LENGTH	32
+
+/* Socinfo SMEM item structure */
+struct socinfo {
+	__le32 fmt;
+	__le32 id;
+	__le32 ver;
+	char build_id[SMEM_SOCINFO_BUILD_ID_LENGTH];
+	/* Version 2 */
+	__le32 raw_id;
+	__le32 raw_ver;
+	/* Version 3 */
+	__le32 hw_plat;
+	/* Version 4 */
+	__le32 plat_ver;
+	/* Version 5 */
+	__le32 accessory_chip;
+	/* Version 6 */
+	__le32 hw_plat_subtype;
+	/* Version 7 */
+	__le32 pmic_model;
+	__le32 pmic_die_rev;
+	/* Version 8 */
+	__le32 pmic_model_1;
+	__le32 pmic_die_rev_1;
+	__le32 pmic_model_2;
+	__le32 pmic_die_rev_2;
+	/* Version 9 */
+	__le32 foundry_id;
+	/* Version 10 */
+	__le32 serial_num;
+	/* Version 11 */
+	__le32 num_pmics;
+	__le32 pmic_array_offset;
+	/* Version 12 */
+	__le32 chip_family;
+	__le32 raw_device_family;
+	__le32 raw_device_num;
+	/* Version 13 */
+	__le32 nproduct_id;
+	char chip_id[SMEM_SOCINFO_CHIP_ID_LENGTH];
+	/* Version 14 */
+	__le32 num_clusters;
+	__le32 ncluster_array_offset;
+	__le32 num_defective_parts;
+	__le32 ndefective_parts_array_offset;
+	/* Version 15 */
+	__le32 nmodem_supported;
+	/* Version 16 */
+	__le32  feature_code;
+	__le32  pcode;
+	__le32  npartnamemap_offset;
+	__le32  nnum_partname_mapping;
+	/* Version 17 */
+	__le32 oem_variant;
+};
+
+#endif
-- 
cgit v1.2.3


From 17051d2c3cd696439adb900e9af547ba162fb982 Mon Sep 17 00:00:00 2001
From: Robert Marko <robimarko@gmail.com>
Date: Fri, 26 May 2023 22:48:00 +0200
Subject: soc: qcom: smem: introduce qcom_smem_get_soc_id()

Introduce a helper to return the SoC SMEM ID, which is used to identify the
exact SoC model as there may be differences in the same SoC family.

Currently, cpufreq-nvmem does this completely in the driver and there has
been more interest expresed for other drivers to use this information so
lets expose a common helper to prevent redoing it in individual drivers
since this field is present on every SMEM table version.

Signed-off-by: Robert Marko <robimarko@gmail.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20230526204802.3081168-3-robimarko@gmail.com
---
 drivers/soc/qcom/smem.c       | 23 +++++++++++++++++++++++
 include/linux/soc/qcom/smem.h |  2 ++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c
index bc98520c4969..b0d59e815c3b 100644
--- a/drivers/soc/qcom/smem.c
+++ b/drivers/soc/qcom/smem.c
@@ -14,6 +14,7 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/soc/qcom/smem.h>
+#include <linux/soc/qcom/socinfo.h>
 
 /*
  * The Qualcomm shared memory system is a allocate only heap structure that
@@ -772,6 +773,28 @@ phys_addr_t qcom_smem_virt_to_phys(void *p)
 }
 EXPORT_SYMBOL_GPL(qcom_smem_virt_to_phys);
 
+/**
+ * qcom_smem_get_soc_id() - return the SoC ID
+ * @id:	On success, we return the SoC ID here.
+ *
+ * Look up SoC ID from HW/SW build ID and return it.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int qcom_smem_get_soc_id(u32 *id)
+{
+	struct socinfo *info;
+
+	info = qcom_smem_get(QCOM_SMEM_HOST_ANY, SMEM_HW_SW_BUILD_ID, NULL);
+	if (IS_ERR(info))
+		return PTR_ERR(info);
+
+	*id = __le32_to_cpu(info->id);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(qcom_smem_get_soc_id);
+
 static int qcom_smem_get_sbl_version(struct qcom_smem *smem)
 {
 	struct smem_header *header;
diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h
index 86e1b358688a..223db6a9c733 100644
--- a/include/linux/soc/qcom/smem.h
+++ b/include/linux/soc/qcom/smem.h
@@ -11,4 +11,6 @@ int qcom_smem_get_free_space(unsigned host);
 
 phys_addr_t qcom_smem_virt_to_phys(void *p);
 
+int qcom_smem_get_soc_id(u32 *id);
+
 #endif
-- 
cgit v1.2.3


From cbd77119b6355872cd308a60e99f9ca678435d15 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Wed, 17 May 2023 17:16:35 +0530
Subject: EDAC/qcom: Get rid of hardcoded register offsets

The LLCC EDAC register offsets varies between each SoC. Hardcoding the
register offsets won't work and will often result in crash due to
accessing the wrong locations.

Hence, get the register offsets from the LLCC driver matching the
individual SoCs.

Cc: <stable@vger.kernel.org> # 6.0: 5365cea199c7 ("soc: qcom: llcc: Rename reg_offset structs to reflect LLCC version")
Cc: <stable@vger.kernel.org> # 6.0: c13d7d261e36 ("soc: qcom: llcc: Pass LLCC version based register offsets to EDAC driver")
Cc: <stable@vger.kernel.org> # 6.0
Fixes: a6e9d7ef252c ("soc: qcom: llcc: Add configuration data for SM8450 SoC")
Acked-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20230517114635.76358-3-manivannan.sadhasivam@linaro.org
---
 drivers/edac/qcom_edac.c           | 116 ++++++++++++++++++-------------------
 include/linux/soc/qcom/llcc-qcom.h |   6 --
 2 files changed, 58 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c
index 6140001f21c4..b2db545c6810 100644
--- a/drivers/edac/qcom_edac.c
+++ b/drivers/edac/qcom_edac.c
@@ -21,30 +21,9 @@
 #define TRP_SYN_REG_CNT                 6
 #define DRP_SYN_REG_CNT                 8
 
-#define LLCC_COMMON_STATUS0             0x0003000c
 #define LLCC_LB_CNT_MASK                GENMASK(31, 28)
 #define LLCC_LB_CNT_SHIFT               28
 
-/* Single & double bit syndrome register offsets */
-#define TRP_ECC_SB_ERR_SYN0             0x0002304c
-#define TRP_ECC_DB_ERR_SYN0             0x00020370
-#define DRP_ECC_SB_ERR_SYN0             0x0004204c
-#define DRP_ECC_DB_ERR_SYN0             0x00042070
-
-/* Error register offsets */
-#define TRP_ECC_ERROR_STATUS1           0x00020348
-#define TRP_ECC_ERROR_STATUS0           0x00020344
-#define DRP_ECC_ERROR_STATUS1           0x00042048
-#define DRP_ECC_ERROR_STATUS0           0x00042044
-
-/* TRP, DRP interrupt register offsets */
-#define DRP_INTERRUPT_STATUS            0x00041000
-#define TRP_INTERRUPT_0_STATUS          0x00020480
-#define DRP_INTERRUPT_CLEAR             0x00041008
-#define DRP_ECC_ERROR_CNTR_CLEAR        0x00040004
-#define TRP_INTERRUPT_0_CLEAR           0x00020484
-#define TRP_ECC_ERROR_CNTR_CLEAR        0x00020440
-
 /* Mask and shift macros */
 #define ECC_DB_ERR_COUNT_MASK           GENMASK(4, 0)
 #define ECC_DB_ERR_WAYS_MASK            GENMASK(31, 16)
@@ -60,15 +39,6 @@
 #define DRP_TRP_INT_CLEAR               GENMASK(1, 0)
 #define DRP_TRP_CNT_CLEAR               GENMASK(1, 0)
 
-/* Config registers offsets*/
-#define DRP_ECC_ERROR_CFG               0x00040000
-
-/* Tag RAM, Data RAM interrupt register offsets */
-#define CMN_INTERRUPT_0_ENABLE          0x0003001c
-#define CMN_INTERRUPT_2_ENABLE          0x0003003c
-#define TRP_INTERRUPT_0_ENABLE          0x00020488
-#define DRP_INTERRUPT_ENABLE            0x0004100c
-
 #define SB_ERROR_THRESHOLD              0x1
 #define SB_ERROR_THRESHOLD_SHIFT        24
 #define SB_DB_TRP_INTERRUPT_ENABLE      0x3
@@ -88,9 +58,6 @@ enum {
 static const struct llcc_edac_reg_data edac_reg_data[] = {
 	[LLCC_DRAM_CE] = {
 		.name = "DRAM Single-bit",
-		.synd_reg = DRP_ECC_SB_ERR_SYN0,
-		.count_status_reg = DRP_ECC_ERROR_STATUS1,
-		.ways_status_reg = DRP_ECC_ERROR_STATUS0,
 		.reg_cnt = DRP_SYN_REG_CNT,
 		.count_mask = ECC_SB_ERR_COUNT_MASK,
 		.ways_mask = ECC_SB_ERR_WAYS_MASK,
@@ -98,9 +65,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
 	},
 	[LLCC_DRAM_UE] = {
 		.name = "DRAM Double-bit",
-		.synd_reg = DRP_ECC_DB_ERR_SYN0,
-		.count_status_reg = DRP_ECC_ERROR_STATUS1,
-		.ways_status_reg = DRP_ECC_ERROR_STATUS0,
 		.reg_cnt = DRP_SYN_REG_CNT,
 		.count_mask = ECC_DB_ERR_COUNT_MASK,
 		.ways_mask = ECC_DB_ERR_WAYS_MASK,
@@ -108,9 +72,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
 	},
 	[LLCC_TRAM_CE] = {
 		.name = "TRAM Single-bit",
-		.synd_reg = TRP_ECC_SB_ERR_SYN0,
-		.count_status_reg = TRP_ECC_ERROR_STATUS1,
-		.ways_status_reg = TRP_ECC_ERROR_STATUS0,
 		.reg_cnt = TRP_SYN_REG_CNT,
 		.count_mask = ECC_SB_ERR_COUNT_MASK,
 		.ways_mask = ECC_SB_ERR_WAYS_MASK,
@@ -118,9 +79,6 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
 	},
 	[LLCC_TRAM_UE] = {
 		.name = "TRAM Double-bit",
-		.synd_reg = TRP_ECC_DB_ERR_SYN0,
-		.count_status_reg = TRP_ECC_ERROR_STATUS1,
-		.ways_status_reg = TRP_ECC_ERROR_STATUS0,
 		.reg_cnt = TRP_SYN_REG_CNT,
 		.count_mask = ECC_DB_ERR_COUNT_MASK,
 		.ways_mask = ECC_DB_ERR_WAYS_MASK,
@@ -128,7 +86,7 @@ static const struct llcc_edac_reg_data edac_reg_data[] = {
 	},
 };
 
-static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
+static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_bcast_regmap)
 {
 	u32 sb_err_threshold;
 	int ret;
@@ -137,31 +95,31 @@ static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
 	 * Configure interrupt enable registers such that Tag, Data RAM related
 	 * interrupts are propagated to interrupt controller for servicing
 	 */
-	ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
+	ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable,
 				 TRP0_INTERRUPT_ENABLE,
 				 TRP0_INTERRUPT_ENABLE);
 	if (ret)
 		return ret;
 
-	ret = regmap_update_bits(llcc_bcast_regmap, TRP_INTERRUPT_0_ENABLE,
+	ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->trp_interrupt_0_enable,
 				 SB_DB_TRP_INTERRUPT_ENABLE,
 				 SB_DB_TRP_INTERRUPT_ENABLE);
 	if (ret)
 		return ret;
 
 	sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT);
-	ret = regmap_write(llcc_bcast_regmap, DRP_ECC_ERROR_CFG,
+	ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_ecc_error_cfg,
 			   sb_err_threshold);
 	if (ret)
 		return ret;
 
-	ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
+	ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable,
 				 DRP0_INTERRUPT_ENABLE,
 				 DRP0_INTERRUPT_ENABLE);
 	if (ret)
 		return ret;
 
-	ret = regmap_write(llcc_bcast_regmap, DRP_INTERRUPT_ENABLE,
+	ret = regmap_write(llcc_bcast_regmap, drv->edac_reg_offset->drp_interrupt_enable,
 			   SB_DB_DRP_INTERRUPT_ENABLE);
 	return ret;
 }
@@ -175,24 +133,28 @@ qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
 	switch (err_type) {
 	case LLCC_DRAM_CE:
 	case LLCC_DRAM_UE:
-		ret = regmap_write(drv->bcast_regmap, DRP_INTERRUPT_CLEAR,
+		ret = regmap_write(drv->bcast_regmap,
+				   drv->edac_reg_offset->drp_interrupt_clear,
 				   DRP_TRP_INT_CLEAR);
 		if (ret)
 			return ret;
 
-		ret = regmap_write(drv->bcast_regmap, DRP_ECC_ERROR_CNTR_CLEAR,
+		ret = regmap_write(drv->bcast_regmap,
+				   drv->edac_reg_offset->drp_ecc_error_cntr_clear,
 				   DRP_TRP_CNT_CLEAR);
 		if (ret)
 			return ret;
 		break;
 	case LLCC_TRAM_CE:
 	case LLCC_TRAM_UE:
-		ret = regmap_write(drv->bcast_regmap, TRP_INTERRUPT_0_CLEAR,
+		ret = regmap_write(drv->bcast_regmap,
+				   drv->edac_reg_offset->trp_interrupt_0_clear,
 				   DRP_TRP_INT_CLEAR);
 		if (ret)
 			return ret;
 
-		ret = regmap_write(drv->bcast_regmap, TRP_ECC_ERROR_CNTR_CLEAR,
+		ret = regmap_write(drv->bcast_regmap,
+				   drv->edac_reg_offset->trp_ecc_error_cntr_clear,
 				   DRP_TRP_CNT_CLEAR);
 		if (ret)
 			return ret;
@@ -205,16 +167,54 @@ qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
 	return ret;
 }
 
+struct qcom_llcc_syn_regs {
+	u32 synd_reg;
+	u32 count_status_reg;
+	u32 ways_status_reg;
+};
+
+static void get_reg_offsets(struct llcc_drv_data *drv, int err_type,
+			    struct qcom_llcc_syn_regs *syn_regs)
+{
+	const struct llcc_edac_reg_offset *edac_reg_offset = drv->edac_reg_offset;
+
+	switch (err_type) {
+	case LLCC_DRAM_CE:
+		syn_regs->synd_reg = edac_reg_offset->drp_ecc_sb_err_syn0;
+		syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1;
+		syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0;
+		break;
+	case LLCC_DRAM_UE:
+		syn_regs->synd_reg = edac_reg_offset->drp_ecc_db_err_syn0;
+		syn_regs->count_status_reg = edac_reg_offset->drp_ecc_error_status1;
+		syn_regs->ways_status_reg = edac_reg_offset->drp_ecc_error_status0;
+		break;
+	case LLCC_TRAM_CE:
+		syn_regs->synd_reg = edac_reg_offset->trp_ecc_sb_err_syn0;
+		syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1;
+		syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0;
+		break;
+	case LLCC_TRAM_UE:
+		syn_regs->synd_reg = edac_reg_offset->trp_ecc_db_err_syn0;
+		syn_regs->count_status_reg = edac_reg_offset->trp_ecc_error_status1;
+		syn_regs->ways_status_reg = edac_reg_offset->trp_ecc_error_status0;
+		break;
+	}
+}
+
 /* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/
 static int
 dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
 {
 	struct llcc_edac_reg_data reg_data = edac_reg_data[err_type];
+	struct qcom_llcc_syn_regs regs = { };
 	int err_cnt, err_ways, ret, i;
 	u32 synd_reg, synd_val;
 
+	get_reg_offsets(drv, err_type, &regs);
+
 	for (i = 0; i < reg_data.reg_cnt; i++) {
-		synd_reg = reg_data.synd_reg + (i * 4);
+		synd_reg = regs.synd_reg + (i * 4);
 		ret = regmap_read(drv->regmaps[bank], synd_reg,
 				  &synd_val);
 		if (ret)
@@ -224,7 +224,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
 			    reg_data.name, i, synd_val);
 	}
 
-	ret = regmap_read(drv->regmaps[bank], reg_data.count_status_reg,
+	ret = regmap_read(drv->regmaps[bank], regs.count_status_reg,
 			  &err_cnt);
 	if (ret)
 		goto clear;
@@ -234,7 +234,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
 	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n",
 		    reg_data.name, err_cnt);
 
-	ret = regmap_read(drv->regmaps[bank], reg_data.ways_status_reg,
+	ret = regmap_read(drv->regmaps[bank], regs.ways_status_reg,
 			  &err_ways);
 	if (ret)
 		goto clear;
@@ -295,7 +295,7 @@ static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl)
 
 	/* Iterate over the banks and look for Tag RAM or Data RAM errors */
 	for (i = 0; i < drv->num_banks; i++) {
-		ret = regmap_read(drv->regmaps[i], DRP_INTERRUPT_STATUS,
+		ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->drp_interrupt_status,
 				  &drp_error);
 
 		if (!ret && (drp_error & SB_ECC_ERROR)) {
@@ -310,7 +310,7 @@ static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl)
 		if (!ret)
 			irq_rc = IRQ_HANDLED;
 
-		ret = regmap_read(drv->regmaps[i], TRP_INTERRUPT_0_STATUS,
+		ret = regmap_read(drv->regmaps[i], drv->edac_reg_offset->trp_interrupt_0_status,
 				  &trp_error);
 
 		if (!ret && (trp_error & SB_ECC_ERROR)) {
@@ -342,7 +342,7 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev)
 	int ecc_irq;
 	int rc;
 
-	rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap);
+	rc = qcom_llcc_core_setup(llcc_driv_data, llcc_driv_data->bcast_regmap);
 	if (rc)
 		return rc;
 
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 423220e66026..93417ba1ead4 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -69,9 +69,6 @@ struct llcc_slice_desc {
 /**
  * struct llcc_edac_reg_data - llcc edac registers data for each error type
  * @name: Name of the error
- * @synd_reg: Syndrome register address
- * @count_status_reg: Status register address to read the error count
- * @ways_status_reg: Status register address to read the error ways
  * @reg_cnt: Number of registers
  * @count_mask: Mask value to get the error count
  * @ways_mask: Mask value to get the error ways
@@ -80,9 +77,6 @@ struct llcc_slice_desc {
  */
 struct llcc_edac_reg_data {
 	char *name;
-	u64 synd_reg;
-	u64 count_status_reg;
-	u64 ways_status_reg;
 	u32 reg_cnt;
 	u32 count_mask;
 	u32 ways_mask;
-- 
cgit v1.2.3


From 27c433ce081ffbd59e6c785770c871d1785c3b41 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 15 May 2023 15:10:18 +0800
Subject: soundwire: intel: add ACE2.x SHIM definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the HDaudio extended link integration, the SHIM and IP registers
are split in blocks

a) SHIM generic registers
b) IP registers (same offsets for Cadence IP as before)
c) SHIM vendor-specific registers

Add offsets and definitions as defined in the hardware specifications.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20230515071042.2038-3-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw_intel.h | 75 +++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 207701aeeb47..8e6183e029fa 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -7,6 +7,10 @@
 #include <linux/irqreturn.h>
 #include <linux/soundwire/sdw.h>
 
+/*********************************************************************
+ * cAVS and ACE1.x definitions
+ *********************************************************************/
+
 #define SDW_SHIM_BASE			0x2C000
 #define SDW_ALH_BASE			0x2C800
 #define SDW_SHIM_BASE_ACE		0x38000
@@ -101,6 +105,77 @@
 #define SDW_ALH_STRMZCFG_DMAT		GENMASK(7, 0)
 #define SDW_ALH_STRMZCFG_CHN		GENMASK(19, 16)
 
+/*********************************************************************
+ * ACE2.x definitions for SHIM registers - only accessible when the
+ * HDAudio extended link LCTL.SPA/CPA = 1.
+ *********************************************************************/
+/* x variable is link index */
+#define SDW_SHIM2_GENERIC_BASE(x)	(0x00030000 + 0x8000 * (x))
+#define SDW_IP_BASE(x)			(0x00030100 + 0x8000 * (x))
+#define SDW_SHIM2_VS_BASE(x)		(0x00036000 + 0x8000 * (x))
+
+/* SHIM2 Generic Registers */
+/* Read-only capabilities */
+#define SDW_SHIM2_LECAP			0x00
+#define SDW_SHIM2_LECAP_HDS		BIT(0)		/* unset -> Host mode */
+#define SDW_SHIM2_LECAP_MLC		GENMASK(3, 1)	/* Number of Lanes */
+
+/* PCM Stream capabilities */
+#define SDW_SHIM2_PCMSCAP		0x10
+#define SDW_SHIM2_PCMSCAP_ISS		GENMASK(3, 0)	/* Input-only streams */
+#define SDW_SHIM2_PCMSCAP_OSS		GENMASK(7, 4)	/* Output-only streams */
+#define SDW_SHIM2_PCMSCAP_BSS		GENMASK(12, 8)	/* Bidirectional streams */
+
+/* Read-only PCM Stream Channel Count, y variable is stream */
+#define SDW_SHIM2_PCMSYCHC(y)		(0x14 + (0x4 * (y)))
+#define SDW_SHIM2_PCMSYCHC_CS		GENMASK(3, 0)	/* Channels Supported */
+
+/* PCM Stream Channel Map */
+#define SDW_SHIM2_PCMSYCHM(y)		(0x16 + (0x4 * (y)))
+#define SDW_SHIM2_PCMSYCHM_LCHAN	GENMASK(3, 0)	/* Lowest channel used by the FIFO port */
+#define SDW_SHIM2_PCMSYCHM_HCHAN	GENMASK(7, 4)	/* Lowest channel used by the FIFO port */
+#define SDW_SHIM2_PCMSYCHM_STRM		GENMASK(13, 8)	/* HDaudio stream tag */
+#define SDW_SHIM2_PCMSYCHM_DIR		BIT(15)		/* HDaudio stream direction */
+
+/* SHIM2 vendor-specific registers */
+#define SDW_SHIM2_INTEL_VS_LVSCTL	0x04
+#define SDW_SHIM2_INTEL_VS_LVSCTL_FCG	BIT(26)
+#define SDW_SHIM2_INTEL_VS_LVSCTL_MLCS	GENMASK(29, 27)
+#define SDW_SHIM2_INTEL_VS_LVSCTL_DCGD	BIT(30)
+#define SDW_SHIM2_INTEL_VS_LVSCTL_ICGD	BIT(31)
+
+#define SDW_SHIM2_MLCS_XTAL_CLK		0x0
+#define SDW_SHIM2_MLCS_CARDINAL_CLK	0x1
+#define SDW_SHIM2_MLCS_AUDIO_PLL_CLK	0x2
+#define SDW_SHIM2_MLCS_MCLK_INPUT_CLK	0x3
+#define SDW_SHIM2_MLCS_WOV_RING_OSC_CLK 0x4
+
+#define SDW_SHIM2_INTEL_VS_WAKEEN	0x08
+#define SDW_SHIM2_INTEL_VS_WAKEEN_PWE	BIT(0)
+
+#define SDW_SHIM2_INTEL_VS_WAKESTS	0x0A
+#define SDW_SHIM2_INTEL_VS_WAKEEN_PWS	BIT(0)
+
+#define SDW_SHIM2_INTEL_VS_IOCTL	0x0C
+#define SDW_SHIM2_INTEL_VS_IOCTL_MIF	BIT(0)
+#define SDW_SHIM2_INTEL_VS_IOCTL_CO	BIT(1)
+#define SDW_SHIM2_INTEL_VS_IOCTL_COE	BIT(2)
+#define SDW_SHIM2_INTEL_VS_IOCTL_DO	BIT(3)
+#define SDW_SHIM2_INTEL_VS_IOCTL_DOE	BIT(4)
+#define SDW_SHIM2_INTEL_VS_IOCTL_BKE	BIT(5)
+#define SDW_SHIM2_INTEL_VS_IOCTL_WPDD	BIT(6)
+#define SDW_SHIM2_INTEL_VS_IOCTL_ODC	BIT(7)
+#define SDW_SHIM2_INTEL_VS_IOCTL_CIBD	BIT(8)
+#define SDW_SHIM2_INTEL_VS_IOCTL_DIBD	BIT(9)
+#define SDW_SHIM2_INTEL_VS_IOCTL_HAMIFD	BIT(10)
+
+#define SDW_SHIM2_INTEL_VS_ACTMCTL	0x0E
+#define SDW_SHIM2_INTEL_VS_ACTMCTL_DACTQE	BIT(0)
+#define SDW_SHIM2_INTEL_VS_ACTMCTL_DODS		BIT(1)
+#define SDW_SHIM2_INTEL_VS_ACTMCTL_DODSE	BIT(2)
+#define SDW_SHIM2_INTEL_VS_ACTMCTL_DOAIS	GENMASK(4, 3)
+#define SDW_SHIM2_INTEL_VS_ACTMCTL_DOAISE	BIT(5)
+
 /**
  * struct sdw_intel_stream_params_data: configuration passed during
  * the @params_stream callback, e.g. for interaction with DSP
-- 
cgit v1.2.3


From 6f23f4e2c62b086d92b6ee707843e8bf821283d7 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 15 May 2023 15:10:19 +0800
Subject: soundwire: intel_ace2x: add empty new ops for LunarLake
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The register map and programming sequences for the ACE2.x IP are
completely different and need to be abstracted with a different set of
callbacks.

This initial patch adds a new file, follow-up patches will add each
required callback.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20230515071042.2038-4-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/Makefile          |  2 +-
 drivers/soundwire/intel_ace2x.c     | 19 +++++++++++++++++++
 include/linux/soundwire/sdw_intel.h |  1 +
 3 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soundwire/intel_ace2x.c

(limited to 'include/linux')

diff --git a/drivers/soundwire/Makefile b/drivers/soundwire/Makefile
index 925566ff4272..5d612c9b6362 100644
--- a/drivers/soundwire/Makefile
+++ b/drivers/soundwire/Makefile
@@ -24,7 +24,7 @@ soundwire-cadence-y := cadence_master.o
 obj-$(CONFIG_SOUNDWIRE_CADENCE) += soundwire-cadence.o
 
 #Intel driver
-soundwire-intel-y :=	intel.o intel_auxdevice.o intel_init.o dmi-quirks.o \
+soundwire-intel-y :=	intel.o  intel_ace2x.o intel_auxdevice.o intel_init.o dmi-quirks.o \
 			intel_bus_common.o
 obj-$(CONFIG_SOUNDWIRE_INTEL) += soundwire-intel.o
 
diff --git a/drivers/soundwire/intel_ace2x.c b/drivers/soundwire/intel_ace2x.c
new file mode 100644
index 000000000000..623e4fd7db91
--- /dev/null
+++ b/drivers/soundwire/intel_ace2x.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+// Copyright(c) 2023 Intel Corporation. All rights reserved.
+
+/*
+ * Soundwire Intel ops for LunarLake
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/soundwire/sdw_registers.h>
+#include <linux/soundwire/sdw.h>
+#include <linux/soundwire/sdw_intel.h>
+#include "cadence_master.h"
+#include "bus.h"
+#include "intel.h"
+
+const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops = {
+};
+EXPORT_SYMBOL_NS(sdw_intel_lnl_hw_ops, SOUNDWIRE_INTEL);
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 8e6183e029fa..66687e83a94f 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -419,5 +419,6 @@ struct sdw_intel_hw_ops {
 };
 
 extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops;
+extern const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops;
 
 #endif
-- 
cgit v1.2.3


From 6ab915b9c355caa1f80e9e383892052523f49d1f Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 15 May 2023 15:10:20 +0800
Subject: soundwire/ASOC: Intel: update offsets for LunarLake
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous settings are not applicable, use a flag to determine what
the register layout is.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230515071042.2038-5-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.h           |  2 ++
 drivers/soundwire/intel_init.c      | 14 ++++++++++----
 include/linux/soundwire/sdw_intel.h |  2 ++
 sound/soc/sof/intel/hda.c           | 21 +++++++++++++++++----
 4 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index 09d479f2c77b..51aa42a5a824 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -11,6 +11,7 @@
  * @mmio_base: mmio base of SoundWire registers
  * @registers: Link IO registers base
  * @shim: Audio shim pointer
+ * @shim_vs: Audio vendor-specific shim pointer
  * @alh: ALH (Audio Link Hub) pointer
  * @irq: Interrupt line
  * @ops: Shim callback ops
@@ -28,6 +29,7 @@ struct sdw_intel_link_res {
 	void __iomem *mmio_base; /* not strictly needed, useful for debug */
 	void __iomem *registers;
 	void __iomem *shim;
+	void __iomem *shim_vs;
 	void __iomem *alh;
 	int irq;
 	const struct sdw_intel_ops *ops;
diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c
index cbe56b993c6c..e0023af9e0e1 100644
--- a/drivers/soundwire/intel_init.c
+++ b/drivers/soundwire/intel_init.c
@@ -63,10 +63,16 @@ static struct sdw_intel_link_dev *intel_link_dev_register(struct sdw_intel_res *
 	link = &ldev->link_res;
 	link->hw_ops = res->hw_ops;
 	link->mmio_base = res->mmio_base;
-	link->registers = res->mmio_base + SDW_LINK_BASE
-		+ (SDW_LINK_SIZE * link_id);
-	link->shim = res->mmio_base + res->shim_base;
-	link->alh = res->mmio_base + res->alh_base;
+	if (!res->ext) {
+		link->registers = res->mmio_base + SDW_LINK_BASE
+			+ (SDW_LINK_SIZE * link_id);
+		link->shim = res->mmio_base + res->shim_base;
+		link->alh = res->mmio_base + res->alh_base;
+	} else {
+		link->registers = res->mmio_base + SDW_IP_BASE(link_id);
+		link->shim = res->mmio_base +  SDW_SHIM2_GENERIC_BASE(link_id);
+		link->shim_vs = res->mmio_base + SDW_SHIM2_VS_BASE(link_id);
+	}
 
 	link->ops = res->ops;
 	link->dev = res->dev;
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 66687e83a94f..88eb5bf98140 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -323,6 +323,7 @@ struct sdw_intel_ctx {
  * DSP driver. The quirks are common for all links for now.
  * @shim_base: sdw shim base.
  * @alh_base: sdw alh base.
+ * @ext: extended HDaudio link support
  */
 struct sdw_intel_res {
 	const struct sdw_intel_hw_ops *hw_ops;
@@ -337,6 +338,7 @@ struct sdw_intel_res {
 	u32 clock_stop_quirks;
 	u32 shim_base;
 	u32 alh_base;
+	bool ext;
 };
 
 /*
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index 3153e21f100a..793baf60c78b 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -158,6 +158,7 @@ static int hda_sdw_acpi_scan(struct snd_sof_dev *sdev)
 
 static int hda_sdw_probe(struct snd_sof_dev *sdev)
 {
+	const struct sof_intel_dsp_desc *chip;
 	struct sof_intel_hda_dev *hdev;
 	struct sdw_intel_res res;
 	void *sdw;
@@ -166,10 +167,22 @@ static int hda_sdw_probe(struct snd_sof_dev *sdev)
 
 	memset(&res, 0, sizeof(res));
 
-	res.hw_ops = &sdw_intel_cnl_hw_ops;
-	res.mmio_base = sdev->bar[HDA_DSP_BAR];
-	res.shim_base = hdev->desc->sdw_shim_base;
-	res.alh_base = hdev->desc->sdw_alh_base;
+	chip = get_chip_info(sdev->pdata);
+	if (chip->hw_ip_version < SOF_INTEL_ACE_2_0) {
+		res.mmio_base = sdev->bar[HDA_DSP_BAR];
+		res.hw_ops = &sdw_intel_cnl_hw_ops;
+		res.shim_base = hdev->desc->sdw_shim_base;
+		res.alh_base = hdev->desc->sdw_alh_base;
+		res.ext = false;
+	} else {
+		res.mmio_base = sdev->bar[HDA_DSP_HDA_BAR];
+		/*
+		 * the SHIM and SoundWire register offsets are link-specific
+		 * and will be determined when adding auxiliary devices
+		 */
+		res.hw_ops = &sdw_intel_lnl_hw_ops;
+		res.ext = true;
+	}
 	res.irq = sdev->ipc_irq;
 	res.handle = hdev->info.handle;
 	res.parent = sdev->dev;
-- 
cgit v1.2.3


From 881cf1e9df731e6dc238ca83067c17c782e2a059 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 15 May 2023 15:10:22 +0800
Subject: ASoC/soundwire: intel: pass hdac_bus pointer for link management
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hdac_bus pointer is used to access the extended link information
and handle power management. Pass it from the SOF driver down to the
auxiliary devices.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20230515071042.2038-7-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.h           | 4 ++++
 drivers/soundwire/intel_init.c      | 2 ++
 include/linux/soundwire/sdw_intel.h | 4 ++++
 sound/soc/sof/intel/hda.c           | 1 +
 4 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index 1b23292bb8be..cf9db4906de4 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -4,6 +4,8 @@
 #ifndef __SDW_INTEL_LOCAL_H
 #define __SDW_INTEL_LOCAL_H
 
+struct hdac_bus;
+
 /**
  * struct sdw_intel_link_res - Soundwire Intel link resource structure,
  * typically populated by the controller driver.
@@ -23,6 +25,7 @@
  * @link_mask: global mask needed for power-up/down sequences
  * @cdns: Cadence master descriptor
  * @list: used to walk-through all masters exposed by the same controller
+ * @hbus: hdac_bus pointer, needed for power management
  */
 struct sdw_intel_link_res {
 	const struct sdw_intel_hw_ops *hw_ops;
@@ -42,6 +45,7 @@ struct sdw_intel_link_res {
 	u32 link_mask;
 	struct sdw_cdns *cdns;
 	struct list_head list;
+	struct hdac_bus *hbus;
 };
 
 struct sdw_intel {
diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c
index 43d339c6bcee..c918d2b81cc3 100644
--- a/drivers/soundwire/intel_init.c
+++ b/drivers/soundwire/intel_init.c
@@ -84,6 +84,8 @@ static struct sdw_intel_link_dev *intel_link_dev_register(struct sdw_intel_res *
 	link->shim_mask = &ctx->shim_mask;
 	link->link_mask = ctx->link_mask;
 
+	link->hbus = res->hbus;
+
 	/* now follow the two-step init/add sequence */
 	ret = auxiliary_device_init(auxdev);
 	if (ret < 0) {
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 88eb5bf98140..c4281aa06e2e 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -269,6 +269,8 @@ struct sdw_intel_slave_id {
 	struct sdw_slave_id id;
 };
 
+struct hdac_bus;
+
 /**
  * struct sdw_intel_ctx - context allocated by the controller
  * driver probe
@@ -324,6 +326,7 @@ struct sdw_intel_ctx {
  * @shim_base: sdw shim base.
  * @alh_base: sdw alh base.
  * @ext: extended HDaudio link support
+ * @hbus: hdac_bus pointer, needed for power management
  */
 struct sdw_intel_res {
 	const struct sdw_intel_hw_ops *hw_ops;
@@ -339,6 +342,7 @@ struct sdw_intel_res {
 	u32 shim_base;
 	u32 alh_base;
 	bool ext;
+	struct hdac_bus *hbus;
 };
 
 /*
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index 793baf60c78b..4d48f4018617 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -189,6 +189,7 @@ static int hda_sdw_probe(struct snd_sof_dev *sdev)
 	res.ops = &sdw_callback;
 	res.dev = sdev->dev;
 	res.clock_stop_quirks = sdw_clock_stop_quirks;
+	res.hbus = sof_to_bus(sdev);
 
 	/*
 	 * ops and arg fields are not populated for now,
-- 
cgit v1.2.3


From ec2c9dbe5392cd17b7b1144918350d67cfbb9ff7 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 15 May 2023 15:10:23 +0800
Subject: soundwire: intel: add eml_lock in the interface for new platforms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In existing Intel/SoundWire systems, all the SoundWire configuration
is 'self-contained', with the 'shim_lock' mutex used to protect access
to shared registers in multi-link configurations.

With the move of part of the SoundWire registers to the HDaudio
multi-link structure, we need a unified lock. The hda-mlink
implementation provides an 'eml_lock' that is used to protect shared
registers such as LCTL and LSYNC, we can pass it to the SoundWire
side. There is no issue with possible dangling pointers since the
SoundWire auxiliary devices are children of the PCI device, so the
'eml_lock' cannot be removed while the SoundWire side is in use.

This patch only adds the interface for now.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20230515071042.2038-8-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw_intel.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index c4281aa06e2e..bafc6f2554b0 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -327,6 +327,8 @@ struct sdw_intel_ctx {
  * @alh_base: sdw alh base.
  * @ext: extended HDaudio link support
  * @hbus: hdac_bus pointer, needed for power management
+ * @eml_lock: mutex protecting shared registers in the HDaudio multi-link
+ * space
  */
 struct sdw_intel_res {
 	const struct sdw_intel_hw_ops *hw_ops;
@@ -343,6 +345,7 @@ struct sdw_intel_res {
 	u32 alh_base;
 	bool ext;
 	struct hdac_bus *hbus;
+	struct mutex *eml_lock;
 };
 
 /*
-- 
cgit v1.2.3


From 6bac0d8d1b4c2dd0fc90b64451ffb88a206adda0 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 15 May 2023 15:10:35 +0800
Subject: soundwire: bus: add new manager callback to deal with peripheral
 enumeration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a peripheral reports as ATTACHED, the manager may need to follow
a programming sequence, e.g. to assign DMA resources and/or assign a
command queue for that peripheral.

This patch adds an optional callback, which will be invoked every time
the peripheral attaches. This might be overkill in some scenarios, and
one could argue that this should be invoked only on the first
attachment. The bus does not however track this first attachment with
any existing state-mirroring variable, and using dev_num_sticky would
not work across suspend-resume cycles.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20230515071042.2038-20-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/bus.c       | 3 +++
 include/linux/soundwire/sdw.h | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index 338f4f0b5d0c..b44f8d0affa6 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -769,6 +769,9 @@ static int sdw_assign_device_num(struct sdw_slave *slave)
 	/* After xfer of msg, restore dev_num */
 	slave->dev_num = slave->dev_num_sticky;
 
+	if (bus->ops && bus->ops->new_peripheral_assigned)
+		bus->ops->new_peripheral_assigned(bus, dev_num);
+
 	return 0;
 }
 
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index ef645de13ae9..c076a3f879b3 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -846,6 +846,7 @@ struct sdw_defer {
  * @post_bank_switch: Callback for post bank switch
  * @read_ping_status: Read status from PING frames, reported with two bits per Device.
  * Bits 31:24 are reserved.
+ * @new_peripheral_assigned: Callback to handle enumeration of new peripheral.
  */
 struct sdw_master_ops {
 	int (*read_prop)(struct sdw_bus *bus);
@@ -860,7 +861,7 @@ struct sdw_master_ops {
 	int (*pre_bank_switch)(struct sdw_bus *bus);
 	int (*post_bank_switch)(struct sdw_bus *bus);
 	u32 (*read_ping_status)(struct sdw_bus *bus);
-
+	void (*new_peripheral_assigned)(struct sdw_bus *bus, int dev_num);
 };
 
 /**
-- 
cgit v1.2.3


From bcf71917c9ddd6714126e6115bfa26ea482d7afb Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 15 May 2023 15:10:36 +0800
Subject: soundwire: intel_ace2x: add new_peripheral_assigned callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the abstraction needed to only program the LSDIID registers for
the HDaudio extended links. It's perfectly fine to program this
register multiple times in case devices lose sync and reattach.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20230515071042.2038-21-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel_ace2x.c     | 12 ++++++++++++
 drivers/soundwire/intel_auxdevice.c | 16 ++++++++++++++++
 include/linux/soundwire/sdw_intel.h |  3 +++
 3 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soundwire/intel_ace2x.c b/drivers/soundwire/intel_ace2x.c
index a12fee8a5bfa..65deb4345354 100644
--- a/drivers/soundwire/intel_ace2x.c
+++ b/drivers/soundwire/intel_ace2x.c
@@ -350,6 +350,16 @@ static int intel_register_dai(struct sdw_intel *sdw)
 					       dais, num_dai);
 }
 
+static void intel_program_sdi(struct sdw_intel *sdw, int dev_num)
+{
+	int ret;
+
+	ret = hdac_bus_eml_sdw_set_lsdiid(sdw->link_res->hbus, sdw->instance, dev_num);
+	if (ret < 0)
+		dev_err(sdw->cdns.dev, "%s: could not set lsdiid for link %d %d\n",
+			__func__, sdw->instance, dev_num);
+}
+
 const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops = {
 	.debugfs_init = intel_ace2x_debugfs_init,
 	.debugfs_exit = intel_ace2x_debugfs_exit,
@@ -372,6 +382,8 @@ const struct sdw_intel_hw_ops sdw_intel_lnl_hw_ops = {
 	.sync_go_unlocked = intel_sync_go_unlocked,
 	.sync_go = intel_sync_go,
 	.sync_check_cmdsync_unlocked = intel_check_cmdsync_unlocked,
+
+	.program_sdi = intel_program_sdi,
 };
 EXPORT_SYMBOL_NS(sdw_intel_lnl_hw_ops, SOUNDWIRE_INTEL);
 
diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c
index fcdf4f2d60e3..0daa6ca9a224 100644
--- a/drivers/soundwire/intel_auxdevice.c
+++ b/drivers/soundwire/intel_auxdevice.c
@@ -60,6 +60,21 @@ static int generic_post_bank_switch(struct sdw_bus *bus)
 	return sdw->link_res->hw_ops->post_bank_switch(sdw);
 }
 
+static void generic_new_peripheral_assigned(struct sdw_bus *bus, int dev_num)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+
+	/* paranoia check, this should never happen */
+	if (dev_num < INTEL_DEV_NUM_IDA_MIN || dev_num > SDW_MAX_DEVICES)  {
+		dev_err(bus->dev, "%s: invalid dev_num %d\n", __func__, dev_num);
+		return;
+	}
+
+	if (sdw->link_res->hw_ops->program_sdi)
+		sdw->link_res->hw_ops->program_sdi(sdw, dev_num);
+}
+
 static int sdw_master_read_intel_prop(struct sdw_bus *bus)
 {
 	struct sdw_master_prop *prop = &bus->prop;
@@ -117,6 +132,7 @@ static struct sdw_master_ops sdw_intel_ops = {
 	.pre_bank_switch = generic_pre_bank_switch,
 	.post_bank_switch = generic_post_bank_switch,
 	.read_ping_status = cdns_read_ping_status,
+	.new_peripheral_assigned = generic_new_peripheral_assigned,
 };
 
 /*
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index bafc6f2554b0..1a8f32059cd8 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -399,6 +399,7 @@ struct sdw_intel;
  * @sync_go: helper for multi-link synchronization
  * @sync_check_cmdsync_unlocked: helper for multi-link synchronization
  * and bank switch - shim_lock is assumed to be locked at higher level
+ * @program_sdi: helper for codec command/control based on dev_num
  */
 struct sdw_intel_hw_ops {
 	void (*debugfs_init)(struct sdw_intel *sdw);
@@ -425,6 +426,8 @@ struct sdw_intel_hw_ops {
 	int (*sync_go_unlocked)(struct sdw_intel *sdw);
 	int (*sync_go)(struct sdw_intel *sdw);
 	bool (*sync_check_cmdsync_unlocked)(struct sdw_intel *sdw);
+
+	void (*program_sdi)(struct sdw_intel *sdw, int dev_num);
 };
 
 extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops;
-- 
cgit v1.2.3


From 1d905d355ef329d2e4fbe04569dea7cb041419c1 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 15 May 2023 15:10:38 +0800
Subject: ASoC: SOF/soundwire: re-add substream in params_stream structure

An earlier simplification to only pass the direction is no longer
suitable, all the ACE2.x HDaudio DMA management relies on access to
the substream structure.

This patch is an iso-functionality change, the HDaudio DMA parts will
be provided separately.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20230515071042.2038-23-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.c           | 8 ++++----
 include/linux/soundwire/sdw_intel.h | 2 +-
 sound/soc/sof/intel/hda.c           | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 238acf5c97a9..c8eb1ec512c4 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -643,7 +643,7 @@ intel_pdi_alh_configure(struct sdw_intel *sdw, struct sdw_cdns_pdi *pdi)
 }
 
 static int intel_params_stream(struct sdw_intel *sdw,
-			       int stream,
+			       struct snd_pcm_substream *substream,
 			       struct snd_soc_dai *dai,
 			       struct snd_pcm_hw_params *hw_params,
 			       int link_id, int alh_stream_id)
@@ -651,7 +651,7 @@ static int intel_params_stream(struct sdw_intel *sdw,
 	struct sdw_intel_link_res *res = sdw->link_res;
 	struct sdw_intel_stream_params_data params_data;
 
-	params_data.stream = stream; /* direction */
+	params_data.substream = substream;
 	params_data.dai = dai;
 	params_data.hw_params = hw_params;
 	params_data.link_id = link_id;
@@ -727,7 +727,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream,
 	dai_runtime->pdi = pdi;
 
 	/* Inform DSP about PDI stream number */
-	ret = intel_params_stream(sdw, substream->stream, dai, params,
+	ret = intel_params_stream(sdw, substream, dai, params,
 				  sdw->instance,
 				  pdi->intel_alh_id);
 	if (ret)
@@ -804,7 +804,7 @@ static int intel_prepare(struct snd_pcm_substream *substream,
 		sdw_cdns_config_stream(cdns, ch, dir, dai_runtime->pdi);
 
 		/* Inform DSP about PDI stream number */
-		ret = intel_params_stream(sdw, substream->stream, dai,
+		ret = intel_params_stream(sdw, substream, dai,
 					  hw_params,
 					  sdw->instance,
 					  dai_runtime->pdi->intel_alh_id);
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 1a8f32059cd8..ccb228eebc65 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -182,7 +182,7 @@
  * firmware.
  */
 struct sdw_intel_stream_params_data {
-	int stream;
+	struct snd_pcm_substream *substream;
 	struct snd_soc_dai *dai;
 	struct snd_pcm_hw_params *hw_params;
 	int link_id;
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index 388e41057172..511c927b6696 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -94,7 +94,7 @@ static int sdw_params_stream(struct device *dev,
 			     struct sdw_intel_stream_params_data *params_data)
 {
 	struct snd_soc_dai *d = params_data->dai;
-	struct snd_soc_dapm_widget *w = snd_soc_dai_get_widget(d, params_data->stream);
+	struct snd_soc_dapm_widget *w = snd_soc_dai_get_widget(d, params_data->substream->stream);
 	struct snd_sof_dai_config_data data = { 0 };
 
 	data.dai_index = (params_data->link_id << 8) | d->id;
-- 
cgit v1.2.3


From 8bff8c49c85b073e5086d98b0a47a9ad7b52198a Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 15 May 2023 15:10:40 +0800
Subject: soundwire: intel: use substream for .trigger callback

The interface is not needed for IPC3 but will be needed for
ACE2.x+IPC4 combinations, with the substream information passed as a
parameter.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20230515071042.2038-25-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw_intel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index ccb228eebc65..9bd6885ee34d 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -209,7 +209,7 @@ struct sdw_intel_ops {
 			     struct sdw_intel_stream_params_data *params_data);
 	int (*free_stream)(struct device *dev,
 			   struct sdw_intel_stream_free_data *free_data);
-	int (*trigger)(struct snd_soc_dai *dai, int cmd, int stream);
+	int (*trigger)(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai);
 };
 
 /**
-- 
cgit v1.2.3


From 6dd0776ddde8ae187c04803c53becd55eccf4fc3 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 15 May 2023 15:10:42 +0800
Subject: soundwire: intel: use substream for .free callback

The interface is not needed for IPC3 but will be needed for
ACE2.x+IPC4 combinations, with the substream information passed as a
parameter.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20230515071042.2038-27-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw_intel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 9bd6885ee34d..11fc88fb0d78 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -195,7 +195,7 @@ struct sdw_intel_stream_params_data {
  * firmware.
  */
 struct sdw_intel_stream_free_data {
-	int stream;
+	struct snd_pcm_substream *substream;
 	struct snd_soc_dai *dai;
 	int link_id;
 };
-- 
cgit v1.2.3


From 36e4fc57fc1619f462e669e939209c45763bc8f5 Mon Sep 17 00:00:00 2001
From: Akihiro Suda <suda.kyoto@gmail.com>
Date: Sun, 28 May 2023 19:36:02 +0200
Subject: efi: Bump stub image version for macOS HVF compatibility

The macOS hypervisor framework includes a host-side VMM called
VZLinuxBootLoader [1] which implements native support for booting the
Linux kernel inside a guest directly (instead of, e.g., via GRUB
installed inside the guest). On x86, it incorporates a BIOS style loader
that does not implement or expose EFI to the loaded kernel. However,
this loader appears to fail when the 'image minor version' field in the
kernel image's PE/COFF header (which is generally only used by EFI based
bootloaders) is set to any value other than 0x0. [2]

Commit e346bebbd36b1576 ("efi: libstub: Always enable initrd command
line loader and bump version") incremented the EFI stub image minor
version to convey that all EFI stub kernels now implement support for
the initrd= command line option, and do so in a way where it can load
initrd images from any filesystem known to the EFI firmware (as opposed
to prior implementations that could only load initrds from the same
volume that the kernel image was loaded from).

Unfortunately, bumping the version to v1.1 triggers this issue in
VZLinuxBootLoader, breaking the boot on x86. So let's keep the image
minor version at 0x0, and bump the image major version instead.

While at it, convert this field to a bit field, so that individual
features are discoverable from it, as suggested by Linus. So let's bump
the major version to v3, and document the initrd= command line loading
feature as being represented by bit 1 in the mask.

Note that, due to the prior interpretation as a monotonically increasing
version field, loaders are still permitted to assume that the LoadFile2
initrd loading feature is supported for any major version value >= 1,
even if bit 0 is not set.

[1] https://developer.apple.com/documentation/virtualization/vzlinuxbootloader
[2] https://lore.kernel.org/linux-efi/CAG8fp8Teu4G9JuenQrqGndFt2Gy+V4YgJ=hN1xX7AD940YKf3A@mail.gmail.com/

Fixes: e346bebbd36b1576 ("efi: libstub: Always enable initrd command ...")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217485
Signed-off-by: Akihiro Suda <suda.kyoto@gmail.com>
[ardb: rewrite comment and commit log]
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/pe.h | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pe.h b/include/linux/pe.h
index 5e1e11540870..fdf9c95709ba 100644
--- a/include/linux/pe.h
+++ b/include/linux/pe.h
@@ -11,25 +11,26 @@
 #include <linux/types.h>
 
 /*
- * Linux EFI stub v1.0 adds the following functionality:
- * - Loading initrd from the LINUX_EFI_INITRD_MEDIA_GUID device path,
- * - Loading/starting the kernel from firmware that targets a different
- *   machine type, via the entrypoint exposed in the .compat PE/COFF section.
+ * Starting from version v3.0, the major version field should be interpreted as
+ * a bit mask of features supported by the kernel's EFI stub:
+ * - 0x1: initrd loading from the LINUX_EFI_INITRD_MEDIA_GUID device path,
+ * - 0x2: initrd loading using the initrd= command line option, where the file
+ *        may be specified using device path notation, and is not required to
+ *        reside on the same volume as the loaded kernel image.
  *
  * The recommended way of loading and starting v1.0 or later kernels is to use
  * the LoadImage() and StartImage() EFI boot services, and expose the initrd
  * via the LINUX_EFI_INITRD_MEDIA_GUID device path.
  *
- * Versions older than v1.0 support initrd loading via the image load options
- * (using initrd=, limited to the volume from which the kernel itself was
- * loaded), or via arch specific means (bootparams, DT, etc).
+ * Versions older than v1.0 may support initrd loading via the image load
+ * options (using initrd=, limited to the volume from which the kernel itself
+ * was loaded), or only via arch specific means (bootparams, DT, etc).
  *
- * On x86, LoadImage() and StartImage() can be omitted if the EFI handover
- * protocol is implemented, which can be inferred from the version,
- * handover_offset and xloadflags fields in the bootparams structure.
+ * The minor version field must remain 0x0.
+ * (https://lore.kernel.org/all/efd6f2d4-547c-1378-1faa-53c044dbd297@gmail.com/)
  */
-#define LINUX_EFISTUB_MAJOR_VERSION		0x1
-#define LINUX_EFISTUB_MINOR_VERSION		0x1
+#define LINUX_EFISTUB_MAJOR_VERSION		0x3
+#define LINUX_EFISTUB_MINOR_VERSION		0x0
 
 /*
  * LINUX_PE_MAGIC appears at offset 0x38 into the MS-DOS header of EFI bootable
-- 
cgit v1.2.3


From ac2263b588dffd3a1efd7ed0b156ea6c5aea200d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 29 May 2023 06:40:33 -0400
Subject: Revert "module: error out early on concurrent load of the same module
 file"

This reverts commit 9828ed3f695a138f7add89fa2a186ababceb8006.

Sadly, it does seem to cause failures to load modules. Johan Hovold reports:

 "This change breaks module loading during boot on the Lenovo Thinkpad
  X13s (aarch64).

  Specifically it results in indefinite probe deferral of the display
  and USB (ethernet) which makes it a pain to debug. Typing in the dark
  to acquire some logs reveals that other modules are missing as well"

Since this was applied late as a "let's try this", I'm reverting it
asap, and we can try to figure out what goes wrong later.  The excessive
parallel module loading problem is annoying, but not noticeable in
normal situations, and this was only meant as an optimistic workaround
for a user-space bug.

One possible solution may be to do the optimistic exclusive open first,
and then use a lock to serialize loading if that fails.

Reported-by: Johan Hovold <johan@kernel.org>
Link: https://lore.kernel.org/lkml/ZHRpH-JXAxA6DnzR@hovoldconsulting.com/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h   |  6 ------
 kernel/module/main.c | 58 ++++++++++++++--------------------------------------
 2 files changed, 15 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 86b50271b4f7..133f0640fb24 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2566,12 +2566,6 @@ static inline int deny_write_access(struct file *file)
 	struct inode *inode = file_inode(file);
 	return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY;
 }
-static inline int exclusive_deny_write_access(struct file *file)
-{
-	int old = 0;
-	struct inode *inode = file_inode(file);
-	return atomic_try_cmpxchg(&inode->i_writecount, &old, -1) ? 0 : -ETXTBSY;
-}
 static inline void put_write_access(struct inode * inode)
 {
 	atomic_dec(&inode->i_writecount);
diff --git a/kernel/module/main.c b/kernel/module/main.c
index b4c7e925fdb0..044aa2c9e3cb 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -3057,13 +3057,25 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	return load_module(&info, uargs, 0);
 }
 
-static int file_init_module(struct file *file, const char __user * uargs, int flags)
+SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
 {
 	struct load_info info = { };
 	void *buf = NULL;
 	int len;
+	int err;
+
+	err = may_init_module();
+	if (err)
+		return err;
+
+	pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
 
-	len = kernel_read_file(file, 0, &buf, INT_MAX, NULL,
+	if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
+		      |MODULE_INIT_IGNORE_VERMAGIC
+		      |MODULE_INIT_COMPRESSED_FILE))
+		return -EINVAL;
+
+	len = kernel_read_file_from_fd(fd, 0, &buf, INT_MAX, NULL,
 				       READING_MODULE);
 	if (len < 0) {
 		mod_stat_inc(&failed_kreads);
@@ -3072,7 +3084,7 @@ static int file_init_module(struct file *file, const char __user * uargs, int fl
 	}
 
 	if (flags & MODULE_INIT_COMPRESSED_FILE) {
-		int err = module_decompress(&info, buf, len);
+		err = module_decompress(&info, buf, len);
 		vfree(buf); /* compressed data is no longer needed */
 		if (err) {
 			mod_stat_inc(&failed_decompress);
@@ -3087,46 +3099,6 @@ static int file_init_module(struct file *file, const char __user * uargs, int fl
 	return load_module(&info, uargs, flags);
 }
 
-/*
- * kernel_read_file() will already deny write access, but module
- * loading wants _exclusive_ access to the file, so we do that
- * here, along with basic sanity checks.
- */
-static int prepare_file_for_module_load(struct file *file)
-{
-	if (!file || !(file->f_mode & FMODE_READ))
-		return -EBADF;
-	if (!S_ISREG(file_inode(file)->i_mode))
-		return -EINVAL;
-	return exclusive_deny_write_access(file);
-}
-
-SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
-{
-	struct fd f;
-	int err;
-
-	err = may_init_module();
-	if (err)
-		return err;
-
-	pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
-
-	if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
-		      |MODULE_INIT_IGNORE_VERMAGIC
-		      |MODULE_INIT_COMPRESSED_FILE))
-		return -EINVAL;
-
-	f = fdget(fd);
-	err = prepare_file_for_module_load(f.file);
-	if (!err) {
-		err = file_init_module(f.file, uargs, flags);
-		allow_write_access(f.file);
-	}
-	fdput(f);
-	return err;
-}
-
 /* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */
 char *module_flags(struct module *mod, char *buf, bool show_state)
 {
-- 
cgit v1.2.3


From 3524fe31538c1a1de1da2571b1f313f9469edf51 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Fri, 26 May 2023 16:14:34 +0300
Subject: usb: typec: mux: Remove alt mode parameters from the API

The alt mode descriptor parameters are not used anymore.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Tested-by: Bjorn Andersson <andersson@kernel.org>
Acked-by: Prashant Malani <pmalani@chromium.org>
Link: https://lore.kernel.org/r/20230526131434.46920-3-heikki.krogerus@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/platform/chrome/cros_ec_typec.c |  2 +-
 drivers/soc/qcom/pmic_glink_altmode.c   |  5 +----
 drivers/usb/typec/class.c               |  4 ++--
 drivers/usb/typec/mux.c                 |  6 ++----
 include/linux/usb/typec_mux.h           | 11 ++++-------
 5 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c
index a673c3342470..25f9767c28e8 100644
--- a/drivers/platform/chrome/cros_ec_typec.c
+++ b/drivers/platform/chrome/cros_ec_typec.c
@@ -77,7 +77,7 @@ static int cros_typec_get_switch_handles(struct cros_typec_port *port,
 {
 	int ret = 0;
 
-	port->mux = fwnode_typec_mux_get(fwnode, NULL);
+	port->mux = fwnode_typec_mux_get(fwnode);
 	if (IS_ERR(port->mux)) {
 		ret = PTR_ERR(port->mux);
 		dev_dbg(dev, "Mux handle not found: %d.\n", ret);
diff --git a/drivers/soc/qcom/pmic_glink_altmode.c b/drivers/soc/qcom/pmic_glink_altmode.c
index 4d7895bdeaf2..df48fbea4b68 100644
--- a/drivers/soc/qcom/pmic_glink_altmode.c
+++ b/drivers/soc/qcom/pmic_glink_altmode.c
@@ -369,7 +369,6 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
 {
 	struct pmic_glink_altmode_port *alt_port;
 	struct pmic_glink_altmode *altmode;
-	struct typec_altmode_desc mux_desc = {};
 	const struct of_device_id *match;
 	struct fwnode_handle *fwnode;
 	struct device *dev = &adev->dev;
@@ -427,9 +426,7 @@ static int pmic_glink_altmode_probe(struct auxiliary_device *adev,
 		alt_port->dp_alt.mode = USB_TYPEC_DP_MODE;
 		alt_port->dp_alt.active = 1;
 
-		mux_desc.svid = USB_TYPEC_DP_SID;
-		mux_desc.mode = USB_TYPEC_DP_MODE;
-		alt_port->typec_mux = fwnode_typec_mux_get(fwnode, &mux_desc);
+		alt_port->typec_mux = fwnode_typec_mux_get(fwnode);
 		if (IS_ERR(alt_port->typec_mux))
 			return dev_err_probe(dev, PTR_ERR(alt_port->typec_mux),
 					     "failed to acquire mode-switch for port: %d\n",
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 349cc2030c90..faa184ae3dac 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -2110,7 +2110,7 @@ typec_port_register_altmode(struct typec_port *port,
 	struct typec_mux *mux;
 	struct typec_retimer *retimer;
 
-	mux = typec_mux_get(&port->dev, desc);
+	mux = typec_mux_get(&port->dev);
 	if (IS_ERR(mux))
 		return ERR_CAST(mux);
 
@@ -2274,7 +2274,7 @@ struct typec_port *typec_register_port(struct device *parent,
 		return ERR_PTR(ret);
 	}
 
-	port->mux = typec_mux_get(&port->dev, NULL);
+	port->mux = typec_mux_get(&port->dev);
 	if (IS_ERR(port->mux)) {
 		ret = PTR_ERR(port->mux);
 		put_device(&port->dev);
diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c
index f2eee6322815..80dd91938d96 100644
--- a/drivers/usb/typec/mux.c
+++ b/drivers/usb/typec/mux.c
@@ -287,15 +287,13 @@ static void *typec_mux_match(const struct fwnode_handle *fwnode,
 /**
  * fwnode_typec_mux_get - Find USB Type-C Multiplexer
  * @fwnode: The caller device node
- * @desc: Alt Mode description
  *
  * Finds a mux linked to the caller. This function is primarily meant for the
  * Type-C drivers. Returns a reference to the mux on success, NULL if no
  * matching connection was found, or ERR_PTR(-EPROBE_DEFER) when a connection
  * was found but the mux has not been enumerated yet.
  */
-struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode,
-				       const struct typec_altmode_desc *desc)
+struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode)
 {
 	struct typec_mux_dev *mux_devs[TYPEC_MUX_MAX_DEVS];
 	struct typec_mux *mux;
@@ -308,7 +306,7 @@ struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode,
 		return ERR_PTR(-ENOMEM);
 
 	count = fwnode_connection_find_matches(fwnode, "mode-switch",
-					       (void *)desc, typec_mux_match,
+					       NULL, typec_mux_match,
 					       (void **)mux_devs,
 					       ARRAY_SIZE(mux_devs));
 	if (count <= 0) {
diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h
index 9292f0e07846..11bfa314529f 100644
--- a/include/linux/usb/typec_mux.h
+++ b/include/linux/usb/typec_mux.h
@@ -60,8 +60,7 @@ struct typec_mux_desc {
 
 #if IS_ENABLED(CONFIG_TYPEC)
 
-struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode,
-				       const struct typec_altmode_desc *desc);
+struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode);
 void typec_mux_put(struct typec_mux *mux);
 int typec_mux_set(struct typec_mux *mux, struct typec_mux_state *state);
 
@@ -74,8 +73,7 @@ void *typec_mux_get_drvdata(struct typec_mux_dev *mux);
 
 #else
 
-static inline struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode,
-				       const struct typec_altmode_desc *desc)
+static inline struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode);
 {
 	return NULL;
 }
@@ -102,10 +100,9 @@ static inline void *typec_mux_get_drvdata(struct typec_mux_dev *mux)
 
 #endif /* CONFIG_TYPEC */
 
-static inline struct typec_mux *
-typec_mux_get(struct device *dev, const struct typec_altmode_desc *desc)
+static inline struct typec_mux *typec_mux_get(struct device *dev)
 {
-	return fwnode_typec_mux_get(dev_fwnode(dev), desc);
+	return fwnode_typec_mux_get(dev_fwnode(dev));
 }
 
 #endif /* __USB_TYPEC_MUX */
-- 
cgit v1.2.3


From 4a3a2c32a5ee163bc8f195b04751f165aa4d9c83 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 8 May 2023 09:42:15 +0200
Subject: PM / devfreq: Reorder fields in 'struct devfreq_dev_status'

Group some variables based on their sizes to reduce holes.
On x86_64, this shrinks the size of 'struct devfreq_dev_status' from 72 to
64 bytes.

This structure is used both to allocate static variables or is embedded in
some other structures. In both cases, reducing its size is nice to have.

Moreover, the whole structure now fits in a single cache line on x86_64.

Finally, it makes the order of code match the order of the above kernel
doc.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 include/linux/devfreq.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 7fd704bb8f3d..d312ffbac4dd 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -108,7 +108,6 @@ struct devfreq_dev_profile {
 	unsigned long initial_freq;
 	unsigned int polling_ms;
 	enum devfreq_timer timer;
-	bool is_cooling_device;
 
 	int (*target)(struct device *dev, unsigned long *freq, u32 flags);
 	int (*get_dev_status)(struct device *dev,
@@ -118,6 +117,8 @@ struct devfreq_dev_profile {
 
 	unsigned long *freq_table;
 	unsigned int max_state;
+
+	bool is_cooling_device;
 };
 
 /**
-- 
cgit v1.2.3


From 0143d148d1e882fb1538dc9974c94d63961719b9 Mon Sep 17 00:00:00 2001
From: Ruihan Li <lrh2000@pku.edu.cn>
Date: Mon, 15 May 2023 21:09:55 +0800
Subject: usb: usbfs: Enforce page requirements for mmap

The current implementation of usbdev_mmap uses usb_alloc_coherent to
allocate memory pages that will later be mapped into the user space.
Meanwhile, usb_alloc_coherent employs three different methods to
allocate memory, as outlined below:
 * If hcd->localmem_pool is non-null, it uses gen_pool_dma_alloc to
   allocate memory;
 * If DMA is not available, it uses kmalloc to allocate memory;
 * Otherwise, it uses dma_alloc_coherent.

However, it should be noted that gen_pool_dma_alloc does not guarantee
that the resulting memory will be page-aligned. Furthermore, trying to
map slab pages (i.e., memory allocated by kmalloc) into the user space
is not resonable and can lead to problems, such as a type confusion bug
when PAGE_TABLE_CHECK=y [1].

To address these issues, this patch introduces hcd_alloc_coherent_pages,
which addresses the above two problems. Specifically,
hcd_alloc_coherent_pages uses gen_pool_dma_alloc_align instead of
gen_pool_dma_alloc to ensure that the memory is page-aligned. To replace
kmalloc, hcd_alloc_coherent_pages directly allocates pages by calling
__get_free_pages.

Reported-by: syzbot+fcf1a817ceb50935ce99@syzkaller.appspotmail.comm
Closes: https://lore.kernel.org/lkml/000000000000258e5e05fae79fc1@google.com/ [1]
Fixes: f7d34b445abc ("USB: Add support for usbfs zerocopy.")
Fixes: ff2437befd8f ("usb: host: Fix excessive alignment restriction for local memory allocations")
Cc: stable@vger.kernel.org
Signed-off-by: Ruihan Li <lrh2000@pku.edu.cn>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/r/20230515130958.32471-2-lrh2000@pku.edu.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/buffer.c | 41 +++++++++++++++++++++++++++++++++++++++++
 drivers/usb/core/devio.c  |  9 +++++----
 include/linux/usb/hcd.h   |  5 +++++
 3 files changed, 51 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
index fbb087b728dc..268ccbec88f9 100644
--- a/drivers/usb/core/buffer.c
+++ b/drivers/usb/core/buffer.c
@@ -172,3 +172,44 @@ void hcd_buffer_free(
 	}
 	dma_free_coherent(hcd->self.sysdev, size, addr, dma);
 }
+
+void *hcd_buffer_alloc_pages(struct usb_hcd *hcd,
+		size_t size, gfp_t mem_flags, dma_addr_t *dma)
+{
+	if (size == 0)
+		return NULL;
+
+	if (hcd->localmem_pool)
+		return gen_pool_dma_alloc_align(hcd->localmem_pool,
+				size, dma, PAGE_SIZE);
+
+	/* some USB hosts just use PIO */
+	if (!hcd_uses_dma(hcd)) {
+		*dma = DMA_MAPPING_ERROR;
+		return (void *)__get_free_pages(mem_flags,
+				get_order(size));
+	}
+
+	return dma_alloc_coherent(hcd->self.sysdev,
+			size, dma, mem_flags);
+}
+
+void hcd_buffer_free_pages(struct usb_hcd *hcd,
+		size_t size, void *addr, dma_addr_t dma)
+{
+	if (!addr)
+		return;
+
+	if (hcd->localmem_pool) {
+		gen_pool_free(hcd->localmem_pool,
+				(unsigned long)addr, size);
+		return;
+	}
+
+	if (!hcd_uses_dma(hcd)) {
+		free_pages((unsigned long)addr, get_order(size));
+		return;
+	}
+
+	dma_free_coherent(hcd->self.sysdev, size, addr, dma);
+}
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index e501a03d6c70..3936ca7f7d2f 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -186,6 +186,7 @@ static int connected(struct usb_dev_state *ps)
 static void dec_usb_memory_use_count(struct usb_memory *usbm, int *count)
 {
 	struct usb_dev_state *ps = usbm->ps;
+	struct usb_hcd *hcd = bus_to_hcd(ps->dev->bus);
 	unsigned long flags;
 
 	spin_lock_irqsave(&ps->lock, flags);
@@ -194,8 +195,8 @@ static void dec_usb_memory_use_count(struct usb_memory *usbm, int *count)
 		list_del(&usbm->memlist);
 		spin_unlock_irqrestore(&ps->lock, flags);
 
-		usb_free_coherent(ps->dev, usbm->size, usbm->mem,
-				usbm->dma_handle);
+		hcd_buffer_free_pages(hcd, usbm->size,
+				usbm->mem, usbm->dma_handle);
 		usbfs_decrease_memory_usage(
 			usbm->size + sizeof(struct usb_memory));
 		kfree(usbm);
@@ -247,8 +248,8 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
 		goto error_decrease_mem;
 	}
 
-	mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN,
-			&dma_handle);
+	mem = hcd_buffer_alloc_pages(hcd,
+			size, GFP_USER | __GFP_NOWARN, &dma_handle);
 	if (!mem) {
 		ret = -ENOMEM;
 		goto error_free_usbm;
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 094c77eaf455..0c7eff91adf4 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -501,6 +501,11 @@ void *hcd_buffer_alloc(struct usb_bus *bus, size_t size,
 void hcd_buffer_free(struct usb_bus *bus, size_t size,
 	void *addr, dma_addr_t dma);
 
+void *hcd_buffer_alloc_pages(struct usb_hcd *hcd,
+		size_t size, gfp_t mem_flags, dma_addr_t *dma);
+void hcd_buffer_free_pages(struct usb_hcd *hcd,
+		size_t size, void *addr, dma_addr_t dma);
+
 /* generic bus glue, needed for host controllers that don't use PCI */
 extern irqreturn_t usb_hcd_irq(int irq, void *__hcd);
 
-- 
cgit v1.2.3


From 44d0fb387b53e56c8a050bac5c7d460e21eb226f Mon Sep 17 00:00:00 2001
From: Ruihan Li <lrh2000@pku.edu.cn>
Date: Mon, 15 May 2023 21:09:58 +0800
Subject: mm: page_table_check: Ensure user pages are not slab pages

The current uses of PageAnon in page table check functions can lead to
type confusion bugs between struct page and slab [1], if slab pages are
accidentally mapped into the user space. This is because slab reuses the
bits in struct page to store its internal states, which renders PageAnon
ineffective on slab pages.

Since slab pages are not expected to be mapped into the user space, this
patch adds BUG_ON(PageSlab(page)) checks to make sure that slab pages
are not inadvertently mapped. Otherwise, there must be some bugs in the
kernel.

Reported-by: syzbot+fcf1a817ceb50935ce99@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/lkml/000000000000258e5e05fae79fc1@google.com/ [1]
Fixes: df4e817b7108 ("mm: page table check")
Cc: <stable@vger.kernel.org> # 5.17
Signed-off-by: Ruihan Li <lrh2000@pku.edu.cn>
Acked-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Link: https://lore.kernel.org/r/20230515130958.32471-5-lrh2000@pku.edu.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/page-flags.h | 6 ++++++
 mm/page_table_check.c      | 6 ++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1c68d67b832f..92a2063a0a23 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -617,6 +617,12 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
  * Please note that, confusingly, "page_mapping" refers to the inode
  * address_space which maps the page from disk; whereas "page_mapped"
  * refers to user virtual address space into which the page is mapped.
+ *
+ * For slab pages, since slab reuses the bits in struct page to store its
+ * internal states, the page->mapping does not exist as such, nor do these
+ * flags below.  So in order to avoid testing non-existent bits, please
+ * make sure that PageSlab(page) actually evaluates to false before calling
+ * the following functions (e.g., PageAnon).  See mm/slab.h.
  */
 #define PAGE_MAPPING_ANON	0x1
 #define PAGE_MAPPING_MOVABLE	0x2
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 25d8610c0042..f2baf97d5f38 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -71,6 +71,8 @@ static void page_table_check_clear(struct mm_struct *mm, unsigned long addr,
 
 	page = pfn_to_page(pfn);
 	page_ext = page_ext_get(page);
+
+	BUG_ON(PageSlab(page));
 	anon = PageAnon(page);
 
 	for (i = 0; i < pgcnt; i++) {
@@ -107,6 +109,8 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr,
 
 	page = pfn_to_page(pfn);
 	page_ext = page_ext_get(page);
+
+	BUG_ON(PageSlab(page));
 	anon = PageAnon(page);
 
 	for (i = 0; i < pgcnt; i++) {
@@ -133,6 +137,8 @@ void __page_table_check_zero(struct page *page, unsigned int order)
 	struct page_ext *page_ext;
 	unsigned long i;
 
+	BUG_ON(PageSlab(page));
+
 	page_ext = page_ext_get(page);
 	BUG_ON(!page_ext);
 	for (i = 0; i < (1ul << order); i++) {
-- 
cgit v1.2.3


From dcadfd7f7c74ef9ee415e072a19bdf6c085159eb Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 30 May 2023 08:12:40 +0900
Subject: firewire: core: use union for callback of transaction completion

In 1394 OHCI, the OUTPUT_LAST descriptor of Asynchronous Transmit (AT)
request context has timeStamp field, in which 1394 OHCI controller
record the isochronous cycle when the packet was sent for the request
subaction. Additionally, for the case of split transaction in IEEE 1394,
Asynchronous Receive (AT) request context is used for response subaction
to finish the transaction. The trailer quadlet of descriptor in the
context has timeStamp field, in which 1394 OHCI controller records the
isochronous cycle when the packet arrived.

Current implementation of 1394 OHCI controller driver stores values of
both fields to internal structure as time stamp, while Linux FireWire
subsystem provides no way to access to it. When using asynchronous
transaction service provided by the subsystem, callback function is passed
to kernel API. The prototype of callback function has the lack of argument
for the values.

This commit adds a new callback function for the purpose. It has an
additional argument to point to the constant array with two elements. For
backward compatibility to kernel space, a new union is also adds to wrap
two different prototype of callback function. The fw_transaction structure
has the union as a member and a boolean flag to express which function
callback is available.

The core function is changed to handle the two cases; with or without
time stamp. For the error path to process transaction, the isochronous
cycle is computed by current value of CYCLE_TIMER register in 1394 OHCI
controller. Especially for the case of timeout of split transaction, the
expected isochronous cycle is computed.

Link: https://lore.kernel.org/r/20230529113406.986289-6-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 drivers/firewire/core-transaction.c | 58 +++++++++++++++++++++++++++++--------
 drivers/firewire/core.h             |  7 +++++
 drivers/firewire/ohci.c             | 17 +++++++++--
 include/linux/firewire.h            | 13 ++++++++-
 4 files changed, 80 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index a9f70c96323e..a20f97fdd06c 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -70,8 +70,8 @@ static int try_cancel_split_timeout(struct fw_transaction *t)
 		return 1;
 }
 
-static int close_transaction(struct fw_transaction *transaction,
-			     struct fw_card *card, int rcode)
+static int close_transaction(struct fw_transaction *transaction, struct fw_card *card, int rcode,
+			     u32 response_tstamp)
 {
 	struct fw_transaction *t = NULL, *iter;
 	unsigned long flags;
@@ -92,7 +92,12 @@ static int close_transaction(struct fw_transaction *transaction,
 	spin_unlock_irqrestore(&card->lock, flags);
 
 	if (t) {
-		t->callback(card, rcode, NULL, 0, t->callback_data);
+		if (!t->with_tstamp) {
+			t->callback.without_tstamp(card, rcode, NULL, 0, t->callback_data);
+		} else {
+			t->callback.with_tstamp(card, rcode, t->packet.timestamp, response_tstamp,
+						NULL, 0, t->callback_data);
+		}
 		return 0;
 	}
 
@@ -107,6 +112,8 @@ static int close_transaction(struct fw_transaction *transaction,
 int fw_cancel_transaction(struct fw_card *card,
 			  struct fw_transaction *transaction)
 {
+	u32 tstamp;
+
 	/*
 	 * Cancel the packet transmission if it's still queued.  That
 	 * will call the packet transmission callback which cancels
@@ -121,7 +128,17 @@ int fw_cancel_transaction(struct fw_card *card,
 	 * if the transaction is still pending and remove it in that case.
 	 */
 
-	return close_transaction(transaction, card, RCODE_CANCELLED);
+	if (transaction->packet.ack == 0) {
+		// The timestamp is reused since it was just read now.
+		tstamp = transaction->packet.timestamp;
+	} else {
+		u32 curr_cycle_time = 0;
+
+		(void)fw_card_read_cycle_time(card, &curr_cycle_time);
+		tstamp = cycle_time_to_ohci_tstamp(curr_cycle_time);
+	}
+
+	return close_transaction(transaction, card, RCODE_CANCELLED, tstamp);
 }
 EXPORT_SYMBOL(fw_cancel_transaction);
 
@@ -140,7 +157,12 @@ static void split_transaction_timeout_callback(struct timer_list *timer)
 	card->tlabel_mask &= ~(1ULL << t->tlabel);
 	spin_unlock_irqrestore(&card->lock, flags);
 
-	t->callback(card, RCODE_CANCELLED, NULL, 0, t->callback_data);
+	if (!t->with_tstamp) {
+		t->callback.without_tstamp(card, RCODE_CANCELLED, NULL, 0, t->callback_data);
+	} else {
+		t->callback.with_tstamp(card, RCODE_CANCELLED, t->packet.timestamp,
+					t->split_timeout_cycle, NULL, 0, t->callback_data);
+	}
 }
 
 static void start_split_transaction_timeout(struct fw_transaction *t,
@@ -162,6 +184,8 @@ static void start_split_transaction_timeout(struct fw_transaction *t,
 	spin_unlock_irqrestore(&card->lock, flags);
 }
 
+static u32 compute_split_timeout_timestamp(struct fw_card *card, u32 request_timestamp);
+
 static void transmit_complete_callback(struct fw_packet *packet,
 				       struct fw_card *card, int status)
 {
@@ -170,28 +194,32 @@ static void transmit_complete_callback(struct fw_packet *packet,
 
 	switch (status) {
 	case ACK_COMPLETE:
-		close_transaction(t, card, RCODE_COMPLETE);
+		close_transaction(t, card, RCODE_COMPLETE, packet->timestamp);
 		break;
 	case ACK_PENDING:
+	{
+		t->split_timeout_cycle =
+			compute_split_timeout_timestamp(card, packet->timestamp) & 0xffff;
 		start_split_transaction_timeout(t, card);
 		break;
+	}
 	case ACK_BUSY_X:
 	case ACK_BUSY_A:
 	case ACK_BUSY_B:
-		close_transaction(t, card, RCODE_BUSY);
+		close_transaction(t, card, RCODE_BUSY, packet->timestamp);
 		break;
 	case ACK_DATA_ERROR:
-		close_transaction(t, card, RCODE_DATA_ERROR);
+		close_transaction(t, card, RCODE_DATA_ERROR, packet->timestamp);
 		break;
 	case ACK_TYPE_ERROR:
-		close_transaction(t, card, RCODE_TYPE_ERROR);
+		close_transaction(t, card, RCODE_TYPE_ERROR, packet->timestamp);
 		break;
 	default:
 		/*
 		 * In this case the ack is really a juju specific
 		 * rcode, so just forward that to the callback.
 		 */
-		close_transaction(t, card, status);
+		close_transaction(t, card, status, packet->timestamp);
 		break;
 	}
 }
@@ -363,7 +391,8 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
 	t->is_split_transaction = false;
 	timer_setup(&t->split_timeout_timer,
 		    split_transaction_timeout_callback, 0);
-	t->callback = callback;
+	t->callback.without_tstamp = callback;
+	t->with_tstamp = false;
 	t->callback_data = callback_data;
 
 	fw_fill_request(&t->packet, tcode, t->tlabel,
@@ -1047,7 +1076,12 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
 	 */
 	card->driver->cancel_packet(card, &t->packet);
 
-	t->callback(card, rcode, data, data_length, t->callback_data);
+	if (!t->with_tstamp) {
+		t->callback.without_tstamp(card, rcode, data, data_length, t->callback_data);
+	} else {
+		t->callback.with_tstamp(card, rcode, t->packet.timestamp, p->timestamp, data,
+					data_length, t->callback_data);
+	}
 }
 EXPORT_SYMBOL(fw_core_handle_response);
 
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index eafa4eaae737..2a05f411328f 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -247,6 +247,13 @@ void fw_fill_response(struct fw_packet *response, u32 *request_header,
 void fw_request_get(struct fw_request *request);
 void fw_request_put(struct fw_request *request);
 
+// Convert the value of IEEE 1394 CYCLE_TIME register to the format of timeStamp field in
+// descriptors of 1394 OHCI.
+static inline u32 cycle_time_to_ohci_tstamp(u32 tstamp)
+{
+	return (tstamp & 0x0ffff000) >> 12;
+}
+
 #define FW_PHY_CONFIG_NO_NODE_ID	-1
 #define FW_PHY_CONFIG_CURRENT_GAP_COUNT	-1
 void fw_send_phy_config(struct fw_card *card,
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 17c9d825188b..06386c3b7f03 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -1623,6 +1623,8 @@ static void handle_local_request(struct context *ctx, struct fw_packet *packet)
 	}
 }
 
+static u32 get_cycle_time(struct fw_ohci *ohci);
+
 static void at_context_transmit(struct context *ctx, struct fw_packet *packet)
 {
 	unsigned long flags;
@@ -1633,6 +1635,10 @@ static void at_context_transmit(struct context *ctx, struct fw_packet *packet)
 	if (HEADER_GET_DESTINATION(packet->header[0]) == ctx->ohci->node_id &&
 	    ctx->ohci->generation == packet->generation) {
 		spin_unlock_irqrestore(&ctx->ohci->lock, flags);
+
+		// Timestamping on behalf of the hardware.
+		packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ctx->ohci));
+
 		handle_local_request(ctx, packet);
 		return;
 	}
@@ -1640,9 +1646,12 @@ static void at_context_transmit(struct context *ctx, struct fw_packet *packet)
 	ret = at_context_queue_packet(ctx, packet);
 	spin_unlock_irqrestore(&ctx->ohci->lock, flags);
 
-	if (ret < 0)
-		packet->callback(packet, &ctx->ohci->card, packet->ack);
+	if (ret < 0) {
+		// Timestamping on behalf of the hardware.
+		packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ctx->ohci));
 
+		packet->callback(packet, &ctx->ohci->card, packet->ack);
+	}
 }
 
 static void detect_dead_context(struct fw_ohci *ohci,
@@ -2557,6 +2566,10 @@ static int ohci_cancel_packet(struct fw_card *card, struct fw_packet *packet)
 	log_ar_at_event(ohci, 'T', packet->speed, packet->header, 0x20);
 	driver_data->packet = NULL;
 	packet->ack = RCODE_CANCELLED;
+
+	// Timestamping on behalf of the hardware.
+	packet->timestamp = cycle_time_to_ohci_tstamp(get_cycle_time(ohci));
+
 	packet->callback(packet, &ohci->card, packet->ack);
 	ret = 0;
  out:
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 1716c01c4e54..d61693341da1 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -261,6 +261,15 @@ typedef void (*fw_packet_callback_t)(struct fw_packet *packet,
 typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
 					  void *data, size_t length,
 					  void *callback_data);
+typedef void (*fw_transaction_callback_with_tstamp_t)(struct fw_card *card, int rcode,
+					u32 request_tstamp, u32 response_tstamp, void *data,
+					size_t length, void *callback_data);
+
+union fw_transaction_callback {
+	fw_transaction_callback_t without_tstamp;
+	fw_transaction_callback_with_tstamp_t with_tstamp;
+};
+
 /*
  * This callback handles an inbound request subaction.  It is called in
  * RCU read-side context, therefore must not sleep.
@@ -312,6 +321,7 @@ struct fw_transaction {
 	struct fw_card *card;
 	bool is_split_transaction;
 	struct timer_list split_timeout_timer;
+	u32 split_timeout_cycle;
 
 	struct fw_packet packet;
 
@@ -319,7 +329,8 @@ struct fw_transaction {
 	 * The data passed to the callback is valid only during the
 	 * callback.
 	 */
-	fw_transaction_callback_t callback;
+	union fw_transaction_callback callback;
+	bool with_tstamp;
 	void *callback_data;
 };
 
-- 
cgit v1.2.3


From 39ce342c3a4b763d774c531323d6573af389f332 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 30 May 2023 08:12:40 +0900
Subject: firewire: core: implement variations to send request and wait for
 response with time stamp

In the previous commit, the core function of Linux FireWire subsystem
was changed for two cases to operate asynchronous transaction with or
without time stamp.

This commit changes kernel API for the two cases. Current kernel API,
fw_send_request(), is changed to be static inline function to call
__fw_send_request(), which receives two argument for union and flag of
callback function. The new kernel API, fw_send_request_with_tstamp() is
also added as static inline function, too. When calling, the two
arguments are copied to internal structure, then used in softIRQ
context.

Link: https://lore.kernel.org/r/20230529113406.986289-7-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 drivers/firewire/core-transaction.c | 41 ++++++++++++++--------
 include/linux/firewire.h            | 69 ++++++++++++++++++++++++++++++++++---
 2 files changed, 92 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index a20f97fdd06c..130b95aca629 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -316,7 +316,8 @@ static int allocate_tlabel(struct fw_card *card)
 }
 
 /**
- * fw_send_request() - submit a request packet for transmission
+ * __fw_send_request() - submit a request packet for transmission to generate callback for response
+ *			 subaction with or without time stamp.
  * @card:		interface to send the request at
  * @t:			transaction instance to which the request belongs
  * @tcode:		transaction code
@@ -326,7 +327,9 @@ static int allocate_tlabel(struct fw_card *card)
  * @offset:		48bit wide offset into destination's address space
  * @payload:		data payload for the request subaction
  * @length:		length of the payload, in bytes
- * @callback:		function to be called when the transaction is completed
+ * @callback:		union of two functions whether to receive time stamp or not for response
+ *			subaction.
+ * @with_tstamp:	Whether to receive time stamp or not for response subaction.
  * @callback_data:	data to be passed to the transaction completion callback
  *
  * Submit a request packet into the asynchronous request transmission queue.
@@ -363,10 +366,10 @@ static int allocate_tlabel(struct fw_card *card)
  * transaction completion and hence execution of @callback may happen even
  * before fw_send_request() returns.
  */
-void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
-		     int destination_id, int generation, int speed,
-		     unsigned long long offset, void *payload, size_t length,
-		     fw_transaction_callback_t callback, void *callback_data)
+void __fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
+		int destination_id, int generation, int speed, unsigned long long offset,
+		void *payload, size_t length, union fw_transaction_callback callback,
+		bool with_tstamp, void *callback_data)
 {
 	unsigned long flags;
 	int tlabel;
@@ -381,7 +384,19 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
 	tlabel = allocate_tlabel(card);
 	if (tlabel < 0) {
 		spin_unlock_irqrestore(&card->lock, flags);
-		callback(card, RCODE_SEND_ERROR, NULL, 0, callback_data);
+		if (!with_tstamp) {
+			callback.without_tstamp(card, RCODE_SEND_ERROR, NULL, 0, callback_data);
+		} else {
+			// Timestamping on behalf of hardware.
+			u32 curr_cycle_time = 0;
+			u32 tstamp;
+
+			(void)fw_card_read_cycle_time(card, &curr_cycle_time);
+			tstamp = cycle_time_to_ohci_tstamp(curr_cycle_time);
+
+			callback.with_tstamp(card, RCODE_SEND_ERROR, tstamp, tstamp, NULL, 0,
+					     callback_data);
+		}
 		return;
 	}
 
@@ -389,14 +404,12 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
 	t->tlabel = tlabel;
 	t->card = card;
 	t->is_split_transaction = false;
-	timer_setup(&t->split_timeout_timer,
-		    split_transaction_timeout_callback, 0);
-	t->callback.without_tstamp = callback;
-	t->with_tstamp = false;
+	timer_setup(&t->split_timeout_timer, split_transaction_timeout_callback, 0);
+	t->callback = callback;
+	t->with_tstamp = with_tstamp;
 	t->callback_data = callback_data;
 
-	fw_fill_request(&t->packet, tcode, t->tlabel,
-			destination_id, card->node_id, generation,
+	fw_fill_request(&t->packet, tcode, t->tlabel, destination_id, card->node_id, generation,
 			speed, offset, payload, length);
 	t->packet.callback = transmit_complete_callback;
 
@@ -406,7 +419,7 @@ void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
 
 	card->driver->send_request(card, &t->packet);
 }
-EXPORT_SYMBOL(fw_send_request);
+EXPORT_SYMBOL_GPL(__fw_send_request);
 
 struct transaction_callback_data {
 	struct completion done;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index d61693341da1..a7fd23d0010d 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -356,10 +356,71 @@ void fw_send_response(struct fw_card *card,
 		      struct fw_request *request, int rcode);
 int fw_get_request_speed(struct fw_request *request);
 u32 fw_request_get_timestamp(const struct fw_request *request);
-void fw_send_request(struct fw_card *card, struct fw_transaction *t,
-		     int tcode, int destination_id, int generation, int speed,
-		     unsigned long long offset, void *payload, size_t length,
-		     fw_transaction_callback_t callback, void *callback_data);
+
+void __fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
+		int destination_id, int generation, int speed, unsigned long long offset,
+		void *payload, size_t length, union fw_transaction_callback callback,
+		bool with_tstamp, void *callback_data);
+
+/**
+ * fw_send_request() - submit a request packet for transmission to generate callback for response
+ *		       subaction without time stamp.
+ * @card:		interface to send the request at
+ * @t:			transaction instance to which the request belongs
+ * @tcode:		transaction code
+ * @destination_id:	destination node ID, consisting of bus_ID and phy_ID
+ * @generation:		bus generation in which request and response are valid
+ * @speed:		transmission speed
+ * @offset:		48bit wide offset into destination's address space
+ * @payload:		data payload for the request subaction
+ * @length:		length of the payload, in bytes
+ * @callback:		function to be called when the transaction is completed
+ * @callback_data:	data to be passed to the transaction completion callback
+ *
+ * A variation of __fw_send_request() to generate callback for response subaction without time
+ * stamp.
+ */
+static inline void fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode,
+				   int destination_id, int generation, int speed,
+				   unsigned long long offset, void *payload, size_t length,
+				   fw_transaction_callback_t callback, void *callback_data)
+{
+	union fw_transaction_callback cb = {
+		.without_tstamp = callback,
+	};
+	__fw_send_request(card, t, tcode, destination_id, generation, speed, offset, payload,
+			  length, cb, false, callback_data);
+}
+
+/**
+ * fw_send_request_with_tstamp() - submit a request packet for transmission to generate callback for
+ *				   response with time stamp.
+ * @card:		interface to send the request at
+ * @t:			transaction instance to which the request belongs
+ * @tcode:		transaction code
+ * @destination_id:	destination node ID, consisting of bus_ID and phy_ID
+ * @generation:		bus generation in which request and response are valid
+ * @speed:		transmission speed
+ * @offset:		48bit wide offset into destination's address space
+ * @payload:		data payload for the request subaction
+ * @length:		length of the payload, in bytes
+ * @callback:		function to be called when the transaction is completed
+ * @callback_data:	data to be passed to the transaction completion callback
+ *
+ * A variation of __fw_send_request() to generate callback for response subaction with time stamp.
+ */
+static inline void fw_send_request_with_tstamp(struct fw_card *card, struct fw_transaction *t,
+	int tcode, int destination_id, int generation, int speed, unsigned long long offset,
+	void *payload, size_t length, fw_transaction_callback_with_tstamp_t callback,
+	void *callback_data)
+{
+	union fw_transaction_callback cb = {
+		.with_tstamp = callback,
+	};
+	__fw_send_request(card, t, tcode, destination_id, generation, speed, offset, payload,
+			  length, cb, true, callback_data);
+}
+
 int fw_cancel_transaction(struct fw_card *card,
 			  struct fw_transaction *transaction);
 int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
-- 
cgit v1.2.3


From c4933fa88a68c69205753601044949d516c4db10 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 26 May 2023 11:14:24 +0100
Subject: net: mdio: add mdio_device_get() and mdio_device_put()

Add two new operations for a mdio device to manage the refcount on the
underlying struct device. This will be used by mdio PCS drivers to
simplify the creation and destruction handling, making it easier for
users to get it correct.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mdio.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 0670cc6e067c..c1b7008826e5 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -106,6 +106,16 @@ int mdio_driver_register(struct mdio_driver *drv);
 void mdio_driver_unregister(struct mdio_driver *drv);
 int mdio_device_bus_match(struct device *dev, struct device_driver *drv);
 
+static inline void mdio_device_get(struct mdio_device *mdiodev)
+{
+	get_device(&mdiodev->dev);
+}
+
+static inline void mdio_device_put(struct mdio_device *mdiodev)
+{
+	mdio_device_free(mdiodev);
+}
+
 static inline bool mdio_phy_id_is_c45(int phy_id)
 {
 	return (phy_id & MDIO_PHY_ID_C45) && !(phy_id & ~MDIO_PHY_ID_C45_MASK);
-- 
cgit v1.2.3


From 9a5d500cffdb3652215172b7c5829ca7b41e9efe Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 26 May 2023 11:14:29 +0100
Subject: net: pcs: xpcs: add xpcs_create_mdiodev()

Add xpcs_create_mdiodev() to simplify the creation of the mdio device
associated with the XPCS. In order to allow xpcs_destroy() to clean
this up, we need to arrange for xpcs_create() to take a refcount on
the mdiodev, and xpcs_destroy() to put it.

Adding the refcounting to xpcs_create()..xpcs_destroy() will be
transparent to existing users of these interfaces.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/pcs/pcs-xpcs.c   | 28 ++++++++++++++++++++++++++++
 include/linux/pcs/pcs-xpcs.h |  2 ++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 736776e40c25..1ba214429e01 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -1235,6 +1235,7 @@ struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
 	if (!xpcs)
 		return ERR_PTR(-ENOMEM);
 
+	mdio_device_get(mdiodev);
 	xpcs->mdiodev = mdiodev;
 
 	xpcs_id = xpcs_get_id(xpcs);
@@ -1267,6 +1268,7 @@ struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
 	ret = -ENODEV;
 
 out:
+	mdio_device_put(mdiodev);
 	kfree(xpcs);
 
 	return ERR_PTR(ret);
@@ -1275,8 +1277,34 @@ EXPORT_SYMBOL_GPL(xpcs_create);
 
 void xpcs_destroy(struct dw_xpcs *xpcs)
 {
+	if (xpcs)
+		mdio_device_put(xpcs->mdiodev);
 	kfree(xpcs);
 }
 EXPORT_SYMBOL_GPL(xpcs_destroy);
 
+struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
+				    phy_interface_t interface)
+{
+	struct mdio_device *mdiodev;
+	struct dw_xpcs *xpcs;
+
+	mdiodev = mdio_device_create(bus, addr);
+	if (IS_ERR(mdiodev))
+		return ERR_CAST(mdiodev);
+
+	xpcs = xpcs_create(mdiodev, interface);
+
+	/* xpcs_create() has taken a refcount on the mdiodev if it was
+	 * successful. If xpcs_create() fails, this will free the mdio
+	 * device here. In any case, we don't need to hold our reference
+	 * anymore, and putting it here will allow mdio_device_put() in
+	 * xpcs_destroy() to automatically free the mdio device.
+	 */
+	mdio_device_put(mdiodev);
+
+	return xpcs;
+}
+EXPORT_SYMBOL_GPL(xpcs_create_mdiodev);
+
 MODULE_LICENSE("GPL v2");
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index d2da1e0b4a92..a99972a6d046 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -37,6 +37,8 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
 		    int enable);
 struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
 			    phy_interface_t interface);
+struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
+				    phy_interface_t interface);
 void xpcs_destroy(struct dw_xpcs *xpcs);
 
 #endif /* __LINUX_PCS_XPCS_H */
-- 
cgit v1.2.3


From 86b5f2d8cd7828c881036d30ce3a4e711a071726 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 26 May 2023 11:14:39 +0100
Subject: net: pcs: lynx: add lynx_pcs_create_mdiodev()

Add lynx_pcs_create_mdiodev() to simplify the creation of the mdio
device associated with lynx PCS. In order to allow lynx_pcs_destroy()
to clean this up, we need to arrange for lynx_pcs_create() to take a
refcount on the mdiodev, and lynx_pcs_destroy() to put it.

Adding the refcounting to lynx_pcs_create()..lynx_pcs_destroy() will
be transparent to existing users of these interfaces.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Tested-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/pcs/pcs-lynx.c | 31 +++++++++++++++++++++++++++++++
 include/linux/pcs-lynx.h   |  1 +
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index 622c3de3f3a8..f04dc580ffb8 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -323,6 +323,7 @@ struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio)
 	if (!lynx)
 		return NULL;
 
+	mdio_device_get(mdio);
 	lynx->mdio = mdio;
 	lynx->pcs.ops = &lynx_pcs_phylink_ops;
 	lynx->pcs.poll = true;
@@ -331,10 +332,40 @@ struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio)
 }
 EXPORT_SYMBOL(lynx_pcs_create);
 
+struct phylink_pcs *lynx_pcs_create_mdiodev(struct mii_bus *bus, int addr)
+{
+	struct mdio_device *mdio;
+	struct phylink_pcs *pcs;
+
+	mdio = mdio_device_create(bus, addr);
+	if (IS_ERR(mdio))
+		return ERR_CAST(mdio);
+
+	pcs = lynx_pcs_create(mdio);
+
+	/* Convert failure to create the PCS to an error pointer, so this
+	 * function has a consistent return value strategy.
+	 */
+	if (!pcs)
+		pcs = ERR_PTR(-ENOMEM);
+
+	/* lynx_create() has taken a refcount on the mdiodev if it was
+	 * successful. If lynx_create() fails, this will free the mdio
+	 * device here. In any case, we don't need to hold our reference
+	 * anymore, and putting it here will allow mdio_device_put() in
+	 * lynx_destroy() to automatically free the mdio device.
+	 */
+	mdio_device_put(mdio);
+
+	return pcs;
+}
+EXPORT_SYMBOL(lynx_pcs_create_mdiodev);
+
 void lynx_pcs_destroy(struct phylink_pcs *pcs)
 {
 	struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs);
 
+	mdio_device_put(lynx->mdio);
 	kfree(lynx);
 }
 EXPORT_SYMBOL(lynx_pcs_destroy);
diff --git a/include/linux/pcs-lynx.h b/include/linux/pcs-lynx.h
index 5712cc2ce775..885b59d10581 100644
--- a/include/linux/pcs-lynx.h
+++ b/include/linux/pcs-lynx.h
@@ -12,6 +12,7 @@
 struct mdio_device *lynx_get_mdio_device(struct phylink_pcs *pcs);
 
 struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio);
+struct phylink_pcs *lynx_pcs_create_mdiodev(struct mii_bus *bus, int addr);
 
 void lynx_pcs_destroy(struct phylink_pcs *pcs);
 
-- 
cgit v1.2.3


From ed08d937eaa4f18aa26e47fe6b937205a4745045 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Thu, 25 May 2023 22:50:41 +0200
Subject: platform/surface: aggregator: Make to_ssam_device_driver() respect
 constness

Make to_ssam_device_driver() a bit safer by replacing container_of()
with container_of_const() to respect the constness of the passed in
pointer, instead of silently discarding any const specifications. This
change also makes it more similar to to_ssam_device(), which already
uses container_of_const().

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20230525205041.2774947-1-luzmaximilian@gmail.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/surface_aggregator/device.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h
index df81043b9e71..42b249b4c24b 100644
--- a/include/linux/surface_aggregator/device.h
+++ b/include/linux/surface_aggregator/device.h
@@ -243,11 +243,7 @@ static inline bool is_ssam_device(struct device *d)
  * Return: Returns the pointer to the &struct ssam_device_driver wrapping the
  * given device driver @d.
  */
-static inline
-struct ssam_device_driver *to_ssam_device_driver(struct device_driver *d)
-{
-	return container_of(d, struct ssam_device_driver, driver);
-}
+#define to_ssam_device_driver(d)	container_of_const(d, struct ssam_device_driver, driver)
 
 const struct ssam_device_id *ssam_device_id_match(const struct ssam_device_id *table,
 						  const struct ssam_device_uid uid);
-- 
cgit v1.2.3


From 576215cffdefc1f0ceebffd87abb390926e6b037 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 25 May 2023 16:17:10 +0200
Subject: fs: Drop wait_unfrozen wait queue

wait_unfrozen waitqueue is used only in quota code to wait for
filesystem to become unfrozen. In that place we can just use
sb_start_write() - sb_end_write() pair to achieve the same. So just
remove the waitqueue.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Message-Id: <20230525141710.7595-1-jack@suse.cz>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/quota.c   | 5 +++--
 fs/super.c         | 4 ----
 include/linux/fs.h | 1 -
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 052f143e2e0e..0e41fb84060f 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -895,8 +895,9 @@ retry:
 			up_write(&sb->s_umount);
 		else
 			up_read(&sb->s_umount);
-		wait_event(sb->s_writers.wait_unfrozen,
-			   sb->s_writers.frozen == SB_UNFROZEN);
+		/* Wait for sb to unfreeze */
+		sb_start_write(sb);
+		sb_end_write(sb);
 		put_super(sb);
 		goto retry;
 	}
diff --git a/fs/super.c b/fs/super.c
index 34afe411cf2b..6283cea67280 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -236,7 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
 					&type->s_writers_key[i]))
 			goto fail;
 	}
-	init_waitqueue_head(&s->s_writers.wait_unfrozen);
 	s->s_bdi = &noop_backing_dev_info;
 	s->s_flags = flags;
 	if (s->s_user_ns != &init_user_ns)
@@ -1706,7 +1705,6 @@ int freeze_super(struct super_block *sb)
 	if (ret) {
 		sb->s_writers.frozen = SB_UNFROZEN;
 		sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
-		wake_up(&sb->s_writers.wait_unfrozen);
 		deactivate_locked_super(sb);
 		return ret;
 	}
@@ -1722,7 +1720,6 @@ int freeze_super(struct super_block *sb)
 				"VFS:Filesystem freeze failed\n");
 			sb->s_writers.frozen = SB_UNFROZEN;
 			sb_freeze_unlock(sb, SB_FREEZE_FS);
-			wake_up(&sb->s_writers.wait_unfrozen);
 			deactivate_locked_super(sb);
 			return ret;
 		}
@@ -1768,7 +1765,6 @@ static int thaw_super_locked(struct super_block *sb)
 	sb->s_writers.frozen = SB_UNFROZEN;
 	sb_freeze_unlock(sb, SB_FREEZE_FS);
 out:
-	wake_up(&sb->s_writers.wait_unfrozen);
 	deactivate_locked_super(sb);
 	return 0;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21a981680856..3b65a6194485 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1146,7 +1146,6 @@ enum {
 
 struct sb_writers {
 	int				frozen;		/* Is sb frozen? */
-	wait_queue_head_t		wait_unfrozen;	/* wait for thaw */
 	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
 };
 
-- 
cgit v1.2.3


From cdb37f73cf05631c4f7401f2cd99878733c0c3d9 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <linux@weissschuh.net>
Date: Tue, 30 May 2023 19:09:58 +0200
Subject: block: constify struct part_type part_type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The struct is never modified so it can be const.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20230419-const-partition-v3-2-4e14e48be367@weissschuh.net
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partitions/core.c | 2 +-
 include/linux/blkdev.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/partitions/core.c b/block/partitions/core.c
index 650603dbe557..2bc21063edef 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -256,7 +256,7 @@ static int part_uevent(const struct device *dev, struct kobj_uevent_env *env)
 	return 0;
 }
 
-struct device_type part_type = {
+const struct device_type part_type = {
 	.name		= "partition",
 	.groups		= part_attr_groups,
 	.release	= part_release,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b2ac587e3402..d89c2da14698 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -41,7 +41,7 @@ struct blk_stat_callback;
 struct blk_crypto_profile;
 
 extern const struct device_type disk_type;
-extern struct device_type part_type;
+extern const struct device_type part_type;
 extern struct class block_class;
 
 /*
-- 
cgit v1.2.3


From 143f83e2003a4c3ca0c2558254129569048e0759 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Fri, 26 May 2023 10:58:20 +0100
Subject: perf: Allow a PMU to have a parent

Some PMUs have well defined parents such as PCI devices.
As the device_initialize() and device_add() are all within
pmu_dev_alloc() which is called from perf_pmu_register()
there is no opportunity to set the parent from within a driver.

Add a struct device *parent field to struct pmu and use that
to set the parent.

Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20230526095824.16336-2-Jonathan.Cameron@huawei.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/perf_event.h | 1 +
 kernel/events/core.c       | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d5628a7b5eaa..b99db1eda72c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -303,6 +303,7 @@ struct pmu {
 
 	struct module			*module;
 	struct device			*dev;
+	struct device			*parent;
 	const struct attribute_group	**attr_groups;
 	const struct attribute_group	**attr_update;
 	const char			*name;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index db016e418931..285cf6ca6e81 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11379,6 +11379,7 @@ static int pmu_dev_alloc(struct pmu *pmu)
 
 	dev_set_drvdata(pmu->dev, pmu);
 	pmu->dev->bus = &pmu_bus;
+	pmu->dev->parent = pmu->parent;
 	pmu->dev->release = pmu_dev_release;
 
 	ret = dev_set_name(pmu->dev, "%s", pmu->name);
-- 
cgit v1.2.3


From 7dcdad6f32c96af6e6fb2afe83ec4028dbe1da44 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Tue, 23 May 2023 11:52:48 +0200
Subject: interconnect: drop unused icc_get() interface

The icc_get() interface can be used to lookup an interconnect path based
on global node ids. There has never been any users of this interface and
all lookups are currently done from the devicetree.

Remove the unused icc_get() interface.

Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20230523095248.25211-1-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/core.c  | 52 ++------------------------------------------
 include/linux/interconnect.h |  8 -------
 2 files changed, 2 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index ec46bcb16d5e..5fac448c28fd 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -587,7 +587,7 @@ EXPORT_SYMBOL_GPL(icc_set_tag);
 
 /**
  * icc_get_name() - Get name of the icc path
- * @path: reference to the path returned by icc_get()
+ * @path: interconnect path
  *
  * This function is used by an interconnect consumer to get the name of the icc
  * path.
@@ -605,7 +605,7 @@ EXPORT_SYMBOL_GPL(icc_get_name);
 
 /**
  * icc_set_bw() - set bandwidth constraints on an interconnect path
- * @path: reference to the path returned by icc_get()
+ * @path: interconnect path
  * @avg_bw: average bandwidth in kilobytes per second
  * @peak_bw: peak bandwidth in kilobytes per second
  *
@@ -704,54 +704,6 @@ int icc_disable(struct icc_path *path)
 }
 EXPORT_SYMBOL_GPL(icc_disable);
 
-/**
- * icc_get() - return a handle for path between two endpoints
- * @dev: the device requesting the path
- * @src_id: source device port id
- * @dst_id: destination device port id
- *
- * This function will search for a path between two endpoints and return an
- * icc_path handle on success. Use icc_put() to release
- * constraints when they are not needed anymore.
- * If the interconnect API is disabled, NULL is returned and the consumer
- * drivers will still build. Drivers are free to handle this specifically,
- * but they don't have to.
- *
- * Return: icc_path pointer on success, ERR_PTR() on error or NULL if the
- * interconnect API is disabled.
- */
-struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id)
-{
-	struct icc_node *src, *dst;
-	struct icc_path *path = ERR_PTR(-EPROBE_DEFER);
-
-	mutex_lock(&icc_lock);
-
-	src = node_find(src_id);
-	if (!src)
-		goto out;
-
-	dst = node_find(dst_id);
-	if (!dst)
-		goto out;
-
-	path = path_find(dev, src, dst);
-	if (IS_ERR(path)) {
-		dev_err(dev, "%s: invalid path=%ld\n", __func__, PTR_ERR(path));
-		goto out;
-	}
-
-	path->name = kasprintf(GFP_KERNEL, "%s-%s", src->name, dst->name);
-	if (!path->name) {
-		kfree(path);
-		path = ERR_PTR(-ENOMEM);
-	}
-out:
-	mutex_unlock(&icc_lock);
-	return path;
-}
-EXPORT_SYMBOL_GPL(icc_get);
-
 /**
  * icc_put() - release the reference to the icc_path
  * @path: interconnect path
diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h
index 2b0e784ba771..97ac253df62c 100644
--- a/include/linux/interconnect.h
+++ b/include/linux/interconnect.h
@@ -40,8 +40,6 @@ struct icc_bulk_data {
 
 #if IS_ENABLED(CONFIG_INTERCONNECT)
 
-struct icc_path *icc_get(struct device *dev, const int src_id,
-			 const int dst_id);
 struct icc_path *of_icc_get(struct device *dev, const char *name);
 struct icc_path *devm_of_icc_get(struct device *dev, const char *name);
 int devm_of_icc_bulk_get(struct device *dev, int num_paths, struct icc_bulk_data *paths);
@@ -61,12 +59,6 @@ void icc_bulk_disable(int num_paths, const struct icc_bulk_data *paths);
 
 #else
 
-static inline struct icc_path *icc_get(struct device *dev, const int src_id,
-				       const int dst_id)
-{
-	return NULL;
-}
-
 static inline struct icc_path *of_icc_get(struct device *dev,
 					  const char *name)
 {
-- 
cgit v1.2.3


From d388f06aced3b9fe2fb167f48ceedf75ea7629f8 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 9 May 2023 10:49:38 +0100
Subject: devres: Provide krealloc_array

There is no krealloc_array equivalent in devres. Users would have to
do their own multiplication overflow check so provide one.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: James Clark <james.clark@arm.com>
Link: https://lore.kernel.org/r/20230509094942.396150-2-james.clark@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/driver-model/devres.rst |  1 +
 include/linux/device.h                           | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 4249eb4239e0..8be086b3f829 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -364,6 +364,7 @@ MEM
   devm_kmalloc_array()
   devm_kmemdup()
   devm_krealloc()
+  devm_krealloc_array()
   devm_kstrdup()
   devm_kstrdup_const()
   devm_kvasprintf()
diff --git a/include/linux/device.h b/include/linux/device.h
index 472dd24d4823..58f4f5948edb 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -223,6 +223,17 @@ static inline void *devm_kcalloc(struct device *dev,
 {
 	return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO);
 }
+static inline __realloc_size(3, 4) void * __must_check
+devm_krealloc_array(struct device *dev, void *p, size_t new_n, size_t new_size, gfp_t flags)
+{
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
+		return NULL;
+
+	return devm_krealloc(dev, p, bytes, flags);
+}
+
 void devm_kfree(struct device *dev, const void *p);
 char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc;
 const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp);
-- 
cgit v1.2.3


From dd06e72e68bcb4070ef211be100d2896e236c8fb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 17 May 2023 12:08:44 -0700
Subject: Compiler Attributes: Add __counted_by macro

In an effort to annotate all flexible array members with their run-time
size information, the "element_count" attribute is being introduced by
Clang[1] and GCC[2] in future releases. This annotation will provide
the CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE features the ability
to perform run-time bounds checking on otherwise unknown-size flexible
arrays.

Even though the attribute is under development, we can start the
annotation process in the kernel. This requires defining a macro for
it, even if we have to change the name of the actual attribute later.
Since it is likely that this attribute may change its name to "counted_by"
in the future (to better align with a future total bytes "sized_by"
attribute), name the wrapper macro "__counted_by", which also reads more
clearly (and concisely) in structure definitions.

[1] https://reviews.llvm.org/D148381
[2] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896

Cc: Bill Wendling <morbo@google.com>
Cc: Qing Zhao <qing.zhao@oracle.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Tom Rix <trix@redhat.com>
Cc: llvm@lists.linux.dev
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://lore.kernel.org/r/20230517190841.gonna.796-kees@kernel.org
---
 include/linux/compiler_attributes.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index e659cb6fded3..5588bffe53b9 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -123,6 +123,19 @@
 # define __designated_init
 #endif
 
+/*
+ * Optional: only supported since gcc >= 14
+ * Optional: only supported since clang >= 17
+ *
+ *   gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
+ * clang: https://reviews.llvm.org/D148381
+ */
+#if __has_attribute(__element_count__)
+# define __counted_by(member)		__attribute__((__element_count__(#member)))
+#else
+# define __counted_by(member)
+#endif
+
 /*
  * Optional: only supported since clang >= 14.0
  *
-- 
cgit v1.2.3


From 59272ad8d9e8ea6398a96f8c6d62da284bf2ae6e Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <uwe@kleine-koenig.org>
Date: Fri, 10 Mar 2023 23:41:28 +0100
Subject: bus: fsl-mc: Make remove function return void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The value returned by an fsl-mc driver's remove function is mostly
ignored.  (Only an error message is printed if the value is non-zero
and then device removal continues unconditionally.)

So change the prototype of the remove function to return no value. This
way driver authors are not tempted to assume that passing an error to
the upper layer is a good idea. All drivers are adapted accordingly.
There is no intended change of behaviour, all callbacks were prepared to
return 0 before.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Tested-by: Ioana Ciornei <ioana.ciornei@nxp.com> # sanity checks
Reviewed-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Tested-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
---
 drivers/bus/fsl-mc/dprc-driver.c                    | 5 ++---
 drivers/bus/fsl-mc/fsl-mc-allocator.c               | 5 ++---
 drivers/bus/fsl-mc/fsl-mc-bus.c                     | 5 +----
 drivers/crypto/caam/caamalg_qi2.c                   | 4 +---
 drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c             | 4 +---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c    | 4 +---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c    | 4 +---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 4 +---
 drivers/soc/fsl/dpio/dpio-driver.c                  | 4 +---
 drivers/vfio/fsl-mc/vfio_fsl_mc.c                   | 3 +--
 include/linux/fsl/mc.h                              | 2 +-
 11 files changed, 13 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bus/fsl-mc/dprc-driver.c b/drivers/bus/fsl-mc/dprc-driver.c
index ef4f43f67b80..595d4cecd041 100644
--- a/drivers/bus/fsl-mc/dprc-driver.c
+++ b/drivers/bus/fsl-mc/dprc-driver.c
@@ -835,13 +835,13 @@ EXPORT_SYMBOL_GPL(dprc_cleanup);
  * It tears down the interrupts that were configured for the DPRC device.
  * It destroys the interrupt pool associated with this MC bus.
  */
-static int dprc_remove(struct fsl_mc_device *mc_dev)
+static void dprc_remove(struct fsl_mc_device *mc_dev)
 {
 	struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_dev);
 
 	if (!mc_bus->irq_resources) {
 		dev_err(&mc_dev->dev, "No irq resources, so unbinding the device failed\n");
-		return 0;
+		return;
 	}
 
 	if (dev_get_msi_domain(&mc_dev->dev))
@@ -852,7 +852,6 @@ static int dprc_remove(struct fsl_mc_device *mc_dev)
 	dprc_cleanup(mc_dev);
 
 	dev_info(&mc_dev->dev, "DPRC device unbound from driver");
-	return 0;
 }
 
 static const struct fsl_mc_device_id match_id_table[] = {
diff --git a/drivers/bus/fsl-mc/fsl-mc-allocator.c b/drivers/bus/fsl-mc/fsl-mc-allocator.c
index 36f70e5e418b..0ad68099684e 100644
--- a/drivers/bus/fsl-mc/fsl-mc-allocator.c
+++ b/drivers/bus/fsl-mc/fsl-mc-allocator.c
@@ -614,19 +614,18 @@ static int fsl_mc_allocator_probe(struct fsl_mc_device *mc_dev)
  * fsl_mc_allocator_remove - callback invoked when an allocatable device is
  * being removed from the system
  */
-static int fsl_mc_allocator_remove(struct fsl_mc_device *mc_dev)
+static void fsl_mc_allocator_remove(struct fsl_mc_device *mc_dev)
 {
 	int error;
 
 	if (mc_dev->resource) {
 		error = fsl_mc_resource_pool_remove_device(mc_dev);
 		if (error < 0)
-			return 0;
+			return;
 	}
 
 	dev_dbg(&mc_dev->dev,
 		"Allocatable fsl-mc device unbound from fsl_mc_allocator driver");
-	return 0;
 }
 
 static const struct fsl_mc_device_id match_id_table[] = {
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index 410693ff7335..4352745a923c 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -454,11 +454,8 @@ static int fsl_mc_driver_remove(struct device *dev)
 {
 	struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver);
 	struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
-	int error;
 
-	error = mc_drv->remove(mc_dev);
-	if (error < 0)
-		dev_err(dev, "%s failed: %d\n", __func__, error);
+	mc_drv->remove(mc_dev);
 
 	return 0;
 }
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index 5c8d35edaa1c..9156bbe038b7 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -5402,7 +5402,7 @@ err_dma_mask:
 	return err;
 }
 
-static int __cold dpaa2_caam_remove(struct fsl_mc_device *ls_dev)
+static void __cold dpaa2_caam_remove(struct fsl_mc_device *ls_dev)
 {
 	struct device *dev;
 	struct dpaa2_caam_priv *priv;
@@ -5443,8 +5443,6 @@ static int __cold dpaa2_caam_remove(struct fsl_mc_device *ls_dev)
 	free_percpu(priv->ppriv);
 	fsl_mc_portal_free(priv->mc_io);
 	kmem_cache_destroy(qi_cache);
-
-	return 0;
 }
 
 int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req)
diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
index 8dd40d00a672..a42a37634881 100644
--- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
+++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
@@ -765,7 +765,7 @@ err_mcportal:
 	return err;
 }
 
-static int dpaa2_qdma_remove(struct fsl_mc_device *ls_dev)
+static void dpaa2_qdma_remove(struct fsl_mc_device *ls_dev)
 {
 	struct dpaa2_qdma_engine *dpaa2_qdma;
 	struct dpaa2_qdma_priv *priv;
@@ -787,8 +787,6 @@ static int dpaa2_qdma_remove(struct fsl_mc_device *ls_dev)
 	dma_async_device_unregister(&dpaa2_qdma->dma_dev);
 	kfree(priv);
 	kfree(dpaa2_qdma);
-
-	return 0;
 }
 
 static void dpaa2_qdma_shutdown(struct fsl_mc_device *ls_dev)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index a62cffaf6ff1..a9676d0dece8 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -5025,7 +5025,7 @@ err_wq_alloc:
 	return err;
 }
 
-static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
+static void dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 {
 	struct device *dev;
 	struct net_device *net_dev;
@@ -5073,8 +5073,6 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 	dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name);
 
 	free_netdev(net_dev);
-
-	return 0;
 }
 
 static const struct fsl_mc_device_id dpaa2_eth_match_id_table[] = {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
index 90d23ab1ce9d..4497e3c0456d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
@@ -219,7 +219,7 @@ err_exit:
 	return err;
 }
 
-static int dpaa2_ptp_remove(struct fsl_mc_device *mc_dev)
+static void dpaa2_ptp_remove(struct fsl_mc_device *mc_dev)
 {
 	struct device *dev = &mc_dev->dev;
 	struct ptp_qoriq *ptp_qoriq;
@@ -232,8 +232,6 @@ static int dpaa2_ptp_remove(struct fsl_mc_device *mc_dev)
 	fsl_mc_free_irqs(mc_dev);
 	dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
 	fsl_mc_portal_free(mc_dev->mc_io);
-
-	return 0;
 }
 
 static const struct fsl_mc_device_id dpaa2_ptp_match_id_table[] = {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
index f4ae4289c41a..21cc4e52425a 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -3221,7 +3221,7 @@ static void dpaa2_switch_teardown(struct fsl_mc_device *sw_dev)
 		dev_warn(dev, "dpsw_close err %d\n", err);
 }
 
-static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
+static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
 {
 	struct ethsw_port_priv *port_priv;
 	struct ethsw_core *ethsw;
@@ -3252,8 +3252,6 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
 	kfree(ethsw);
 
 	dev_set_drvdata(dev, NULL);
-
-	return 0;
 }
 
 static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
diff --git a/drivers/soc/fsl/dpio/dpio-driver.c b/drivers/soc/fsl/dpio/dpio-driver.c
index 09df5302d255..9e3fddd8f5a9 100644
--- a/drivers/soc/fsl/dpio/dpio-driver.c
+++ b/drivers/soc/fsl/dpio/dpio-driver.c
@@ -270,7 +270,7 @@ static void dpio_teardown_irqs(struct fsl_mc_device *dpio_dev)
 	fsl_mc_free_irqs(dpio_dev);
 }
 
-static int dpaa2_dpio_remove(struct fsl_mc_device *dpio_dev)
+static void dpaa2_dpio_remove(struct fsl_mc_device *dpio_dev)
 {
 	struct device *dev;
 	struct dpio_priv *priv;
@@ -299,8 +299,6 @@ static int dpaa2_dpio_remove(struct fsl_mc_device *dpio_dev)
 
 err_open:
 	fsl_mc_portal_free(dpio_dev->mc_io);
-
-	return 0;
 }
 
 static const struct fsl_mc_device_id dpaa2_dpio_match_id_table[] = {
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index c89a047a4cd8..f2140e94d41e 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -570,7 +570,7 @@ static void vfio_fsl_mc_release_dev(struct vfio_device *core_vdev)
 	mutex_destroy(&vdev->igate);
 }
 
-static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
+static void vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
 {
 	struct device *dev = &mc_dev->dev;
 	struct vfio_fsl_mc_device *vdev = dev_get_drvdata(dev);
@@ -578,7 +578,6 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
 	vfio_unregister_group_dev(&vdev->vdev);
 	dprc_remove_devices(mc_dev, NULL, 0);
 	vfio_put_device(&vdev->vdev);
-	return 0;
 }
 
 static const struct vfio_device_ops vfio_fsl_mc_ops = {
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index a86115bc799c..a1b3de87a3d1 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -48,7 +48,7 @@ struct fsl_mc_driver {
 	struct device_driver driver;
 	const struct fsl_mc_device_id *match_id_table;
 	int (*probe)(struct fsl_mc_device *dev);
-	int (*remove)(struct fsl_mc_device *dev);
+	void (*remove)(struct fsl_mc_device *dev);
 	void (*shutdown)(struct fsl_mc_device *dev);
 	int (*suspend)(struct fsl_mc_device *dev, pm_message_t state);
 	int (*resume)(struct fsl_mc_device *dev);
-- 
cgit v1.2.3


From 7b4858df3bf7a8d43ed6b58f411543a040c56f10 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 29 May 2023 14:48:28 +0300
Subject: skbuff: bridge: Add layer 2 miss indication

For EVPN non-DF (Designated Forwarder) filtering we need to be able to
prevent decapsulated traffic from being flooded to a multi-homed host.
Filtering of multicast and broadcast traffic can be achieved using the
following flower filter:

 # tc filter add dev bond0 egress pref 1 proto all flower indev vxlan0 dst_mac 01:00:00:00:00:00/01:00:00:00:00:00 action drop

Unlike broadcast and multicast traffic, it is not currently possible to
filter unknown unicast traffic. The classification into unknown unicast
is performed by the bridge driver, but is not visible to other layers
such as tc.

Solve this by adding a new 'l2_miss' bit to the tc skb extension. Clear
the bit whenever a packet enters the bridge (received from a bridge port
or transmitted via the bridge) and set it if the packet did not match an
FDB or MDB entry. If there is no skb extension and the bit needs to be
cleared, then do not allocate one as no extension is equivalent to the
bit being cleared. The bit is not set for broadcast packets as they
never perform a lookup and therefore never incur a miss.

A bit that is set for every flooded packet would also work for the
current use case, but it does not allow us to differentiate between
registered and unregistered multicast traffic, which might be useful in
the future.

To keep the performance impact to a minimum, the marking of packets is
guarded by the 'tc_skb_ext_tc' static key. When 'false', the skb is not
touched and an skb extension is not allocated. Instead, only a
5 bytes nop is executed, as demonstrated below for the call site in
br_handle_frame().

Before the patch:

```
        memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
  c37b09:       49 c7 44 24 28 00 00    movq   $0x0,0x28(%r12)
  c37b10:       00 00

        p = br_port_get_rcu(skb->dev);
  c37b12:       49 8b 44 24 10          mov    0x10(%r12),%rax
        memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
  c37b17:       49 c7 44 24 30 00 00    movq   $0x0,0x30(%r12)
  c37b1e:       00 00
  c37b20:       49 c7 44 24 38 00 00    movq   $0x0,0x38(%r12)
  c37b27:       00 00
```

After the patch (when static key is disabled):

```
        memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
  c37c29:       49 c7 44 24 28 00 00    movq   $0x0,0x28(%r12)
  c37c30:       00 00
  c37c32:       49 8d 44 24 28          lea    0x28(%r12),%rax
  c37c37:       48 c7 40 08 00 00 00    movq   $0x0,0x8(%rax)
  c37c3e:       00
  c37c3f:       48 c7 40 10 00 00 00    movq   $0x0,0x10(%rax)
  c37c46:       00

#ifdef CONFIG_HAVE_JUMP_LABEL_HACK

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
        asm_volatile_goto("1:"
  c37c47:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
        br_tc_skb_miss_set(skb, false);

        p = br_port_get_rcu(skb->dev);
  c37c4c:       49 8b 44 24 10          mov    0x10(%r12),%rax
```

Subsequent patches will extend the flower classifier to be able to match
on the new 'l2_miss' bit and enable / disable the static key when
filters that match on it are added / deleted.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h  |  1 +
 net/bridge/br_device.c  |  1 +
 net/bridge/br_forward.c |  3 +++
 net/bridge/br_input.c   |  1 +
 net/bridge/br_private.h | 27 +++++++++++++++++++++++++++
 5 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5951904413ab..e2f48ddb2f7c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -330,6 +330,7 @@ struct tc_skb_ext {
 	u8 post_ct_snat:1;
 	u8 post_ct_dnat:1;
 	u8 act_miss:1; /* Set if act_miss_cookie is used */
+	u8 l2_miss:1; /* Set by bridge upon FDB or MDB miss */
 };
 #endif
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8eca8a5c80c6..9a5ea06236bd 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	u16 vid = 0;
 
 	memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+	br_tc_skb_miss_set(skb, false);
 
 	rcu_read_lock();
 	nf_ops = rcu_dereference(nf_br_ops);
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 84d6dd5e5b1a..6116eba1bd89 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -203,6 +203,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
 	struct net_bridge_port *prev = NULL;
 	struct net_bridge_port *p;
 
+	br_tc_skb_miss_set(skb, pkt_type != BR_PKT_BROADCAST);
+
 	list_for_each_entry_rcu(p, &br->port_list, list) {
 		/* Do not flood unicast traffic to ports that turn it off, nor
 		 * other traffic if flood off, except for traffic we originate
@@ -295,6 +297,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 			allow_mode_include = false;
 	} else {
 		p = NULL;
+		br_tc_skb_miss_set(skb, true);
 	}
 
 	while (p || rp) {
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index fc17b9fd93e6..c34a0b0901b0 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -334,6 +334,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 		return RX_HANDLER_CONSUMED;
 
 	memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+	br_tc_skb_miss_set(skb, false);
 
 	p = br_port_get_rcu(skb->dev);
 	if (p->flags & BR_VLAN_TUNNEL)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2119729ded2b..a63b32c1638e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -15,6 +15,7 @@
 #include <linux/u64_stats_sync.h>
 #include <net/route.h>
 #include <net/ip6_fib.h>
+#include <net/pkt_cls.h>
 #include <linux/if_vlan.h>
 #include <linux/rhashtable.h>
 #include <linux/refcount.h>
@@ -754,6 +755,32 @@ void br_boolopt_multi_get(const struct net_bridge *br,
 			  struct br_boolopt_multi *bm);
 void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on);
 
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss)
+{
+	struct tc_skb_ext *ext;
+
+	if (!tc_skb_ext_tc_enabled())
+		return;
+
+	ext = skb_ext_find(skb, TC_SKB_EXT);
+	if (ext) {
+		ext->l2_miss = miss;
+		return;
+	}
+	if (!miss)
+		return;
+	ext = tc_skb_ext_alloc(skb);
+	if (!ext)
+		return;
+	ext->l2_miss = true;
+}
+#else
+static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss)
+{
+}
+#endif
+
 /* br_device.c */
 void br_dev_setup(struct net_device *dev);
 void br_dev_delete(struct net_device *dev, struct list_head *list);
-- 
cgit v1.2.3


From ed554d3f945179c5b159bddfad7be34b403fe11a Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Mon, 29 May 2023 18:32:31 +0200
Subject: leds: add APIs for LEDs hw control

Add an option to permit LED driver to declare support for a specific
trigger to use hw control and setup the LED to blink based on specific
provided modes.

Add APIs for LEDs hw control. These functions will be used to activate
hardware control where a LED will use the provided flags, from an
unique defined supported trigger, to setup the LED to be driven by
hardware.

Add hw_control_is_supported() to ask the LED driver if the requested
mode by the trigger are supported and the LED can be setup to follow
the requested modes.

Deactivate hardware blink control by setting brightness to LED_OFF via
the brightness_set() callback.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/leds.h | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index c39bbf17a25b..4caf559b1922 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -183,6 +183,43 @@ struct led_classdev {
 
 	/* LEDs that have private triggers have this set */
 	struct led_hw_trigger_type	*trigger_type;
+
+	/* Unique trigger name supported by LED set in hw control mode */
+	const char		*hw_control_trigger;
+	/*
+	 * Check if the LED driver supports the requested mode provided by the
+	 * defined supported trigger to setup the LED to hw control mode.
+	 *
+	 * Return 0 on success. Return -EOPNOTSUPP when the passed flags are not
+	 * supported and software fallback needs to be used.
+	 * Return a negative error number on any other case  for check fail due
+	 * to various reason like device not ready or timeouts.
+	 */
+	int			(*hw_control_is_supported)(struct led_classdev *led_cdev,
+							   unsigned long flags);
+	/*
+	 * Activate hardware control, LED driver will use the provided flags
+	 * from the supported trigger and setup the LED to be driven by hardware
+	 * following the requested mode from the trigger flags.
+	 * Deactivate hardware blink control by setting brightness to LED_OFF via
+	 * the brightness_set() callback.
+	 *
+	 * Return 0 on success, a negative error number on flags apply fail.
+	 */
+	int			(*hw_control_set)(struct led_classdev *led_cdev,
+						  unsigned long flags);
+	/*
+	 * Get from the LED driver the current mode that the LED is set in hw
+	 * control mode and put them in flags.
+	 * Trigger can use this to get the initial state of a LED already set in
+	 * hardware blink control.
+	 *
+	 * Return 0 on success, a negative error number on failing parsing the
+	 * initial mode. Error from this function is NOT FATAL as the device
+	 * may be in a not supported initial state by the attached LED trigger.
+	 */
+	int			(*hw_control_get)(struct led_classdev *led_cdev,
+						  unsigned long *flags);
 #endif
 
 #ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
-- 
cgit v1.2.3


From 052c38eb17e866c5b4cd43924e7a5e20167b55c0 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 29 May 2023 18:32:32 +0200
Subject: leds: add API to get attached device for LED hw control

Some specific LED triggers blink the LED based on events from a device
or subsystem.
For example, an LED could be blinked to indicate a network device is
receiving packets, or a disk is reading blocks. To correctly enable and
request the hw control of the LED, the trigger has to check if the
network interface or block device configured via a /sys/class/led file
match the one the LED driver provide for hw control for.

Provide an API call to get the device which the LED blinks for.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/leds.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 4caf559b1922..3268b4e789d6 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -220,6 +220,12 @@ struct led_classdev {
 	 */
 	int			(*hw_control_get)(struct led_classdev *led_cdev,
 						  unsigned long *flags);
+	/*
+	 * Get the device this LED blinks in response to.
+	 * e.g. for a PHY LED, it is the network device. If the LED is
+	 * not yet associated to a device, return NULL.
+	 */
+	struct device		*(*hw_control_get_device)(struct led_classdev *led_cdev);
 #endif
 
 #ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
-- 
cgit v1.2.3


From 947acacab5ea151291b861cdfbde16ff5cf1b08c Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Mon, 29 May 2023 18:32:41 +0200
Subject: leds: trigger: netdev: expose netdev trigger modes in linux include

Expose netdev trigger modes to make them accessible by LED driver that
will support netdev trigger for hw control.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/leds/trigger/ledtrig-netdev.c |  9 ---------
 include/linux/leds.h                  | 10 ++++++++++
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
index e8bb9d0f85c0..b0a6f2749552 100644
--- a/drivers/leds/trigger/ledtrig-netdev.c
+++ b/drivers/leds/trigger/ledtrig-netdev.c
@@ -56,15 +56,6 @@ struct led_netdev_data {
 	bool hw_control;
 };
 
-enum led_trigger_netdev_modes {
-	TRIGGER_NETDEV_LINK = 0,
-	TRIGGER_NETDEV_TX,
-	TRIGGER_NETDEV_RX,
-
-	/* Keep last */
-	__TRIGGER_NETDEV_MAX,
-};
-
 static void set_baseline_state(struct led_netdev_data *trigger_data)
 {
 	int current_brightness;
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 3268b4e789d6..8af62ff431f0 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -552,6 +552,16 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 
 #endif /* CONFIG_LEDS_TRIGGERS */
 
+/* Trigger specific enum */
+enum led_trigger_netdev_modes {
+	TRIGGER_NETDEV_LINK = 0,
+	TRIGGER_NETDEV_TX,
+	TRIGGER_NETDEV_RX,
+
+	/* Keep last */
+	__TRIGGER_NETDEV_MAX,
+};
+
 /* Trigger specific functions */
 #ifdef CONFIG_LEDS_TRIGGER_DISK
 void ledtrig_disk_activity(bool write);
-- 
cgit v1.2.3


From 84a9582fd203063cd4d301204971ff2cd8327f1a Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 25 May 2023 14:30:30 +0300
Subject: serial: core: Start managing serial controllers to enable runtime PM

We want to enable runtime PM for serial port device drivers in a generic
way. To do this, we want to have the serial core layer manage the
registered physical serial controller devices.

To manage serial controllers, let's set up a struct bus and struct device
for the serial core controller as suggested by Greg and Jiri. The serial
core controller devices are children of the physical serial port device.
The serial core controller device is needed to support multiple different
kind of ports connected to single physical serial port device.

Let's also set up a struct device for the serial core port. The serial
core port instances are children of the serial core controller device.

With the serial core port device we can now flush pending TX on the
runtime PM resume as suggested by Johan.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Suggested-by: Jiri Slaby <jirislaby@kernel.org>
Suggested-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230525113034.46880-1-tony@atomide.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c  |   1 +
 drivers/tty/serial/8250/8250_port.c  |   1 +
 drivers/tty/serial/Makefile          |   3 +-
 drivers/tty/serial/serial_base.h     |  46 ++++++++
 drivers/tty/serial/serial_base_bus.c | 200 +++++++++++++++++++++++++++++++++++
 drivers/tty/serial/serial_core.c     | 192 +++++++++++++++++++++++++++++----
 drivers/tty/serial/serial_ctrl.c     |  68 ++++++++++++
 drivers/tty/serial/serial_port.c     | 105 ++++++++++++++++++
 include/linux/serial_core.h          |   5 +-
 9 files changed, 598 insertions(+), 23 deletions(-)
 create mode 100644 drivers/tty/serial/serial_base.h
 create mode 100644 drivers/tty/serial/serial_base_bus.c
 create mode 100644 drivers/tty/serial/serial_ctrl.c
 create mode 100644 drivers/tty/serial/serial_port.c

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 4434c3256a92..914e0e6251bf 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -1039,6 +1039,7 @@ int serial8250_register_8250_port(const struct uart_8250_port *up)
 		if (uart->port.dev)
 			uart_remove_one_port(&serial8250_reg, &uart->port);
 
+		uart->port.ctrl_id	= up->port.ctrl_id;
 		uart->port.iobase       = up->port.iobase;
 		uart->port.membase      = up->port.membase;
 		uart->port.irq          = up->port.irq;
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 1321bb42a074..dfb51a854e77 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -3282,6 +3282,7 @@ void serial8250_init_port(struct uart_8250_port *up)
 	struct uart_port *port = &up->port;
 
 	spin_lock_init(&port->lock);
+	port->ctrl_id = 0;
 	port->ops = &serial8250_pops;
 	port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
 
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 531ec3a19dae..4f7ab4150ec5 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -3,7 +3,8 @@
 # Makefile for the kernel serial device drivers.
 #
 
-obj-$(CONFIG_SERIAL_CORE) += serial_core.o
+obj-$(CONFIG_SERIAL_CORE) += serial_base.o
+serial_base-y := serial_core.o serial_base_bus.o serial_ctrl.o serial_port.o
 
 obj-$(CONFIG_SERIAL_EARLYCON) += earlycon.o
 obj-$(CONFIG_SERIAL_EARLYCON_SEMIHOST) += earlycon-semihost.o
diff --git a/drivers/tty/serial/serial_base.h b/drivers/tty/serial/serial_base.h
new file mode 100644
index 000000000000..9faac0ff6b89
--- /dev/null
+++ b/drivers/tty/serial/serial_base.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Serial core related functions, serial port device drivers do not need this.
+ *
+ * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Author: Tony Lindgren <tony@atomide.com>
+ */
+
+#define to_serial_base_ctrl_device(d) container_of((d), struct serial_ctrl_device, dev)
+#define to_serial_base_port_device(d) container_of((d), struct serial_port_device, dev)
+
+struct uart_driver;
+struct uart_port;
+struct device_driver;
+struct device;
+
+struct serial_ctrl_device {
+	struct device dev;
+};
+
+struct serial_port_device {
+	struct device dev;
+	struct uart_port *port;
+};
+
+int serial_base_ctrl_init(void);
+void serial_base_ctrl_exit(void);
+
+int serial_base_port_init(void);
+void serial_base_port_exit(void);
+
+int serial_base_driver_register(struct device_driver *driver);
+void serial_base_driver_unregister(struct device_driver *driver);
+
+struct serial_ctrl_device *serial_base_ctrl_add(struct uart_port *port,
+						struct device *parent);
+struct serial_port_device *serial_base_port_add(struct uart_port *port,
+						struct serial_ctrl_device *parent);
+void serial_base_ctrl_device_remove(struct serial_ctrl_device *ctrl_dev);
+void serial_base_port_device_remove(struct serial_port_device *port_dev);
+
+int serial_ctrl_register_port(struct uart_driver *drv, struct uart_port *port);
+void serial_ctrl_unregister_port(struct uart_driver *drv, struct uart_port *port);
+
+int serial_core_register_port(struct uart_driver *drv, struct uart_port *port);
+void serial_core_unregister_port(struct uart_driver *drv, struct uart_port *port);
diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c
new file mode 100644
index 000000000000..1b37833b8f66
--- /dev/null
+++ b/drivers/tty/serial/serial_base_bus.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Serial base bus layer for controllers
+ *
+ * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Author: Tony Lindgren <tony@atomide.com>
+ *
+ * The serial core bus manages the serial core controller instances.
+ */
+
+#include <linux/container_of.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/serial_core.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "serial_base.h"
+
+static int serial_base_match(struct device *dev, struct device_driver *drv)
+{
+	int len = strlen(drv->name);
+
+	return !strncmp(dev_name(dev), drv->name, len);
+}
+
+static struct bus_type serial_base_bus_type = {
+	.name = "serial-base",
+	.match = serial_base_match,
+};
+
+int serial_base_driver_register(struct device_driver *driver)
+{
+	driver->bus = &serial_base_bus_type;
+
+	return driver_register(driver);
+}
+
+void serial_base_driver_unregister(struct device_driver *driver)
+{
+	driver_unregister(driver);
+}
+
+static int serial_base_device_init(struct uart_port *port,
+				   struct device *dev,
+				   struct device *parent_dev,
+				   const struct device_type *type,
+				   void (*release)(struct device *dev),
+				   int id)
+{
+	device_initialize(dev);
+	dev->type = type;
+	dev->parent = parent_dev;
+	dev->bus = &serial_base_bus_type;
+	dev->release = release;
+
+	return dev_set_name(dev, "%s.%s.%d", type->name, dev_name(port->dev), id);
+}
+
+static const struct device_type serial_ctrl_type = {
+	.name = "ctrl",
+};
+
+static void serial_base_ctrl_release(struct device *dev)
+{
+	struct serial_ctrl_device *ctrl_dev = to_serial_base_ctrl_device(dev);
+
+	kfree(ctrl_dev);
+}
+
+void serial_base_ctrl_device_remove(struct serial_ctrl_device *ctrl_dev)
+{
+	if (!ctrl_dev)
+		return;
+
+	device_del(&ctrl_dev->dev);
+}
+
+struct serial_ctrl_device *serial_base_ctrl_add(struct uart_port *port,
+						struct device *parent)
+{
+	struct serial_ctrl_device *ctrl_dev;
+	int err;
+
+	ctrl_dev = kzalloc(sizeof(*ctrl_dev), GFP_KERNEL);
+	if (!ctrl_dev)
+		return ERR_PTR(-ENOMEM);
+
+	err = serial_base_device_init(port, &ctrl_dev->dev,
+				      parent, &serial_ctrl_type,
+				      serial_base_ctrl_release,
+				      port->ctrl_id);
+	if (err)
+		goto err_free_ctrl_dev;
+
+	err = device_add(&ctrl_dev->dev);
+	if (err)
+		goto err_put_device;
+
+	return ctrl_dev;
+
+err_put_device:
+	put_device(&ctrl_dev->dev);
+err_free_ctrl_dev:
+	kfree(ctrl_dev);
+
+	return ERR_PTR(err);
+}
+
+static const struct device_type serial_port_type = {
+	.name = "port",
+};
+
+static void serial_base_port_release(struct device *dev)
+{
+	struct serial_port_device *port_dev = to_serial_base_port_device(dev);
+
+	kfree(port_dev);
+}
+
+struct serial_port_device *serial_base_port_add(struct uart_port *port,
+						struct serial_ctrl_device *ctrl_dev)
+{
+	struct serial_port_device *port_dev;
+	int err;
+
+	port_dev = kzalloc(sizeof(*port_dev), GFP_KERNEL);
+	if (!port_dev)
+		return ERR_PTR(-ENOMEM);
+
+	err = serial_base_device_init(port, &port_dev->dev,
+				      &ctrl_dev->dev, &serial_port_type,
+				      serial_base_port_release,
+				      port->line);
+	if (err)
+		goto err_free_port_dev;
+
+	port_dev->port = port;
+
+	err = device_add(&port_dev->dev);
+	if (err)
+		goto err_put_device;
+
+	return port_dev;
+
+err_put_device:
+	put_device(&port_dev->dev);
+err_free_port_dev:
+	kfree(port_dev);
+
+	return ERR_PTR(err);
+}
+
+void serial_base_port_device_remove(struct serial_port_device *port_dev)
+{
+	if (!port_dev)
+		return;
+
+	device_del(&port_dev->dev);
+}
+
+static int serial_base_init(void)
+{
+	int ret;
+
+	ret = bus_register(&serial_base_bus_type);
+	if (ret)
+		return ret;
+
+	ret = serial_base_ctrl_init();
+	if (ret)
+		goto err_bus_unregister;
+
+	ret = serial_base_port_init();
+	if (ret)
+		goto err_ctrl_exit;
+
+	return 0;
+
+err_ctrl_exit:
+	serial_base_ctrl_exit();
+
+err_bus_unregister:
+	bus_unregister(&serial_base_bus_type);
+
+	return ret;
+}
+module_init(serial_base_init);
+
+static void serial_base_exit(void)
+{
+	serial_base_port_exit();
+	serial_base_ctrl_exit();
+	bus_unregister(&serial_base_bus_type);
+}
+module_exit(serial_base_exit);
+
+MODULE_AUTHOR("Tony Lindgren <tony@atomide.com>");
+MODULE_DESCRIPTION("Serial core bus");
+MODULE_LICENSE("GPL");
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index f856c7fae2fd..29bd5ede0b25 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -17,6 +17,7 @@
 #include <linux/gpio/consumer.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
+#include <linux/pm_runtime.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/device.h>
@@ -31,6 +32,8 @@
 #include <linux/irq.h>
 #include <linux/uaccess.h>
 
+#include "serial_base.h"
+
 /*
  * This is used to lock changes in serial line configuration.
  */
@@ -134,9 +137,30 @@ static void __uart_start(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *port = state->uart_port;
+	struct serial_port_device *port_dev;
+	int err;
+
+	if (!port || port->flags & UPF_DEAD || uart_tx_stopped(port))
+		return;
+
+	port_dev = port->port_dev;
+
+	/* Increment the runtime PM usage count for the active check below */
+	err = pm_runtime_get(&port_dev->dev);
+	if (err < 0) {
+		pm_runtime_put_noidle(&port_dev->dev);
+		return;
+	}
 
-	if (port && !(port->flags & UPF_DEAD) && !uart_tx_stopped(port))
+	/*
+	 * Start TX if enabled, and kick runtime PM. If the device is not
+	 * enabled, serial_port_runtime_resume() calls start_tx() again
+	 * after enabling the device.
+	 */
+	if (pm_runtime_active(&port_dev->dev))
 		port->ops->start_tx(port);
+	pm_runtime_mark_last_busy(&port_dev->dev);
+	pm_runtime_put_autosuspend(&port_dev->dev);
 }
 
 static void uart_start(struct tty_struct *tty)
@@ -3048,7 +3072,7 @@ static const struct attribute_group tty_dev_attr_group = {
 };
 
 /**
- * uart_add_one_port - attach a driver-defined port structure
+ * serial_core_add_one_port - attach a driver-defined port structure
  * @drv: pointer to the uart low level driver structure for this port
  * @uport: uart port structure to use for this port.
  *
@@ -3057,8 +3081,9 @@ static const struct attribute_group tty_dev_attr_group = {
  * This allows the driver @drv to register its own uart_port structure with the
  * core driver. The main purpose is to allow the low level uart drivers to
  * expand uart_port, rather than having yet more levels of structures.
+ * Caller must hold port_mutex.
  */
-int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
+static int serial_core_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 {
 	struct uart_state *state;
 	struct tty_port *port;
@@ -3072,7 +3097,6 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 	state = drv->state + uport->line;
 	port = &state->port;
 
-	mutex_lock(&port_mutex);
 	mutex_lock(&port->mutex);
 	if (state->uart_port) {
 		ret = -EINVAL;
@@ -3137,21 +3161,14 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 		       uport->line);
 	}
 
-	/*
-	 * Ensure UPF_DEAD is not set.
-	 */
-	uport->flags &= ~UPF_DEAD;
-
  out:
 	mutex_unlock(&port->mutex);
-	mutex_unlock(&port_mutex);
 
 	return ret;
 }
-EXPORT_SYMBOL(uart_add_one_port);
 
 /**
- * uart_remove_one_port - detach a driver defined port structure
+ * serial_core_remove_one_port - detach a driver defined port structure
  * @drv: pointer to the uart low level driver structure for this port
  * @uport: uart port structure for this port
  *
@@ -3159,20 +3176,16 @@ EXPORT_SYMBOL(uart_add_one_port);
  *
  * This unhooks (and hangs up) the specified port structure from the core
  * driver. No further calls will be made to the low-level code for this port.
+ * Caller must hold port_mutex.
  */
-void uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
+static void serial_core_remove_one_port(struct uart_driver *drv,
+					struct uart_port *uport)
 {
 	struct uart_state *state = drv->state + uport->line;
 	struct tty_port *port = &state->port;
 	struct uart_port *uart_port;
 	struct tty_struct *tty;
 
-	mutex_lock(&port_mutex);
-
-	/*
-	 * Mark the port "dead" - this prevents any opens from
-	 * succeeding while we shut down the port.
-	 */
 	mutex_lock(&port->mutex);
 	uart_port = uart_port_check(state);
 	if (uart_port != uport)
@@ -3183,7 +3196,6 @@ void uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 		mutex_unlock(&port->mutex);
 		goto out;
 	}
-	uport->flags |= UPF_DEAD;
 	mutex_unlock(&port->mutex);
 
 	/*
@@ -3215,6 +3227,7 @@ void uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 	 * Indicate that there isn't a port here anymore.
 	 */
 	uport->type = PORT_UNKNOWN;
+	uport->port_dev = NULL;
 
 	mutex_lock(&port->mutex);
 	WARN_ON(atomic_dec_return(&state->refcount) < 0);
@@ -3224,7 +3237,6 @@ void uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 out:
 	mutex_unlock(&port_mutex);
 }
-EXPORT_SYMBOL(uart_remove_one_port);
 
 /**
  * uart_match_port - are the two ports equivalent?
@@ -3259,6 +3271,144 @@ bool uart_match_port(const struct uart_port *port1,
 }
 EXPORT_SYMBOL(uart_match_port);
 
+static struct serial_ctrl_device *
+serial_core_get_ctrl_dev(struct serial_port_device *port_dev)
+{
+	struct device *dev = &port_dev->dev;
+
+	return to_serial_base_ctrl_device(dev->parent);
+}
+
+/*
+ * Find a registered serial core controller device if one exists. Returns
+ * the first device matching the ctrl_id. Caller must hold port_mutex.
+ */
+static struct serial_ctrl_device *serial_core_ctrl_find(struct uart_driver *drv,
+							struct device *phys_dev,
+							int ctrl_id)
+{
+	struct uart_state *state;
+	int i;
+
+	lockdep_assert_held(&port_mutex);
+
+	for (i = 0; i < drv->nr; i++) {
+		state = drv->state + i;
+		if (!state->uart_port || !state->uart_port->port_dev)
+			continue;
+
+		if (state->uart_port->dev == phys_dev &&
+		    state->uart_port->ctrl_id == ctrl_id)
+			return serial_core_get_ctrl_dev(state->uart_port->port_dev);
+	}
+
+	return NULL;
+}
+
+static struct serial_ctrl_device *serial_core_ctrl_device_add(struct uart_port *port)
+{
+	return serial_base_ctrl_add(port, port->dev);
+}
+
+static int serial_core_port_device_add(struct serial_ctrl_device *ctrl_dev,
+				       struct uart_port *port)
+{
+	struct serial_port_device *port_dev;
+
+	port_dev = serial_base_port_add(port, ctrl_dev);
+	if (IS_ERR(port_dev))
+		return PTR_ERR(port_dev);
+
+	port->port_dev = port_dev;
+
+	return 0;
+}
+
+/*
+ * Initialize a serial core port device, and a controller device if needed.
+ */
+int serial_core_register_port(struct uart_driver *drv, struct uart_port *port)
+{
+	struct serial_ctrl_device *ctrl_dev, *new_ctrl_dev = NULL;
+	int ret;
+
+	mutex_lock(&port_mutex);
+
+	/*
+	 * Prevent serial_port_runtime_resume() from trying to use the port
+	 * until serial_core_add_one_port() has completed
+	 */
+	port->flags |= UPF_DEAD;
+
+	/* Inititalize a serial core controller device if needed */
+	ctrl_dev = serial_core_ctrl_find(drv, port->dev, port->ctrl_id);
+	if (!ctrl_dev) {
+		new_ctrl_dev = serial_core_ctrl_device_add(port);
+		if (!new_ctrl_dev) {
+			ret = -ENODEV;
+			goto err_unlock;
+		}
+		ctrl_dev = new_ctrl_dev;
+	}
+
+	/*
+	 * Initialize a serial core port device. Tag the port dead to prevent
+	 * serial_port_runtime_resume() trying to do anything until port has
+	 * been registered. It gets cleared by serial_core_add_one_port().
+	 */
+	ret = serial_core_port_device_add(ctrl_dev, port);
+	if (ret)
+		goto err_unregister_ctrl_dev;
+
+	ret = serial_core_add_one_port(drv, port);
+	if (ret)
+		goto err_unregister_port_dev;
+
+	port->flags &= ~UPF_DEAD;
+
+	mutex_unlock(&port_mutex);
+
+	return 0;
+
+err_unregister_port_dev:
+	serial_base_port_device_remove(port->port_dev);
+
+err_unregister_ctrl_dev:
+	serial_base_ctrl_device_remove(new_ctrl_dev);
+
+err_unlock:
+	mutex_unlock(&port_mutex);
+
+	return ret;
+}
+
+/*
+ * Removes a serial core port device, and the related serial core controller
+ * device if the last instance.
+ */
+void serial_core_unregister_port(struct uart_driver *drv, struct uart_port *port)
+{
+	struct device *phys_dev = port->dev;
+	struct serial_port_device *port_dev = port->port_dev;
+	struct serial_ctrl_device *ctrl_dev = serial_core_get_ctrl_dev(port_dev);
+	int ctrl_id = port->ctrl_id;
+
+	mutex_lock(&port_mutex);
+
+	port->flags |= UPF_DEAD;
+
+	serial_core_remove_one_port(drv, port);
+
+	/* Note that struct uart_port *port is no longer valid at this point */
+	serial_base_port_device_remove(port_dev);
+
+	/* Drop the serial core controller device if no ports are using it */
+	if (!serial_core_ctrl_find(drv, phys_dev, ctrl_id))
+		serial_base_ctrl_device_remove(ctrl_dev);
+
+	mutex_unlock(&port_mutex);
+}
+
 /**
  * uart_handle_dcd_change - handle a change of carrier detect state
  * @uport: uart_port structure for the open port
diff --git a/drivers/tty/serial/serial_ctrl.c b/drivers/tty/serial/serial_ctrl.c
new file mode 100644
index 000000000000..6fcf634425dc
--- /dev/null
+++ b/drivers/tty/serial/serial_ctrl.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Serial core controller driver
+ *
+ * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Author: Tony Lindgren <tony@atomide.com>
+ *
+ * This driver manages the serial core controller struct device instances.
+ * The serial core controller devices are children of the physical serial
+ * port device.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/serial_core.h>
+#include <linux/spinlock.h>
+
+#include "serial_base.h"
+
+static int serial_ctrl_probe(struct device *dev)
+{
+	pm_runtime_enable(dev);
+
+	return 0;
+}
+
+static int serial_ctrl_remove(struct device *dev)
+{
+	pm_runtime_disable(dev);
+
+	return 0;
+}
+
+/*
+ * Serial core controller device init functions. Note that the physical
+ * serial port device driver may not have completed probe at this point.
+ */
+int serial_ctrl_register_port(struct uart_driver *drv, struct uart_port *port)
+{
+	return serial_core_register_port(drv, port);
+}
+
+void serial_ctrl_unregister_port(struct uart_driver *drv, struct uart_port *port)
+{
+	serial_core_unregister_port(drv, port);
+}
+
+static struct device_driver serial_ctrl_driver = {
+	.name = "ctrl",
+	.suppress_bind_attrs = true,
+	.probe = serial_ctrl_probe,
+	.remove = serial_ctrl_remove,
+};
+
+int serial_base_ctrl_init(void)
+{
+	return serial_base_driver_register(&serial_ctrl_driver);
+}
+
+void serial_base_ctrl_exit(void)
+{
+	serial_base_driver_unregister(&serial_ctrl_driver);
+}
+
+MODULE_AUTHOR("Tony Lindgren <tony@atomide.com>");
+MODULE_DESCRIPTION("Serial core controller driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c
new file mode 100644
index 000000000000..862423237007
--- /dev/null
+++ b/drivers/tty/serial/serial_port.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Serial core port device driver
+ *
+ * Copyright (C) 2023 Texas Instruments Incorporated - https://www.ti.com/
+ * Author: Tony Lindgren <tony@atomide.com>
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/serial_core.h>
+#include <linux/spinlock.h>
+
+#include "serial_base.h"
+
+#define SERIAL_PORT_AUTOSUSPEND_DELAY_MS	500
+
+/* Only considers pending TX for now. Caller must take care of locking */
+static int __serial_port_busy(struct uart_port *port)
+{
+	return !uart_tx_stopped(port) &&
+		uart_circ_chars_pending(&port->state->xmit);
+}
+
+static int serial_port_runtime_resume(struct device *dev)
+{
+	struct serial_port_device *port_dev = to_serial_base_port_device(dev);
+	struct uart_port *port;
+	unsigned long flags;
+
+	port = port_dev->port;
+
+	if (port->flags & UPF_DEAD)
+		goto out;
+
+	/* Flush any pending TX for the port */
+	spin_lock_irqsave(&port->lock, flags);
+	if (__serial_port_busy(port))
+		port->ops->start_tx(port);
+	spin_unlock_irqrestore(&port->lock, flags);
+
+out:
+	pm_runtime_mark_last_busy(dev);
+
+	return 0;
+}
+
+static DEFINE_RUNTIME_DEV_PM_OPS(serial_port_pm,
+				 NULL, serial_port_runtime_resume, NULL);
+
+static int serial_port_probe(struct device *dev)
+{
+	pm_runtime_enable(dev);
+	pm_runtime_set_autosuspend_delay(dev, SERIAL_PORT_AUTOSUSPEND_DELAY_MS);
+	pm_runtime_use_autosuspend(dev);
+
+	return 0;
+}
+
+static int serial_port_remove(struct device *dev)
+{
+	pm_runtime_dont_use_autosuspend(dev);
+	pm_runtime_disable(dev);
+
+	return 0;
+}
+
+/*
+ * Serial core port device init functions. Note that the physical serial
+ * port device driver may not have completed probe at this point.
+ */
+int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
+{
+	return serial_ctrl_register_port(drv, port);
+}
+EXPORT_SYMBOL(uart_add_one_port);
+
+void uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
+{
+	serial_ctrl_unregister_port(drv, port);
+}
+EXPORT_SYMBOL(uart_remove_one_port);
+
+static struct device_driver serial_port_driver = {
+	.name = "port",
+	.suppress_bind_attrs = true,
+	.probe = serial_port_probe,
+	.remove = serial_port_remove,
+	.pm = pm_ptr(&serial_port_pm),
+};
+
+int serial_base_port_init(void)
+{
+	return serial_base_driver_register(&serial_port_driver);
+}
+
+void serial_base_port_exit(void)
+{
+	serial_base_driver_unregister(&serial_port_driver);
+}
+
+MODULE_AUTHOR("Tony Lindgren <tony@atomide.com>");
+MODULE_DESCRIPTION("Serial controller port driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index ddcdb5b8523e..6d58c57acdaa 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -28,6 +28,7 @@
 
 struct uart_port;
 struct serial_struct;
+struct serial_port_device;
 struct device;
 struct gpio_desc;
 
@@ -458,6 +459,7 @@ struct uart_port {
 						struct serial_rs485 *rs485);
 	int			(*iso7816_config)(struct uart_port *,
 						  struct serial_iso7816 *iso7816);
+	int			ctrl_id;		/* optional serial core controller id */
 	unsigned int		irq;			/* irq number */
 	unsigned long		irqflags;		/* irq flags  */
 	unsigned int		uartclk;		/* base uart clock */
@@ -563,7 +565,8 @@ struct uart_port {
 	unsigned int		minor;
 	resource_size_t		mapbase;		/* for ioremap */
 	resource_size_t		mapsize;
-	struct device		*dev;			/* parent device */
+	struct device		*dev;			/* serial port physical parent device */
+	struct serial_port_device *port_dev;		/* serial core port device */
 
 	unsigned long		sysrq;			/* sysrq timeout */
 	unsigned int		sysrq_ch;		/* char for sysrq */
-- 
cgit v1.2.3


From 9b68f30b68701e98abcec331a2cf3df972d910f8 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Fri, 26 May 2023 14:21:02 +0300
Subject: net: Use umd_cleanup_helper()

bpfilter_umh_cleanup() is the same function as umd_cleanup_helper().
Drop the redundant function.

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230526112104.1044686-1-jarkko@kernel.org
---
 include/linux/bpfilter.h     |  1 -
 net/bpfilter/bpfilter_kern.c |  2 +-
 net/ipv4/bpfilter/sockopt.c  | 11 +----------
 3 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h
index 2ae3c8e1d83c..736ded4905e0 100644
--- a/include/linux/bpfilter.h
+++ b/include/linux/bpfilter.h
@@ -11,7 +11,6 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
 			    unsigned int optlen);
 int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
 			    int __user *optlen);
-void bpfilter_umh_cleanup(struct umd_info *info);
 
 struct bpfilter_umh_ops {
 	struct umd_info info;
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 422ec6e7ccff..97e129e3f31c 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -21,7 +21,7 @@ static void shutdown_umh(void)
 	if (tgid) {
 		kill_pid(tgid, SIGKILL, 1);
 		wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
-		bpfilter_umh_cleanup(info);
+		umd_cleanup_helper(info);
 	}
 }
 
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 1b34cb9a7708..193bcc2acccc 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -12,15 +12,6 @@
 struct bpfilter_umh_ops bpfilter_ops;
 EXPORT_SYMBOL_GPL(bpfilter_ops);
 
-void bpfilter_umh_cleanup(struct umd_info *info)
-{
-	fput(info->pipe_to_umh);
-	fput(info->pipe_from_umh);
-	put_pid(info->tgid);
-	info->tgid = NULL;
-}
-EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup);
-
 static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval,
 				 unsigned int optlen, bool is_set)
 {
@@ -38,7 +29,7 @@ static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval,
 	}
 	if (bpfilter_ops.info.tgid &&
 	    thread_group_exited(bpfilter_ops.info.tgid))
-		bpfilter_umh_cleanup(&bpfilter_ops.info);
+		umd_cleanup_helper(&bpfilter_ops.info);
 
 	if (!bpfilter_ops.info.tgid) {
 		err = bpfilter_ops.start();
-- 
cgit v1.2.3


From c034203b6a9dae6751ef4371c18cb77983e30c28 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Mon, 29 May 2023 14:35:55 +0300
Subject: nfsd: fix double fget() bug in __write_ports_addfd()

The bug here is that you cannot rely on getting the same socket
from multiple calls to fget() because userspace can influence
that.  This is a kind of double fetch bug.

The fix is to delete the svc_alien_sock() function and instead do
the checking inside the svc_addsock() function.

Fixes: 3064639423c4 ("nfsd: check passed socket's net matches NFSd superblock's one")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: NeilBrown <neilb@suse.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfsctl.c               |  7 +------
 include/linux/sunrpc/svcsock.h |  7 +++----
 net/sunrpc/svcsock.c           | 24 ++++++------------------
 3 files changed, 10 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c159817d1282..b4fd7a7062d5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -690,16 +690,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
 	if (err != 0 || fd < 0)
 		return -EINVAL;
 
-	if (svc_alien_sock(net, fd)) {
-		printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
-		return -EINVAL;
-	}
-
 	err = nfsd_create_serv(net);
 	if (err != 0)
 		return err;
 
-	err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+	err = svc_addsock(nn->nfsd_serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
 
 	if (err >= 0 &&
 	    !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index d16ae621782c..a7116048a4d4 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -61,10 +61,9 @@ int		svc_recv(struct svc_rqst *, long);
 void		svc_send(struct svc_rqst *rqstp);
 void		svc_drop(struct svc_rqst *);
 void		svc_sock_update_bufs(struct svc_serv *serv);
-bool		svc_alien_sock(struct net *net, int fd);
-int		svc_addsock(struct svc_serv *serv, const int fd,
-					char *name_return, const size_t len,
-					const struct cred *cred);
+int		svc_addsock(struct svc_serv *serv, struct net *net,
+			    const int fd, char *name_return, const size_t len,
+			    const struct cred *cred);
 void		svc_init_xprt_sock(void);
 void		svc_cleanup_xprt_sock(void);
 struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 63fe7a338992..f77cebe2c071 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1480,25 +1480,10 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	return svsk;
 }
 
-bool svc_alien_sock(struct net *net, int fd)
-{
-	int err;
-	struct socket *sock = sockfd_lookup(fd, &err);
-	bool ret = false;
-
-	if (!sock)
-		goto out;
-	if (sock_net(sock->sk) != net)
-		ret = true;
-	sockfd_put(sock);
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(svc_alien_sock);
-
 /**
  * svc_addsock - add a listener socket to an RPC service
  * @serv: pointer to RPC service to which to add a new listener
+ * @net: caller's network namespace
  * @fd: file descriptor of the new listener
  * @name_return: pointer to buffer to fill in with name of listener
  * @len: size of the buffer
@@ -1508,8 +1493,8 @@ EXPORT_SYMBOL_GPL(svc_alien_sock);
  * Name is terminated with '\n'.  On error, returns a negative errno
  * value.
  */
-int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
-		const size_t len, const struct cred *cred)
+int svc_addsock(struct svc_serv *serv, struct net *net, const int fd,
+		char *name_return, const size_t len, const struct cred *cred)
 {
 	int err = 0;
 	struct socket *so = sockfd_lookup(fd, &err);
@@ -1520,6 +1505,9 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
 
 	if (!so)
 		return err;
+	err = -EINVAL;
+	if (sock_net(so->sk) != net)
+		goto out;
 	err = -EAFNOSUPPORT;
 	if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
 		goto out;
-- 
cgit v1.2.3


From c8070b78751955e59b42457b974bea4a4fe00187 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 26 May 2023 22:41:40 +0100
Subject: mm: Don't pin ZERO_PAGE in pin_user_pages()

Make pin_user_pages*() leave a ZERO_PAGE unpinned if it extracts a pointer
to it from the page tables and make unpin_user_page*() correspondingly
ignore a ZERO_PAGE when unpinning.  We don't want to risk overrunning a
zero page's refcount as we're only allowed ~2 million pins on it -
something that userspace can conceivably trigger.

Add a pair of functions to test whether a page or a folio is a ZERO_PAGE.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Christoph Hellwig <hch@infradead.org>
cc: David Hildenbrand <david@redhat.com>
cc: Lorenzo Stoakes <lstoakes@gmail.com>
cc: Andrew Morton <akpm@linux-foundation.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Matthew Wilcox <willy@infradead.org>
cc: Jan Kara <jack@suse.cz>
cc: Jeff Layton <jlayton@kernel.org>
cc: Jason Gunthorpe <jgg@nvidia.com>
cc: Logan Gunthorpe <logang@deltatee.com>
cc: Hillf Danton <hdanton@sina.com>
cc: Christian Brauner <brauner@kernel.org>
cc: Linus Torvalds <torvalds@linux-foundation.org>
cc: linux-fsdevel@vger.kernel.org
cc: linux-block@vger.kernel.org
cc: linux-kernel@vger.kernel.org
cc: linux-mm@kvack.org
Reviewed-by: Lorenzo Stoakes <lstoakes@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/r/20230526214142.958751-2-dhowells@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/core-api/pin_user_pages.rst |  6 ++++++
 include/linux/mm.h                        | 26 ++++++++++++++++++++++++--
 mm/gup.c                                  | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 60 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/pin_user_pages.rst b/Documentation/core-api/pin_user_pages.rst
index 9fb0b1080d3b..d3c1f6d8c0e0 100644
--- a/Documentation/core-api/pin_user_pages.rst
+++ b/Documentation/core-api/pin_user_pages.rst
@@ -112,6 +112,12 @@ pages:
 This also leads to limitations: there are only 31-10==21 bits available for a
 counter that increments 10 bits at a time.
 
+* Because of that limitation, special handling is applied to the zero pages
+  when using FOLL_PIN.  We only pretend to pin a zero page - we don't alter its
+  refcount or pincount at all (it is permanent, so there's no need).  The
+  unpinning functions also don't do anything to a zero page.  This is
+  transparent to the caller.
+
 * Callers must specifically request "dma-pinned tracking of pages". In other
   words, just calling get_user_pages() will not suffice; a new set of functions,
   pin_user_page() and related, must be used.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27ce77080c79..3c2f6b452586 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1910,6 +1910,28 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
 	return page_maybe_dma_pinned(page);
 }
 
+/**
+ * is_zero_page - Query if a page is a zero page
+ * @page: The page to query
+ *
+ * This returns true if @page is one of the permanent zero pages.
+ */
+static inline bool is_zero_page(const struct page *page)
+{
+	return is_zero_pfn(page_to_pfn(page));
+}
+
+/**
+ * is_zero_folio - Query if a folio is a zero page
+ * @folio: The folio to query
+ *
+ * This returns true if @folio is one of the permanent zero pages.
+ */
+static inline bool is_zero_folio(const struct folio *folio)
+{
+	return is_zero_page(&folio->page);
+}
+
 /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
 #ifdef CONFIG_MIGRATION
 static inline bool is_longterm_pinnable_page(struct page *page)
@@ -1920,8 +1942,8 @@ static inline bool is_longterm_pinnable_page(struct page *page)
 	if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
 		return false;
 #endif
-	/* The zero page may always be pinned */
-	if (is_zero_pfn(page_to_pfn(page)))
+	/* The zero page can be "pinned" but gets special handling. */
+	if (is_zero_page(page))
 		return true;
 
 	/* Coherent device memory must always allow eviction. */
diff --git a/mm/gup.c b/mm/gup.c
index bbe416236593..ad28261dcafd 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -51,7 +51,8 @@ static inline void sanity_check_pinned_pages(struct page **pages,
 		struct page *page = *pages;
 		struct folio *folio = page_folio(page);
 
-		if (!folio_test_anon(folio))
+		if (is_zero_page(page) ||
+		    !folio_test_anon(folio))
 			continue;
 		if (!folio_test_large(folio) || folio_test_hugetlb(folio))
 			VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
@@ -131,6 +132,13 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
 	else if (flags & FOLL_PIN) {
 		struct folio *folio;
 
+		/*
+		 * Don't take a pin on the zero page - it's not going anywhere
+		 * and it is used in a *lot* of places.
+		 */
+		if (is_zero_page(page))
+			return page_folio(page);
+
 		/*
 		 * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
 		 * right zone, so fail and let the caller fall back to the slow
@@ -180,6 +188,8 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
 static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
 {
 	if (flags & FOLL_PIN) {
+		if (is_zero_folio(folio))
+			return;
 		node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
 		if (folio_test_large(folio))
 			atomic_sub(refs, &folio->_pincount);
@@ -224,6 +234,13 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
 	if (flags & FOLL_GET)
 		folio_ref_inc(folio);
 	else if (flags & FOLL_PIN) {
+		/*
+		 * Don't take a pin on the zero page - it's not going anywhere
+		 * and it is used in a *lot* of places.
+		 */
+		if (is_zero_page(page))
+			return 0;
+
 		/*
 		 * Similar to try_grab_folio(): be sure to *also*
 		 * increment the normal page refcount field at least once,
@@ -3079,6 +3096,9 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
  *
  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
  * see Documentation/core-api/pin_user_pages.rst for further details.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page() will not remove pins from it.
  */
 int pin_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages)
@@ -3110,6 +3130,9 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast);
  *
  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
  * see Documentation/core-api/pin_user_pages.rst for details.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page*() will not remove pins from it.
  */
 long pin_user_pages_remote(struct mm_struct *mm,
 			   unsigned long start, unsigned long nr_pages,
@@ -3143,6 +3166,9 @@ EXPORT_SYMBOL(pin_user_pages_remote);
  *
  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
  * see Documentation/core-api/pin_user_pages.rst for details.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page*() will not remove pins from it.
  */
 long pin_user_pages(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages,
@@ -3161,6 +3187,9 @@ EXPORT_SYMBOL(pin_user_pages);
  * pin_user_pages_unlocked() is the FOLL_PIN variant of
  * get_user_pages_unlocked(). Behavior is the same, except that this one sets
  * FOLL_PIN and rejects FOLL_GET.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page*() will not remove pins from it.
  */
 long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 			     struct page **pages, unsigned int gup_flags)
-- 
cgit v1.2.3


From 1101fb8f89e5fc548c4d0ad66750e98980291815 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 26 May 2023 22:41:41 +0100
Subject: mm: Provide a function to get an additional pin on a page

Provide a function to get an additional pin on a page that we already have
a pin on.  This will be used in fs/direct-io.c when dispatching multiple
bios to a page we've extracted from a user-backed iter rather than redoing
the extraction.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Christoph Hellwig <hch@infradead.org>
cc: David Hildenbrand <david@redhat.com>
cc: Lorenzo Stoakes <lstoakes@gmail.com>
cc: Andrew Morton <akpm@linux-foundation.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Matthew Wilcox <willy@infradead.org>
cc: Jan Kara <jack@suse.cz>
cc: Jeff Layton <jlayton@kernel.org>
cc: Jason Gunthorpe <jgg@nvidia.com>
cc: Logan Gunthorpe <logang@deltatee.com>
cc: Hillf Danton <hdanton@sina.com>
cc: Christian Brauner <brauner@kernel.org>
cc: Linus Torvalds <torvalds@linux-foundation.org>
cc: linux-fsdevel@vger.kernel.org
cc: linux-block@vger.kernel.org
cc: linux-kernel@vger.kernel.org
cc: linux-mm@kvack.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/r/20230526214142.958751-3-dhowells@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/mm.h |  1 +
 mm/gup.c           | 27 +++++++++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3c2f6b452586..200068d98686 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2405,6 +2405,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages);
 int pin_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages);
+void folio_add_pin(struct folio *folio);
 
 int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);
 int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
diff --git a/mm/gup.c b/mm/gup.c
index ad28261dcafd..0814576b7366 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -275,6 +275,33 @@ void unpin_user_page(struct page *page)
 }
 EXPORT_SYMBOL(unpin_user_page);
 
+/**
+ * folio_add_pin - Try to get an additional pin on a pinned folio
+ * @folio: The folio to be pinned
+ *
+ * Get an additional pin on a folio we already have a pin on.  Makes no change
+ * if the folio is a zero_page.
+ */
+void folio_add_pin(struct folio *folio)
+{
+	if (is_zero_folio(folio))
+		return;
+
+	/*
+	 * Similar to try_grab_folio(): be sure to *also* increment the normal
+	 * page refcount field at least once, so that the page really is
+	 * pinned.
+	 */
+	if (folio_test_large(folio)) {
+		WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1);
+		folio_ref_inc(folio);
+		atomic_inc(&folio->_pincount);
+	} else {
+		WARN_ON_ONCE(folio_ref_count(folio) < GUP_PIN_COUNTING_BIAS);
+		folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+	}
+}
+
 static inline struct folio *gup_folio_range_next(struct page *start,
 		unsigned long npages, unsigned long i, unsigned int *ntails)
 {
-- 
cgit v1.2.3


From d3c6e265681285e046e0725dcbf5465482371e62 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 18 Apr 2023 16:13:00 -0500
Subject: PCI: Expand comment about sorting pci_ids.h entries

Clarify the request to sort Vendor ID and Device ID entries by numeric
value, not alphabetically.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci_ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 45c3d62e616d..4d2001b86e6b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2,7 +2,7 @@
 /*
  *	PCI Class, Vendor and Device IDs
  *
- *	Please keep sorted.
+ *	Please keep sorted by numeric Vendor ID and Device ID.
  *
  *	Do not add new entries to this file unless the definitions
  *	are shared between multiple drivers.
-- 
cgit v1.2.3


From 2aa5ac633259843f656eb6ecff4cf01e8e810c5e Mon Sep 17 00:00:00 2001
From: Sui Jingfeng <suijingfeng@loongson.cn>
Date: Wed, 31 May 2023 18:27:44 +0800
Subject: PCI: Add pci_clear_master() stub for non-CONFIG_PCI

Add a pci_clear_master() stub when CONFIG_PCI is not set so drivers that
support both PCI and platform devices don't need #ifdefs or extra Kconfig
symbols for the PCI parts.

[bhelgaas: commit log]
Fixes: 6a479079c072 ("PCI: Add pci_clear_master() as opposite of pci_set_master()")
Link: https://lore.kernel.org/r/20230531102744.2354313-1-suijingfeng@loongson.cn
Signed-off-by: Sui Jingfeng <suijingfeng@loongson.cn>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 60b8772b5bd4..c69a2cc1f412 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1903,6 +1903,7 @@ static inline int pci_dev_present(const struct pci_device_id *ids)
 #define pci_dev_put(dev)	do { } while (0)
 
 static inline void pci_set_master(struct pci_dev *dev) { }
+static inline void pci_clear_master(struct pci_dev *dev) { }
 static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; }
 static inline void pci_disable_device(struct pci_dev *dev) { }
 static inline int pcim_enable_device(struct pci_dev *pdev) { return -EIO; }
-- 
cgit v1.2.3


From df1b056d489d98c7c45fa89627102dd34b44496f Mon Sep 17 00:00:00 2001
From: Zhangfei Gao <zhangfei.gao@linaro.org>
Date: Thu, 11 May 2023 17:59:20 +0800
Subject: uacce: use q->mapping to replace inode->i_mapping

The inode can be different in a container, for example, a docker and host
both open the same uacce parent device, which uses the same uacce struct
but different inode, so uacce->inode is not enough.

What's worse, when docker stops, the inode will be destroyed as well,
causing use-after-free in uacce_remove.

So use q->mapping to replace uacce->inode->i_mapping.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Link: https://lore.kernel.org/r/20230511095921.9331-2-zhangfei.gao@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/uacce/uacce.c | 14 +++++++-------
 include/linux/uacce.h      |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index 346bd7cf2e94..338b59ef5493 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -166,8 +166,8 @@ static int uacce_fops_open(struct inode *inode, struct file *filep)
 
 	init_waitqueue_head(&q->wait);
 	filep->private_data = q;
-	uacce->inode = inode;
 	q->state = UACCE_Q_INIT;
+	q->mapping = filep->f_mapping;
 	mutex_init(&q->mutex);
 	list_add(&q->list, &uacce->queues);
 	mutex_unlock(&uacce->mutex);
@@ -574,12 +574,6 @@ void uacce_remove(struct uacce_device *uacce)
 
 	if (!uacce)
 		return;
-	/*
-	 * unmap remaining mapping from user space, preventing user still
-	 * access the mmaped area while parent device is already removed
-	 */
-	if (uacce->inode)
-		unmap_mapping_range(uacce->inode->i_mapping, 0, 0, 1);
 
 	/*
 	 * uacce_fops_open() may be running concurrently, even after we remove
@@ -597,6 +591,12 @@ void uacce_remove(struct uacce_device *uacce)
 		uacce_put_queue(q);
 		mutex_unlock(&q->mutex);
 		uacce_unbind_queue(q);
+
+		/*
+		 * unmap remaining mapping from user space, preventing user still
+		 * access the mmaped area while parent device is already removed
+		 */
+		unmap_mapping_range(q->mapping, 0, 0, 1);
 	}
 
 	/* disable sva now since no opened queues */
diff --git a/include/linux/uacce.h b/include/linux/uacce.h
index 0a81c3dfd26c..e290c0269944 100644
--- a/include/linux/uacce.h
+++ b/include/linux/uacce.h
@@ -86,6 +86,7 @@ enum uacce_q_state {
  * @state: queue state machine
  * @pasid: pasid associated to the mm
  * @handle: iommu_sva handle returned by iommu_sva_bind_device()
+ * @mapping: user space mapping of the queue
  */
 struct uacce_queue {
 	struct uacce_device *uacce;
@@ -97,6 +98,7 @@ struct uacce_queue {
 	enum uacce_q_state state;
 	u32 pasid;
 	struct iommu_sva *handle;
+	struct address_space *mapping;
 };
 
 /**
@@ -114,7 +116,6 @@ struct uacce_queue {
  * @mutex: protects uacce operation
  * @priv: private pointer of the uacce
  * @queues: list of queues
- * @inode: core vfs
  */
 struct uacce_device {
 	const char *algs;
@@ -130,7 +131,6 @@ struct uacce_device {
 	struct mutex mutex;
 	void *priv;
 	struct list_head queues;
-	struct inode *inode;
 };
 
 #if IS_ENABLED(CONFIG_UACCE)
-- 
cgit v1.2.3


From ab7f17fb5b776933dd1a3431ae02bc2e394e2cba Mon Sep 17 00:00:00 2001
From: Prathu Baronia <prathubaronia2011@gmail.com>
Date: Thu, 18 May 2023 19:16:56 +0530
Subject: amba: move to_amba_device() to use container_of_const

to_amba_device() now properly keeps the const-ness of the dev pointer
passed into it, while as before it could be lost.

Signed-off-by: Prathu Baronia <prathubaronia2011@gmail.com>
Link: https://lore.kernel.org/r/20230518134656.9559-1-prathubaronia2011@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/amba/bus.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 5001e14c5c06..c60a6a14638c 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -107,7 +107,7 @@ enum amba_vendor {
 
 extern struct bus_type amba_bustype;
 
-#define to_amba_device(d)	container_of(d, struct amba_device, dev)
+#define to_amba_device(d)	container_of_const(d, struct amba_device, dev)
 
 #define amba_get_drvdata(d)	dev_get_drvdata(&d->dev)
 #define amba_set_drvdata(d,p)	dev_set_drvdata(&d->dev, p)
-- 
cgit v1.2.3


From ce3e8ec08dacba16292e1c6948f2983d1d3f1eea Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Mon, 22 May 2023 12:50:28 +0200
Subject: parport: PC style parport depends on HAS_IOPORT

In a future patch HAS_IOPORT=n will result in inb()/outb() and friends
not being declared. As PC style parport uses these functions we need to
handle this dependency.

Co-developed-by: Arnd Bergmann <arnd@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@kernel.org>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20230522105049.1467313-24-schnelle@linux.ibm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/parport/Kconfig | 3 ++-
 include/linux/parport.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig
index 5561362224e2..631c193fe42c 100644
--- a/drivers/parport/Kconfig
+++ b/drivers/parport/Kconfig
@@ -42,7 +42,8 @@ if PARPORT
 
 config PARPORT_PC
 	tristate "PC-style hardware"
-	depends on ARCH_MIGHT_HAVE_PC_PARPORT || (PCI && !S390)
+	depends on ARCH_MIGHT_HAVE_PC_PARPORT || PCI
+	depends on HAS_IOPORT
 	help
 	  You should say Y here if you have a PC-style parallel port. All
 	  IBM PC compatible computers and some Alphas have PC-style
diff --git a/include/linux/parport.h b/include/linux/parport.h
index a0bc9e0267b7..fff39bc30629 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -514,7 +514,7 @@ extern int parport_device_proc_register(struct pardevice *device);
 extern int parport_device_proc_unregister(struct pardevice *device);
 
 /* If PC hardware is the only type supported, we can optimise a bit.  */
-#if !defined(CONFIG_PARPORT_NOT_PC)
+#if !defined(CONFIG_PARPORT_NOT_PC) && defined(CONFIG_PARPORT_PC)
 
 #include <linux/parport_pc.h>
 #define parport_write_data(p,x)            parport_pc_write_data(p,x)
-- 
cgit v1.2.3


From cd00bc2ca42705bf141a2bf6fb5224c7ae628dbe Mon Sep 17 00:00:00 2001
From: James Seo <james@equiv.tech>
Date: Mon, 8 May 2023 19:47:05 -0700
Subject: driver core: device.h: add some missing kerneldocs

struct device_attribute, struct dev_ext_attribute, dev_name(), and the
DEVICE_ATTR() macros lack kerneldocs, preventing them from appearing in
the driver core documentation and from being cross-referenced elsewhere.

Add the missing kerneldocs (except for DEVICE_ATTR_IGNORE_LOCKDEP(),
which is only meaningful on debug builds with CONFIG_DEBUG_LOCK_ALLOC
defined, and is aliased to DEVICE_ATTR() otherwise).

Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: James Seo <james@equiv.tech>
Link: https://lore.kernel.org/r/20230509024702.1977991-1-james@equiv.tech
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 111 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 110 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 58f4f5948edb..66c13965153d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -96,7 +96,12 @@ struct device_type {
 	const struct dev_pm_ops *pm;
 };
 
-/* interface for exporting device attributes */
+/**
+ * struct device_attribute - Interface for exporting device attributes.
+ * @attr: sysfs attribute definition.
+ * @show: Show handler.
+ * @store: Store handler.
+ */
 struct device_attribute {
 	struct attribute	attr;
 	ssize_t (*show)(struct device *dev, struct device_attribute *attr,
@@ -105,6 +110,11 @@ struct device_attribute {
 			 const char *buf, size_t count);
 };
 
+/**
+ * struct dev_ext_attribute - Exported device attribute with extra context.
+ * @attr: Exported device attribute.
+ * @var: Pointer to context.
+ */
 struct dev_ext_attribute {
 	struct device_attribute attr;
 	void *var;
@@ -123,30 +133,124 @@ ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
 ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
 			 const char *buf, size_t count);
 
+/**
+ * DEVICE_ATTR - Define a device attribute.
+ * @_name: Attribute name.
+ * @_mode: File mode.
+ * @_show: Show handler. Optional, but mandatory if attribute is readable.
+ * @_store: Store handler. Optional, but mandatory if attribute is writable.
+ *
+ * Convenience macro for defining a struct device_attribute.
+ *
+ * For example, ``DEVICE_ATTR(foo, 0644, foo_show, foo_store);`` expands to:
+ *
+ * .. code-block:: c
+ *
+ *	struct device_attribute dev_attr_foo = {
+ *		.attr	= { .name = "foo", .mode = 0644 },
+ *		.show	= foo_show,
+ *		.store	= foo_store,
+ *	};
+ */
 #define DEVICE_ATTR(_name, _mode, _show, _store) \
 	struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
+
+/**
+ * DEVICE_ATTR_PREALLOC - Define a preallocated device attribute.
+ * @_name: Attribute name.
+ * @_mode: File mode.
+ * @_show: Show handler. Optional, but mandatory if attribute is readable.
+ * @_store: Store handler. Optional, but mandatory if attribute is writable.
+ *
+ * Like DEVICE_ATTR(), but ``SYSFS_PREALLOC`` is set on @_mode.
+ */
 #define DEVICE_ATTR_PREALLOC(_name, _mode, _show, _store) \
 	struct device_attribute dev_attr_##_name = \
 		__ATTR_PREALLOC(_name, _mode, _show, _store)
+
+/**
+ * DEVICE_ATTR_RW - Define a read-write device attribute.
+ * @_name: Attribute name.
+ *
+ * Like DEVICE_ATTR(), but @_mode is 0644, @_show is <_name>_show,
+ * and @_store is <_name>_store.
+ */
 #define DEVICE_ATTR_RW(_name) \
 	struct device_attribute dev_attr_##_name = __ATTR_RW(_name)
+
+/**
+ * DEVICE_ATTR_ADMIN_RW - Define an admin-only read-write device attribute.
+ * @_name: Attribute name.
+ *
+ * Like DEVICE_ATTR_RW(), but @_mode is 0600.
+ */
 #define DEVICE_ATTR_ADMIN_RW(_name) \
 	struct device_attribute dev_attr_##_name = __ATTR_RW_MODE(_name, 0600)
+
+/**
+ * DEVICE_ATTR_RO - Define a readable device attribute.
+ * @_name: Attribute name.
+ *
+ * Like DEVICE_ATTR(), but @_mode is 0444 and @_show is <_name>_show.
+ */
 #define DEVICE_ATTR_RO(_name) \
 	struct device_attribute dev_attr_##_name = __ATTR_RO(_name)
+
+/**
+ * DEVICE_ATTR_ADMIN_RO - Define an admin-only readable device attribute.
+ * @_name: Attribute name.
+ *
+ * Like DEVICE_ATTR_RO(), but @_mode is 0400.
+ */
 #define DEVICE_ATTR_ADMIN_RO(_name) \
 	struct device_attribute dev_attr_##_name = __ATTR_RO_MODE(_name, 0400)
+
+/**
+ * DEVICE_ATTR_WO - Define an admin-only writable device attribute.
+ * @_name: Attribute name.
+ *
+ * Like DEVICE_ATTR(), but @_mode is 0200 and @_store is <_name>_store.
+ */
 #define DEVICE_ATTR_WO(_name) \
 	struct device_attribute dev_attr_##_name = __ATTR_WO(_name)
+
+/**
+ * DEVICE_ULONG_ATTR - Define a device attribute backed by an unsigned long.
+ * @_name: Attribute name.
+ * @_mode: File mode.
+ * @_var: Identifier of unsigned long.
+ *
+ * Like DEVICE_ATTR(), but @_show and @_store are automatically provided
+ * such that reads and writes to the attribute from userspace affect @_var.
+ */
 #define DEVICE_ULONG_ATTR(_name, _mode, _var) \
 	struct dev_ext_attribute dev_attr_##_name = \
 		{ __ATTR(_name, _mode, device_show_ulong, device_store_ulong), &(_var) }
+
+/**
+ * DEVICE_INT_ATTR - Define a device attribute backed by an int.
+ * @_name: Attribute name.
+ * @_mode: File mode.
+ * @_var: Identifier of int.
+ *
+ * Like DEVICE_ULONG_ATTR(), but @_var is an int.
+ */
 #define DEVICE_INT_ATTR(_name, _mode, _var) \
 	struct dev_ext_attribute dev_attr_##_name = \
 		{ __ATTR(_name, _mode, device_show_int, device_store_int), &(_var) }
+
+/**
+ * DEVICE_BOOL_ATTR - Define a device attribute backed by a bool.
+ * @_name: Attribute name.
+ * @_mode: File mode.
+ * @_var: Identifier of bool.
+ *
+ * Like DEVICE_ULONG_ATTR(), but @_var is a bool.
+ */
 #define DEVICE_BOOL_ATTR(_name, _mode, _var) \
 	struct dev_ext_attribute dev_attr_##_name = \
 		{ __ATTR(_name, _mode, device_show_bool, device_store_bool), &(_var) }
+
 #define DEVICE_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \
 	struct device_attribute dev_attr_##_name =		\
 		__ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
@@ -711,6 +815,11 @@ static inline bool device_iommu_mapped(struct device *dev)
 /* Get the wakeup routines, which depend on struct device */
 #include <linux/pm_wakeup.h>
 
+/**
+ * dev_name - Return a device's name.
+ * @dev: Device with name to get.
+ * Return: The kobject name of the device, or its initial name if unavailable.
+ */
 static inline const char *dev_name(const struct device *dev)
 {
 	/* Use the init name until the kobject becomes available */
-- 
cgit v1.2.3


From a5cb804b68b4072e685bd76831ee647c489db4c8 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@amd.com>
Date: Tue, 16 May 2023 15:46:14 +0200
Subject: firmware: xilinx: Switch Michal Simek's email to new one

@xilinx.com is still working but better to switch to new amd.com after
AMD/Xilinx acquisition.

Signed-off-by: Michal Simek <michal.simek@amd.com>
Link: https://lore.kernel.org/r/36d119221aa12369c601cd37160306aeb84fc973.1684244767.git.michal.simek@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/xilinx/zynqmp-debug.c | 2 +-
 drivers/firmware/xilinx/zynqmp-debug.h | 2 +-
 drivers/firmware/xilinx/zynqmp.c       | 2 +-
 include/linux/firmware/xlnx-zynqmp.h   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/xilinx/zynqmp-debug.c b/drivers/firmware/xilinx/zynqmp-debug.c
index 99606b34975e..8528850af889 100644
--- a/drivers/firmware/xilinx/zynqmp-debug.c
+++ b/drivers/firmware/xilinx/zynqmp-debug.c
@@ -4,7 +4,7 @@
  *
  *  Copyright (C) 2014-2018 Xilinx, Inc.
  *
- *  Michal Simek <michal.simek@xilinx.com>
+ *  Michal Simek <michal.simek@amd.com>
  *  Davorin Mista <davorin.mista@aggios.com>
  *  Jolly Shah <jollys@xilinx.com>
  *  Rajan Vaja <rajanv@xilinx.com>
diff --git a/drivers/firmware/xilinx/zynqmp-debug.h b/drivers/firmware/xilinx/zynqmp-debug.h
index 9929f8b433f5..e1515a93e9e9 100644
--- a/drivers/firmware/xilinx/zynqmp-debug.h
+++ b/drivers/firmware/xilinx/zynqmp-debug.h
@@ -4,7 +4,7 @@
  *
  *  Copyright (C) 2014-2018 Xilinx
  *
- *  Michal Simek <michal.simek@xilinx.com>
+ *  Michal Simek <michal.simek@amd.com>
  *  Davorin Mista <davorin.mista@aggios.com>
  *  Jolly Shah <jollys@xilinx.com>
  *  Rajan Vaja <rajanv@xilinx.com>
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index a736db4a5825..60eb6bd8f957 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -4,7 +4,7 @@
  *
  *  Copyright (C) 2014-2022 Xilinx, Inc.
  *
- *  Michal Simek <michal.simek@xilinx.com>
+ *  Michal Simek <michal.simek@amd.com>
  *  Davorin Mista <davorin.mista@aggios.com>
  *  Jolly Shah <jollys@xilinx.com>
  *  Rajan Vaja <rajanv@xilinx.com>
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index f5da51677069..9dda7d9898ff 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -4,7 +4,7 @@
  *
  *  Copyright (C) 2014-2021 Xilinx
  *
- *  Michal Simek <michal.simek@xilinx.com>
+ *  Michal Simek <michal.simek@amd.com>
  *  Davorin Mista <davorin.mista@aggios.com>
  *  Jolly Shah <jollys@xilinx.com>
  *  Rajan Vaja <rajanv@xilinx.com>
-- 
cgit v1.2.3


From b7a7ce1bb77b19ff2859d365da96285340fbc145 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 15:11:02 +0200
Subject: vdso/timens: Always provide arch_get_vdso_data() prototype for vdso

The arch_get_vdso_data() function is defined separately on each architecture,
but only called when CONFIG_TIME_NS is set. If the definition is a global
function, this causes a W=1 warning without TIME_NS:

arch/x86/entry/vdso/vma.c:35:19: error: no previous prototype for 'arch_get_vdso_data' [-Werror=missing-prototypes]

Move the prototype out of the #ifdef block to reliably turn off that
warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230517131102.934196-15-arnd@kernel.org
---
 include/linux/time_namespace.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index bb9d3f5542f8..03d9c5ac01d1 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -44,7 +44,6 @@ struct time_namespace *copy_time_ns(unsigned long flags,
 				    struct time_namespace *old_ns);
 void free_time_ns(struct time_namespace *ns);
 void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
-struct vdso_data *arch_get_vdso_data(void *vvar_page);
 struct page *find_timens_vvar_page(struct vm_area_struct *vma);
 
 static inline void put_time_ns(struct time_namespace *ns)
@@ -163,4 +162,6 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
 }
 #endif
 
+struct vdso_data *arch_get_vdso_data(void *vvar_page);
+
 #endif /* _LINUX_TIMENS_H */
-- 
cgit v1.2.3


From 243ff7e6a03533fd5f34036b4a2c297d844ffdc0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 31 May 2023 22:03:25 -0700
Subject: usb: typec: mux: fix static inline syntax error

Fix build error when USB_SUPPORT is not set or TYPEC is not set
by dropping an extraneous semi-colon:

In file included from ../drivers/phy/qualcomm/phy-qcom-qmp-combo.c:23:
../include/linux/usb/typec_mux.h:77:1: error: expected identifier or '(' before '{' token
   77 | {
      | ^
../include/linux/usb/typec_mux.h:76:33: warning: 'fwnode_typec_mux_get' used but never defined
   76 | static inline struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode);
      |                                 ^~~~~~~~~~~~~~~~~~~~

Fixes: 3524fe31538c ("usb: typec: mux: Remove alt mode parameters from the API")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-usb@vger.kernel.org
Link: https://lore.kernel.org/r/20230601050325.26883-1-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/typec_mux.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h
index 11bfa314529f..2489a7857d8e 100644
--- a/include/linux/usb/typec_mux.h
+++ b/include/linux/usb/typec_mux.h
@@ -73,7 +73,7 @@ void *typec_mux_get_drvdata(struct typec_mux_dev *mux);
 
 #else
 
-static inline struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode);
+static inline struct typec_mux *fwnode_typec_mux_get(struct fwnode_handle *fwnode)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 2f804aca48322f02a8f44cca540663845ee80fb1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 30 May 2023 01:14:45 +0300
Subject: gpiolib: Kill unused GPIOF_EXPORT and Co

There is no use of the GPIOF_EXPORT in the kernel. Kill it for good.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 Documentation/driver-api/gpio/legacy.rst                    | 3 ---
 Documentation/translations/zh_CN/driver-api/gpio/legacy.rst | 3 ---
 Documentation/translations/zh_TW/gpio.txt                   | 3 ---
 drivers/gpio/gpiolib-legacy.c                               | 6 ------
 include/linux/gpio.h                                        | 5 -----
 5 files changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/gpio/legacy.rst b/Documentation/driver-api/gpio/legacy.rst
index 78372853c6d4..c5f98a78499f 100644
--- a/Documentation/driver-api/gpio/legacy.rst
+++ b/Documentation/driver-api/gpio/legacy.rst
@@ -322,9 +322,6 @@ where 'flags' is currently defined to specify the following properties:
 	* GPIOF_OPEN_DRAIN	- gpio pin is open drain type.
 	* GPIOF_OPEN_SOURCE	- gpio pin is open source type.
 
-	* GPIOF_EXPORT_DIR_FIXED	- export gpio to sysfs, keep direction
-	* GPIOF_EXPORT_DIR_CHANGEABLE	- also export, allow changing direction
-
 since GPIOF_INIT_* are only valid when configured as output, so group valid
 combinations as:
 
diff --git a/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst b/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst
index 84ce2322fdba..8720970393fb 100644
--- a/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst
+++ b/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst
@@ -297,9 +297,6 @@ gpio_request()前将这类细节配置好，例如使用引脚控制子系统的
 	* GPIOF_OPEN_DRAIN	- gpio引脚为开漏信号
 	* GPIOF_OPEN_SOURCE	- gpio引脚为源极开路信号
 
-	* GPIOF_EXPORT_DIR_FIXED	- 将 gpio 导出到 sysfs，并保持方向
-	* GPIOF_EXPORT_DIR_CHANGEABLE	- 同样是导出, 但允许改变方向
-
 因为 GPIOF_INIT_* 仅有在配置为输出的时候才存在,所以有效的组合为:
 
 	* GPIOF_IN		- 配置为输入
diff --git a/Documentation/translations/zh_TW/gpio.txt b/Documentation/translations/zh_TW/gpio.txt
index 62e560ffe628..e0b96d897fa7 100644
--- a/Documentation/translations/zh_TW/gpio.txt
+++ b/Documentation/translations/zh_TW/gpio.txt
@@ -303,9 +303,6 @@ gpio_request()前將這類細節配置好，例如使用 pinctrl 子系統的映
 	* GPIOF_OPEN_DRAIN	- gpio引腳爲開漏信號
 	* GPIOF_OPEN_SOURCE	- gpio引腳爲源極開路信號
 
-	* GPIOF_EXPORT_DIR_FIXED	- 將 gpio 導出到 sysfs，並保持方向
-	* GPIOF_EXPORT_DIR_CHANGEABLE	- 同樣是導出, 但允許改變方向
-
 因爲 GPIOF_INIT_* 僅有在配置爲輸出的時候才存在,所以有效的組合爲:
 
 	* GPIOF_IN		- 配置爲輸入
diff --git a/drivers/gpio/gpiolib-legacy.c b/drivers/gpio/gpiolib-legacy.c
index 30e2476a6dc4..c5cc828750b7 100644
--- a/drivers/gpio/gpiolib-legacy.c
+++ b/drivers/gpio/gpiolib-legacy.c
@@ -51,12 +51,6 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
 	if (err)
 		goto free_gpio;
 
-	if (flags & GPIOF_EXPORT) {
-		err = gpiod_export(desc, flags & GPIOF_EXPORT_CHANGEABLE);
-		if (err)
-			goto free_gpio;
-	}
-
 	return 0;
 
  free_gpio:
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 8528353e073b..86963a00b018 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -38,11 +38,6 @@ struct device;
 /* Gpio pin is open source */
 #define GPIOF_OPEN_SOURCE	(1 << 4)
 
-#define GPIOF_EXPORT		(1 << 5)
-#define GPIOF_EXPORT_CHANGEABLE	(1 << 6)
-#define GPIOF_EXPORT_DIR_FIXED	(GPIOF_EXPORT)
-#define GPIOF_EXPORT_DIR_CHANGEABLE (GPIOF_EXPORT | GPIOF_EXPORT_CHANGEABLE)
-
 /**
  * struct gpio - a structure describing a GPIO with configuration
  * @gpio:	the GPIO number
-- 
cgit v1.2.3


From 9df8c63c2b814be8c40d44d21dabaf074058c98b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 30 May 2023 01:14:46 +0300
Subject: gpiolib: Kill unused GPIOF_OPEN_*

There is no use of the GPIOF_OPEN_* in the kernel. Kill it for good.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 Documentation/driver-api/gpio/legacy.rst                 | 16 ----------------
 .../translations/zh_CN/driver-api/gpio/legacy.rst        | 14 --------------
 Documentation/translations/zh_TW/gpio.txt                | 14 --------------
 drivers/gpio/gpiolib-legacy.c                            |  6 ------
 include/linux/gpio.h                                     |  6 ------
 5 files changed, 56 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/gpio/legacy.rst b/Documentation/driver-api/gpio/legacy.rst
index c5f98a78499f..9eda54811faa 100644
--- a/Documentation/driver-api/gpio/legacy.rst
+++ b/Documentation/driver-api/gpio/legacy.rst
@@ -319,8 +319,6 @@ where 'flags' is currently defined to specify the following properties:
 
 	* GPIOF_INIT_LOW	- as output, set initial level to LOW
 	* GPIOF_INIT_HIGH	- as output, set initial level to HIGH
-	* GPIOF_OPEN_DRAIN	- gpio pin is open drain type.
-	* GPIOF_OPEN_SOURCE	- gpio pin is open source type.
 
 since GPIOF_INIT_* are only valid when configured as output, so group valid
 combinations as:
@@ -329,20 +327,6 @@ combinations as:
 	* GPIOF_OUT_INIT_LOW	- configured as output, initial level LOW
 	* GPIOF_OUT_INIT_HIGH	- configured as output, initial level HIGH
 
-When setting the flag as GPIOF_OPEN_DRAIN then it will assume that pins is
-open drain type. Such pins will not be driven to 1 in output mode. It is
-require to connect pull-up on such pins. By enabling this flag, gpio lib will
-make the direction to input when it is asked to set value of 1 in output mode
-to make the pin HIGH. The pin is make to LOW by driving value 0 in output mode.
-
-When setting the flag as GPIOF_OPEN_SOURCE then it will assume that pins is
-open source type. Such pins will not be driven to 0 in output mode. It is
-require to connect pull-down on such pin. By enabling this flag, gpio lib will
-make the direction to input when it is asked to set value of 0 in output mode
-to make the pin LOW. The pin is make to HIGH by driving value 1 in output mode.
-
-In the future, these flags can be extended to support more properties.
-
 Further more, to ease the claim/release of multiple GPIOs, 'struct gpio' is
 introduced to encapsulate all three fields as::
 
diff --git a/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst b/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst
index 8720970393fb..1bddecf73670 100644
--- a/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst
+++ b/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst
@@ -294,8 +294,6 @@ gpio_request()前将这类细节配置好，例如使用引脚控制子系统的
 
 	* GPIOF_INIT_LOW	- 在作为输出时,初始值为低电平
 	* GPIOF_INIT_HIGH	- 在作为输出时,初始值为高电平
-	* GPIOF_OPEN_DRAIN	- gpio引脚为开漏信号
-	* GPIOF_OPEN_SOURCE	- gpio引脚为源极开路信号
 
 因为 GPIOF_INIT_* 仅有在配置为输出的时候才存在,所以有效的组合为:
 
@@ -303,18 +301,6 @@ gpio_request()前将这类细节配置好，例如使用引脚控制子系统的
 	* GPIOF_OUT_INIT_LOW	- 配置为输出,并初始化为低电平
 	* GPIOF_OUT_INIT_HIGH	- 配置为输出,并初始化为高电平
 
-当设置 flag 为 GPIOF_OPEN_DRAIN 时，则假设引脚是开漏信号。这样的引脚
-将不会在输出模式下置1。这样的引脚需要连接上拉电阻。通过使能这个标志，gpio库
-将会在被要求输出模式下置1时将引脚变为输入状态来使引脚置高。引脚在输出模式下
-通过置0使其输出低电平。
-
-当设置 flag 为 GPIOF_OPEN_SOURCE 时，则假设引脚为源极开路信号。这样的引脚
-将不会在输出模式下置0。这样的引脚需要连接下拉电阻。通过使能这个标志，gpio库
-将会在被要求输出模式下置0时将引脚变为输入状态来使引脚置低。引脚在输出模式下
-通过置1使其输出高电平。
-
-将来这些标志可能扩展到支持更多的属性。
-
 更进一步,为了更简单地声明/释放多个 GPIO,'struct gpio'被引进来封装所有
 这三个领域::
 
diff --git a/Documentation/translations/zh_TW/gpio.txt b/Documentation/translations/zh_TW/gpio.txt
index e0b96d897fa7..66bc7f2bbe53 100644
--- a/Documentation/translations/zh_TW/gpio.txt
+++ b/Documentation/translations/zh_TW/gpio.txt
@@ -300,8 +300,6 @@ gpio_request()前將這類細節配置好，例如使用 pinctrl 子系統的映
 
 	* GPIOF_INIT_LOW	- 在作爲輸出時,初始值爲低電平
 	* GPIOF_INIT_HIGH	- 在作爲輸出時,初始值爲高電平
-	* GPIOF_OPEN_DRAIN	- gpio引腳爲開漏信號
-	* GPIOF_OPEN_SOURCE	- gpio引腳爲源極開路信號
 
 因爲 GPIOF_INIT_* 僅有在配置爲輸出的時候才存在,所以有效的組合爲:
 
@@ -309,18 +307,6 @@ gpio_request()前將這類細節配置好，例如使用 pinctrl 子系統的映
 	* GPIOF_OUT_INIT_LOW	- 配置爲輸出,並初始化爲低電平
 	* GPIOF_OUT_INIT_HIGH	- 配置爲輸出,並初始化爲高電平
 
-當設置 flag 爲 GPIOF_OPEN_DRAIN 時，則假設引腳是開漏信號。這樣的引腳
-將不會在輸出模式下置1。這樣的引腳需要連接上拉電阻。通過使能這個標誌，gpio庫
-將會在被要求輸出模式下置1時將引腳變爲輸入狀態來使引腳置高。引腳在輸出模式下
-通過置0使其輸出低電平。
-
-當設置 flag 爲 GPIOF_OPEN_SOURCE 時，則假設引腳爲源極開路信號。這樣的引腳
-將不會在輸出模式下置0。這樣的引腳需要連接下拉電阻。通過使能這個標誌，gpio庫
-將會在被要求輸出模式下置0時將引腳變爲輸入狀態來使引腳置低。引腳在輸出模式下
-通過置1使其輸出高電平。
-
-將來這些標誌可能擴展到支持更多的屬性。
-
 更進一步,爲了更簡單地聲明/釋放多個 GPIO,'struct gpio'被引進來封裝所有
 這三個領域:
 
diff --git a/drivers/gpio/gpiolib-legacy.c b/drivers/gpio/gpiolib-legacy.c
index c5cc828750b7..97f4b498e343 100644
--- a/drivers/gpio/gpiolib-legacy.c
+++ b/drivers/gpio/gpiolib-legacy.c
@@ -33,12 +33,6 @@ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label)
 	if (err)
 		return err;
 
-	if (flags & GPIOF_OPEN_DRAIN)
-		set_bit(FLAG_OPEN_DRAIN, &desc->flags);
-
-	if (flags & GPIOF_OPEN_SOURCE)
-		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
-
 	if (flags & GPIOF_ACTIVE_LOW)
 		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
 
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 86963a00b018..88efac969754 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -32,12 +32,6 @@ struct device;
 /* Gpio pin is active-low */
 #define GPIOF_ACTIVE_LOW        (1 << 2)
 
-/* Gpio pin is open drain */
-#define GPIOF_OPEN_DRAIN	(1 << 3)
-
-/* Gpio pin is open source */
-#define GPIOF_OPEN_SOURCE	(1 << 4)
-
 /**
  * struct gpio - a structure describing a GPIO with configuration
  * @gpio:	the GPIO number
-- 
cgit v1.2.3


From f1061fa641b8b15c7815d58c20a6c29f2f1f5337 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Tue, 30 May 2023 17:12:17 +0200
Subject: fbdev: Add initializer macros for struct fb_ops

For framebuffers in I/O and system memory, add macros that set
struct fb_ops to the respective callback functions.

For deferred I/O, add macros that generate callback functions with
damage handling. Add initializer macros that set struct fb_ops to
the generated callbacks.

These macros can remove a lot boilerplate code from fbdev drivers.
The drivers are supposed to use the macro that is required for its
framebuffer. Each macro is split into smaller helpers, so that
drivers with non-standard callbacks can pick and customize callbacks
as needed. There are individual helper macros for read/write, mmap
and drawing.

v5:
	* fix whitespace errors (Jingfeng)

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230530151228.22979-3-tzimmermann@suse.de
---
 include/linux/fb.h | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 2cf8efcb9e32..ce6823e157e6 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -538,9 +538,31 @@ extern ssize_t fb_io_read(struct fb_info *info, char __user *buf,
 extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf,
 			   size_t count, loff_t *ppos);
 
+/*
+ * Initializes struct fb_ops for framebuffers in I/O memory.
+ */
+
+#define __FB_DEFAULT_IO_OPS_RDWR \
+	.fb_read	= fb_io_read, \
+	.fb_write	= fb_io_write
+
+#define __FB_DEFAULT_IO_OPS_DRAW \
+	.fb_fillrect	= cfb_fillrect, \
+	.fb_copyarea	= cfb_copyarea, \
+	.fb_imageblit	= cfb_imageblit
+
+#define __FB_DEFAULT_IO_OPS_MMAP \
+	.fb_mmap	= NULL // default implementation
+
+#define FB_DEFAULT_IO_OPS \
+	__FB_DEFAULT_IO_OPS_RDWR, \
+	__FB_DEFAULT_IO_OPS_DRAW, \
+	__FB_DEFAULT_IO_OPS_MMAP
+
 /*
  * Drawing operations where framebuffer is in system RAM
  */
+
 extern void sys_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
 extern void sys_copyarea(struct fb_info *info, const struct fb_copyarea *area);
 extern void sys_imageblit(struct fb_info *info, const struct fb_image *image);
@@ -549,6 +571,27 @@ extern ssize_t fb_sys_read(struct fb_info *info, char __user *buf,
 extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
 			    size_t count, loff_t *ppos);
 
+/*
+ * Initializes struct fb_ops for framebuffers in system memory.
+ */
+
+#define __FB_DEFAULT_SYS_OPS_RDWR \
+	.fb_read	= fb_sys_read, \
+	.fb_write	= fb_sys_write
+
+#define __FB_DEFAULT_SYS_OPS_DRAW \
+	.fb_fillrect	= sys_fillrect, \
+	.fb_copyarea	= sys_copyarea, \
+	.fb_imageblit	= sys_imageblit
+
+#define __FB_DEFAULT_SYS_OPS_MMAP \
+	.fb_mmap	= NULL // default implementation
+
+#define FB_DEFAULT_SYS_OPS \
+	__FB_DEFAULT_SYS_OPS_RDWR, \
+	__FB_DEFAULT_SYS_OPS_DRAW, \
+	__FB_DEFAULT_SYS_OPS_MMAP
+
 /* drivers/video/fbmem.c */
 extern int register_framebuffer(struct fb_info *fb_info);
 extern void unregister_framebuffer(struct fb_info *fb_info);
@@ -604,6 +647,75 @@ extern void fb_deferred_io_cleanup(struct fb_info *info);
 extern int fb_deferred_io_fsync(struct file *file, loff_t start,
 				loff_t end, int datasync);
 
+/*
+ * Generate callbacks for deferred I/O
+ */
+
+#define __FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, __mode) \
+	static ssize_t __prefix ## _defio_read(struct fb_info *info, char __user *buf, \
+					       size_t count, loff_t *ppos) \
+	{ \
+		return fb_ ## __mode ## _read(info, buf, count, ppos); \
+	} \
+	static ssize_t __prefix ## _defio_write(struct fb_info *info, const char __user *buf, \
+						size_t count, loff_t *ppos) \
+	{ \
+		unsigned long offset = *ppos; \
+		ssize_t ret = fb_ ## __mode ## _write(info, buf, count, ppos); \
+		if (ret > 0) \
+			__damage_range(info, offset, ret); \
+		return ret; \
+	}
+
+#define __FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, __mode) \
+	static void __prefix ## _defio_fillrect(struct fb_info *info, \
+						const struct fb_fillrect *rect) \
+	{ \
+		__mode ## _fillrect(info, rect); \
+		__damage_area(info, rect->dx, rect->dy, rect->width, rect->height); \
+	} \
+	static void __prefix ## _defio_copyarea(struct fb_info *info, \
+						const struct fb_copyarea *area) \
+	{ \
+		__mode ## _copyarea(info, area); \
+		__damage_area(info, area->dx, area->dy, area->width, area->height); \
+	} \
+	static void __prefix ## _defio_imageblit(struct fb_info *info, \
+						 const struct fb_image *image) \
+	{ \
+		__mode ## _imageblit(info, image); \
+		__damage_area(info, image->dx, image->dy, image->width, image->height); \
+	}
+
+#define FB_GEN_DEFAULT_DEFERRED_IO_OPS(__prefix, __damage_range, __damage_area) \
+	__FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, io) \
+	__FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, cfb)
+
+#define FB_GEN_DEFAULT_DEFERRED_SYS_OPS(__prefix, __damage_range, __damage_area) \
+	__FB_GEN_DEFAULT_DEFERRED_OPS_RDWR(__prefix, __damage_range, sys) \
+	__FB_GEN_DEFAULT_DEFERRED_OPS_DRAW(__prefix, __damage_area, sys)
+
+/*
+ * Initializes struct fb_ops for deferred I/O.
+ */
+
+#define __FB_DEFAULT_DEFERRED_OPS_RDWR(__prefix) \
+	.fb_read	= __prefix ## _defio_read, \
+	.fb_write	= __prefix ## _defio_write
+
+#define __FB_DEFAULT_DEFERRED_OPS_DRAW(__prefix) \
+	.fb_fillrect	= __prefix ## _defio_fillrect, \
+	.fb_copyarea	= __prefix ## _defio_copyarea, \
+	.fb_imageblit	= __prefix ## _defio_imageblit
+
+#define __FB_DEFAULT_DEFERRED_OPS_MMAP(__prefix) \
+	.fb_mmap	= fb_deferred_io_mmap
+
+#define FB_DEFAULT_DEFERRED_OPS(__prefix) \
+	__FB_DEFAULT_DEFERRED_OPS_RDWR(__prefix), \
+	__FB_DEFAULT_DEFERRED_OPS_DRAW(__prefix), \
+	__FB_DEFAULT_DEFERRED_OPS_MMAP(__prefix)
+
 static inline bool fb_be_math(struct fb_info *info)
 {
 #ifdef CONFIG_FB_FOREIGN_ENDIAN
-- 
cgit v1.2.3


From 4420528254153189c70b6267593e445dc8654e37 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 29 May 2023 12:52:07 -0600
Subject: firewire: Replace zero-length array with flexible-array member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Zero-length and one-element arrays are deprecated, and we are moving
towards adopting C99 flexible-array members, instead.

Address the following warnings found with GCC-13 and
-fstrict-flex-arrays=3 enabled:
sound/firewire/amdtp-stream.c: In function ‘build_it_pkt_header’:
sound/firewire/amdtp-stream.c:694:17: warning: ‘generate_cip_header’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=]
  694 |                 generate_cip_header(s, cip_header, data_block_counter, syt);
      |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
sound/firewire/amdtp-stream.c:694:17: note: referencing argument 2 of type ‘__be32[2]’ {aka ‘unsigned int[2]’}
sound/firewire/amdtp-stream.c:667:13: note: in a call to function ‘generate_cip_header’
  667 | static void generate_cip_header(struct amdtp_stream *s, __be32 cip_header[2],
      |             ^~~~~~~~~~~~~~~~~~~

This helps with the ongoing efforts to tighten the FORTIFY_SOURCE
routines on memcpy() and help us make progress towards globally
enabling -fstrict-flex-arrays=3 [1].

Link: https://github.com/KSPP/linux/issues/21
Link: https://github.com/KSPP/linux/issues/303
Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1]
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/ZHT0V3SpvHyxCv5W@work
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 include/linux/firewire.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 1716c01c4e54..efb6e2cf2034 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -391,7 +391,7 @@ struct fw_iso_packet {
 	u32 tag:2;		/* tx: Tag in packet header		*/
 	u32 sy:4;		/* tx: Sy in packet header		*/
 	u32 header_length:8;	/* Length of immediate header		*/
-	u32 header[0];		/* tx: Top of 1394 isoch. data_block	*/
+	u32 header[];		/* tx: Top of 1394 isoch. data_block	*/
 };
 
 #define FW_ISO_CONTEXT_TRANSMIT			0
-- 
cgit v1.2.3


From 83f2caaaf9cb25fe74775a59bf2662f184bfaa08 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 31 May 2023 04:50:40 -0700
Subject: block: mark bio_add_page as __must_check

Now that all users of bio_add_page check for the return value, mark
bio_add_page as __must_check.

Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/7ae4a902e08fe2e90c012ee07aeb35d4aae28373.1685532726.git.johannes.thumshirn@wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 8588bcfbc6ef..d63f0bb47c65 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -465,7 +465,8 @@ extern void bio_uninit(struct bio *);
 void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf);
 void bio_chain(struct bio *, struct bio *);
 
-int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off);
+int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len,
+			      unsigned off);
 bool bio_add_folio(struct bio *, struct folio *, size_t len, size_t off);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
-- 
cgit v1.2.3


From 7a150f1ed19b709837e98571f49ab1ff2625ca89 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 31 May 2023 04:50:41 -0700
Subject: block: add bio_add_folio_nofail

Just like for bio_add_pages() add a no-fail variant for bio_add_folio().

Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/924dff4077812804398ef84128fb920507fa4be1.1685532726.git.johannes.thumshirn@wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 8 ++++++++
 include/linux/bio.h | 2 ++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 798cc4cf3bd2..8672179213b9 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1138,6 +1138,14 @@ int bio_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_add_page);
 
+void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
+			  size_t off)
+{
+	WARN_ON_ONCE(len > UINT_MAX);
+	WARN_ON_ONCE(off > UINT_MAX);
+	__bio_add_page(bio, &folio->page, len, off);
+}
+
 /**
  * bio_add_folio - Attempt to add part of a folio to a bio.
  * @bio: BIO to add to.
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d63f0bb47c65..0002bd78e02d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -474,6 +474,8 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page,
 			     unsigned int len, unsigned int offset);
 void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off);
+void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
+			  size_t off);
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter);
 void __bio_release_pages(struct bio *bio, bool mark_dirty);
-- 
cgit v1.2.3


From 6c500000af037f74b66dd01b565c8ee1b501cc1b Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 31 May 2023 04:50:43 -0700
Subject: block: mark bio_add_folio as __must_check

Now that all callers of bio_add_folio() check the return value, mark it as
__must_check.

Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/381360a45ac3684120cfbe1e07685e9c36256e47.1685532726.git.johannes.thumshirn@wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 0002bd78e02d..617522928964 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -467,7 +467,8 @@ void bio_chain(struct bio *, struct bio *);
 
 int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len,
 			      unsigned off);
-bool bio_add_folio(struct bio *, struct folio *, size_t len, size_t off);
+bool __must_check bio_add_folio(struct bio *bio, struct folio *folio,
+				size_t len, size_t off);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
 int bio_add_zone_append_page(struct bio *bio, struct page *page,
-- 
cgit v1.2.3


From c9d601548603c54919a3b1333c5b972252b4031d Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Tue, 7 Mar 2023 21:52:33 +0800
Subject: KVM: allow KVM_BUG/KVM_BUG_ON to handle 64-bit cond

Current KVM_BUG and KVM_BUG_ON assume that 'cond' passed from callers is
32-bit as it casts 'cond' to the type of int. This will be wrong if 'cond'
provided by a caller is 64-bit, e.g. an error code of 0xc0000d0300000000
will be converted to 0, which is not expected.

Improves the implementation by using bool in KVM_BUG and KVM_BUG_ON.
'bool' is preferred to 'int' as __ret is essentially used as a boolean
and coding-stytle.rst documents that use of bool is encouraged to improve
readability and is often a better option than 'int' for storing boolean
values.

Fixes: 0b8f11737cff ("KVM: Add infrastructure and macro to mark VM as bugged")
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Reviewed-by: Mingwei Zhang <mizhang@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20230307135233.54684-1-wei.w.wang@intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 include/linux/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0e571e973bc2..2dc604af1ac1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -849,7 +849,7 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
 
 #define KVM_BUG(cond, kvm, fmt...)				\
 ({								\
-	int __ret = (cond);					\
+	bool __ret = !!(cond);					\
 								\
 	if (WARN_ONCE(__ret && !(kvm)->vm_bugged, fmt))		\
 		kvm_vm_bugged(kvm);				\
@@ -858,7 +858,7 @@ static inline void kvm_vm_bugged(struct kvm *kvm)
 
 #define KVM_BUG_ON(cond, kvm)					\
 ({								\
-	int __ret = (cond);					\
+	bool __ret = !!(cond);					\
 								\
 	if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged))		\
 		kvm_vm_bugged(kvm);				\
-- 
cgit v1.2.3


From f9010dbdce911ee1f1af1398a24b1f9f992e0080 Mon Sep 17 00:00:00 2001
From: Mike Christie <michael.christie@oracle.com>
Date: Thu, 1 Jun 2023 13:32:32 -0500
Subject: fork, vhost: Use CLONE_THREAD to fix freezer/ps regression

When switching from kthreads to vhost_tasks two bugs were added:
1. The vhost worker tasks's now show up as processes so scripts doing
ps or ps a would not incorrectly detect the vhost task as another
process.  2. kthreads disabled freeze by setting PF_NOFREEZE, but
vhost tasks's didn't disable or add support for them.

To fix both bugs, this switches the vhost task to be thread in the
process that does the VHOST_SET_OWNER ioctl, and has vhost_worker call
get_signal to support SIGKILL/SIGSTOP and freeze signals. Note that
SIGKILL/STOP support is required because CLONE_THREAD requires
CLONE_SIGHAND which requires those 2 signals to be supported.

This is a modified version of the patch written by Mike Christie
<michael.christie@oracle.com> which was a modified version of patch
originally written by Linus.

Much of what depended upon PF_IO_WORKER now depends on PF_USER_WORKER.
Including ignoring signals, setting up the register state, and having
get_signal return instead of calling do_group_exit.

Tidied up the vhost_task abstraction so that the definition of
vhost_task only needs to be visible inside of vhost_task.c.  Making
it easier to review the code and tell what needs to be done where.
As part of this the main loop has been moved from vhost_worker into
vhost_task_fn.  vhost_worker now returns true if work was done.

The main loop has been updated to call get_signal which handles
SIGSTOP, freezing, and collects the message that tells the thread to
exit as part of process exit.  This collection clears
__fatal_signal_pending.  This collection is not guaranteed to
clear signal_pending() so clear that explicitly so the schedule()
sleeps.

For now the vhost thread continues to exist and run work until the
last file descriptor is closed and the release function is called as
part of freeing struct file.  To avoid hangs in the coredump
rendezvous and when killing threads in a multi-threaded exec.  The
coredump code and de_thread have been modified to ignore vhost threads.

Remvoing the special case for exec appears to require teaching
vhost_dev_flush how to directly complete transactions in case
the vhost thread is no longer running.

Removing the special case for coredump rendezvous requires either the
above fix needed for exec or moving the coredump rendezvous into
get_signal.

Fixes: 6e890c5d5021 ("vhost: use vhost_tasks for worker threads")
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Co-developed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Mike Christie <michael.christie@oracle.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/fpu/sched.h |  2 +-
 arch/x86/kernel/fpu/context.h    |  2 +-
 arch/x86/kernel/fpu/core.c       |  2 +-
 drivers/vhost/vhost.c            | 22 +++-------
 fs/coredump.c                    |  4 +-
 include/linux/sched/task.h       |  1 -
 include/linux/sched/vhost_task.h | 15 ++-----
 kernel/exit.c                    |  5 ++-
 kernel/fork.c                    | 13 +++---
 kernel/signal.c                  |  8 ++--
 kernel/vhost_task.c              | 92 ++++++++++++++++++++++++++--------------
 11 files changed, 89 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index c2d6cd78ed0c..78fcde7b1f07 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -39,7 +39,7 @@ extern void fpu_flush_thread(void);
 static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
 	if (cpu_feature_enabled(X86_FEATURE_FPU) &&
-	    !(current->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+	    !(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
 		save_fpregs_to_fpstate(old_fpu);
 		/*
 		 * The save operation preserved register state, so the
diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h
index 9fcfa5c4dad7..af5cbdd9bd29 100644
--- a/arch/x86/kernel/fpu/context.h
+++ b/arch/x86/kernel/fpu/context.h
@@ -57,7 +57,7 @@ static inline void fpregs_restore_userregs(void)
 	struct fpu *fpu = &current->thread.fpu;
 	int cpu = smp_processor_id();
 
-	if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_IO_WORKER)))
+	if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER)))
 		return;
 
 	if (!fpregs_state_valid(fpu, cpu)) {
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index caf33486dc5e..1015af1ae562 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -426,7 +426,7 @@ void kernel_fpu_begin_mask(unsigned int kfpu_mask)
 
 	this_cpu_write(in_kernel_fpu, true);
 
-	if (!(current->flags & (PF_KTHREAD | PF_IO_WORKER)) &&
+	if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) &&
 	    !test_thread_flag(TIF_NEED_FPU_LOAD)) {
 		set_thread_flag(TIF_NEED_FPU_LOAD);
 		save_fpregs_to_fpstate(&current->thread.fpu);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index a92af08e7864..074273020849 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -256,7 +256,7 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
 		 * test_and_set_bit() implies a memory barrier.
 		 */
 		llist_add(&work->node, &dev->worker->work_list);
-		wake_up_process(dev->worker->vtsk->task);
+		vhost_task_wake(dev->worker->vtsk);
 	}
 }
 EXPORT_SYMBOL_GPL(vhost_work_queue);
@@ -333,31 +333,19 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	__vhost_vq_meta_reset(vq);
 }
 
-static int vhost_worker(void *data)
+static bool vhost_worker(void *data)
 {
 	struct vhost_worker *worker = data;
 	struct vhost_work *work, *work_next;
 	struct llist_node *node;
 
-	for (;;) {
-		/* mb paired w/ kthread_stop */
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (vhost_task_should_stop(worker->vtsk)) {
-			__set_current_state(TASK_RUNNING);
-			break;
-		}
-
-		node = llist_del_all(&worker->work_list);
-		if (!node)
-			schedule();
-
+	node = llist_del_all(&worker->work_list);
+	if (node) {
 		node = llist_reverse_order(node);
 		/* make sure flag is seen after deletion */
 		smp_wmb();
 		llist_for_each_entry_safe(work, work_next, node, node) {
 			clear_bit(VHOST_WORK_QUEUED, &work->flags);
-			__set_current_state(TASK_RUNNING);
 			kcov_remote_start_common(worker->kcov_handle);
 			work->fn(work);
 			kcov_remote_stop();
@@ -365,7 +353,7 @@ static int vhost_worker(void *data)
 		}
 	}
 
-	return 0;
+	return !!node;
 }
 
 static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
diff --git a/fs/coredump.c b/fs/coredump.c
index ece7badf701b..88740c51b942 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -371,7 +371,9 @@ static int zap_process(struct task_struct *start, int exit_code)
 		if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
-			nr++;
+			/* The vhost_worker does not particpate in coredumps */
+			if ((t->flags & (PF_USER_WORKER | PF_IO_WORKER)) != PF_USER_WORKER)
+				nr++;
 		}
 	}
 
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 537cbf9a2ade..e0f5ac90a228 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -29,7 +29,6 @@ struct kernel_clone_args {
 	u32 io_thread:1;
 	u32 user_worker:1;
 	u32 no_files:1;
-	u32 ignore_signals:1;
 	unsigned long stack;
 	unsigned long stack_size;
 	unsigned long tls;
diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h
index 6123c10b99cf..837a23624a66 100644
--- a/include/linux/sched/vhost_task.h
+++ b/include/linux/sched/vhost_task.h
@@ -2,22 +2,13 @@
 #ifndef _LINUX_VHOST_TASK_H
 #define _LINUX_VHOST_TASK_H
 
-#include <linux/completion.h>
 
-struct task_struct;
+struct vhost_task;
 
-struct vhost_task {
-	int (*fn)(void *data);
-	void *data;
-	struct completion exited;
-	unsigned long flags;
-	struct task_struct *task;
-};
-
-struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg,
+struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg,
 				     const char *name);
 void vhost_task_start(struct vhost_task *vtsk);
 void vhost_task_stop(struct vhost_task *vtsk);
-bool vhost_task_should_stop(struct vhost_task *vtsk);
+void vhost_task_wake(struct vhost_task *vtsk);
 
 #endif
diff --git a/kernel/exit.c b/kernel/exit.c
index 34b90e2e7cf7..edb50b4c9972 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -411,7 +411,10 @@ static void coredump_task_exit(struct task_struct *tsk)
 	tsk->flags |= PF_POSTCOREDUMP;
 	core_state = tsk->signal->core_state;
 	spin_unlock_irq(&tsk->sighand->siglock);
-	if (core_state) {
+
+	/* The vhost_worker does not particpate in coredumps */
+	if (core_state &&
+	    ((tsk->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)) {
 		struct core_thread self;
 
 		self.task = current;
diff --git a/kernel/fork.c b/kernel/fork.c
index ed4e01daccaa..81cba91f30bb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2336,16 +2336,16 @@ __latent_entropy struct task_struct *copy_process(
 	p->flags &= ~PF_KTHREAD;
 	if (args->kthread)
 		p->flags |= PF_KTHREAD;
-	if (args->user_worker)
-		p->flags |= PF_USER_WORKER;
-	if (args->io_thread) {
+	if (args->user_worker) {
 		/*
-		 * Mark us an IO worker, and block any signal that isn't
+		 * Mark us a user worker, and block any signal that isn't
 		 * fatal or STOP
 		 */
-		p->flags |= PF_IO_WORKER;
+		p->flags |= PF_USER_WORKER;
 		siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
 	}
+	if (args->io_thread)
+		p->flags |= PF_IO_WORKER;
 
 	if (args->name)
 		strscpy_pad(p->comm, args->name, sizeof(p->comm));
@@ -2517,9 +2517,6 @@ __latent_entropy struct task_struct *copy_process(
 	if (retval)
 		goto bad_fork_cleanup_io;
 
-	if (args->ignore_signals)
-		ignore_signals(p);
-
 	stackleak_task_init(p);
 
 	if (pid != &init_struct_pid) {
diff --git a/kernel/signal.c b/kernel/signal.c
index 8f6330f0e9ca..2547fa73bde5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1368,7 +1368,9 @@ int zap_other_threads(struct task_struct *p)
 
 	while_each_thread(p, t) {
 		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
-		count++;
+		/* Don't require de_thread to wait for the vhost_worker */
+		if ((t->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)
+			count++;
 
 		/* Don't bother with already dead threads */
 		if (t->exit_state)
@@ -2861,11 +2863,11 @@ relock:
 		}
 
 		/*
-		 * PF_IO_WORKER threads will catch and exit on fatal signals
+		 * PF_USER_WORKER threads will catch and exit on fatal signals
 		 * themselves. They have cleanup that must be performed, so
 		 * we cannot call do_exit() on their behalf.
 		 */
-		if (current->flags & PF_IO_WORKER)
+		if (current->flags & PF_USER_WORKER)
 			goto out;
 
 		/*
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
index b7cbd66f889e..f80d5c51ae67 100644
--- a/kernel/vhost_task.c
+++ b/kernel/vhost_task.c
@@ -12,58 +12,88 @@ enum vhost_task_flags {
 	VHOST_TASK_FLAGS_STOP,
 };
 
+struct vhost_task {
+	bool (*fn)(void *data);
+	void *data;
+	struct completion exited;
+	unsigned long flags;
+	struct task_struct *task;
+};
+
 static int vhost_task_fn(void *data)
 {
 	struct vhost_task *vtsk = data;
-	int ret;
+	bool dead = false;
+
+	for (;;) {
+		bool did_work;
+
+		/* mb paired w/ vhost_task_stop */
+		if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags))
+			break;
+
+		if (!dead && signal_pending(current)) {
+			struct ksignal ksig;
+			/*
+			 * Calling get_signal will block in SIGSTOP,
+			 * or clear fatal_signal_pending, but remember
+			 * what was set.
+			 *
+			 * This thread won't actually exit until all
+			 * of the file descriptors are closed, and
+			 * the release function is called.
+			 */
+			dead = get_signal(&ksig);
+			if (dead)
+				clear_thread_flag(TIF_SIGPENDING);
+		}
+
+		did_work = vtsk->fn(vtsk->data);
+		if (!did_work) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule();
+		}
+	}
 
-	ret = vtsk->fn(vtsk->data);
 	complete(&vtsk->exited);
-	do_exit(ret);
+	do_exit(0);
+}
+
+/**
+ * vhost_task_wake - wakeup the vhost_task
+ * @vtsk: vhost_task to wake
+ *
+ * wake up the vhost_task worker thread
+ */
+void vhost_task_wake(struct vhost_task *vtsk)
+{
+	wake_up_process(vtsk->task);
 }
+EXPORT_SYMBOL_GPL(vhost_task_wake);
 
 /**
  * vhost_task_stop - stop a vhost_task
  * @vtsk: vhost_task to stop
  *
- * Callers must call vhost_task_should_stop and return from their worker
- * function when it returns true;
+ * vhost_task_fn ensures the worker thread exits after
+ * VHOST_TASK_FLAGS_SOP becomes true.
  */
 void vhost_task_stop(struct vhost_task *vtsk)
 {
-	pid_t pid = vtsk->task->pid;
-
 	set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
-	wake_up_process(vtsk->task);
+	vhost_task_wake(vtsk);
 	/*
 	 * Make sure vhost_task_fn is no longer accessing the vhost_task before
-	 * freeing it below. If userspace crashed or exited without closing,
-	 * then the vhost_task->task could already be marked dead so
-	 * kernel_wait will return early.
+	 * freeing it below.
 	 */
 	wait_for_completion(&vtsk->exited);
-	/*
-	 * If we are just closing/removing a device and the parent process is
-	 * not exiting then reap the task.
-	 */
-	kernel_wait4(pid, NULL, __WCLONE, NULL);
 	kfree(vtsk);
 }
 EXPORT_SYMBOL_GPL(vhost_task_stop);
 
 /**
- * vhost_task_should_stop - should the vhost task return from the work function
- * @vtsk: vhost_task to stop
- */
-bool vhost_task_should_stop(struct vhost_task *vtsk)
-{
-	return test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
-}
-EXPORT_SYMBOL_GPL(vhost_task_should_stop);
-
-/**
- * vhost_task_create - create a copy of a process to be used by the kernel
- * @fn: thread stack
+ * vhost_task_create - create a copy of a task to be used by the kernel
+ * @fn: vhost worker function
  * @arg: data to be passed to fn
  * @name: the thread's name
  *
@@ -71,17 +101,17 @@ EXPORT_SYMBOL_GPL(vhost_task_should_stop);
  * failure. The returned task is inactive, and the caller must fire it up
  * through vhost_task_start().
  */
-struct vhost_task *vhost_task_create(int (*fn)(void *), void *arg,
+struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg,
 				     const char *name)
 {
 	struct kernel_clone_args args = {
-		.flags		= CLONE_FS | CLONE_UNTRACED | CLONE_VM,
+		.flags		= CLONE_FS | CLONE_UNTRACED | CLONE_VM |
+				  CLONE_THREAD | CLONE_SIGHAND,
 		.exit_signal	= 0,
 		.fn		= vhost_task_fn,
 		.name		= name,
 		.user_worker	= 1,
 		.no_files	= 1,
-		.ignore_signals	= 1,
 	};
 	struct vhost_task *vtsk;
 	struct task_struct *tsk;
-- 
cgit v1.2.3


From 9d0c6a9af9e38efa675e565bd181794deca1188a Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 23 May 2023 11:18:21 +0100
Subject: KVM: arm64: Handle FFA_RXTX_MAP and FFA_RXTX_UNMAP calls from the
 host

Handle FFA_RXTX_MAP and FFA_RXTX_UNMAP calls from the host by sharing
the host's mailbox memory with the hypervisor and establishing a
separate pair of mailboxes between the hypervisor and the SPMD at EL3.

Co-developed-by: Andrew Walbran <qwandor@google.com>
Signed-off-by: Andrew Walbran <qwandor@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20230523101828.7328-5-will@kernel.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 arch/arm64/kvm/hyp/nvhe/ffa.c | 188 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/arm_ffa.h       |   8 ++
 2 files changed, 196 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index c85e5d46a90d..0c020cd9f722 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -31,6 +31,8 @@
 #include <asm/kvm_pkvm.h>
 
 #include <nvhe/ffa.h>
+#include <nvhe/mem_protect.h>
+#include <nvhe/memory.h>
 #include <nvhe/trap_handler.h>
 #include <nvhe/spinlock.h>
 
@@ -52,6 +54,7 @@ struct kvm_ffa_buffers {
  * client.
  */
 static struct kvm_ffa_buffers hyp_buffers;
+static struct kvm_ffa_buffers host_buffers;
 
 static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
 {
@@ -61,6 +64,15 @@ static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
 	};
 }
 
+static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
+{
+	if (ret == FFA_RET_SUCCESS) {
+		*res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS };
+	} else {
+		ffa_to_smccc_error(res, ret);
+	}
+}
+
 static void ffa_set_retval(struct kvm_cpu_context *ctxt,
 			   struct arm_smccc_res *res)
 {
@@ -78,6 +90,144 @@ static bool is_ffa_call(u64 func_id)
 	       ARM_SMCCC_FUNC_NUM(func_id) <= FFA_MAX_FUNC_NUM;
 }
 
+static int ffa_map_hyp_buffers(u64 ffa_page_count)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP,
+			  hyp_virt_to_phys(hyp_buffers.tx),
+			  hyp_virt_to_phys(hyp_buffers.rx),
+			  ffa_page_count,
+			  0, 0, 0, 0,
+			  &res);
+
+	return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
+}
+
+static int ffa_unmap_hyp_buffers(void)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_smc(FFA_RXTX_UNMAP,
+			  HOST_FFA_ID,
+			  0, 0, 0, 0, 0, 0,
+			  &res);
+
+	return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
+}
+
+static void do_ffa_rxtx_map(struct arm_smccc_res *res,
+			    struct kvm_cpu_context *ctxt)
+{
+	DECLARE_REG(phys_addr_t, tx, ctxt, 1);
+	DECLARE_REG(phys_addr_t, rx, ctxt, 2);
+	DECLARE_REG(u32, npages, ctxt, 3);
+	int ret = 0;
+	void *rx_virt, *tx_virt;
+
+	if (npages != (KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) / FFA_PAGE_SIZE) {
+		ret = FFA_RET_INVALID_PARAMETERS;
+		goto out;
+	}
+
+	if (!PAGE_ALIGNED(tx) || !PAGE_ALIGNED(rx)) {
+		ret = FFA_RET_INVALID_PARAMETERS;
+		goto out;
+	}
+
+	hyp_spin_lock(&host_buffers.lock);
+	if (host_buffers.tx) {
+		ret = FFA_RET_DENIED;
+		goto out_unlock;
+	}
+
+	/*
+	 * Map our hypervisor buffers into the SPMD before mapping and
+	 * pinning the host buffers in our own address space.
+	 */
+	ret = ffa_map_hyp_buffers(npages);
+	if (ret)
+		goto out_unlock;
+
+	ret = __pkvm_host_share_hyp(hyp_phys_to_pfn(tx));
+	if (ret) {
+		ret = FFA_RET_INVALID_PARAMETERS;
+		goto err_unmap;
+	}
+
+	ret = __pkvm_host_share_hyp(hyp_phys_to_pfn(rx));
+	if (ret) {
+		ret = FFA_RET_INVALID_PARAMETERS;
+		goto err_unshare_tx;
+	}
+
+	tx_virt = hyp_phys_to_virt(tx);
+	ret = hyp_pin_shared_mem(tx_virt, tx_virt + 1);
+	if (ret) {
+		ret = FFA_RET_INVALID_PARAMETERS;
+		goto err_unshare_rx;
+	}
+
+	rx_virt = hyp_phys_to_virt(rx);
+	ret = hyp_pin_shared_mem(rx_virt, rx_virt + 1);
+	if (ret) {
+		ret = FFA_RET_INVALID_PARAMETERS;
+		goto err_unpin_tx;
+	}
+
+	host_buffers.tx = tx_virt;
+	host_buffers.rx = rx_virt;
+
+out_unlock:
+	hyp_spin_unlock(&host_buffers.lock);
+out:
+	ffa_to_smccc_res(res, ret);
+	return;
+
+err_unpin_tx:
+	hyp_unpin_shared_mem(tx_virt, tx_virt + 1);
+err_unshare_rx:
+	__pkvm_host_unshare_hyp(hyp_phys_to_pfn(rx));
+err_unshare_tx:
+	__pkvm_host_unshare_hyp(hyp_phys_to_pfn(tx));
+err_unmap:
+	ffa_unmap_hyp_buffers();
+	goto out_unlock;
+}
+
+static void do_ffa_rxtx_unmap(struct arm_smccc_res *res,
+			      struct kvm_cpu_context *ctxt)
+{
+	DECLARE_REG(u32, id, ctxt, 1);
+	int ret = 0;
+
+	if (id != HOST_FFA_ID) {
+		ret = FFA_RET_INVALID_PARAMETERS;
+		goto out;
+	}
+
+	hyp_spin_lock(&host_buffers.lock);
+	if (!host_buffers.tx) {
+		ret = FFA_RET_INVALID_PARAMETERS;
+		goto out_unlock;
+	}
+
+	hyp_unpin_shared_mem(host_buffers.tx, host_buffers.tx + 1);
+	WARN_ON(__pkvm_host_unshare_hyp(hyp_virt_to_pfn(host_buffers.tx)));
+	host_buffers.tx = NULL;
+
+	hyp_unpin_shared_mem(host_buffers.rx, host_buffers.rx + 1);
+	WARN_ON(__pkvm_host_unshare_hyp(hyp_virt_to_pfn(host_buffers.rx)));
+	host_buffers.rx = NULL;
+
+	ffa_unmap_hyp_buffers();
+
+out_unlock:
+	hyp_spin_unlock(&host_buffers.lock);
+out:
+	ffa_to_smccc_res(res, ret);
+}
+
 /*
  * Is a given FFA function supported, either by forwarding on directly
  * or by handling at EL2?
@@ -132,10 +282,21 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt)
 	if (!is_ffa_call(func_id))
 		return false;
 
+	switch (func_id) {
+	/* Memory management */
+	case FFA_FN64_RXTX_MAP:
+		do_ffa_rxtx_map(&res, host_ctxt);
+		goto out_handled;
+	case FFA_RXTX_UNMAP:
+		do_ffa_rxtx_unmap(&res, host_ctxt);
+		goto out_handled;
+	}
+
 	if (ffa_call_supported(func_id))
 		return false; /* Pass through */
 
 	ffa_to_smccc_error(&res, FFA_RET_NOT_SUPPORTED);
+out_handled:
 	ffa_set_retval(host_ctxt, &res);
 	return true;
 }
@@ -143,6 +304,7 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt)
 int hyp_ffa_init(void *pages)
 {
 	struct arm_smccc_res res;
+	size_t min_rxtx_sz;
 
 	if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2)
 		return 0;
@@ -161,11 +323,37 @@ int hyp_ffa_init(void *pages)
 	if (res.a2 != HOST_FFA_ID)
 		return -EINVAL;
 
+	arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP,
+			  0, 0, 0, 0, 0, 0, &res);
+	if (res.a0 != FFA_SUCCESS)
+		return -EOPNOTSUPP;
+
+	switch (res.a2) {
+	case FFA_FEAT_RXTX_MIN_SZ_4K:
+		min_rxtx_sz = SZ_4K;
+		break;
+	case FFA_FEAT_RXTX_MIN_SZ_16K:
+		min_rxtx_sz = SZ_16K;
+		break;
+	case FFA_FEAT_RXTX_MIN_SZ_64K:
+		min_rxtx_sz = SZ_64K;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (min_rxtx_sz > PAGE_SIZE)
+		return -EOPNOTSUPP;
+
 	hyp_buffers = (struct kvm_ffa_buffers) {
 		.lock	= __HYP_SPIN_LOCK_UNLOCKED,
 		.tx	= pages,
 		.rx	= pages + (KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE),
 	};
 
+	host_buffers = (struct kvm_ffa_buffers) {
+		.lock	= __HYP_SPIN_LOCK_UNLOCKED,
+	};
+
 	return 0;
 }
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index c87aeecaa9b2..b9f81035eb41 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -94,6 +94,14 @@
  */
 #define FFA_PAGE_SIZE		SZ_4K
 
+/*
+ * Minimum buffer size/alignment encodings returned by an FFA_FEATURES
+ * query for FFA_RXTX_MAP.
+ */
+#define FFA_FEAT_RXTX_MIN_SZ_4K		0
+#define FFA_FEAT_RXTX_MIN_SZ_64K	1
+#define FFA_FEAT_RXTX_MIN_SZ_16K	2
+
 /* FFA Bus/Device/Driver related */
 struct ffa_device {
 	int vm_id;
-- 
cgit v1.2.3


From e8b6f79b41840c542b7ef45c16b31dd17e1cc6e1 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@broadcom.com>
Date: Wed, 31 May 2023 16:17:29 -0700
Subject: net: phy: broadcom: Add LPI counter

Add the ability to read the PHY maintained LPI counter which is in the
Clause 45 vendor space, device address 7, offset 0x803F. The counter is
cleared on read.

Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20230531231729.1873932-1-florian.fainelli@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/bcm-phy-lib.c | 19 ++++++++++++-------
 include/linux/brcmphy.h       |  2 ++
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index 5603d0a9ce96..c6e2e5f636d4 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -496,18 +496,20 @@ EXPORT_SYMBOL_GPL(bcm_phy_downshift_set);
 
 struct bcm_phy_hw_stat {
 	const char *string;
-	u8 reg;
+	int devad;
+	u16 reg;
 	u8 shift;
 	u8 bits;
 };
 
 /* Counters freeze at either 0xffff or 0xff, better than nothing */
 static const struct bcm_phy_hw_stat bcm_phy_hw_stats[] = {
-	{ "phy_receive_errors", MII_BRCM_CORE_BASE12, 0, 16 },
-	{ "phy_serdes_ber_errors", MII_BRCM_CORE_BASE13, 8, 8 },
-	{ "phy_false_carrier_sense_errors", MII_BRCM_CORE_BASE13, 0, 8 },
-	{ "phy_local_rcvr_nok", MII_BRCM_CORE_BASE14, 8, 8 },
-	{ "phy_remote_rcv_nok", MII_BRCM_CORE_BASE14, 0, 8 },
+	{ "phy_receive_errors", -1, MII_BRCM_CORE_BASE12, 0, 16 },
+	{ "phy_serdes_ber_errors", -1, MII_BRCM_CORE_BASE13, 8, 8 },
+	{ "phy_false_carrier_sense_errors", -1, MII_BRCM_CORE_BASE13, 0, 8 },
+	{ "phy_local_rcvr_nok", -1, MII_BRCM_CORE_BASE14, 8, 8 },
+	{ "phy_remote_rcv_nok", -1, MII_BRCM_CORE_BASE14, 0, 8 },
+	{ "phy_lpi_count", MDIO_MMD_AN, BRCM_CL45VEN_EEE_LPI_CNT, 0, 16 },
 };
 
 int bcm_phy_get_sset_count(struct phy_device *phydev)
@@ -536,7 +538,10 @@ static u64 bcm_phy_get_stat(struct phy_device *phydev, u64 *shadow,
 	int val;
 	u64 ret;
 
-	val = phy_read(phydev, stat.reg);
+	if (stat.devad < 0)
+		val = phy_read(phydev, stat.reg);
+	else
+		val = phy_read_mmd(phydev, stat.devad, stat.reg);
 	if (val < 0) {
 		ret = U64_MAX;
 	} else {
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index e9afbfb6d7a5..251833ab271f 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -359,6 +359,8 @@
 #define LPI_FEATURE_EN			0x8000
 #define LPI_FEATURE_EN_DIG1000X		0x4000
 
+#define BRCM_CL45VEN_EEE_LPI_CNT	0x803f
+
 /* Core register definitions*/
 #define MII_BRCM_CORE_BASE12	0x12
 #define MII_BRCM_CORE_BASE13	0x13
-- 
cgit v1.2.3


From 49f661ba99324a3f7eef0befbdaa4f22dee02b97 Mon Sep 17 00:00:00 2001
From: Naresh Solanki <naresh.solanki@9elements.com>
Date: Thu, 27 Apr 2023 13:30:45 +0200
Subject: mfd: max5970: Rename driver and remove wildcard

The previous version of this driver included wildcards in file names
and descriptions. This patch renames the driver to only support MAX5970
and MAX5978, which are the only chips that the driver actually supports.

Signed-off-by: Naresh Solanki <Naresh.Solanki@9elements.com>
Link: https://lore.kernel.org/r/20230427113046.3971425-1-Naresh.Solanki@9elements.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/Kconfig          |  4 +-
 drivers/mfd/simple-mfd-i2c.c | 18 ++++-----
 include/linux/mfd/max5970.h  | 96 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/max597x.h  | 96 --------------------------------------------
 4 files changed, 107 insertions(+), 107 deletions(-)
 create mode 100644 include/linux/mfd/max5970.h
 delete mode 100644 include/linux/mfd/max597x.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index e90463c4441c..71231388e03c 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -266,8 +266,8 @@ config MFD_MADERA_SPI
 	  Support for the Cirrus Logic Madera platform audio SoC
 	  core functionality controlled via SPI.
 
-config MFD_MAX597X
-	tristate "Maxim 597x power switch and monitor"
+config MFD_MAX5970
+	tristate "Maxim 5970/5978 power switch and monitor"
 	depends on (I2C && OF)
 	select MFD_SIMPLE_MFD_I2C
 	help
diff --git a/drivers/mfd/simple-mfd-i2c.c b/drivers/mfd/simple-mfd-i2c.c
index 20782b4dd172..1f1c007560d8 100644
--- a/drivers/mfd/simple-mfd-i2c.c
+++ b/drivers/mfd/simple-mfd-i2c.c
@@ -72,22 +72,22 @@ static const struct simple_mfd_data silergy_sy7636a = {
 	.mfd_cell_size = ARRAY_SIZE(sy7636a_cells),
 };
 
-static const struct mfd_cell max597x_cells[] = {
-	{ .name = "max597x-regulator", },
-	{ .name = "max597x-iio", },
-	{ .name = "max597x-led", },
+static const struct mfd_cell max5970_cells[] = {
+	{ .name = "max5970-regulator", },
+	{ .name = "max5970-iio", },
+	{ .name = "max5970-led", },
 };
 
-static const struct simple_mfd_data maxim_max597x = {
-	.mfd_cell = max597x_cells,
-	.mfd_cell_size = ARRAY_SIZE(max597x_cells),
+static const struct simple_mfd_data maxim_max5970 = {
+	.mfd_cell = max5970_cells,
+	.mfd_cell_size = ARRAY_SIZE(max5970_cells),
 };
 
 static const struct of_device_id simple_mfd_i2c_of_match[] = {
 	{ .compatible = "kontron,sl28cpld" },
 	{ .compatible = "silergy,sy7636a", .data = &silergy_sy7636a},
-	{ .compatible = "maxim,max5970", .data = &maxim_max597x},
-	{ .compatible = "maxim,max5978", .data = &maxim_max597x},
+	{ .compatible = "maxim,max5970", .data = &maxim_max5970},
+	{ .compatible = "maxim,max5978", .data = &maxim_max5970},
 	{}
 };
 MODULE_DEVICE_TABLE(of, simple_mfd_i2c_of_match);
diff --git a/include/linux/mfd/max5970.h b/include/linux/mfd/max5970.h
new file mode 100644
index 000000000000..762a7d40c843
--- /dev/null
+++ b/include/linux/mfd/max5970.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Device driver for regulators in MAX5970 and MAX5978 IC
+ *
+ * Copyright (c) 2022 9elements GmbH
+ *
+ * Author: Patrick Rudolph <patrick.rudolph@9elements.com>
+ */
+
+#ifndef _MFD_MAX5970_H
+#define _MFD_MAX5970_H
+
+#include <linux/regmap.h>
+
+#define MAX5970_NUM_SWITCHES 2
+#define MAX5978_NUM_SWITCHES 1
+#define MAX5970_NUM_LEDS     4
+
+struct max5970_data {
+	int num_switches;
+	u32 irng[MAX5970_NUM_SWITCHES];
+	u32 mon_rng[MAX5970_NUM_SWITCHES];
+	u32 shunt_micro_ohms[MAX5970_NUM_SWITCHES];
+};
+
+enum max5970_chip_type {
+	TYPE_MAX5978 = 1,
+	TYPE_MAX5970,
+};
+
+#define MAX5970_REG_CURRENT_L(ch)		(0x01 + (ch) * 4)
+#define MAX5970_REG_CURRENT_H(ch)		(0x00 + (ch) * 4)
+#define MAX5970_REG_VOLTAGE_L(ch)		(0x03 + (ch) * 4)
+#define MAX5970_REG_VOLTAGE_H(ch)		(0x02 + (ch) * 4)
+#define MAX5970_REG_MON_RANGE			0x18
+#define  MAX5970_MON_MASK			0x3
+#define  MAX5970_MON(reg, ch)			(((reg) >> ((ch) * 2)) & MAX5970_MON_MASK)
+#define  MAX5970_MON_MAX_RANGE_UV		16000000
+
+#define MAX5970_REG_CH_UV_WARN_H(ch)		(0x1A + (ch) * 10)
+#define MAX5970_REG_CH_UV_WARN_L(ch)		(0x1B + (ch) * 10)
+#define MAX5970_REG_CH_UV_CRIT_H(ch)		(0x1C + (ch) * 10)
+#define MAX5970_REG_CH_UV_CRIT_L(ch)		(0x1D + (ch) * 10)
+#define MAX5970_REG_CH_OV_WARN_H(ch)		(0x1E + (ch) * 10)
+#define MAX5970_REG_CH_OV_WARN_L(ch)		(0x1F + (ch) * 10)
+#define MAX5970_REG_CH_OV_CRIT_H(ch)		(0x20 + (ch) * 10)
+#define MAX5970_REG_CH_OV_CRIT_L(ch)		(0x21 + (ch) * 10)
+
+#define  MAX5970_VAL2REG_H(x)		(((x) >> 2) & 0xFF)
+#define  MAX5970_VAL2REG_L(x)		((x) & 0x3)
+
+#define MAX5970_REG_DAC_FAST(ch)	(0x2E + (ch))
+
+#define MAX5970_FAST2SLOW_RATIO		200
+
+#define MAX5970_REG_STATUS0		0x31
+#define  MAX5970_CB_IFAULTF(ch)		(1 << (ch))
+#define  MAX5970_CB_IFAULTS(ch)		(1 << ((ch) + 4))
+
+#define MAX5970_REG_STATUS1		0x32
+#define  STATUS1_PROT_MASK		0x3
+#define  STATUS1_PROT(reg) \
+	(((reg) >> 6) & STATUS1_PROT_MASK)
+#define  STATUS1_PROT_SHUTDOWN		0
+#define  STATUS1_PROT_CLEAR_PG		1
+#define  STATUS1_PROT_ALERT_ONLY	2
+
+#define MAX5970_REG_STATUS2		0x33
+#define  MAX5970_IRNG_MASK		0x3
+#define  MAX5970_IRNG(reg, ch) \
+	(((reg) >> ((ch) * 2)) & MAX5970_IRNG_MASK)
+
+#define MAX5970_REG_STATUS3		0x34
+#define  MAX5970_STATUS3_ALERT		BIT(4)
+#define  MAX5970_STATUS3_PG(ch)		BIT(ch)
+
+#define MAX5970_REG_FAULT0		0x35
+#define  UV_STATUS_WARN(ch)		(1 << (ch))
+#define  UV_STATUS_CRIT(ch)		(1 << ((ch) + 4))
+
+#define MAX5970_REG_FAULT1		0x36
+#define  OV_STATUS_WARN(ch)		(1 << (ch))
+#define  OV_STATUS_CRIT(ch)		(1 << ((ch) + 4))
+
+#define MAX5970_REG_FAULT2		0x37
+#define  OC_STATUS_WARN(ch)		(1 << (ch))
+
+#define MAX5970_REG_CHXEN		0x3b
+#define  CHXEN(ch)			(3 << ((ch) * 2))
+
+#define MAX5970_REG_LED_FLASH		0x43
+
+#define MAX_REGISTERS			0x49
+#define ADC_MASK			0x3FF
+
+#endif				/* _MFD_MAX5970_H */
diff --git a/include/linux/mfd/max597x.h b/include/linux/mfd/max597x.h
deleted file mode 100644
index a850b2e02e6a..000000000000
--- a/include/linux/mfd/max597x.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Device driver for regulators in MAX5970 and MAX5978 IC
- *
- * Copyright (c) 2022 9elements GmbH
- *
- * Author: Patrick Rudolph <patrick.rudolph@9elements.com>
- */
-
-#ifndef _MFD_MAX597X_H
-#define _MFD_MAX597X_H
-
-#include <linux/regmap.h>
-
-#define MAX5970_NUM_SWITCHES 2
-#define MAX5978_NUM_SWITCHES 1
-#define MAX597X_NUM_LEDS     4
-
-struct max597x_data {
-	int num_switches;
-	u32 irng[MAX5970_NUM_SWITCHES];
-	u32 mon_rng[MAX5970_NUM_SWITCHES];
-	u32 shunt_micro_ohms[MAX5970_NUM_SWITCHES];
-};
-
-enum max597x_chip_type {
-	MAX597x_TYPE_MAX5978 = 1,
-	MAX597x_TYPE_MAX5970,
-};
-
-#define MAX5970_REG_CURRENT_L(ch)		(0x01 + (ch) * 4)
-#define MAX5970_REG_CURRENT_H(ch)		(0x00 + (ch) * 4)
-#define MAX5970_REG_VOLTAGE_L(ch)		(0x03 + (ch) * 4)
-#define MAX5970_REG_VOLTAGE_H(ch)		(0x02 + (ch) * 4)
-#define MAX5970_REG_MON_RANGE			0x18
-#define  MAX5970_MON_MASK			0x3
-#define  MAX5970_MON(reg, ch)			(((reg) >> ((ch) * 2)) & MAX5970_MON_MASK)
-#define  MAX5970_MON_MAX_RANGE_UV		16000000
-
-#define MAX5970_REG_CH_UV_WARN_H(ch)		(0x1A + (ch) * 10)
-#define MAX5970_REG_CH_UV_WARN_L(ch)		(0x1B + (ch) * 10)
-#define MAX5970_REG_CH_UV_CRIT_H(ch)		(0x1C + (ch) * 10)
-#define MAX5970_REG_CH_UV_CRIT_L(ch)		(0x1D + (ch) * 10)
-#define MAX5970_REG_CH_OV_WARN_H(ch)		(0x1E + (ch) * 10)
-#define MAX5970_REG_CH_OV_WARN_L(ch)		(0x1F + (ch) * 10)
-#define MAX5970_REG_CH_OV_CRIT_H(ch)		(0x20 + (ch) * 10)
-#define MAX5970_REG_CH_OV_CRIT_L(ch)		(0x21 + (ch) * 10)
-
-#define  MAX5970_VAL2REG_H(x)		(((x) >> 2) & 0xFF)
-#define  MAX5970_VAL2REG_L(x)		((x) & 0x3)
-
-#define MAX5970_REG_DAC_FAST(ch)	(0x2E + (ch))
-
-#define MAX5970_FAST2SLOW_RATIO		200
-
-#define MAX5970_REG_STATUS0		0x31
-#define  MAX5970_CB_IFAULTF(ch)		(1 << (ch))
-#define  MAX5970_CB_IFAULTS(ch)		(1 << ((ch) + 4))
-
-#define MAX5970_REG_STATUS1		0x32
-#define  STATUS1_PROT_MASK		0x3
-#define  STATUS1_PROT(reg) \
-	(((reg) >> 6) & STATUS1_PROT_MASK)
-#define  STATUS1_PROT_SHUTDOWN		0
-#define  STATUS1_PROT_CLEAR_PG		1
-#define  STATUS1_PROT_ALERT_ONLY	2
-
-#define MAX5970_REG_STATUS2		0x33
-#define  MAX5970_IRNG_MASK		0x3
-#define  MAX5970_IRNG(reg, ch) \
-	(((reg) >> ((ch) * 2)) & MAX5970_IRNG_MASK)
-
-#define MAX5970_REG_STATUS3		0x34
-#define  MAX5970_STATUS3_ALERT		BIT(4)
-#define  MAX5970_STATUS3_PG(ch)		BIT(ch)
-
-#define MAX5970_REG_FAULT0		0x35
-#define  UV_STATUS_WARN(ch)		(1 << (ch))
-#define  UV_STATUS_CRIT(ch)		(1 << ((ch) + 4))
-
-#define MAX5970_REG_FAULT1		0x36
-#define  OV_STATUS_WARN(ch)		(1 << (ch))
-#define  OV_STATUS_CRIT(ch)		(1 << ((ch) + 4))
-
-#define MAX5970_REG_FAULT2		0x37
-#define  OC_STATUS_WARN(ch)		(1 << (ch))
-
-#define MAX5970_REG_CHXEN		0x3b
-#define  CHXEN(ch)			(3 << ((ch) * 2))
-
-#define MAX5970_REG_LED_FLASH		0x43
-
-#define MAX_REGISTERS			0x49
-#define ADC_MASK			0x3FF
-
-#endif				/* _MFD_MAX597X_H */
-- 
cgit v1.2.3


From d0bf7d5759c1d89fb013aa41cca5832e00b9632a Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 17 Jan 2023 14:40:00 +0100
Subject: mm/slab: introduce kmem_cache flag SLAB_NO_MERGE

Allow API users of kmem_cache_create to specify that they don't want
any slab merge or aliasing (with similar sized objects). Use this in
kfence_test.

The SKB (sk_buff) kmem_cache slab is critical for network performance.
Network stack uses kmem_cache_{alloc,free}_bulk APIs to gain
performance by amortising the alloc/free cost.

For the bulk API to perform efficiently the slub fragmentation need to
be low. Especially for the SLUB allocator, the efficiency of bulk free
API depend on objects belonging to the same slab (page).

When running different network performance microbenchmarks, I started
to notice that performance was reduced (slightly) when machines had
longer uptimes. I believe the cause was 'skbuff_head_cache' got
aliased/merged into the general slub for 256 bytes sized objects (with
my kernel config, without CONFIG_HARDENED_USERCOPY).

For SKB kmem_cache network stack have reasons for not merging, but it
varies depending on kernel config (e.g. CONFIG_HARDENED_USERCOPY).
We want to explicitly set SLAB_NO_MERGE for this kmem_cache.

Another use case for the flag has been described by David Sterba [1]:

> This can be used for more fine grained control over the caches or for
> debugging builds where separate slabs can verify that no objects leak.

> The slab_nomerge boot option is too coarse and would need to be
> enabled on all testing hosts. There are some other ways how to disable
> merging, e.g. a slab constructor but this disables poisoning besides
> that it adds additional overhead. Other flags are internal and may
> have other semantics.

> A concrete example what motivates the flag. During 'btrfs balance'
> slab top reported huge increase in caches like

>  1330095 1330095 100%    0.10K  34105       39    136420K Acpi-ParseExt
>  1734684 1734684 100%    0.14K  61953       28    247812K pid_namespace
>  8244036 6873075  83%    0.11K 229001       36    916004K khugepaged_mm_slot

> which was confusing and that it's because of slab merging was not the
> first idea.  After rebooting with slab_nomerge all the caches were
> from btrfs_ namespace as expected.

[1] https://lore.kernel.org/all/20230524101748.30714-1-dsterba@suse.com/

[ vbabka@suse.cz: rename to SLAB_NO_MERGE, change the flag value to the
  one proposed by David so it does not collide with internal SLAB/SLUB
  flags, write a comment for the flag, expand changelog, drop the skbuff
  part to be handled spearately ]

Link: https://lore.kernel.org/all/167396280045.539803.7540459812377220500.stgit@firesoul/
Reported-by: David Sterba <dsterba@suse.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
---
 include/linux/slab.h    | 12 ++++++++++++
 mm/kfence/kfence_test.c |  7 +++----
 mm/slab.h               |  5 +++--
 mm/slab_common.c        |  2 +-
 4 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 6b3e155b70bf..72bc906d8bc7 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -106,6 +106,18 @@
 /* Avoid kmemleak tracing */
 #define SLAB_NOLEAKTRACE	((slab_flags_t __force)0x00800000U)
 
+/*
+ * Prevent merging with compatible kmem caches. This flag should be used
+ * cautiously. Valid use cases:
+ *
+ * - caches created for self-tests (e.g. kunit)
+ * - general caches created and used by a subsystem, only when a
+ *   (subsystem-specific) debug option is enabled
+ * - performance critical caches, should be very rare and consulted with slab
+ *   maintainers, and not used together with CONFIG_SLUB_TINY
+ */
+#define SLAB_NO_MERGE		((slab_flags_t __force)0x01000000U)
+
 /* Fault injection mark */
 #ifdef CONFIG_FAILSLAB
 # define SLAB_FAILSLAB		((slab_flags_t __force)0x02000000U)
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index 6aee19a79236..9e008a336d9f 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -191,11 +191,10 @@ static size_t setup_test_cache(struct kunit *test, size_t size, slab_flags_t fla
 	kunit_info(test, "%s: size=%zu, ctor=%ps\n", __func__, size, ctor);
 
 	/*
-	 * Use SLAB_NOLEAKTRACE to prevent merging with existing caches. Any
-	 * other flag in SLAB_NEVER_MERGE also works. Use SLAB_ACCOUNT to
-	 * allocate via memcg, if enabled.
+	 * Use SLAB_NO_MERGE to prevent merging with existing caches.
+	 * Use SLAB_ACCOUNT to allocate via memcg, if enabled.
 	 */
-	flags |= SLAB_NOLEAKTRACE | SLAB_ACCOUNT;
+	flags |= SLAB_NO_MERGE | SLAB_ACCOUNT;
 	test_cache = kmem_cache_create("test", size, 1, flags, ctor);
 	KUNIT_ASSERT_TRUE_MSG(test, test_cache, "could not create cache");
 
diff --git a/mm/slab.h b/mm/slab.h
index f01ac256a8f5..9005ddc51cf8 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -294,11 +294,11 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
 #if defined(CONFIG_SLAB)
 #define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
 			  SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | \
-			  SLAB_ACCOUNT)
+			  SLAB_ACCOUNT | SLAB_NO_MERGE)
 #elif defined(CONFIG_SLUB)
 #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
 			  SLAB_TEMPORARY | SLAB_ACCOUNT | \
-			  SLAB_NO_USER_FLAGS | SLAB_KMALLOC)
+			  SLAB_NO_USER_FLAGS | SLAB_KMALLOC | SLAB_NO_MERGE)
 #else
 #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
 #endif
@@ -319,6 +319,7 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
 			      SLAB_TEMPORARY | \
 			      SLAB_ACCOUNT | \
 			      SLAB_KMALLOC | \
+			      SLAB_NO_MERGE | \
 			      SLAB_NO_USER_FLAGS)
 
 bool __kmem_cache_empty(struct kmem_cache *);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 607249785c07..0e0a617eae7d 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -47,7 +47,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
  */
 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
 		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
-		SLAB_FAILSLAB | kasan_never_merge())
+		SLAB_FAILSLAB | SLAB_NO_MERGE | kasan_never_merge())
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
 			 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
-- 
cgit v1.2.3


From 4ec7329517027db28c5683675ab3b3842ad60324 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 1 Jun 2023 16:48:12 +0100
Subject: net: phylib: fix phy_read*_poll_timeout()

Dan Carpenter reported a signedness bug in genphy_loopback(). Andrew
reports that:

"It is common to get this wrong in general with PHY drivers. Dan
regularly posts fixes like this soon after a PHY driver patch it
merged. I really wish we could somehow get the compiler to warn when
the result from phy_read() is stored into a unsigned type. It would
save Dan a lot of work."

Let's make phy_read*_poll_timeout() immune to further issues when "val"
is an unsigned type by storing the read function's result in a signed
int as well as "val", and using the signed variable both to check for
an error and for propagating that error to the caller.

The advantage of this method is we don't change where the cast from
the signed return code to the user's variable occurs - so users will
see no change.

Previously Heiner changed phy_read_poll_timeout() to check for an error
before evaluating the user supplied condition, but didn't update
phy_read_mmd_poll_timeout(). Make that change there too.

Link: https://lore.kernel.org/r/d7bb312e-2428-45f6-b9b3-59ba544e8b94@kili.mountain
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/E1q4kX6-00BNuM-Mx@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7addde5d14c0..11c1e91563d4 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1206,10 +1206,12 @@ static inline int phy_read(struct phy_device *phydev, u32 regnum)
 #define phy_read_poll_timeout(phydev, regnum, val, cond, sleep_us, \
 				timeout_us, sleep_before_read) \
 ({ \
-	int __ret = read_poll_timeout(phy_read, val, val < 0 || (cond), \
+	int __ret, __val; \
+	__ret = read_poll_timeout(__val = phy_read, val, \
+				  __val < 0 || (cond), \
 		sleep_us, timeout_us, sleep_before_read, phydev, regnum); \
-	if (val < 0) \
-		__ret = val; \
+	if (__val < 0) \
+		__ret = __val; \
 	if (__ret) \
 		phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \
 	__ret; \
@@ -1302,11 +1304,13 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
 #define phy_read_mmd_poll_timeout(phydev, devaddr, regnum, val, cond, \
 				  sleep_us, timeout_us, sleep_before_read) \
 ({ \
-	int __ret = read_poll_timeout(phy_read_mmd, val, (cond) || val < 0, \
+	int __ret, __val; \
+	__ret = read_poll_timeout(__val = phy_read_mmd, val, \
+				  __val < 0 || (cond), \
 				  sleep_us, timeout_us, sleep_before_read, \
 				  phydev, devaddr, regnum); \
-	if (val <  0) \
-		__ret = val; \
+	if (__val < 0) \
+		__ret = __val; \
 	if (__ret) \
 		phydev_err(phydev, "%s failed: %d\n", __func__, __ret); \
 	__ret; \
-- 
cgit v1.2.3


From 371b74c8ba8fa588ab9ba10d0504acf495b3490e Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Tue, 30 May 2023 16:29:24 +0900
Subject: ata: libata-sata: Simplify ata_change_queue_depth()

Commit 141f3d6256e5 ("ata: libata-sata: Fix device queue depth control")
added a struct ata_device argument to ata_change_queue_depth() to
address problems with changing the queue depth of ATA devices managed
through libsas. This was due to problems with ata_scsi_find_dev() which
are now fixed with commit 7f875850f20a ("ata: libata-scsi: Use correct
device no in ata_find_dev()").

Undo some of the changes of commit 141f3d6256e5: remove the added struct
ata_device aregument and use again ata_scsi_find_dev() to find the
target ATA device structure. While doing this, also make sure that
ata_scsi_find_dev() is called with ap->lock held, as it should.

libsas and libata call sites of ata_change_queue_depth() are updated to
match the modified function arguments.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Reviewed-by: John Garry <john.g.garry@oracle.com>
---
 drivers/ata/libata-sata.c           | 19 ++++++++++---------
 drivers/scsi/libsas/sas_scsi_host.c |  3 +--
 include/linux/libata.h              |  4 ++--
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
index f3e7396e3191..e3c9cb617048 100644
--- a/drivers/ata/libata-sata.c
+++ b/drivers/ata/libata-sata.c
@@ -1023,7 +1023,6 @@ EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
 /**
  *	ata_change_queue_depth - Set a device maximum queue depth
  *	@ap: ATA port of the target device
- *	@dev: target ATA device
  *	@sdev: SCSI device to configure queue depth for
  *	@queue_depth: new queue depth
  *
@@ -1031,24 +1030,27 @@ EXPORT_SYMBOL_GPL(dev_attr_sw_activity);
  *	and libata.
  *
  */
-int ata_change_queue_depth(struct ata_port *ap, struct ata_device *dev,
-			   struct scsi_device *sdev, int queue_depth)
+int ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
+			   int queue_depth)
 {
+	struct ata_device *dev;
 	unsigned long flags;
 
-	if (!dev || !ata_dev_enabled(dev))
-		return sdev->queue_depth;
+	spin_lock_irqsave(ap->lock, flags);
 
-	if (queue_depth < 1 || queue_depth == sdev->queue_depth)
+	dev = ata_scsi_find_dev(ap, sdev);
+	if (!dev || queue_depth < 1 || queue_depth == sdev->queue_depth) {
+		spin_unlock_irqrestore(ap->lock, flags);
 		return sdev->queue_depth;
+	}
 
 	/* NCQ enabled? */
-	spin_lock_irqsave(ap->lock, flags);
 	dev->flags &= ~ATA_DFLAG_NCQ_OFF;
 	if (queue_depth == 1 || !ata_ncq_enabled(dev)) {
 		dev->flags |= ATA_DFLAG_NCQ_OFF;
 		queue_depth = 1;
 	}
+
 	spin_unlock_irqrestore(ap->lock, flags);
 
 	/* limit and apply queue depth */
@@ -1082,8 +1084,7 @@ int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth)
 {
 	struct ata_port *ap = ata_shost_to_port(sdev->host);
 
-	return ata_change_queue_depth(ap, ata_scsi_find_dev(ap, sdev),
-				      sdev, queue_depth);
+	return ata_change_queue_depth(ap, sdev, queue_depth);
 }
 EXPORT_SYMBOL_GPL(ata_scsi_change_queue_depth);
 
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index a36fa1c128a8..94c5f14f3c16 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -872,8 +872,7 @@ int sas_change_queue_depth(struct scsi_device *sdev, int depth)
 	struct domain_device *dev = sdev_to_domain_dev(sdev);
 
 	if (dev_is_sata(dev))
-		return ata_change_queue_depth(dev->sata_dev.ap,
-					      sas_to_ata_dev(dev), sdev, depth);
+		return ata_change_queue_depth(dev->sata_dev.ap, sdev, depth);
 
 	if (!sdev->tagged_supported)
 		depth = 1;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 01f9fbb69f89..bc756f8586f3 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1144,8 +1144,8 @@ extern int ata_scsi_slave_config(struct scsi_device *sdev);
 extern void ata_scsi_slave_destroy(struct scsi_device *sdev);
 extern int ata_scsi_change_queue_depth(struct scsi_device *sdev,
 				       int queue_depth);
-extern int ata_change_queue_depth(struct ata_port *ap, struct ata_device *dev,
-				  struct scsi_device *sdev, int queue_depth);
+extern int ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
+				  int queue_depth);
 extern struct ata_device *ata_dev_pair(struct ata_device *adev);
 extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev);
 extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap);
-- 
cgit v1.2.3


From 224d80c584d3016cb8d83d1c33914fdd3508aa8c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 15:08:35 +0200
Subject: types: Introduce [us]128

Introduce [us]128 (when available). Unlike [us]64, ensure they are
always naturally aligned.

This also enables 128bit wide atomics (which require natural
alignment) such as cmpxchg128().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230531132323.385005581@infradead.org
---
 include/linux/types.h          | 5 +++++
 include/uapi/linux/types.h     | 4 ++++
 lib/crypto/curve25519-hacl64.c | 2 --
 lib/crypto/poly1305-donna64.c  | 2 --
 4 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index 688fb943556a..becb8cd5916f 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -10,6 +10,11 @@
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
 
+#ifdef __SIZEOF_INT128__
+typedef __s128 s128;
+typedef __u128 u128;
+#endif
+
 typedef u32 __kernel_dev_t;
 
 typedef __kernel_fd_set		fd_set;
diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h
index 308433be33c2..6375a0684052 100644
--- a/include/uapi/linux/types.h
+++ b/include/uapi/linux/types.h
@@ -13,6 +13,10 @@
 
 #include <linux/posix_types.h>
 
+#ifdef __SIZEOF_INT128__
+typedef __signed__ __int128 __s128 __attribute__((aligned(16)));
+typedef unsigned __int128 __u128 __attribute__((aligned(16)));
+#endif
 
 /*
  * Below are truly Linux-specific types that should never collide with
diff --git a/lib/crypto/curve25519-hacl64.c b/lib/crypto/curve25519-hacl64.c
index 771d82dc5f14..c40e5d913234 100644
--- a/lib/crypto/curve25519-hacl64.c
+++ b/lib/crypto/curve25519-hacl64.c
@@ -14,8 +14,6 @@
 #include <crypto/curve25519.h>
 #include <linux/string.h>
 
-typedef __uint128_t u128;
-
 static __always_inline u64 u64_eq_mask(u64 a, u64 b)
 {
 	u64 x = a ^ b;
diff --git a/lib/crypto/poly1305-donna64.c b/lib/crypto/poly1305-donna64.c
index d34cf4053668..988702c9b3b2 100644
--- a/lib/crypto/poly1305-donna64.c
+++ b/lib/crypto/poly1305-donna64.c
@@ -10,8 +10,6 @@
 #include <asm/unaligned.h>
 #include <crypto/internal/poly1305.h>
 
-typedef __uint128_t u128;
-
 void poly1305_core_setkey(struct poly1305_core_key *key,
 			  const u8 raw_key[POLY1305_BLOCK_SIZE])
 {
-- 
cgit v1.2.3


From 8c8b096a23d12fedf3c0f50524f30113ef97aa8c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 15:08:37 +0200
Subject: instrumentation: Wire up cmpxchg128()

Wire up the cmpxchg128 family in the atomic wrapper scripts.

These provide the generic cmpxchg128 family of functions from the
arch_ prefixed version, adding explicit instrumentation where needed.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230531132323.519237070@infradead.org
---
 include/linux/atomic/atomic-arch-fallback.h | 95 ++++++++++++++++++++++++++++-
 include/linux/atomic/atomic-instrumented.h  | 86 +++++++++++++++++++++++++-
 scripts/atomic/gen-atomic-fallback.sh       |  4 +-
 scripts/atomic/gen-atomic-instrumented.sh   |  4 +-
 4 files changed, 183 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index a6e4437c5f36..1722ddb6f17e 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -77,6 +77,29 @@
 
 #endif /* arch_cmpxchg64_relaxed */
 
+#ifndef arch_cmpxchg128_relaxed
+#define arch_cmpxchg128_acquire arch_cmpxchg128
+#define arch_cmpxchg128_release arch_cmpxchg128
+#define arch_cmpxchg128_relaxed arch_cmpxchg128
+#else /* arch_cmpxchg128_relaxed */
+
+#ifndef arch_cmpxchg128_acquire
+#define arch_cmpxchg128_acquire(...) \
+	__atomic_op_acquire(arch_cmpxchg128, __VA_ARGS__)
+#endif
+
+#ifndef arch_cmpxchg128_release
+#define arch_cmpxchg128_release(...) \
+	__atomic_op_release(arch_cmpxchg128, __VA_ARGS__)
+#endif
+
+#ifndef arch_cmpxchg128
+#define arch_cmpxchg128(...) \
+	__atomic_op_fence(arch_cmpxchg128, __VA_ARGS__)
+#endif
+
+#endif /* arch_cmpxchg128_relaxed */
+
 #ifndef arch_try_cmpxchg_relaxed
 #ifdef arch_try_cmpxchg
 #define arch_try_cmpxchg_acquire arch_try_cmpxchg
@@ -217,6 +240,76 @@
 
 #endif /* arch_try_cmpxchg64_relaxed */
 
+#ifndef arch_try_cmpxchg128_relaxed
+#ifdef arch_try_cmpxchg128
+#define arch_try_cmpxchg128_acquire arch_try_cmpxchg128
+#define arch_try_cmpxchg128_release arch_try_cmpxchg128
+#define arch_try_cmpxchg128_relaxed arch_try_cmpxchg128
+#endif /* arch_try_cmpxchg128 */
+
+#ifndef arch_try_cmpxchg128
+#define arch_try_cmpxchg128(_ptr, _oldp, _new) \
+({ \
+	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+	___r = arch_cmpxchg128((_ptr), ___o, (_new)); \
+	if (unlikely(___r != ___o)) \
+		*___op = ___r; \
+	likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg128 */
+
+#ifndef arch_try_cmpxchg128_acquire
+#define arch_try_cmpxchg128_acquire(_ptr, _oldp, _new) \
+({ \
+	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+	___r = arch_cmpxchg128_acquire((_ptr), ___o, (_new)); \
+	if (unlikely(___r != ___o)) \
+		*___op = ___r; \
+	likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg128_acquire */
+
+#ifndef arch_try_cmpxchg128_release
+#define arch_try_cmpxchg128_release(_ptr, _oldp, _new) \
+({ \
+	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+	___r = arch_cmpxchg128_release((_ptr), ___o, (_new)); \
+	if (unlikely(___r != ___o)) \
+		*___op = ___r; \
+	likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg128_release */
+
+#ifndef arch_try_cmpxchg128_relaxed
+#define arch_try_cmpxchg128_relaxed(_ptr, _oldp, _new) \
+({ \
+	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+	___r = arch_cmpxchg128_relaxed((_ptr), ___o, (_new)); \
+	if (unlikely(___r != ___o)) \
+		*___op = ___r; \
+	likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg128_relaxed */
+
+#else /* arch_try_cmpxchg128_relaxed */
+
+#ifndef arch_try_cmpxchg128_acquire
+#define arch_try_cmpxchg128_acquire(...) \
+	__atomic_op_acquire(arch_try_cmpxchg128, __VA_ARGS__)
+#endif
+
+#ifndef arch_try_cmpxchg128_release
+#define arch_try_cmpxchg128_release(...) \
+	__atomic_op_release(arch_try_cmpxchg128, __VA_ARGS__)
+#endif
+
+#ifndef arch_try_cmpxchg128
+#define arch_try_cmpxchg128(...) \
+	__atomic_op_fence(arch_try_cmpxchg128, __VA_ARGS__)
+#endif
+
+#endif /* arch_try_cmpxchg128_relaxed */
+
 #ifndef arch_try_cmpxchg_local
 #define arch_try_cmpxchg_local(_ptr, _oldp, _new) \
 ({ \
@@ -2668,4 +2761,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
 #endif
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// ad2e2b4d168dbc60a73922616047a9bfa446af36
+// 52dfc6fe4a2e7234bbd2aa3e16a377c1db793a53
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index 03a232a1fa57..858372096d5c 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -2034,6 +2034,36 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	arch_cmpxchg64_relaxed(__ai_ptr, __VA_ARGS__); \
 })
 
+#define cmpxchg128(ptr, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_mb(); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	arch_cmpxchg128(__ai_ptr, __VA_ARGS__); \
+})
+
+#define cmpxchg128_acquire(ptr, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	arch_cmpxchg128_acquire(__ai_ptr, __VA_ARGS__); \
+})
+
+#define cmpxchg128_release(ptr, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	kcsan_release(); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	arch_cmpxchg128_release(__ai_ptr, __VA_ARGS__); \
+})
+
+#define cmpxchg128_relaxed(ptr, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	arch_cmpxchg128_relaxed(__ai_ptr, __VA_ARGS__); \
+})
+
 #define try_cmpxchg(ptr, oldp, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
@@ -2110,6 +2140,44 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	arch_try_cmpxchg64_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
+#define try_cmpxchg128(ptr, oldp, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	typeof(oldp) __ai_oldp = (oldp); \
+	kcsan_mb(); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
+	arch_try_cmpxchg128(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg128_acquire(ptr, oldp, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	typeof(oldp) __ai_oldp = (oldp); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
+	arch_try_cmpxchg128_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg128_release(ptr, oldp, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	typeof(oldp) __ai_oldp = (oldp); \
+	kcsan_release(); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
+	arch_try_cmpxchg128_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
+#define try_cmpxchg128_relaxed(ptr, oldp, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	typeof(oldp) __ai_oldp = (oldp); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
+	arch_try_cmpxchg128_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
 #define cmpxchg_local(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
@@ -2124,6 +2192,13 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	arch_cmpxchg64_local(__ai_ptr, __VA_ARGS__); \
 })
 
+#define cmpxchg128_local(ptr, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	arch_cmpxchg128_local(__ai_ptr, __VA_ARGS__); \
+})
+
 #define sync_cmpxchg(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
@@ -2150,6 +2225,15 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	arch_try_cmpxchg64_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
+#define try_cmpxchg128_local(ptr, oldp, ...) \
+({ \
+	typeof(ptr) __ai_ptr = (ptr); \
+	typeof(oldp) __ai_oldp = (oldp); \
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
+	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
+	arch_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+})
+
 #define cmpxchg_double(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
@@ -2167,4 +2251,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 })
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 6b513a42e1a1b5962532a019b7fc91eaa044ad5e
+// 82d1be694fab30414527d0877c29fa75ed5a0b74
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
index 6e853f0dad8d..a70acd548fcd 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -217,11 +217,11 @@ cat << EOF
 
 EOF
 
-for xchg in "arch_xchg" "arch_cmpxchg" "arch_cmpxchg64"; do
+for xchg in "arch_xchg" "arch_cmpxchg" "arch_cmpxchg64" "arch_cmpxchg128"; do
 	gen_xchg_fallbacks "${xchg}"
 done
 
-for cmpxchg in "cmpxchg" "cmpxchg64"; do
+for cmpxchg in "cmpxchg" "cmpxchg64" "cmpxchg128"; do
 	gen_try_cmpxchg_fallbacks "${cmpxchg}"
 done
 
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index d9ffd74f73ca..4f803472661e 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -166,14 +166,14 @@ grep '^[a-z]' "$1" | while read name meta args; do
 done
 
 
-for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg" "try_cmpxchg64"; do
+for xchg in "xchg" "cmpxchg" "cmpxchg64" "cmpxchg128" "try_cmpxchg" "try_cmpxchg64" "try_cmpxchg128"; do
 	for order in "" "_acquire" "_release" "_relaxed"; do
 		gen_xchg "${xchg}" "${order}" ""
 		printf "\n"
 	done
 done
 
-for xchg in "cmpxchg_local" "cmpxchg64_local" "sync_cmpxchg" "try_cmpxchg_local" "try_cmpxchg64_local" ; do
+for xchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local" "sync_cmpxchg" "try_cmpxchg_local" "try_cmpxchg64_local" "try_cmpxchg128_local"; do
 	gen_xchg "${xchg}" "" ""
 	printf "\n"
 done
-- 
cgit v1.2.3


From c5c0ba953b8c969c5d51bf1c57f239866a97c47c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 15:08:38 +0200
Subject: percpu: Add {raw,this}_cpu_try_cmpxchg()

Add the try_cmpxchg() form to the per-cpu ops.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230531132323.587480729@infradead.org
---
 include/asm-generic/percpu.h | 113 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/percpu-defs.h  |  19 ++++++++
 2 files changed, 128 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 6432a7fade91..96af32c283b2 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -89,16 +89,37 @@ do {									\
 	__ret;								\
 })
 
-#define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
+#define __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, _cmpxchg)		\
+({									\
+	typeof(pcp) __val, __old = *(ovalp);				\
+	__val = _cmpxchg(pcp, __old, nval);				\
+	if (__val != __old)						\
+		*(ovalp) = __val;					\
+	__val == __old;							\
+})
+
+#define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)			\
 ({									\
 	typeof(pcp) *__p = raw_cpu_ptr(&(pcp));				\
-	typeof(pcp) __ret;						\
-	__ret = *__p;							\
-	if (__ret == (oval))						\
+	typeof(pcp) __val = *__p, __old = *(ovalp);			\
+	bool __ret;							\
+	if (__val == __old) {						\
 		*__p = nval;						\
+		__ret = true;						\
+	} else {							\
+		*(ovalp) = __val;					\
+		__ret = false;						\
+	}								\
 	__ret;								\
 })
 
+#define raw_cpu_generic_cmpxchg(pcp, oval, nval)			\
+({									\
+	typeof(pcp) __old = (oval);					\
+	raw_cpu_generic_try_cmpxchg(pcp, &__old, nval);			\
+	__old;								\
+})
+
 #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 ({									\
 	typeof(pcp1) *__p1 = raw_cpu_ptr(&(pcp1));			\
@@ -170,6 +191,16 @@ do {									\
 	__ret;								\
 })
 
+#define this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)			\
+({									\
+	bool __ret;							\
+	unsigned long __flags;						\
+	raw_local_irq_save(__flags);					\
+	__ret = raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval);		\
+	raw_local_irq_restore(__flags);					\
+	__ret;								\
+})
+
 #define this_cpu_generic_cmpxchg(pcp, oval, nval)			\
 ({									\
 	typeof(pcp) __ret;						\
@@ -282,6 +313,43 @@ do {									\
 #define raw_cpu_xchg_8(pcp, nval)	raw_cpu_generic_xchg(pcp, nval)
 #endif
 
+#ifndef raw_cpu_try_cmpxchg_1
+#ifdef raw_cpu_cmpxchg_1
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_1)
+#else
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_2
+#ifdef raw_cpu_cmpxchg_2
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_2)
+#else
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_4
+#ifdef raw_cpu_cmpxchg_4
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_4)
+#else
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef raw_cpu_try_cmpxchg_8
+#ifdef raw_cpu_cmpxchg_8
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_8)
+#else
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+	raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+
 #ifndef raw_cpu_cmpxchg_1
 #define raw_cpu_cmpxchg_1(pcp, oval, nval) \
 	raw_cpu_generic_cmpxchg(pcp, oval, nval)
@@ -407,6 +475,43 @@ do {									\
 #define this_cpu_xchg_8(pcp, nval)	this_cpu_generic_xchg(pcp, nval)
 #endif
 
+#ifndef this_cpu_try_cmpxchg_1
+#ifdef this_cpu_cmpxchg_1
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_1)
+#else
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \
+	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_2
+#ifdef this_cpu_cmpxchg_2
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_2)
+#else
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \
+	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_4
+#ifdef this_cpu_cmpxchg_4
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_4)
+#else
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \
+	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+#ifndef this_cpu_try_cmpxchg_8
+#ifdef this_cpu_cmpxchg_8
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+	__cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_8)
+#else
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \
+	this_cpu_generic_try_cmpxchg(pcp, ovalp, nval)
+#endif
+#endif
+
 #ifndef this_cpu_cmpxchg_1
 #define this_cpu_cmpxchg_1(pcp, oval, nval) \
 	this_cpu_generic_cmpxchg(pcp, oval, nval)
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index e60727be79c4..cbbf6d189a3a 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -343,6 +343,21 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { }
 	pscr2_ret__;							\
 })
 
+#define __pcpu_size_call_return2bool(stem, variable, ...)		\
+({									\
+	bool pscr2_ret__;						\
+	__verify_pcpu_ptr(&(variable));					\
+	switch(sizeof(variable)) {					\
+	case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break;	\
+	case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break;	\
+	case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break;	\
+	case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break;	\
+	default:							\
+		__bad_size_call_parameter(); break;			\
+	}								\
+	pscr2_ret__;							\
+})
+
 /*
  * Special handling for cmpxchg_double.  cmpxchg_double is passed two
  * percpu variables.  The first has to be aligned to a double word
@@ -426,6 +441,8 @@ do {									\
 #define raw_cpu_xchg(pcp, nval)		__pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval)
 #define raw_cpu_cmpxchg(pcp, oval, nval) \
 	__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
+#define raw_cpu_try_cmpxchg(pcp, ovalp, nval) \
+	__pcpu_size_call_return2bool(raw_cpu_try_cmpxchg_, pcp, ovalp, nval)
 #define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 	__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
 
@@ -513,6 +530,8 @@ do {									\
 #define this_cpu_xchg(pcp, nval)	__pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
 #define this_cpu_cmpxchg(pcp, oval, nval) \
 	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
+#define this_cpu_try_cmpxchg(pcp, ovalp, nval) \
+	__pcpu_size_call_return2bool(this_cpu_try_cmpxchg_, pcp, ovalp, nval)
 #define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 	__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
 
-- 
cgit v1.2.3


From b1fe7f2cda2a003afe316ce8dfe8d3645694a67e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 15:08:42 +0200
Subject: x86,intel_iommu: Replace cmpxchg_double()

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230531132323.855976804@infradead.org
---
 drivers/iommu/intel/irq_remapping.c |   8 +--
 include/linux/dmar.h                | 125 +++++++++++++++++++-----------------
 2 files changed, 68 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index a1b987335b31..08f56326e2f8 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -175,18 +175,14 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
 	irte = &iommu->ir_table->base[index];
 
 	if ((irte->pst == 1) || (irte_modified->pst == 1)) {
-		bool ret;
-
-		ret = cmpxchg_double(&irte->low, &irte->high,
-				     irte->low, irte->high,
-				     irte_modified->low, irte_modified->high);
 		/*
 		 * We use cmpxchg16 to atomically update the 128-bit IRTE,
 		 * and it cannot be updated by the hardware or other processors
 		 * behind us, so the return value of cmpxchg16 should be the
 		 * same as the old value.
 		 */
-		WARN_ON(!ret);
+		u128 old = irte->irte;
+		WARN_ON(!try_cmpxchg128(&irte->irte, &old, irte_modified->irte));
 	} else {
 		WRITE_ONCE(irte->low, irte_modified->low);
 		WRITE_ONCE(irte->high, irte_modified->high);
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 725d5e6acec0..27dbd4c64860 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -202,67 +202,74 @@ static inline void detect_intel_iommu(void)
 
 struct irte {
 	union {
-		/* Shared between remapped and posted mode*/
 		struct {
-			__u64	present		: 1,  /*  0      */
-				fpd		: 1,  /*  1      */
-				__res0		: 6,  /*  2 -  6 */
-				avail		: 4,  /*  8 - 11 */
-				__res1		: 3,  /* 12 - 14 */
-				pst		: 1,  /* 15      */
-				vector		: 8,  /* 16 - 23 */
-				__res2		: 40; /* 24 - 63 */
+			union {
+				/* Shared between remapped and posted mode*/
+				struct {
+					__u64	present		: 1,  /*  0      */
+						fpd		: 1,  /*  1      */
+						__res0		: 6,  /*  2 -  6 */
+						avail		: 4,  /*  8 - 11 */
+						__res1		: 3,  /* 12 - 14 */
+						pst		: 1,  /* 15      */
+						vector		: 8,  /* 16 - 23 */
+						__res2		: 40; /* 24 - 63 */
+				};
+
+				/* Remapped mode */
+				struct {
+					__u64	r_present	: 1,  /*  0      */
+						r_fpd		: 1,  /*  1      */
+						dst_mode	: 1,  /*  2      */
+						redir_hint	: 1,  /*  3      */
+						trigger_mode	: 1,  /*  4      */
+						dlvry_mode	: 3,  /*  5 -  7 */
+						r_avail		: 4,  /*  8 - 11 */
+						r_res0		: 4,  /* 12 - 15 */
+						r_vector	: 8,  /* 16 - 23 */
+						r_res1		: 8,  /* 24 - 31 */
+						dest_id		: 32; /* 32 - 63 */
+				};
+
+				/* Posted mode */
+				struct {
+					__u64	p_present	: 1,  /*  0      */
+						p_fpd		: 1,  /*  1      */
+						p_res0		: 6,  /*  2 -  7 */
+						p_avail		: 4,  /*  8 - 11 */
+						p_res1		: 2,  /* 12 - 13 */
+						p_urgent	: 1,  /* 14      */
+						p_pst		: 1,  /* 15      */
+						p_vector	: 8,  /* 16 - 23 */
+						p_res2		: 14, /* 24 - 37 */
+						pda_l		: 26; /* 38 - 63 */
+				};
+				__u64 low;
+			};
+
+			union {
+				/* Shared between remapped and posted mode*/
+				struct {
+					__u64	sid		: 16,  /* 64 - 79  */
+						sq		: 2,   /* 80 - 81  */
+						svt		: 2,   /* 82 - 83  */
+						__res3		: 44;  /* 84 - 127 */
+				};
+
+				/* Posted mode*/
+				struct {
+					__u64	p_sid		: 16,  /* 64 - 79  */
+						p_sq		: 2,   /* 80 - 81  */
+						p_svt		: 2,   /* 82 - 83  */
+						p_res3		: 12,  /* 84 - 95  */
+						pda_h		: 32;  /* 96 - 127 */
+				};
+				__u64 high;
+			};
 		};
-
-		/* Remapped mode */
-		struct {
-			__u64	r_present	: 1,  /*  0      */
-				r_fpd		: 1,  /*  1      */
-				dst_mode	: 1,  /*  2      */
-				redir_hint	: 1,  /*  3      */
-				trigger_mode	: 1,  /*  4      */
-				dlvry_mode	: 3,  /*  5 -  7 */
-				r_avail		: 4,  /*  8 - 11 */
-				r_res0		: 4,  /* 12 - 15 */
-				r_vector	: 8,  /* 16 - 23 */
-				r_res1		: 8,  /* 24 - 31 */
-				dest_id		: 32; /* 32 - 63 */
-		};
-
-		/* Posted mode */
-		struct {
-			__u64	p_present	: 1,  /*  0      */
-				p_fpd		: 1,  /*  1      */
-				p_res0		: 6,  /*  2 -  7 */
-				p_avail		: 4,  /*  8 - 11 */
-				p_res1		: 2,  /* 12 - 13 */
-				p_urgent	: 1,  /* 14      */
-				p_pst		: 1,  /* 15      */
-				p_vector	: 8,  /* 16 - 23 */
-				p_res2		: 14, /* 24 - 37 */
-				pda_l		: 26; /* 38 - 63 */
-		};
-		__u64 low;
-	};
-
-	union {
-		/* Shared between remapped and posted mode*/
-		struct {
-			__u64	sid		: 16,  /* 64 - 79  */
-				sq		: 2,   /* 80 - 81  */
-				svt		: 2,   /* 82 - 83  */
-				__res3		: 44;  /* 84 - 127 */
-		};
-
-		/* Posted mode*/
-		struct {
-			__u64	p_sid		: 16,  /* 64 - 79  */
-				p_sq		: 2,   /* 80 - 81  */
-				p_svt		: 2,   /* 82 - 83  */
-				p_res3		: 12,  /* 84 - 95  */
-				pda_h		: 32;  /* 96 - 127 */
-		};
-		__u64 high;
+#ifdef CONFIG_IRQ_REMAP
+		__u128 irte;
+#endif
 	};
 };
 
-- 
cgit v1.2.3


From 6801be4f2653e5fdddca73b527cf0728284ba8a3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 15:08:43 +0200
Subject: slub: Replace cmpxchg_double()

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230531132323.924677086@infradead.org
---
 include/linux/slub_def.h |  12 +++-
 mm/slab.h                |  53 +++++++++++++++---
 mm/slub.c                | 139 ++++++++++++++++++++++++++++-------------------
 3 files changed, 137 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index f6df03f934e5..deb90cf4bffb 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -39,7 +39,8 @@ enum stat_item {
 	CPU_PARTIAL_FREE,	/* Refill cpu partial on free */
 	CPU_PARTIAL_NODE,	/* Refill cpu partial from node partial */
 	CPU_PARTIAL_DRAIN,	/* Drain cpu partial to node partial */
-	NR_SLUB_STAT_ITEMS };
+	NR_SLUB_STAT_ITEMS
+};
 
 #ifndef CONFIG_SLUB_TINY
 /*
@@ -47,8 +48,13 @@ enum stat_item {
  * with this_cpu_cmpxchg_double() alignment requirements.
  */
 struct kmem_cache_cpu {
-	void **freelist;	/* Pointer to next available object */
-	unsigned long tid;	/* Globally unique transaction id */
+	union {
+		struct {
+			void **freelist;	/* Pointer to next available object */
+			unsigned long tid;	/* Globally unique transaction id */
+		};
+		freelist_aba_t freelist_tid;
+	};
 	struct slab *slab;	/* The slab from which we are allocating */
 #ifdef CONFIG_SLUB_CPU_PARTIAL
 	struct slab *partial;	/* Partially allocated frozen slabs */
diff --git a/mm/slab.h b/mm/slab.h
index f01ac256a8f5..bc36edd5ba4f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -6,6 +6,38 @@
  */
 void __init kmem_cache_init(void);
 
+#ifdef CONFIG_64BIT
+# ifdef system_has_cmpxchg128
+# define system_has_freelist_aba()	system_has_cmpxchg128()
+# define try_cmpxchg_freelist		try_cmpxchg128
+# endif
+#define this_cpu_try_cmpxchg_freelist	this_cpu_try_cmpxchg128
+typedef u128 freelist_full_t;
+#else /* CONFIG_64BIT */
+# ifdef system_has_cmpxchg64
+# define system_has_freelist_aba()	system_has_cmpxchg64()
+# define try_cmpxchg_freelist		try_cmpxchg64
+# endif
+#define this_cpu_try_cmpxchg_freelist	this_cpu_try_cmpxchg64
+typedef u64 freelist_full_t;
+#endif /* CONFIG_64BIT */
+
+#if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
+#undef system_has_freelist_aba
+#endif
+
+/*
+ * Freelist pointer and counter to cmpxchg together, avoids the typical ABA
+ * problems with cmpxchg of just a pointer.
+ */
+typedef union {
+	struct {
+		void *freelist;
+		unsigned long counter;
+	};
+	freelist_full_t full;
+} freelist_aba_t;
+
 /* Reuses the bits in struct page */
 struct slab {
 	unsigned long __page_flags;
@@ -38,14 +70,21 @@ struct slab {
 #endif
 			};
 			/* Double-word boundary */
-			void *freelist;		/* first free object */
 			union {
-				unsigned long counters;
 				struct {
-					unsigned inuse:16;
-					unsigned objects:15;
-					unsigned frozen:1;
+					void *freelist;		/* first free object */
+					union {
+						unsigned long counters;
+						struct {
+							unsigned inuse:16;
+							unsigned objects:15;
+							unsigned frozen:1;
+						};
+					};
 				};
+#ifdef system_has_freelist_aba
+				freelist_aba_t freelist_counter;
+#endif
 			};
 		};
 		struct rcu_head rcu_head;
@@ -72,8 +111,8 @@ SLAB_MATCH(memcg_data, memcg_data);
 #endif
 #undef SLAB_MATCH
 static_assert(sizeof(struct slab) <= sizeof(struct page));
-#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && defined(CONFIG_SLUB)
-static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 2*sizeof(void *)));
+#if defined(system_has_freelist_aba) && defined(CONFIG_SLUB)
+static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)));
 #endif
 
 /**
diff --git a/mm/slub.c b/mm/slub.c
index c87628cd8a9a..7529626bbec2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -292,7 +292,12 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
 /* Poison object */
 #define __OBJECT_POISON		((slab_flags_t __force)0x80000000U)
 /* Use cmpxchg_double */
+
+#ifdef system_has_freelist_aba
 #define __CMPXCHG_DOUBLE	((slab_flags_t __force)0x40000000U)
+#else
+#define __CMPXCHG_DOUBLE	((slab_flags_t __force)0U)
+#endif
 
 /*
  * Tracking user of a slab.
@@ -512,6 +517,40 @@ static __always_inline void slab_unlock(struct slab *slab)
 	__bit_spin_unlock(PG_locked, &page->flags);
 }
 
+static inline bool
+__update_freelist_fast(struct slab *slab,
+		      void *freelist_old, unsigned long counters_old,
+		      void *freelist_new, unsigned long counters_new)
+{
+#ifdef system_has_freelist_aba
+	freelist_aba_t old = { .freelist = freelist_old, .counter = counters_old };
+	freelist_aba_t new = { .freelist = freelist_new, .counter = counters_new };
+
+	return try_cmpxchg_freelist(&slab->freelist_counter.full, &old.full, new.full);
+#else
+	return false;
+#endif
+}
+
+static inline bool
+__update_freelist_slow(struct slab *slab,
+		      void *freelist_old, unsigned long counters_old,
+		      void *freelist_new, unsigned long counters_new)
+{
+	bool ret = false;
+
+	slab_lock(slab);
+	if (slab->freelist == freelist_old &&
+	    slab->counters == counters_old) {
+		slab->freelist = freelist_new;
+		slab->counters = counters_new;
+		ret = true;
+	}
+	slab_unlock(slab);
+
+	return ret;
+}
+
 /*
  * Interrupts must be disabled (for the fallback code to work right), typically
  * by an _irqsave() lock variant. On PREEMPT_RT the preempt_disable(), which is
@@ -519,33 +558,25 @@ static __always_inline void slab_unlock(struct slab *slab)
  * allocation/ free operation in hardirq context. Therefore nothing can
  * interrupt the operation.
  */
-static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
+static inline bool __slab_update_freelist(struct kmem_cache *s, struct slab *slab,
 		void *freelist_old, unsigned long counters_old,
 		void *freelist_new, unsigned long counters_new,
 		const char *n)
 {
+	bool ret;
+
 	if (USE_LOCKLESS_FAST_PATH())
 		lockdep_assert_irqs_disabled();
-#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
-    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
+
 	if (s->flags & __CMPXCHG_DOUBLE) {
-		if (cmpxchg_double(&slab->freelist, &slab->counters,
-				   freelist_old, counters_old,
-				   freelist_new, counters_new))
-			return true;
-	} else
-#endif
-	{
-		slab_lock(slab);
-		if (slab->freelist == freelist_old &&
-					slab->counters == counters_old) {
-			slab->freelist = freelist_new;
-			slab->counters = counters_new;
-			slab_unlock(slab);
-			return true;
-		}
-		slab_unlock(slab);
+		ret = __update_freelist_fast(slab, freelist_old, counters_old,
+				            freelist_new, counters_new);
+	} else {
+		ret = __update_freelist_slow(slab, freelist_old, counters_old,
+				            freelist_new, counters_new);
 	}
+	if (likely(ret))
+		return true;
 
 	cpu_relax();
 	stat(s, CMPXCHG_DOUBLE_FAIL);
@@ -557,36 +588,26 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab
 	return false;
 }
 
-static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
+static inline bool slab_update_freelist(struct kmem_cache *s, struct slab *slab,
 		void *freelist_old, unsigned long counters_old,
 		void *freelist_new, unsigned long counters_new,
 		const char *n)
 {
-#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
-    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
+	bool ret;
+
 	if (s->flags & __CMPXCHG_DOUBLE) {
-		if (cmpxchg_double(&slab->freelist, &slab->counters,
-				   freelist_old, counters_old,
-				   freelist_new, counters_new))
-			return true;
-	} else
-#endif
-	{
+		ret = __update_freelist_fast(slab, freelist_old, counters_old,
+				            freelist_new, counters_new);
+	} else {
 		unsigned long flags;
 
 		local_irq_save(flags);
-		slab_lock(slab);
-		if (slab->freelist == freelist_old &&
-					slab->counters == counters_old) {
-			slab->freelist = freelist_new;
-			slab->counters = counters_new;
-			slab_unlock(slab);
-			local_irq_restore(flags);
-			return true;
-		}
-		slab_unlock(slab);
+		ret = __update_freelist_slow(slab, freelist_old, counters_old,
+				            freelist_new, counters_new);
 		local_irq_restore(flags);
 	}
+	if (likely(ret))
+		return true;
 
 	cpu_relax();
 	stat(s, CMPXCHG_DOUBLE_FAIL);
@@ -2228,7 +2249,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
 	VM_BUG_ON(new.frozen);
 	new.frozen = 1;
 
-	if (!__cmpxchg_double_slab(s, slab,
+	if (!__slab_update_freelist(s, slab,
 			freelist, counters,
 			new.freelist, new.counters,
 			"acquire_slab"))
@@ -2554,7 +2575,7 @@ redo:
 	}
 
 
-	if (!cmpxchg_double_slab(s, slab,
+	if (!slab_update_freelist(s, slab,
 				old.freelist, old.counters,
 				new.freelist, new.counters,
 				"unfreezing slab")) {
@@ -2611,7 +2632,7 @@ static void __unfreeze_partials(struct kmem_cache *s, struct slab *partial_slab)
 
 			new.frozen = 0;
 
-		} while (!__cmpxchg_double_slab(s, slab,
+		} while (!__slab_update_freelist(s, slab,
 				old.freelist, old.counters,
 				new.freelist, new.counters,
 				"unfreezing slab"));
@@ -3008,6 +3029,18 @@ static inline bool pfmemalloc_match(struct slab *slab, gfp_t gfpflags)
 }
 
 #ifndef CONFIG_SLUB_TINY
+static inline bool
+__update_cpu_freelist_fast(struct kmem_cache *s,
+			   void *freelist_old, void *freelist_new,
+			   unsigned long tid)
+{
+	freelist_aba_t old = { .freelist = freelist_old, .counter = tid };
+	freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) };
+
+	return this_cpu_try_cmpxchg_freelist(s->cpu_slab->freelist_tid.full,
+					     &old.full, new.full);
+}
+
 /*
  * Check the slab->freelist and either transfer the freelist to the
  * per cpu freelist or deactivate the slab.
@@ -3034,7 +3067,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
 		new.inuse = slab->objects;
 		new.frozen = freelist != NULL;
 
-	} while (!__cmpxchg_double_slab(s, slab,
+	} while (!__slab_update_freelist(s, slab,
 		freelist, counters,
 		NULL, new.counters,
 		"get_freelist"));
@@ -3359,11 +3392,7 @@ redo:
 		 * against code executing on this cpu *not* from access by
 		 * other cpus.
 		 */
-		if (unlikely(!this_cpu_cmpxchg_double(
-				s->cpu_slab->freelist, s->cpu_slab->tid,
-				object, tid,
-				next_object, next_tid(tid)))) {
-
+		if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) {
 			note_cmpxchg_failure("slab_alloc", s, tid);
 			goto redo;
 		}
@@ -3631,7 +3660,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
 			}
 		}
 
-	} while (!cmpxchg_double_slab(s, slab,
+	} while (!slab_update_freelist(s, slab,
 		prior, counters,
 		head, new.counters,
 		"__slab_free"));
@@ -3736,11 +3765,7 @@ redo:
 
 		set_freepointer(s, tail_obj, freelist);
 
-		if (unlikely(!this_cpu_cmpxchg_double(
-				s->cpu_slab->freelist, s->cpu_slab->tid,
-				freelist, tid,
-				head, next_tid(tid)))) {
-
+		if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
 			note_cmpxchg_failure("slab_free", s, tid);
 			goto redo;
 		}
@@ -4505,11 +4530,11 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
 		}
 	}
 
-#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
-    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
-	if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
+#ifdef system_has_freelist_aba
+	if (system_has_freelist_aba() && !(s->flags & SLAB_NO_CMPXCHG)) {
 		/* Enable fast mode */
 		s->flags |= __CMPXCHG_DOUBLE;
+	}
 #endif
 
 	/*
-- 
cgit v1.2.3


From febe950dbfb464799beb0339cc6fb10699f4a5da Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 31 May 2023 15:08:44 +0200
Subject: arch: Remove cmpxchg_double

No moar users, remove the monster.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20230531132323.991907085@infradead.org
---
 Documentation/core-api/this_cpu_ops.rst    |  2 --
 arch/arm64/include/asm/atomic_ll_sc.h      | 33 -----------------
 arch/arm64/include/asm/atomic_lse.h        | 36 -------------------
 arch/arm64/include/asm/cmpxchg.h           | 46 ------------------------
 arch/arm64/include/asm/percpu.h            | 10 ------
 arch/s390/include/asm/cmpxchg.h            | 34 ------------------
 arch/s390/include/asm/percpu.h             | 18 ----------
 arch/x86/include/asm/cmpxchg.h             | 25 -------------
 arch/x86/include/asm/cmpxchg_32.h          |  1 -
 arch/x86/include/asm/cmpxchg_64.h          |  1 -
 arch/x86/include/asm/percpu.h              | 42 ----------------------
 include/asm-generic/percpu.h               | 58 ------------------------------
 include/linux/atomic/atomic-instrumented.h | 17 +--------
 include/linux/percpu-defs.h                | 38 --------------------
 scripts/atomic/gen-atomic-instrumented.sh  | 15 +++-----
 15 files changed, 5 insertions(+), 371 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/this_cpu_ops.rst b/Documentation/core-api/this_cpu_ops.rst
index 5cb8b883ae83..91acbcf30e9b 100644
--- a/Documentation/core-api/this_cpu_ops.rst
+++ b/Documentation/core-api/this_cpu_ops.rst
@@ -53,7 +53,6 @@ preemption and interrupts::
 	this_cpu_add_return(pcp, val)
 	this_cpu_xchg(pcp, nval)
 	this_cpu_cmpxchg(pcp, oval, nval)
-	this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
 	this_cpu_sub(pcp, val)
 	this_cpu_inc(pcp)
 	this_cpu_dec(pcp)
@@ -242,7 +241,6 @@ safe::
 	__this_cpu_add_return(pcp, val)
 	__this_cpu_xchg(pcp, nval)
 	__this_cpu_cmpxchg(pcp, oval, nval)
-	__this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
 	__this_cpu_sub(pcp, val)
 	__this_cpu_inc(pcp)
 	__this_cpu_dec(pcp)
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index b100a49ca0d3..89d2ba272359 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -294,39 +294,6 @@ __CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory", L)
 
 #undef __CMPXCHG_CASE
 
-#define __CMPXCHG_DBL(name, mb, rel, cl)				\
-static __always_inline long						\
-__ll_sc__cmpxchg_double##name(unsigned long old1,			\
-				      unsigned long old2,		\
-				      unsigned long new1,		\
-				      unsigned long new2,		\
-				      volatile void *ptr)		\
-{									\
-	unsigned long tmp, ret;						\
-									\
-	asm volatile("// __cmpxchg_double" #name "\n"			\
-	"	prfm	pstl1strm, %2\n"				\
-	"1:	ldxp	%0, %1, %2\n"					\
-	"	eor	%0, %0, %3\n"					\
-	"	eor	%1, %1, %4\n"					\
-	"	orr	%1, %0, %1\n"					\
-	"	cbnz	%1, 2f\n"					\
-	"	st" #rel "xp	%w0, %5, %6, %2\n"			\
-	"	cbnz	%w0, 1b\n"					\
-	"	" #mb "\n"						\
-	"2:"								\
-	: "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr)		\
-	: "r" (old1), "r" (old2), "r" (new1), "r" (new2)		\
-	: cl);								\
-									\
-	return ret;							\
-}
-
-__CMPXCHG_DBL(   ,        ,  ,         )
-__CMPXCHG_DBL(_mb, dmb ish, l, "memory")
-
-#undef __CMPXCHG_DBL
-
 union __u128_halves {
 	u128 full;
 	struct {
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index c781281306d0..87f568a94e55 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -281,42 +281,6 @@ __CMPXCHG_CASE(x,  ,  mb_, 64, al, "memory")
 
 #undef __CMPXCHG_CASE
 
-#define __CMPXCHG_DBL(name, mb, cl...)					\
-static __always_inline long						\
-__lse__cmpxchg_double##name(unsigned long old1,				\
-					 unsigned long old2,		\
-					 unsigned long new1,		\
-					 unsigned long new2,		\
-					 volatile void *ptr)		\
-{									\
-	unsigned long oldval1 = old1;					\
-	unsigned long oldval2 = old2;					\
-	register unsigned long x0 asm ("x0") = old1;			\
-	register unsigned long x1 asm ("x1") = old2;			\
-	register unsigned long x2 asm ("x2") = new1;			\
-	register unsigned long x3 asm ("x3") = new2;			\
-	register unsigned long x4 asm ("x4") = (unsigned long)ptr;	\
-									\
-	asm volatile(							\
-	__LSE_PREAMBLE							\
-	"	casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
-	"	eor	%[old1], %[old1], %[oldval1]\n"			\
-	"	eor	%[old2], %[old2], %[oldval2]\n"			\
-	"	orr	%[old1], %[old1], %[old2]"			\
-	: [old1] "+&r" (x0), [old2] "+&r" (x1),				\
-	  [v] "+Q" (*(__uint128_t *)ptr)				\
-	: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4),		\
-	  [oldval1] "r" (oldval1), [oldval2] "r" (oldval2)		\
-	: cl);								\
-									\
-	return x0;							\
-}
-
-__CMPXCHG_DBL(   ,   )
-__CMPXCHG_DBL(_mb, al, "memory")
-
-#undef __CMPXCHG_DBL
-
 #define __CMPXCHG128(name, mb, cl...)					\
 static __always_inline u128						\
 __lse__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new)		\
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 097b832ed374..d7a540736741 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -130,22 +130,6 @@ __CMPXCHG_CASE(mb_, 64)
 
 #undef __CMPXCHG_CASE
 
-#define __CMPXCHG_DBL(name)						\
-static inline long __cmpxchg_double##name(unsigned long old1,		\
-					 unsigned long old2,		\
-					 unsigned long new1,		\
-					 unsigned long new2,		\
-					 volatile void *ptr)		\
-{									\
-	return __lse_ll_sc_body(_cmpxchg_double##name, 			\
-				old1, old2, new1, new2, ptr);		\
-}
-
-__CMPXCHG_DBL(   )
-__CMPXCHG_DBL(_mb)
-
-#undef __CMPXCHG_DBL
-
 #define __CMPXCHG128(name)						\
 static inline u128 __cmpxchg128##name(volatile u128 *ptr,		\
 				      u128 old, u128 new)		\
@@ -211,36 +195,6 @@ __CMPXCHG_GEN(_mb)
 #define arch_cmpxchg64			arch_cmpxchg
 #define arch_cmpxchg64_local		arch_cmpxchg_local
 
-/* cmpxchg_double */
-#define system_has_cmpxchg_double()     1
-
-#define __cmpxchg_double_check(ptr1, ptr2)					\
-({										\
-	if (sizeof(*(ptr1)) != 8)						\
-		BUILD_BUG();							\
-	VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1);	\
-})
-
-#define arch_cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2)				\
-({										\
-	int __ret;								\
-	__cmpxchg_double_check(ptr1, ptr2);					\
-	__ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2),	\
-				     (unsigned long)(n1), (unsigned long)(n2),	\
-				     ptr1);					\
-	__ret;									\
-})
-
-#define arch_cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2)			\
-({										\
-	int __ret;								\
-	__cmpxchg_double_check(ptr1, ptr2);					\
-	__ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2),	\
-				  (unsigned long)(n1), (unsigned long)(n2),	\
-				  ptr1);					\
-	__ret;									\
-})
-
 /* cmpxchg128 */
 #define system_has_cmpxchg128()		1
 
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 2549829e559e..9abcc8ef3087 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -145,16 +145,6 @@ PERCPU_RET_OP(add, add, ldadd)
  * preemption point when TIF_NEED_RESCHED gets set while preemption is
  * disabled.
  */
-#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2)		\
-({									\
-	int __ret;							\
-	preempt_disable_notrace();					\
-	__ret = cmpxchg_double_local(	raw_cpu_ptr(&(ptr1)),		\
-					raw_cpu_ptr(&(ptr2)),		\
-					o1, o2, n1, n2);		\
-	preempt_enable_notrace();					\
-	__ret;								\
-})
 
 #define _pcp_protect(op, pcp, ...)					\
 ({									\
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 7517376fdccc..aae0315374de 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -190,40 +190,6 @@ static __always_inline unsigned long __cmpxchg(unsigned long address,
 #define arch_cmpxchg_local	arch_cmpxchg
 #define arch_cmpxchg64_local	arch_cmpxchg
 
-#define system_has_cmpxchg_double()	1
-
-static __always_inline int __cmpxchg_double(unsigned long p1, unsigned long p2,
-					    unsigned long o1, unsigned long o2,
-					    unsigned long n1, unsigned long n2)
-{
-	union register_pair old = { .even = o1, .odd = o2, };
-	union register_pair new = { .even = n1, .odd = n2, };
-	int cc;
-
-	asm volatile(
-		"	cdsg	%[old],%[new],%[ptr]\n"
-		"	ipm	%[cc]\n"
-		"	srl	%[cc],28\n"
-		: [cc] "=&d" (cc), [old] "+&d" (old.pair)
-		: [new] "d" (new.pair),
-		  [ptr] "QS" (*(unsigned long *)p1), "Q" (*(unsigned long *)p2)
-		: "memory", "cc");
-	return !cc;
-}
-
-#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2)			\
-({									\
-	typeof(p1) __p1 = (p1);						\
-	typeof(p2) __p2 = (p2);						\
-									\
-	BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));			\
-	BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));			\
-	VM_BUG_ON((unsigned long)((__p1) + 1) != (unsigned long)(__p2));\
-	__cmpxchg_double((unsigned long)__p1, (unsigned long)__p2,	\
-			 (unsigned long)(o1), (unsigned long)(o2),	\
-			 (unsigned long)(n1), (unsigned long)(n2));	\
-})
-
 #define system_has_cmpxchg128()		1
 
 static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new)
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 56034025797b..264095dd84bc 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -180,24 +180,6 @@
 #define this_cpu_xchg_4(pcp, nval) arch_this_cpu_xchg(pcp, nval)
 #define this_cpu_xchg_8(pcp, nval) arch_this_cpu_xchg(pcp, nval)
 
-#define arch_this_cpu_cmpxchg_double(pcp1, pcp2, o1, o2, n1, n2)	    \
-({									    \
-	typeof(pcp1) *p1__;						    \
-	typeof(pcp2) *p2__;						    \
-	int ret__;							    \
-									    \
-	preempt_disable_notrace();					    \
-	p1__ = raw_cpu_ptr(&(pcp1));					    \
-	p2__ = raw_cpu_ptr(&(pcp2));					    \
-	ret__ = __cmpxchg_double((unsigned long)p1__, (unsigned long)p2__,  \
-				 (unsigned long)(o1), (unsigned long)(o2),  \
-				 (unsigned long)(n1), (unsigned long)(n2)); \
-	preempt_enable_notrace();					    \
-	ret__;								    \
-})
-
-#define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double
-
 #include <asm-generic/percpu.h>
 
 #endif /* __ARCH_S390_PERCPU__ */
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 540573f515b7..d53636506134 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -239,29 +239,4 @@ extern void __add_wrong_size(void)
 #define __xadd(ptr, inc, lock)	__xchg_op((ptr), (inc), xadd, lock)
 #define xadd(ptr, inc)		__xadd((ptr), (inc), LOCK_PREFIX)
 
-#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2)			\
-({									\
-	bool __ret;							\
-	__typeof__(*(p1)) __old1 = (o1), __new1 = (n1);			\
-	__typeof__(*(p2)) __old2 = (o2), __new2 = (n2);			\
-	BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));			\
-	BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));			\
-	VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long)));		\
-	VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2));	\
-	asm volatile(pfx "cmpxchg%c5b %1"				\
-		     CC_SET(e)						\
-		     : CC_OUT(e) (__ret),				\
-		       "+m" (*(p1)), "+m" (*(p2)),			\
-		       "+a" (__old1), "+d" (__old2)			\
-		     : "i" (2 * sizeof(long)),				\
-		       "b" (__new1), "c" (__new2));			\
-	__ret;								\
-})
-
-#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \
-	__cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
-
-#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
-	__cmpxchg_double(, p1, p2, o1, o2, n1, n2)
-
 #endif	/* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index d12f9ed2e63a..b5731c51f0f4 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -103,7 +103,6 @@ static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
 
 #endif
 
-#define system_has_cmpxchg_double()	boot_cpu_has(X86_FEATURE_CX8)
 #define system_has_cmpxchg64()		boot_cpu_has(X86_FEATURE_CX8)
 
 #endif /* _ASM_X86_CMPXCHG_32_H */
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index fe2910729e6c..3e6e3eef701b 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -81,7 +81,6 @@ static __always_inline bool arch_try_cmpxchg128_local(volatile u128 *ptr, u128 *
 	return __arch_try_cmpxchg128(ptr, oldp, new,);
 }
 
-#define system_has_cmpxchg_double()	boot_cpu_has(X86_FEATURE_CX16)
 #define system_has_cmpxchg128()		boot_cpu_has(X86_FEATURE_CX16)
 
 #endif /* _ASM_X86_CMPXCHG_64_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 4232fb246de0..34734d730463 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -351,23 +351,6 @@ do {									\
 #define this_cpu_cmpxchg_2(pcp, oval, nval)	percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
 #define this_cpu_cmpxchg_4(pcp, oval, nval)	percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
 
-#ifdef CONFIG_X86_CMPXCHG64
-#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2)		\
-({									\
-	bool __ret;							\
-	typeof(pcp1) __o1 = (o1), __n1 = (n1);				\
-	typeof(pcp2) __o2 = (o2), __n2 = (n2);				\
-	asm volatile("cmpxchg8b "__percpu_arg(1)			\
-		     CC_SET(z)						\
-		     : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \
-		     : "b" (__n1), "c" (__n2));				\
-	__ret;								\
-})
-
-#define raw_cpu_cmpxchg_double_4	percpu_cmpxchg8b_double
-#define this_cpu_cmpxchg_double_4	percpu_cmpxchg8b_double
-#endif /* CONFIG_X86_CMPXCHG64 */
-
 /*
  * Per cpu atomic 64 bit operations are only available under 64 bit.
  * 32 bit must fall back to generic operations.
@@ -390,31 +373,6 @@ do {									\
 #define this_cpu_add_return_8(pcp, val)		percpu_add_return_op(8, volatile, pcp, val)
 #define this_cpu_xchg_8(pcp, nval)		percpu_xchg_op(8, volatile, pcp, nval)
 #define this_cpu_cmpxchg_8(pcp, oval, nval)	percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
-
-/*
- * Pretty complex macro to generate cmpxchg16 instruction.  The instruction
- * is not supported on early AMD64 processors so we must be able to emulate
- * it in software.  The address used in the cmpxchg16 instruction must be
- * aligned to a 16 byte boundary.
- */
-#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2)		\
-({									\
-	bool __ret;							\
-	typeof(pcp1) __o1 = (o1), __n1 = (n1);				\
-	typeof(pcp2) __o2 = (o2), __n2 = (n2);				\
-	asm volatile (ALTERNATIVE("leaq %P1, %%rsi; call this_cpu_cmpxchg16b_emu", \
-				  "cmpxchg16b " __percpu_arg(1), X86_FEATURE_CX16) \
-			     "setz %0"					\
-			     : "=a" (__ret), "+m" (pcp1)		\
-			     : "b" (__n1), "c" (__n2),			\
-			       "a" (__o1), "d" (__o2)			\
-			     : "memory", "rsi");			\
-	__ret;								\
-})
-
-#define raw_cpu_cmpxchg_double_8	percpu_cmpxchg16b_double
-#define this_cpu_cmpxchg_double_8	percpu_cmpxchg16b_double
-
 #endif
 
 static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 5c66e4496289..68c410e85cd7 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -120,19 +120,6 @@ do {									\
 	__old;								\
 })
 
-#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-({									\
-	typeof(pcp1) *__p1 = raw_cpu_ptr(&(pcp1));			\
-	typeof(pcp2) *__p2 = raw_cpu_ptr(&(pcp2));			\
-	int __ret = 0;							\
-	if (*__p1 == (oval1) && *__p2  == (oval2)) {			\
-		*__p1 = nval1;						\
-		*__p2 = nval2;						\
-		__ret = 1;						\
-	}								\
-	(__ret);							\
-})
-
 #define __this_cpu_generic_read_nopreempt(pcp)				\
 ({									\
 	typeof(pcp) ___ret;						\
@@ -211,17 +198,6 @@ do {									\
 	__ret;								\
 })
 
-#define this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)	\
-({									\
-	int __ret;							\
-	unsigned long __flags;						\
-	raw_local_irq_save(__flags);					\
-	__ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2,		\
-			oval1, oval2, nval1, nval2);			\
-	raw_local_irq_restore(__flags);					\
-	__ret;								\
-})
-
 #ifndef raw_cpu_read_1
 #define raw_cpu_read_1(pcp)		raw_cpu_generic_read(pcp)
 #endif
@@ -395,23 +371,6 @@ do {									\
 	raw_cpu_generic_cmpxchg(pcp, oval, nval)
 #endif
 
-#ifndef raw_cpu_cmpxchg_double_1
-#define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-#endif
-#ifndef raw_cpu_cmpxchg_double_2
-#define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-#endif
-#ifndef raw_cpu_cmpxchg_double_4
-#define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-#endif
-#ifndef raw_cpu_cmpxchg_double_8
-#define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-	raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-#endif
-
 #ifndef this_cpu_read_1
 #define this_cpu_read_1(pcp)		this_cpu_generic_read(pcp)
 #endif
@@ -585,21 +544,4 @@ do {									\
 	this_cpu_generic_cmpxchg(pcp, oval, nval)
 #endif
 
-#ifndef this_cpu_cmpxchg_double_1
-#define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-	this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-#endif
-#ifndef this_cpu_cmpxchg_double_2
-#define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-	this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-#endif
-#ifndef this_cpu_cmpxchg_double_4
-#define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-	this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-#endif
-#ifndef this_cpu_cmpxchg_double_8
-#define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-	this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
-#endif
-
 #endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index 858372096d5c..a55b5b70a3e1 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -2234,21 +2234,6 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	arch_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
-#define cmpxchg_double(ptr, ...) \
-({ \
-	typeof(ptr) __ai_ptr = (ptr); \
-	kcsan_mb(); \
-	instrument_atomic_read_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \
-	arch_cmpxchg_double(__ai_ptr, __VA_ARGS__); \
-})
-
-
-#define cmpxchg_double_local(ptr, ...) \
-({ \
-	typeof(ptr) __ai_ptr = (ptr); \
-	instrument_atomic_read_write(__ai_ptr, 2 * sizeof(*__ai_ptr)); \
-	arch_cmpxchg_double_local(__ai_ptr, __VA_ARGS__); \
-})
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 82d1be694fab30414527d0877c29fa75ed5a0b74
+// 3611991b015450e119bcd7417a9431af7f3ba13c
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index cbbf6d189a3a..ec3573119923 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -358,33 +358,6 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { }
 	pscr2_ret__;							\
 })
 
-/*
- * Special handling for cmpxchg_double.  cmpxchg_double is passed two
- * percpu variables.  The first has to be aligned to a double word
- * boundary and the second has to follow directly thereafter.
- * We enforce this on all architectures even if they don't support
- * a double cmpxchg instruction, since it's a cheap requirement, and it
- * avoids breaking the requirement for architectures with the instruction.
- */
-#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...)		\
-({									\
-	bool pdcrb_ret__;						\
-	__verify_pcpu_ptr(&(pcp1));					\
-	BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2));			\
-	VM_BUG_ON((unsigned long)(&(pcp1)) % (2 * sizeof(pcp1)));	\
-	VM_BUG_ON((unsigned long)(&(pcp2)) !=				\
-		  (unsigned long)(&(pcp1)) + sizeof(pcp1));		\
-	switch(sizeof(pcp1)) {						\
-	case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break;	\
-	case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break;	\
-	case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break;	\
-	case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break;	\
-	default:							\
-		__bad_size_call_parameter(); break;			\
-	}								\
-	pdcrb_ret__;							\
-})
-
 #define __pcpu_size_call(stem, variable, ...)				\
 do {									\
 	__verify_pcpu_ptr(&(variable));					\
@@ -443,9 +416,6 @@ do {									\
 	__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)
 #define raw_cpu_try_cmpxchg(pcp, ovalp, nval) \
 	__pcpu_size_call_return2bool(raw_cpu_try_cmpxchg_, pcp, ovalp, nval)
-#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-	__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
-
 #define raw_cpu_sub(pcp, val)		raw_cpu_add(pcp, -(val))
 #define raw_cpu_inc(pcp)		raw_cpu_add(pcp, 1)
 #define raw_cpu_dec(pcp)		raw_cpu_sub(pcp, 1)
@@ -505,11 +475,6 @@ do {									\
 	raw_cpu_cmpxchg(pcp, oval, nval);				\
 })
 
-#define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-({	__this_cpu_preempt_check("cmpxchg_double");			\
-	raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2);	\
-})
-
 #define __this_cpu_sub(pcp, val)	__this_cpu_add(pcp, -(typeof(pcp))(val))
 #define __this_cpu_inc(pcp)		__this_cpu_add(pcp, 1)
 #define __this_cpu_dec(pcp)		__this_cpu_sub(pcp, 1)
@@ -532,9 +497,6 @@ do {									\
 	__pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval)
 #define this_cpu_try_cmpxchg(pcp, ovalp, nval) \
 	__pcpu_size_call_return2bool(this_cpu_try_cmpxchg_, pcp, ovalp, nval)
-#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
-	__pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2)
-
 #define this_cpu_sub(pcp, val)		this_cpu_add(pcp, -(typeof(pcp))(val))
 #define this_cpu_inc(pcp)		this_cpu_add(pcp, 1)
 #define this_cpu_dec(pcp)		this_cpu_sub(pcp, 1)
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index 4f803472661e..a2ef735be8ca 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -84,7 +84,6 @@ gen_xchg()
 {
 	local xchg="$1"; shift
 	local order="$1"; shift
-	local mult="$1"; shift
 
 	kcsan_barrier=""
 	if [ "${xchg%_local}" = "${xchg}" ]; then
@@ -104,8 +103,8 @@ cat <<EOF
 EOF
 [ -n "$kcsan_barrier" ] && printf "\t${kcsan_barrier}; \\\\\n"
 cat <<EOF
-	instrument_atomic_read_write(__ai_ptr, ${mult}sizeof(*__ai_ptr)); \\
-	instrument_read_write(__ai_oldp, ${mult}sizeof(*__ai_oldp)); \\
+	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \\
+	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \\
 	arch_${xchg}${order}(__ai_ptr, __ai_oldp, __VA_ARGS__); \\
 })
 EOF
@@ -168,22 +167,16 @@ done
 
 for xchg in "xchg" "cmpxchg" "cmpxchg64" "cmpxchg128" "try_cmpxchg" "try_cmpxchg64" "try_cmpxchg128"; do
 	for order in "" "_acquire" "_release" "_relaxed"; do
-		gen_xchg "${xchg}" "${order}" ""
+		gen_xchg "${xchg}" "${order}"
 		printf "\n"
 	done
 done
 
 for xchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local" "sync_cmpxchg" "try_cmpxchg_local" "try_cmpxchg64_local" "try_cmpxchg128_local"; do
-	gen_xchg "${xchg}" "" ""
+	gen_xchg "${xchg}" ""
 	printf "\n"
 done
 
-gen_xchg "cmpxchg_double" "" "2 * "
-
-printf "\n\n"
-
-gen_xchg "cmpxchg_double_local" "" "2 * "
-
 cat <<EOF
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-- 
cgit v1.2.3


From 14d72d4b6f0e88b5f683c1a5b7a876a55055852d Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 5 Jun 2023 08:00:59 +0100
Subject: locking/atomic: remove fallback comments

Currently a subset of the fallback templates have kerneldoc comments,
resulting in a haphazard set of generated kerneldoc comments as only
some operations have fallback templates to begin with.

We'd like to generate more consistent kerneldoc comments, and to do so
we'll need to restructure the way the fallback code is generated.

To minimize churn and to make it easier to restructure the fallback
code, this patch removes the existing kerneldoc comments from the
fallback templates. We can add new kerneldoc comments in subsequent
patches.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230605070124.3741859-3-mark.rutland@arm.com
---
 include/linux/atomic/atomic-arch-fallback.h | 166 +---------------------------
 scripts/atomic/fallbacks/add_negative       |   8 --
 scripts/atomic/fallbacks/add_unless         |   9 --
 scripts/atomic/fallbacks/dec_and_test       |   8 --
 scripts/atomic/fallbacks/fetch_add_unless   |   9 --
 scripts/atomic/fallbacks/inc_and_test       |   8 --
 scripts/atomic/fallbacks/inc_not_zero       |   7 --
 scripts/atomic/fallbacks/sub_and_test       |   9 --
 8 files changed, 1 insertion(+), 223 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 1722ddb6f17e..3ce4cb5e790c 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -1272,15 +1272,6 @@ arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 #endif /* arch_atomic_try_cmpxchg_relaxed */
 
 #ifndef arch_atomic_sub_and_test
-/**
- * arch_atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
 static __always_inline bool
 arch_atomic_sub_and_test(int i, atomic_t *v)
 {
@@ -1290,14 +1281,6 @@ arch_atomic_sub_and_test(int i, atomic_t *v)
 #endif
 
 #ifndef arch_atomic_dec_and_test
-/**
- * arch_atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
 static __always_inline bool
 arch_atomic_dec_and_test(atomic_t *v)
 {
@@ -1307,14 +1290,6 @@ arch_atomic_dec_and_test(atomic_t *v)
 #endif
 
 #ifndef arch_atomic_inc_and_test
-/**
- * arch_atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
 static __always_inline bool
 arch_atomic_inc_and_test(atomic_t *v)
 {
@@ -1331,14 +1306,6 @@ arch_atomic_inc_and_test(atomic_t *v)
 #endif /* arch_atomic_add_negative */
 
 #ifndef arch_atomic_add_negative
-/**
- * arch_atomic_add_negative - Add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true if the result is negative,
- * or false when the result is greater than or equal to zero.
- */
 static __always_inline bool
 arch_atomic_add_negative(int i, atomic_t *v)
 {
@@ -1348,14 +1315,6 @@ arch_atomic_add_negative(int i, atomic_t *v)
 #endif
 
 #ifndef arch_atomic_add_negative_acquire
-/**
- * arch_atomic_add_negative_acquire - Add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true if the result is negative,
- * or false when the result is greater than or equal to zero.
- */
 static __always_inline bool
 arch_atomic_add_negative_acquire(int i, atomic_t *v)
 {
@@ -1365,14 +1324,6 @@ arch_atomic_add_negative_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef arch_atomic_add_negative_release
-/**
- * arch_atomic_add_negative_release - Add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true if the result is negative,
- * or false when the result is greater than or equal to zero.
- */
 static __always_inline bool
 arch_atomic_add_negative_release(int i, atomic_t *v)
 {
@@ -1382,14 +1333,6 @@ arch_atomic_add_negative_release(int i, atomic_t *v)
 #endif
 
 #ifndef arch_atomic_add_negative_relaxed
-/**
- * arch_atomic_add_negative_relaxed - Add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true if the result is negative,
- * or false when the result is greater than or equal to zero.
- */
 static __always_inline bool
 arch_atomic_add_negative_relaxed(int i, atomic_t *v)
 {
@@ -1437,15 +1380,6 @@ arch_atomic_add_negative(int i, atomic_t *v)
 #endif /* arch_atomic_add_negative_relaxed */
 
 #ifndef arch_atomic_fetch_add_unless
-/**
- * arch_atomic_fetch_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns original value of @v
- */
 static __always_inline int
 arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
@@ -1462,15 +1396,6 @@ arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 #endif
 
 #ifndef arch_atomic_add_unless
-/**
- * arch_atomic_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if @v was not already @u.
- * Returns true if the addition was done.
- */
 static __always_inline bool
 arch_atomic_add_unless(atomic_t *v, int a, int u)
 {
@@ -1480,13 +1405,6 @@ arch_atomic_add_unless(atomic_t *v, int a, int u)
 #endif
 
 #ifndef arch_atomic_inc_not_zero
-/**
- * arch_atomic_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1, if @v is non-zero.
- * Returns true if the increment was done.
- */
 static __always_inline bool
 arch_atomic_inc_not_zero(atomic_t *v)
 {
@@ -2488,15 +2406,6 @@ arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 #endif /* arch_atomic64_try_cmpxchg_relaxed */
 
 #ifndef arch_atomic64_sub_and_test
-/**
- * arch_atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
 static __always_inline bool
 arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
@@ -2506,14 +2415,6 @@ arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
 #endif
 
 #ifndef arch_atomic64_dec_and_test
-/**
- * arch_atomic64_dec_and_test - decrement and test
- * @v: pointer of type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
 static __always_inline bool
 arch_atomic64_dec_and_test(atomic64_t *v)
 {
@@ -2523,14 +2424,6 @@ arch_atomic64_dec_and_test(atomic64_t *v)
 #endif
 
 #ifndef arch_atomic64_inc_and_test
-/**
- * arch_atomic64_inc_and_test - increment and test
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
 static __always_inline bool
 arch_atomic64_inc_and_test(atomic64_t *v)
 {
@@ -2547,14 +2440,6 @@ arch_atomic64_inc_and_test(atomic64_t *v)
 #endif /* arch_atomic64_add_negative */
 
 #ifndef arch_atomic64_add_negative
-/**
- * arch_atomic64_add_negative - Add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic64_t
- *
- * Atomically adds @i to @v and returns true if the result is negative,
- * or false when the result is greater than or equal to zero.
- */
 static __always_inline bool
 arch_atomic64_add_negative(s64 i, atomic64_t *v)
 {
@@ -2564,14 +2449,6 @@ arch_atomic64_add_negative(s64 i, atomic64_t *v)
 #endif
 
 #ifndef arch_atomic64_add_negative_acquire
-/**
- * arch_atomic64_add_negative_acquire - Add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic64_t
- *
- * Atomically adds @i to @v and returns true if the result is negative,
- * or false when the result is greater than or equal to zero.
- */
 static __always_inline bool
 arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
 {
@@ -2581,14 +2458,6 @@ arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef arch_atomic64_add_negative_release
-/**
- * arch_atomic64_add_negative_release - Add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic64_t
- *
- * Atomically adds @i to @v and returns true if the result is negative,
- * or false when the result is greater than or equal to zero.
- */
 static __always_inline bool
 arch_atomic64_add_negative_release(s64 i, atomic64_t *v)
 {
@@ -2598,14 +2467,6 @@ arch_atomic64_add_negative_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef arch_atomic64_add_negative_relaxed
-/**
- * arch_atomic64_add_negative_relaxed - Add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic64_t
- *
- * Atomically adds @i to @v and returns true if the result is negative,
- * or false when the result is greater than or equal to zero.
- */
 static __always_inline bool
 arch_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
 {
@@ -2653,15 +2514,6 @@ arch_atomic64_add_negative(s64 i, atomic64_t *v)
 #endif /* arch_atomic64_add_negative_relaxed */
 
 #ifndef arch_atomic64_fetch_add_unless
-/**
- * arch_atomic64_fetch_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns original value of @v
- */
 static __always_inline s64
 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
@@ -2678,15 +2530,6 @@ arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 #endif
 
 #ifndef arch_atomic64_add_unless
-/**
- * arch_atomic64_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if @v was not already @u.
- * Returns true if the addition was done.
- */
 static __always_inline bool
 arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
@@ -2696,13 +2539,6 @@ arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 #endif
 
 #ifndef arch_atomic64_inc_not_zero
-/**
- * arch_atomic64_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1, if @v is non-zero.
- * Returns true if the increment was done.
- */
 static __always_inline bool
 arch_atomic64_inc_not_zero(atomic64_t *v)
 {
@@ -2761,4 +2597,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
 #endif
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 52dfc6fe4a2e7234bbd2aa3e16a377c1db793a53
+// 9f0fd6ed53267c6ec64e36cd18e6fd8df57ea277
diff --git a/scripts/atomic/fallbacks/add_negative b/scripts/atomic/fallbacks/add_negative
index e5980abf5904..d0bd2dfbb244 100755
--- a/scripts/atomic/fallbacks/add_negative
+++ b/scripts/atomic/fallbacks/add_negative
@@ -1,12 +1,4 @@
 cat <<EOF
-/**
- * arch_${atomic}_add_negative${order} - Add and test if negative
- * @i: integer value to add
- * @v: pointer of type ${atomic}_t
- *
- * Atomically adds @i to @v and returns true if the result is negative,
- * or false when the result is greater than or equal to zero.
- */
 static __always_inline bool
 arch_${atomic}_add_negative${order}(${int} i, ${atomic}_t *v)
 {
diff --git a/scripts/atomic/fallbacks/add_unless b/scripts/atomic/fallbacks/add_unless
index 9e5159c2ccfc..cf79b9da38db 100755
--- a/scripts/atomic/fallbacks/add_unless
+++ b/scripts/atomic/fallbacks/add_unless
@@ -1,13 +1,4 @@
 cat << EOF
-/**
- * arch_${atomic}_add_unless - add unless the number is already a given value
- * @v: pointer of type ${atomic}_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if @v was not already @u.
- * Returns true if the addition was done.
- */
 static __always_inline bool
 arch_${atomic}_add_unless(${atomic}_t *v, ${int} a, ${int} u)
 {
diff --git a/scripts/atomic/fallbacks/dec_and_test b/scripts/atomic/fallbacks/dec_and_test
index 8549f359bd0e..3f6b6a8b4773 100755
--- a/scripts/atomic/fallbacks/dec_and_test
+++ b/scripts/atomic/fallbacks/dec_and_test
@@ -1,12 +1,4 @@
 cat <<EOF
-/**
- * arch_${atomic}_dec_and_test - decrement and test
- * @v: pointer of type ${atomic}_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
 static __always_inline bool
 arch_${atomic}_dec_and_test(${atomic}_t *v)
 {
diff --git a/scripts/atomic/fallbacks/fetch_add_unless b/scripts/atomic/fallbacks/fetch_add_unless
index 68ce13c8b9da..81d2834f03d2 100755
--- a/scripts/atomic/fallbacks/fetch_add_unless
+++ b/scripts/atomic/fallbacks/fetch_add_unless
@@ -1,13 +1,4 @@
 cat << EOF
-/**
- * arch_${atomic}_fetch_add_unless - add unless the number is already a given value
- * @v: pointer of type ${atomic}_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns original value of @v
- */
 static __always_inline ${int}
 arch_${atomic}_fetch_add_unless(${atomic}_t *v, ${int} a, ${int} u)
 {
diff --git a/scripts/atomic/fallbacks/inc_and_test b/scripts/atomic/fallbacks/inc_and_test
index 0cf23fe1efb8..c726a6d0634d 100755
--- a/scripts/atomic/fallbacks/inc_and_test
+++ b/scripts/atomic/fallbacks/inc_and_test
@@ -1,12 +1,4 @@
 cat <<EOF
-/**
- * arch_${atomic}_inc_and_test - increment and test
- * @v: pointer of type ${atomic}_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
 static __always_inline bool
 arch_${atomic}_inc_and_test(${atomic}_t *v)
 {
diff --git a/scripts/atomic/fallbacks/inc_not_zero b/scripts/atomic/fallbacks/inc_not_zero
index ed8a1f562667..97603591aac2 100755
--- a/scripts/atomic/fallbacks/inc_not_zero
+++ b/scripts/atomic/fallbacks/inc_not_zero
@@ -1,11 +1,4 @@
 cat <<EOF
-/**
- * arch_${atomic}_inc_not_zero - increment unless the number is zero
- * @v: pointer of type ${atomic}_t
- *
- * Atomically increments @v by 1, if @v is non-zero.
- * Returns true if the increment was done.
- */
 static __always_inline bool
 arch_${atomic}_inc_not_zero(${atomic}_t *v)
 {
diff --git a/scripts/atomic/fallbacks/sub_and_test b/scripts/atomic/fallbacks/sub_and_test
index 260f37341c88..da8a049c9b02 100755
--- a/scripts/atomic/fallbacks/sub_and_test
+++ b/scripts/atomic/fallbacks/sub_and_test
@@ -1,13 +1,4 @@
 cat <<EOF
-/**
- * arch_${atomic}_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type ${atomic}_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
 static __always_inline bool
 arch_${atomic}_sub_and_test(${int} i, ${atomic}_t *v)
 {
-- 
cgit v1.2.3


From d12157efc8e083c77d054675fcdd594f54cc7e2b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 5 Jun 2023 08:01:01 +0100
Subject: locking/atomic: make atomic*_{cmp,}xchg optional

Most architectures define the atomic/atomic64 xchg and cmpxchg
operations in terms of arch_xchg and arch_cmpxchg respectfully.

Add fallbacks for these cases and remove the trivial cases from arch
code. On some architectures the existing definitions are kept as these
are used to build other arch_atomic*() operations.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230605070124.3741859-5-mark.rutland@arm.com
---
 arch/alpha/include/asm/atomic.h             |  10 --
 arch/arc/include/asm/atomic.h               |  24 -----
 arch/arc/include/asm/atomic64-arcv2.h       |   2 +
 arch/arm/include/asm/atomic.h               |   3 +-
 arch/arm64/include/asm/atomic.h             |  28 -----
 arch/csky/include/asm/atomic.h              |  35 ------
 arch/hexagon/include/asm/atomic.h           |   6 --
 arch/ia64/include/asm/atomic.h              |   7 --
 arch/loongarch/include/asm/atomic.h         |   7 --
 arch/m68k/include/asm/atomic.h              |   9 +-
 arch/mips/include/asm/atomic.h              |  11 --
 arch/openrisc/include/asm/atomic.h          |   3 -
 arch/parisc/include/asm/atomic.h            |   9 --
 arch/powerpc/include/asm/atomic.h           |  24 -----
 arch/riscv/include/asm/atomic.h             |  72 -------------
 arch/sh/include/asm/atomic.h                |   3 -
 arch/sparc/include/asm/atomic_32.h          |   2 +
 arch/sparc/include/asm/atomic_64.h          |  11 --
 arch/xtensa/include/asm/atomic.h            |   3 -
 include/asm-generic/atomic.h                |   3 -
 include/linux/atomic/atomic-arch-fallback.h | 158 +++++++++++++++++++++++++++-
 scripts/atomic/fallbacks/cmpxchg            |   7 ++
 scripts/atomic/fallbacks/xchg               |   7 ++
 23 files changed, 179 insertions(+), 265 deletions(-)
 create mode 100644 scripts/atomic/fallbacks/cmpxchg
 create mode 100644 scripts/atomic/fallbacks/xchg

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index f2861a43a61e..ec8ab552c527 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -200,16 +200,6 @@ ATOMIC_OPS(xor, xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define arch_atomic64_cmpxchg(v, old, new) \
-	(arch_cmpxchg(&((v)->counter), old, new))
-#define arch_atomic64_xchg(v, new) \
-	(arch_xchg(&((v)->counter), new))
-
-#define arch_atomic_cmpxchg(v, old, new) \
-	(arch_cmpxchg(&((v)->counter), old, new))
-#define arch_atomic_xchg(v, new) \
-	(arch_xchg(&((v)->counter), new))
-
 /**
  * arch_atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 52ee51e1ff7c..592d7fffc223 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -22,30 +22,6 @@
 #include <asm/atomic-spinlock.h>
 #endif
 
-#define arch_atomic_cmpxchg(v, o, n)					\
-({									\
-	arch_cmpxchg(&((v)->counter), (o), (n));			\
-})
-
-#ifdef arch_cmpxchg_relaxed
-#define arch_atomic_cmpxchg_relaxed(v, o, n)				\
-({									\
-	arch_cmpxchg_relaxed(&((v)->counter), (o), (n));		\
-})
-#endif
-
-#define arch_atomic_xchg(v, n)						\
-({									\
-	arch_xchg(&((v)->counter), (n));				\
-})
-
-#ifdef arch_xchg_relaxed
-#define arch_atomic_xchg_relaxed(v, n)					\
-({									\
-	arch_xchg_relaxed(&((v)->counter), (n));			\
-})
-#endif
-
 /*
  * 64-bit atomics
  */
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
index c5a8010fdc97..2b7c9e61a294 100644
--- a/arch/arc/include/asm/atomic64-arcv2.h
+++ b/arch/arc/include/asm/atomic64-arcv2.h
@@ -159,6 +159,7 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
 
 	return prev;
 }
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
 
 static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
 {
@@ -179,6 +180,7 @@ static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
 
 	return prev;
 }
+#define arch_atomic64_xchg arch_atomic64_xchg
 
 /**
  * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index db8512d9a918..9458d47ff209 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -210,6 +210,7 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
 
 	return ret;
 }
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
 
 #define arch_atomic_fetch_andnot		arch_atomic_fetch_andnot
 
@@ -240,8 +241,6 @@ ATOMIC_OPS(xor, ^=, eor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
 #ifndef CONFIG_GENERIC_ATOMIC64
 typedef struct {
 	s64 counter;
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index c9979273d389..400d279e0f8d 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -142,24 +142,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
 #define arch_atomic_fetch_xor_release		arch_atomic_fetch_xor_release
 #define arch_atomic_fetch_xor			arch_atomic_fetch_xor
 
-#define arch_atomic_xchg_relaxed(v, new) \
-	arch_xchg_relaxed(&((v)->counter), (new))
-#define arch_atomic_xchg_acquire(v, new) \
-	arch_xchg_acquire(&((v)->counter), (new))
-#define arch_atomic_xchg_release(v, new) \
-	arch_xchg_release(&((v)->counter), (new))
-#define arch_atomic_xchg(v, new) \
-	arch_xchg(&((v)->counter), (new))
-
-#define arch_atomic_cmpxchg_relaxed(v, old, new) \
-	arch_cmpxchg_relaxed(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg_acquire(v, old, new) \
-	arch_cmpxchg_acquire(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg_release(v, old, new) \
-	arch_cmpxchg_release(&((v)->counter), (old), (new))
-#define arch_atomic_cmpxchg(v, old, new) \
-	arch_cmpxchg(&((v)->counter), (old), (new))
-
 #define arch_atomic_andnot			arch_atomic_andnot
 
 /*
@@ -209,16 +191,6 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
 #define arch_atomic64_fetch_xor_release		arch_atomic64_fetch_xor_release
 #define arch_atomic64_fetch_xor			arch_atomic64_fetch_xor
 
-#define arch_atomic64_xchg_relaxed		arch_atomic_xchg_relaxed
-#define arch_atomic64_xchg_acquire		arch_atomic_xchg_acquire
-#define arch_atomic64_xchg_release		arch_atomic_xchg_release
-#define arch_atomic64_xchg			arch_atomic_xchg
-
-#define arch_atomic64_cmpxchg_relaxed		arch_atomic_cmpxchg_relaxed
-#define arch_atomic64_cmpxchg_acquire		arch_atomic_cmpxchg_acquire
-#define arch_atomic64_cmpxchg_release		arch_atomic_cmpxchg_release
-#define arch_atomic64_cmpxchg			arch_atomic_cmpxchg
-
 #define arch_atomic64_andnot			arch_atomic64_andnot
 
 #define arch_atomic64_dec_if_positive		arch_atomic64_dec_if_positive
diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
index 60406ef9c2bb..4dab44f6143a 100644
--- a/arch/csky/include/asm/atomic.h
+++ b/arch/csky/include/asm/atomic.h
@@ -195,41 +195,6 @@ arch_atomic_dec_if_positive(atomic_t *v)
 }
 #define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
 
-#define ATOMIC_OP()							\
-static __always_inline							\
-int arch_atomic_xchg_relaxed(atomic_t *v, int n)			\
-{									\
-	return __xchg_relaxed(n, &(v->counter), 4);			\
-}									\
-static __always_inline							\
-int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n)		\
-{									\
-	return __cmpxchg_relaxed(&(v->counter), o, n, 4);		\
-}									\
-static __always_inline							\
-int arch_atomic_cmpxchg_acquire(atomic_t *v, int o, int n)		\
-{									\
-	return __cmpxchg_acquire(&(v->counter), o, n, 4);		\
-}									\
-static __always_inline							\
-int arch_atomic_cmpxchg(atomic_t *v, int o, int n)			\
-{									\
-	return __cmpxchg(&(v->counter), o, n, 4);			\
-}
-
-#define ATOMIC_OPS()							\
-	ATOMIC_OP()
-
-ATOMIC_OPS()
-
-#define arch_atomic_xchg_relaxed	arch_atomic_xchg_relaxed
-#define arch_atomic_cmpxchg_relaxed	arch_atomic_cmpxchg_relaxed
-#define arch_atomic_cmpxchg_acquire	arch_atomic_cmpxchg_acquire
-#define arch_atomic_cmpxchg		arch_atomic_cmpxchg
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP
-
 #else
 #include <asm-generic/atomic.h>
 #endif
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 738857e10d6e..ad6c111e9c10 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -36,12 +36,6 @@ static inline void arch_atomic_set(atomic_t *v, int new)
  */
 #define arch_atomic_read(v)		READ_ONCE((v)->counter)
 
-#define arch_atomic_xchg(v, new)					\
-	(arch_xchg(&((v)->counter), (new)))
-
-#define arch_atomic_cmpxchg(v, old, new)				\
-	(arch_cmpxchg(&((v)->counter), (old), (new)))
-
 #define ATOMIC_OP(op)							\
 static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 266c429b9137..6540a628d257 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -207,13 +207,6 @@ ATOMIC64_FETCH_OP(xor, ^)
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
-#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), old, new))
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
-#define arch_atomic64_cmpxchg(v, old, new) \
-	(arch_cmpxchg(&((v)->counter), old, new))
-#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
 #define arch_atomic_add(i,v)		(void)arch_atomic_add_return((i), (v))
 #define arch_atomic_sub(i,v)		(void)arch_atomic_sub_return((i), (v))
 
diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h
index 6b9aca9ab6e9..8d73c85911b0 100644
--- a/arch/loongarch/include/asm/atomic.h
+++ b/arch/loongarch/include/asm/atomic.h
@@ -181,9 +181,6 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
 	return result;
 }
 
-#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), (new)))
-
 /*
  * arch_atomic_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
@@ -342,10 +339,6 @@ static inline long arch_atomic64_sub_if_positive(long i, atomic64_t *v)
 	return result;
 }
 
-#define arch_atomic64_cmpxchg(v, o, n) \
-	((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), (new)))
-
 /*
  * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic64_t
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index cfba83d230fd..190a032f19be 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -158,12 +158,7 @@ static inline int arch_atomic_inc_and_test(atomic_t *v)
 }
 #define arch_atomic_inc_and_test arch_atomic_inc_and_test
 
-#ifdef CONFIG_RMW_INSNS
-
-#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
-#else /* !CONFIG_RMW_INSNS */
+#ifndef CONFIG_RMW_INSNS
 
 static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
@@ -177,6 +172,7 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
 	local_irq_restore(flags);
 	return prev;
 }
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
 
 static inline int arch_atomic_xchg(atomic_t *v, int new)
 {
@@ -189,6 +185,7 @@ static inline int arch_atomic_xchg(atomic_t *v, int new)
 	local_irq_restore(flags);
 	return prev;
 }
+#define arch_atomic_xchg arch_atomic_xchg
 
 #endif /* !CONFIG_RMW_INSNS */
 
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 712fb5a6a568..ba188e77768b 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -33,17 +33,6 @@ static __always_inline void arch_##pfx##_set(pfx##_t *v, type i)	\
 {									\
 	WRITE_ONCE(v->counter, i);					\
 }									\
-									\
-static __always_inline type						\
-arch_##pfx##_cmpxchg(pfx##_t *v, type o, type n)			\
-{									\
-	return arch_cmpxchg(&v->counter, o, n);				\
-}									\
-									\
-static __always_inline type arch_##pfx##_xchg(pfx##_t *v, type n)	\
-{									\
-	return arch_xchg(&v->counter, n);				\
-}
 
 ATOMIC_OPS(atomic, int)
 
diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h
index 326167e4783a..8ce67ec7c9a3 100644
--- a/arch/openrisc/include/asm/atomic.h
+++ b/arch/openrisc/include/asm/atomic.h
@@ -130,7 +130,4 @@ static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 
 #include <asm/cmpxchg.h>
 
-#define arch_atomic_xchg(ptr, v)		(arch_xchg(&(ptr)->counter, (v)))
-#define arch_atomic_cmpxchg(v, old, new)	(arch_cmpxchg(&((v)->counter), (old), (new)))
-
 #endif /* __ASM_OPENRISC_ATOMIC_H */
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index dd5a299ada69..0b3f64c92e3c 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -73,10 +73,6 @@ static __inline__ int arch_atomic_read(const atomic_t *v)
 	return READ_ONCE((v)->counter);
 }
 
-/* exported interface */
-#define arch_atomic_cmpxchg(v, o, n)	(arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic_xchg(v, new)	(arch_xchg(&((v)->counter), new))
-
 #define ATOMIC_OP(op, c_op)						\
 static __inline__ void arch_atomic_##op(int i, atomic_t *v)		\
 {									\
@@ -218,11 +214,6 @@ arch_atomic64_read(const atomic64_t *v)
 	return READ_ONCE((v)->counter);
 }
 
-/* exported interface */
-#define arch_atomic64_cmpxchg(v, o, n) \
-	((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
 #endif /* !CONFIG_64BIT */
 
 
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 47228b177478..5bf6a4d49268 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -126,18 +126,6 @@ ATOMIC_OPS(xor, xor, "", K)
 #undef ATOMIC_OP_RETURN_RELAXED
 #undef ATOMIC_OP
 
-#define arch_atomic_cmpxchg(v, o, n) \
-	(arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic_cmpxchg_relaxed(v, o, n) \
-	arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define arch_atomic_cmpxchg_acquire(v, o, n) \
-	arch_cmpxchg_acquire(&((v)->counter), (o), (n))
-
-#define arch_atomic_xchg(v, new) \
-	(arch_xchg(&((v)->counter), new))
-#define arch_atomic_xchg_relaxed(v, new) \
-	arch_xchg_relaxed(&((v)->counter), (new))
-
 /**
  * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
@@ -396,18 +384,6 @@ static __inline__ s64 arch_atomic64_dec_if_positive(atomic64_t *v)
 }
 #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 
-#define arch_atomic64_cmpxchg(v, o, n) \
-	(arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic64_cmpxchg_relaxed(v, o, n) \
-	arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define arch_atomic64_cmpxchg_acquire(v, o, n) \
-	arch_cmpxchg_acquire(&((v)->counter), (o), (n))
-
-#define arch_atomic64_xchg(v, new) \
-	(arch_xchg(&((v)->counter), new))
-#define arch_atomic64_xchg_relaxed(v, new) \
-	arch_xchg_relaxed(&((v)->counter), (new))
-
 /**
  * atomic64_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic64_t
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index bba472928b53..f5dfef6c2153 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -238,78 +238,6 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
 #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
 #endif
 
-/*
- * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as
- * {cmp,}xchg and the operations that return, so they need a full barrier.
- */
-#define ATOMIC_OP(c_t, prefix, size)					\
-static __always_inline							\
-c_t arch_atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n)	\
-{									\
-	return __xchg_relaxed(&(v->counter), n, size);			\
-}									\
-static __always_inline							\
-c_t arch_atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n)	\
-{									\
-	return __xchg_acquire(&(v->counter), n, size);			\
-}									\
-static __always_inline							\
-c_t arch_atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n)	\
-{									\
-	return __xchg_release(&(v->counter), n, size);			\
-}									\
-static __always_inline							\
-c_t arch_atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n)		\
-{									\
-	return __arch_xchg(&(v->counter), n, size);			\
-}									\
-static __always_inline							\
-c_t arch_atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v,	\
-				     c_t o, c_t n)			\
-{									\
-	return __cmpxchg_relaxed(&(v->counter), o, n, size);		\
-}									\
-static __always_inline							\
-c_t arch_atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v,	\
-				     c_t o, c_t n)			\
-{									\
-	return __cmpxchg_acquire(&(v->counter), o, n, size);		\
-}									\
-static __always_inline							\
-c_t arch_atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v,	\
-				     c_t o, c_t n)			\
-{									\
-	return __cmpxchg_release(&(v->counter), o, n, size);		\
-}									\
-static __always_inline							\
-c_t arch_atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n)	\
-{									\
-	return __cmpxchg(&(v->counter), o, n, size);			\
-}
-
-#ifdef CONFIG_GENERIC_ATOMIC64
-#define ATOMIC_OPS()							\
-	ATOMIC_OP(int,   , 4)
-#else
-#define ATOMIC_OPS()							\
-	ATOMIC_OP(int,   , 4)						\
-	ATOMIC_OP(s64, 64, 8)
-#endif
-
-ATOMIC_OPS()
-
-#define arch_atomic_xchg_relaxed	arch_atomic_xchg_relaxed
-#define arch_atomic_xchg_acquire	arch_atomic_xchg_acquire
-#define arch_atomic_xchg_release	arch_atomic_xchg_release
-#define arch_atomic_xchg		arch_atomic_xchg
-#define arch_atomic_cmpxchg_relaxed	arch_atomic_cmpxchg_relaxed
-#define arch_atomic_cmpxchg_acquire	arch_atomic_cmpxchg_acquire
-#define arch_atomic_cmpxchg_release	arch_atomic_cmpxchg_release
-#define arch_atomic_cmpxchg		arch_atomic_cmpxchg
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP
-
 static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
 {
 	int prev, rc;
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 528bfeda78f5..7a18cb2a1c1a 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -30,9 +30,6 @@
 #include <asm/atomic-irq.h>
 #endif
 
-#define arch_atomic_xchg(v, new)	(arch_xchg(&((v)->counter), new))
-#define arch_atomic_cmpxchg(v, o, n)	(arch_cmpxchg(&((v)->counter), (o), (n)))
-
 #endif /* CONFIG_CPU_J2 */
 
 #endif /* __ASM_SH_ATOMIC_H */
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index d775daa83d12..1c9e6c7366e4 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -24,7 +24,9 @@ int arch_atomic_fetch_and(int, atomic_t *);
 int arch_atomic_fetch_or(int, atomic_t *);
 int arch_atomic_fetch_xor(int, atomic_t *);
 int arch_atomic_cmpxchg(atomic_t *, int, int);
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
 int arch_atomic_xchg(atomic_t *, int);
+#define arch_atomic_xchg arch_atomic_xchg
 int arch_atomic_fetch_add_unless(atomic_t *, int, int);
 void arch_atomic_set(atomic_t *, int);
 
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 077891686715..df6a8b07d7e6 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -49,17 +49,6 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n)))
-
-static inline int arch_atomic_xchg(atomic_t *v, int new)
-{
-	return arch_xchg(&v->counter, new);
-}
-
-#define arch_atomic64_cmpxchg(v, o, n) \
-	((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
 s64 arch_atomic64_dec_if_positive(atomic64_t *v);
 #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 52da614f953c..1d323a864002 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -257,7 +257,4 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
 #endif /* _XTENSA_ATOMIC_H */
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index e271d6708c87..22142c71d35a 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -130,7 +130,4 @@ ATOMIC_OP(xor, ^)
 #define arch_atomic_read(v)			READ_ONCE((v)->counter)
 #define arch_atomic_set(v, i)			WRITE_ONCE(((v)->counter), (i))
 
-#define arch_atomic_xchg(ptr, v)		(arch_xchg(&(ptr)->counter, (u32)(v)))
-#define arch_atomic_cmpxchg(v, old, new)	(arch_cmpxchg(&((v)->counter), (u32)(old), (u32)(new)))
-
 #endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 3ce4cb5e790c..1a2d81dbc2e4 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -1091,9 +1091,48 @@ arch_atomic_fetch_xor(int i, atomic_t *v)
 #endif /* arch_atomic_fetch_xor_relaxed */
 
 #ifndef arch_atomic_xchg_relaxed
+#ifdef arch_atomic_xchg
 #define arch_atomic_xchg_acquire arch_atomic_xchg
 #define arch_atomic_xchg_release arch_atomic_xchg
 #define arch_atomic_xchg_relaxed arch_atomic_xchg
+#endif /* arch_atomic_xchg */
+
+#ifndef arch_atomic_xchg
+static __always_inline int
+arch_atomic_xchg(atomic_t *v, int new)
+{
+	return arch_xchg(&v->counter, new);
+}
+#define arch_atomic_xchg arch_atomic_xchg
+#endif
+
+#ifndef arch_atomic_xchg_acquire
+static __always_inline int
+arch_atomic_xchg_acquire(atomic_t *v, int new)
+{
+	return arch_xchg_acquire(&v->counter, new);
+}
+#define arch_atomic_xchg_acquire arch_atomic_xchg_acquire
+#endif
+
+#ifndef arch_atomic_xchg_release
+static __always_inline int
+arch_atomic_xchg_release(atomic_t *v, int new)
+{
+	return arch_xchg_release(&v->counter, new);
+}
+#define arch_atomic_xchg_release arch_atomic_xchg_release
+#endif
+
+#ifndef arch_atomic_xchg_relaxed
+static __always_inline int
+arch_atomic_xchg_relaxed(atomic_t *v, int new)
+{
+	return arch_xchg_relaxed(&v->counter, new);
+}
+#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
+#endif
+
 #else /* arch_atomic_xchg_relaxed */
 
 #ifndef arch_atomic_xchg_acquire
@@ -1133,9 +1172,48 @@ arch_atomic_xchg(atomic_t *v, int i)
 #endif /* arch_atomic_xchg_relaxed */
 
 #ifndef arch_atomic_cmpxchg_relaxed
+#ifdef arch_atomic_cmpxchg
 #define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg
 #define arch_atomic_cmpxchg_release arch_atomic_cmpxchg
 #define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg
+#endif /* arch_atomic_cmpxchg */
+
+#ifndef arch_atomic_cmpxchg
+static __always_inline int
+arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	return arch_cmpxchg(&v->counter, old, new);
+}
+#define arch_atomic_cmpxchg arch_atomic_cmpxchg
+#endif
+
+#ifndef arch_atomic_cmpxchg_acquire
+static __always_inline int
+arch_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
+{
+	return arch_cmpxchg_acquire(&v->counter, old, new);
+}
+#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
+#endif
+
+#ifndef arch_atomic_cmpxchg_release
+static __always_inline int
+arch_atomic_cmpxchg_release(atomic_t *v, int old, int new)
+{
+	return arch_cmpxchg_release(&v->counter, old, new);
+}
+#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release
+#endif
+
+#ifndef arch_atomic_cmpxchg_relaxed
+static __always_inline int
+arch_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
+{
+	return arch_cmpxchg_relaxed(&v->counter, old, new);
+}
+#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
+#endif
+
 #else /* arch_atomic_cmpxchg_relaxed */
 
 #ifndef arch_atomic_cmpxchg_acquire
@@ -2225,9 +2303,48 @@ arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
 #endif /* arch_atomic64_fetch_xor_relaxed */
 
 #ifndef arch_atomic64_xchg_relaxed
+#ifdef arch_atomic64_xchg
 #define arch_atomic64_xchg_acquire arch_atomic64_xchg
 #define arch_atomic64_xchg_release arch_atomic64_xchg
 #define arch_atomic64_xchg_relaxed arch_atomic64_xchg
+#endif /* arch_atomic64_xchg */
+
+#ifndef arch_atomic64_xchg
+static __always_inline s64
+arch_atomic64_xchg(atomic64_t *v, s64 new)
+{
+	return arch_xchg(&v->counter, new);
+}
+#define arch_atomic64_xchg arch_atomic64_xchg
+#endif
+
+#ifndef arch_atomic64_xchg_acquire
+static __always_inline s64
+arch_atomic64_xchg_acquire(atomic64_t *v, s64 new)
+{
+	return arch_xchg_acquire(&v->counter, new);
+}
+#define arch_atomic64_xchg_acquire arch_atomic64_xchg_acquire
+#endif
+
+#ifndef arch_atomic64_xchg_release
+static __always_inline s64
+arch_atomic64_xchg_release(atomic64_t *v, s64 new)
+{
+	return arch_xchg_release(&v->counter, new);
+}
+#define arch_atomic64_xchg_release arch_atomic64_xchg_release
+#endif
+
+#ifndef arch_atomic64_xchg_relaxed
+static __always_inline s64
+arch_atomic64_xchg_relaxed(atomic64_t *v, s64 new)
+{
+	return arch_xchg_relaxed(&v->counter, new);
+}
+#define arch_atomic64_xchg_relaxed arch_atomic64_xchg_relaxed
+#endif
+
 #else /* arch_atomic64_xchg_relaxed */
 
 #ifndef arch_atomic64_xchg_acquire
@@ -2267,9 +2384,48 @@ arch_atomic64_xchg(atomic64_t *v, s64 i)
 #endif /* arch_atomic64_xchg_relaxed */
 
 #ifndef arch_atomic64_cmpxchg_relaxed
+#ifdef arch_atomic64_cmpxchg
 #define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg
 #define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg
 #define arch_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg
+#endif /* arch_atomic64_cmpxchg */
+
+#ifndef arch_atomic64_cmpxchg
+static __always_inline s64
+arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+{
+	return arch_cmpxchg(&v->counter, old, new);
+}
+#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+#endif
+
+#ifndef arch_atomic64_cmpxchg_acquire
+static __always_inline s64
+arch_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
+{
+	return arch_cmpxchg_acquire(&v->counter, old, new);
+}
+#define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg_acquire
+#endif
+
+#ifndef arch_atomic64_cmpxchg_release
+static __always_inline s64
+arch_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
+{
+	return arch_cmpxchg_release(&v->counter, old, new);
+}
+#define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg_release
+#endif
+
+#ifndef arch_atomic64_cmpxchg_relaxed
+static __always_inline s64
+arch_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
+{
+	return arch_cmpxchg_relaxed(&v->counter, old, new);
+}
+#define arch_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg_relaxed
+#endif
+
 #else /* arch_atomic64_cmpxchg_relaxed */
 
 #ifndef arch_atomic64_cmpxchg_acquire
@@ -2597,4 +2753,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
 #endif
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 9f0fd6ed53267c6ec64e36cd18e6fd8df57ea277
+// e1cee558cc61cae887890db30fcdf93baca9f498
diff --git a/scripts/atomic/fallbacks/cmpxchg b/scripts/atomic/fallbacks/cmpxchg
new file mode 100644
index 000000000000..87cd010f98d5
--- /dev/null
+++ b/scripts/atomic/fallbacks/cmpxchg
@@ -0,0 +1,7 @@
+cat <<EOF
+static __always_inline ${int}
+arch_${atomic}_cmpxchg${order}(${atomic}_t *v, ${int} old, ${int} new)
+{
+	return arch_cmpxchg${order}(&v->counter, old, new);
+}
+EOF
diff --git a/scripts/atomic/fallbacks/xchg b/scripts/atomic/fallbacks/xchg
new file mode 100644
index 000000000000..733b8980b2f3
--- /dev/null
+++ b/scripts/atomic/fallbacks/xchg
@@ -0,0 +1,7 @@
+cat <<EOF
+static __always_inline ${int}
+arch_${atomic}_xchg${order}(${atomic}_t *v, ${int} new)
+{
+	return arch_xchg${order}(&v->counter, new);
+}
+EOF
-- 
cgit v1.2.3


From c9268ac615f9f6dded7801df5993374598934377 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 5 Jun 2023 08:01:14 +0100
Subject: locking/atomic: scripts: add trivial raw_atomic*_<op>()

Currently a number of arch_atomic*_<op>() functions are optional, and
where an arch does not provide a given arch_atomic*_<op>() we will
define an implementation of arch_atomic*_<op>() in
atomic-arch-fallback.h.

Filling in the missing ops requires special care as we want to select
the optimal definition of each op (e.g. preferentially defining ops in
terms of their relaxed form rather than their fully-ordered form). The
ifdeffery necessary for this requires us to group ordering variants
together, which can be a bit painful to read, and is painful for
kerneldoc generation.

It would be easier to handle this if we generated ops into a separate
namespace, as this would remove the need to take special care with the
ifdeffery, and allow each ordering variant to be generated separately.

This patch adds a new set of raw_atomic_<op>() definitions, which are
currently trivial wrappers of their arch_atomic_<op>() equivalent. This
will allow us to move treewide users of arch_atomic_<op>() over to raw
atomic op before we rework the fallback generation to generate
raw_atomic_<op> directly.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230605070124.3741859-18-mark.rutland@arm.com
---
 include/linux/atomic.h                     |    1 +
 include/linux/atomic/atomic-instrumented.h |  595 +++++-----
 include/linux/atomic/atomic-raw.h          | 1645 ++++++++++++++++++++++++++++
 scripts/atomic/gen-atomic-instrumented.sh  |   19 +-
 scripts/atomic/gen-atomic-raw.sh           |   84 ++
 scripts/atomic/gen-atomics.sh              |    1 +
 6 files changed, 2033 insertions(+), 312 deletions(-)
 create mode 100644 include/linux/atomic/atomic-raw.h
 create mode 100644 scripts/atomic/gen-atomic-raw.sh

(limited to 'include/linux')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 8dd57c3a99e9..127f5dc63a7d 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -79,6 +79,7 @@
 
 #include <linux/atomic/atomic-arch-fallback.h>
 #include <linux/atomic/atomic-long.h>
+#include <linux/atomic/atomic-raw.h>
 #include <linux/atomic/atomic-instrumented.h>
 
 #endif /* _LINUX_ATOMIC_H */
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index a55b5b70a3e1..90ee2f55af77 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -4,15 +4,10 @@
 // DO NOT MODIFY THIS FILE DIRECTLY
 
 /*
- * This file provides wrappers with KASAN instrumentation for atomic operations.
- * To use this functionality an arch's atomic.h file needs to define all
- * atomic operations with arch_ prefix (e.g. arch_atomic_read()) and include
- * this file at the end. This file provides atomic_read() that forwards to
- * arch_atomic_read() for actual atomic operation.
- * Note: if an arch atomic operation is implemented by means of other atomic
- * operations (e.g. atomic_read()/atomic_cmpxchg() loop), then it needs to use
- * arch_ variants (i.e. arch_atomic_read()/arch_atomic_cmpxchg()) to avoid
- * double instrumentation.
+ * This file provoides atomic operations with explicit instrumentation (e.g.
+ * KASAN, KCSAN), which should be used unless it is necessary to avoid
+ * instrumentation. Where it is necessary to aovid instrumenation, the
+ * raw_atomic*() operations should be used.
  */
 #ifndef _LINUX_ATOMIC_INSTRUMENTED_H
 #define _LINUX_ATOMIC_INSTRUMENTED_H
@@ -25,21 +20,21 @@ static __always_inline int
 atomic_read(const atomic_t *v)
 {
 	instrument_atomic_read(v, sizeof(*v));
-	return arch_atomic_read(v);
+	return raw_atomic_read(v);
 }
 
 static __always_inline int
 atomic_read_acquire(const atomic_t *v)
 {
 	instrument_atomic_read(v, sizeof(*v));
-	return arch_atomic_read_acquire(v);
+	return raw_atomic_read_acquire(v);
 }
 
 static __always_inline void
 atomic_set(atomic_t *v, int i)
 {
 	instrument_atomic_write(v, sizeof(*v));
-	arch_atomic_set(v, i);
+	raw_atomic_set(v, i);
 }
 
 static __always_inline void
@@ -47,14 +42,14 @@ atomic_set_release(atomic_t *v, int i)
 {
 	kcsan_release();
 	instrument_atomic_write(v, sizeof(*v));
-	arch_atomic_set_release(v, i);
+	raw_atomic_set_release(v, i);
 }
 
 static __always_inline void
 atomic_add(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_add(i, v);
+	raw_atomic_add(i, v);
 }
 
 static __always_inline int
@@ -62,14 +57,14 @@ atomic_add_return(int i, atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_add_return(i, v);
+	return raw_atomic_add_return(i, v);
 }
 
 static __always_inline int
 atomic_add_return_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_add_return_acquire(i, v);
+	return raw_atomic_add_return_acquire(i, v);
 }
 
 static __always_inline int
@@ -77,14 +72,14 @@ atomic_add_return_release(int i, atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_add_return_release(i, v);
+	return raw_atomic_add_return_release(i, v);
 }
 
 static __always_inline int
 atomic_add_return_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_add_return_relaxed(i, v);
+	return raw_atomic_add_return_relaxed(i, v);
 }
 
 static __always_inline int
@@ -92,14 +87,14 @@ atomic_fetch_add(int i, atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_add(i, v);
+	return raw_atomic_fetch_add(i, v);
 }
 
 static __always_inline int
 atomic_fetch_add_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_add_acquire(i, v);
+	return raw_atomic_fetch_add_acquire(i, v);
 }
 
 static __always_inline int
@@ -107,21 +102,21 @@ atomic_fetch_add_release(int i, atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_add_release(i, v);
+	return raw_atomic_fetch_add_release(i, v);
 }
 
 static __always_inline int
 atomic_fetch_add_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_add_relaxed(i, v);
+	return raw_atomic_fetch_add_relaxed(i, v);
 }
 
 static __always_inline void
 atomic_sub(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_sub(i, v);
+	raw_atomic_sub(i, v);
 }
 
 static __always_inline int
@@ -129,14 +124,14 @@ atomic_sub_return(int i, atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_sub_return(i, v);
+	return raw_atomic_sub_return(i, v);
 }
 
 static __always_inline int
 atomic_sub_return_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_sub_return_acquire(i, v);
+	return raw_atomic_sub_return_acquire(i, v);
 }
 
 static __always_inline int
@@ -144,14 +139,14 @@ atomic_sub_return_release(int i, atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_sub_return_release(i, v);
+	return raw_atomic_sub_return_release(i, v);
 }
 
 static __always_inline int
 atomic_sub_return_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_sub_return_relaxed(i, v);
+	return raw_atomic_sub_return_relaxed(i, v);
 }
 
 static __always_inline int
@@ -159,14 +154,14 @@ atomic_fetch_sub(int i, atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_sub(i, v);
+	return raw_atomic_fetch_sub(i, v);
 }
 
 static __always_inline int
 atomic_fetch_sub_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_sub_acquire(i, v);
+	return raw_atomic_fetch_sub_acquire(i, v);
 }
 
 static __always_inline int
@@ -174,21 +169,21 @@ atomic_fetch_sub_release(int i, atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_sub_release(i, v);
+	return raw_atomic_fetch_sub_release(i, v);
 }
 
 static __always_inline int
 atomic_fetch_sub_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_sub_relaxed(i, v);
+	return raw_atomic_fetch_sub_relaxed(i, v);
 }
 
 static __always_inline void
 atomic_inc(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_inc(v);
+	raw_atomic_inc(v);
 }
 
 static __always_inline int
@@ -196,14 +191,14 @@ atomic_inc_return(atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_inc_return(v);
+	return raw_atomic_inc_return(v);
 }
 
 static __always_inline int
 atomic_inc_return_acquire(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_inc_return_acquire(v);
+	return raw_atomic_inc_return_acquire(v);
 }
 
 static __always_inline int
@@ -211,14 +206,14 @@ atomic_inc_return_release(atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_inc_return_release(v);
+	return raw_atomic_inc_return_release(v);
 }
 
 static __always_inline int
 atomic_inc_return_relaxed(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_inc_return_relaxed(v);
+	return raw_atomic_inc_return_relaxed(v);
 }
 
 static __always_inline int
@@ -226,14 +221,14 @@ atomic_fetch_inc(atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_inc(v);
+	return raw_atomic_fetch_inc(v);
 }
 
 static __always_inline int
 atomic_fetch_inc_acquire(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_inc_acquire(v);
+	return raw_atomic_fetch_inc_acquire(v);
 }
 
 static __always_inline int
@@ -241,21 +236,21 @@ atomic_fetch_inc_release(atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_inc_release(v);
+	return raw_atomic_fetch_inc_release(v);
 }
 
 static __always_inline int
 atomic_fetch_inc_relaxed(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_inc_relaxed(v);
+	return raw_atomic_fetch_inc_relaxed(v);
 }
 
 static __always_inline void
 atomic_dec(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_dec(v);
+	raw_atomic_dec(v);
 }
 
 static __always_inline int
@@ -263,14 +258,14 @@ atomic_dec_return(atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_dec_return(v);
+	return raw_atomic_dec_return(v);
 }
 
 static __always_inline int
 atomic_dec_return_acquire(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_dec_return_acquire(v);
+	return raw_atomic_dec_return_acquire(v);
 }
 
 static __always_inline int
@@ -278,14 +273,14 @@ atomic_dec_return_release(atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_dec_return_release(v);
+	return raw_atomic_dec_return_release(v);
 }
 
 static __always_inline int
 atomic_dec_return_relaxed(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_dec_return_relaxed(v);
+	return raw_atomic_dec_return_relaxed(v);
 }
 
 static __always_inline int
@@ -293,14 +288,14 @@ atomic_fetch_dec(atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_dec(v);
+	return raw_atomic_fetch_dec(v);
 }
 
 static __always_inline int
 atomic_fetch_dec_acquire(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_dec_acquire(v);
+	return raw_atomic_fetch_dec_acquire(v);
 }
 
 static __always_inline int
@@ -308,21 +303,21 @@ atomic_fetch_dec_release(atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_dec_release(v);
+	return raw_atomic_fetch_dec_release(v);
 }
 
 static __always_inline int
 atomic_fetch_dec_relaxed(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_dec_relaxed(v);
+	return raw_atomic_fetch_dec_relaxed(v);
 }
 
 static __always_inline void
 atomic_and(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_and(i, v);
+	raw_atomic_and(i, v);
 }
 
 static __always_inline int
@@ -330,14 +325,14 @@ atomic_fetch_and(int i, atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_and(i, v);
+	return raw_atomic_fetch_and(i, v);
 }
 
 static __always_inline int
 atomic_fetch_and_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_and_acquire(i, v);
+	return raw_atomic_fetch_and_acquire(i, v);
 }
 
 static __always_inline int
@@ -345,21 +340,21 @@ atomic_fetch_and_release(int i, atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_and_release(i, v);
+	return raw_atomic_fetch_and_release(i, v);
 }
 
 static __always_inline int
 atomic_fetch_and_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_and_relaxed(i, v);
+	return raw_atomic_fetch_and_relaxed(i, v);
 }
 
 static __always_inline void
 atomic_andnot(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_andnot(i, v);
+	raw_atomic_andnot(i, v);
 }
 
 static __always_inline int
@@ -367,14 +362,14 @@ atomic_fetch_andnot(int i, atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_andnot(i, v);
+	return raw_atomic_fetch_andnot(i, v);
 }
 
 static __always_inline int
 atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_andnot_acquire(i, v);
+	return raw_atomic_fetch_andnot_acquire(i, v);
 }
 
 static __always_inline int
@@ -382,21 +377,21 @@ atomic_fetch_andnot_release(int i, atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_andnot_release(i, v);
+	return raw_atomic_fetch_andnot_release(i, v);
 }
 
 static __always_inline int
 atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_andnot_relaxed(i, v);
+	return raw_atomic_fetch_andnot_relaxed(i, v);
 }
 
 static __always_inline void
 atomic_or(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_or(i, v);
+	raw_atomic_or(i, v);
 }
 
 static __always_inline int
@@ -404,14 +399,14 @@ atomic_fetch_or(int i, atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_or(i, v);
+	return raw_atomic_fetch_or(i, v);
 }
 
 static __always_inline int
 atomic_fetch_or_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_or_acquire(i, v);
+	return raw_atomic_fetch_or_acquire(i, v);
 }
 
 static __always_inline int
@@ -419,21 +414,21 @@ atomic_fetch_or_release(int i, atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_or_release(i, v);
+	return raw_atomic_fetch_or_release(i, v);
 }
 
 static __always_inline int
 atomic_fetch_or_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_or_relaxed(i, v);
+	return raw_atomic_fetch_or_relaxed(i, v);
 }
 
 static __always_inline void
 atomic_xor(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_xor(i, v);
+	raw_atomic_xor(i, v);
 }
 
 static __always_inline int
@@ -441,14 +436,14 @@ atomic_fetch_xor(int i, atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_xor(i, v);
+	return raw_atomic_fetch_xor(i, v);
 }
 
 static __always_inline int
 atomic_fetch_xor_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_xor_acquire(i, v);
+	return raw_atomic_fetch_xor_acquire(i, v);
 }
 
 static __always_inline int
@@ -456,14 +451,14 @@ atomic_fetch_xor_release(int i, atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_xor_release(i, v);
+	return raw_atomic_fetch_xor_release(i, v);
 }
 
 static __always_inline int
 atomic_fetch_xor_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_xor_relaxed(i, v);
+	return raw_atomic_fetch_xor_relaxed(i, v);
 }
 
 static __always_inline int
@@ -471,14 +466,14 @@ atomic_xchg(atomic_t *v, int i)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_xchg(v, i);
+	return raw_atomic_xchg(v, i);
 }
 
 static __always_inline int
 atomic_xchg_acquire(atomic_t *v, int i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_xchg_acquire(v, i);
+	return raw_atomic_xchg_acquire(v, i);
 }
 
 static __always_inline int
@@ -486,14 +481,14 @@ atomic_xchg_release(atomic_t *v, int i)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_xchg_release(v, i);
+	return raw_atomic_xchg_release(v, i);
 }
 
 static __always_inline int
 atomic_xchg_relaxed(atomic_t *v, int i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_xchg_relaxed(v, i);
+	return raw_atomic_xchg_relaxed(v, i);
 }
 
 static __always_inline int
@@ -501,14 +496,14 @@ atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_cmpxchg(v, old, new);
+	return raw_atomic_cmpxchg(v, old, new);
 }
 
 static __always_inline int
 atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_cmpxchg_acquire(v, old, new);
+	return raw_atomic_cmpxchg_acquire(v, old, new);
 }
 
 static __always_inline int
@@ -516,14 +511,14 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_cmpxchg_release(v, old, new);
+	return raw_atomic_cmpxchg_release(v, old, new);
 }
 
 static __always_inline int
 atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_cmpxchg_relaxed(v, old, new);
+	return raw_atomic_cmpxchg_relaxed(v, old, new);
 }
 
 static __always_inline bool
@@ -532,7 +527,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic_try_cmpxchg(v, old, new);
+	return raw_atomic_try_cmpxchg(v, old, new);
 }
 
 static __always_inline bool
@@ -540,7 +535,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic_try_cmpxchg_acquire(v, old, new);
+	return raw_atomic_try_cmpxchg_acquire(v, old, new);
 }
 
 static __always_inline bool
@@ -549,7 +544,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic_try_cmpxchg_release(v, old, new);
+	return raw_atomic_try_cmpxchg_release(v, old, new);
 }
 
 static __always_inline bool
@@ -557,7 +552,7 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic_try_cmpxchg_relaxed(v, old, new);
+	return raw_atomic_try_cmpxchg_relaxed(v, old, new);
 }
 
 static __always_inline bool
@@ -565,7 +560,7 @@ atomic_sub_and_test(int i, atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_sub_and_test(i, v);
+	return raw_atomic_sub_and_test(i, v);
 }
 
 static __always_inline bool
@@ -573,7 +568,7 @@ atomic_dec_and_test(atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_dec_and_test(v);
+	return raw_atomic_dec_and_test(v);
 }
 
 static __always_inline bool
@@ -581,7 +576,7 @@ atomic_inc_and_test(atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_inc_and_test(v);
+	return raw_atomic_inc_and_test(v);
 }
 
 static __always_inline bool
@@ -589,14 +584,14 @@ atomic_add_negative(int i, atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_add_negative(i, v);
+	return raw_atomic_add_negative(i, v);
 }
 
 static __always_inline bool
 atomic_add_negative_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_add_negative_acquire(i, v);
+	return raw_atomic_add_negative_acquire(i, v);
 }
 
 static __always_inline bool
@@ -604,14 +599,14 @@ atomic_add_negative_release(int i, atomic_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_add_negative_release(i, v);
+	return raw_atomic_add_negative_release(i, v);
 }
 
 static __always_inline bool
 atomic_add_negative_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_add_negative_relaxed(i, v);
+	return raw_atomic_add_negative_relaxed(i, v);
 }
 
 static __always_inline int
@@ -619,7 +614,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_fetch_add_unless(v, a, u);
+	return raw_atomic_fetch_add_unless(v, a, u);
 }
 
 static __always_inline bool
@@ -627,7 +622,7 @@ atomic_add_unless(atomic_t *v, int a, int u)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_add_unless(v, a, u);
+	return raw_atomic_add_unless(v, a, u);
 }
 
 static __always_inline bool
@@ -635,7 +630,7 @@ atomic_inc_not_zero(atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_inc_not_zero(v);
+	return raw_atomic_inc_not_zero(v);
 }
 
 static __always_inline bool
@@ -643,7 +638,7 @@ atomic_inc_unless_negative(atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_inc_unless_negative(v);
+	return raw_atomic_inc_unless_negative(v);
 }
 
 static __always_inline bool
@@ -651,7 +646,7 @@ atomic_dec_unless_positive(atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_dec_unless_positive(v);
+	return raw_atomic_dec_unless_positive(v);
 }
 
 static __always_inline int
@@ -659,28 +654,28 @@ atomic_dec_if_positive(atomic_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_dec_if_positive(v);
+	return raw_atomic_dec_if_positive(v);
 }
 
 static __always_inline s64
 atomic64_read(const atomic64_t *v)
 {
 	instrument_atomic_read(v, sizeof(*v));
-	return arch_atomic64_read(v);
+	return raw_atomic64_read(v);
 }
 
 static __always_inline s64
 atomic64_read_acquire(const atomic64_t *v)
 {
 	instrument_atomic_read(v, sizeof(*v));
-	return arch_atomic64_read_acquire(v);
+	return raw_atomic64_read_acquire(v);
 }
 
 static __always_inline void
 atomic64_set(atomic64_t *v, s64 i)
 {
 	instrument_atomic_write(v, sizeof(*v));
-	arch_atomic64_set(v, i);
+	raw_atomic64_set(v, i);
 }
 
 static __always_inline void
@@ -688,14 +683,14 @@ atomic64_set_release(atomic64_t *v, s64 i)
 {
 	kcsan_release();
 	instrument_atomic_write(v, sizeof(*v));
-	arch_atomic64_set_release(v, i);
+	raw_atomic64_set_release(v, i);
 }
 
 static __always_inline void
 atomic64_add(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic64_add(i, v);
+	raw_atomic64_add(i, v);
 }
 
 static __always_inline s64
@@ -703,14 +698,14 @@ atomic64_add_return(s64 i, atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_add_return(i, v);
+	return raw_atomic64_add_return(i, v);
 }
 
 static __always_inline s64
 atomic64_add_return_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_add_return_acquire(i, v);
+	return raw_atomic64_add_return_acquire(i, v);
 }
 
 static __always_inline s64
@@ -718,14 +713,14 @@ atomic64_add_return_release(s64 i, atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_add_return_release(i, v);
+	return raw_atomic64_add_return_release(i, v);
 }
 
 static __always_inline s64
 atomic64_add_return_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_add_return_relaxed(i, v);
+	return raw_atomic64_add_return_relaxed(i, v);
 }
 
 static __always_inline s64
@@ -733,14 +728,14 @@ atomic64_fetch_add(s64 i, atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_add(i, v);
+	return raw_atomic64_fetch_add(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_add_acquire(i, v);
+	return raw_atomic64_fetch_add_acquire(i, v);
 }
 
 static __always_inline s64
@@ -748,21 +743,21 @@ atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_add_release(i, v);
+	return raw_atomic64_fetch_add_release(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_add_relaxed(i, v);
+	return raw_atomic64_fetch_add_relaxed(i, v);
 }
 
 static __always_inline void
 atomic64_sub(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic64_sub(i, v);
+	raw_atomic64_sub(i, v);
 }
 
 static __always_inline s64
@@ -770,14 +765,14 @@ atomic64_sub_return(s64 i, atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_sub_return(i, v);
+	return raw_atomic64_sub_return(i, v);
 }
 
 static __always_inline s64
 atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_sub_return_acquire(i, v);
+	return raw_atomic64_sub_return_acquire(i, v);
 }
 
 static __always_inline s64
@@ -785,14 +780,14 @@ atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_sub_return_release(i, v);
+	return raw_atomic64_sub_return_release(i, v);
 }
 
 static __always_inline s64
 atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_sub_return_relaxed(i, v);
+	return raw_atomic64_sub_return_relaxed(i, v);
 }
 
 static __always_inline s64
@@ -800,14 +795,14 @@ atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_sub(i, v);
+	return raw_atomic64_fetch_sub(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_sub_acquire(i, v);
+	return raw_atomic64_fetch_sub_acquire(i, v);
 }
 
 static __always_inline s64
@@ -815,21 +810,21 @@ atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_sub_release(i, v);
+	return raw_atomic64_fetch_sub_release(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_sub_relaxed(i, v);
+	return raw_atomic64_fetch_sub_relaxed(i, v);
 }
 
 static __always_inline void
 atomic64_inc(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic64_inc(v);
+	raw_atomic64_inc(v);
 }
 
 static __always_inline s64
@@ -837,14 +832,14 @@ atomic64_inc_return(atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_inc_return(v);
+	return raw_atomic64_inc_return(v);
 }
 
 static __always_inline s64
 atomic64_inc_return_acquire(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_inc_return_acquire(v);
+	return raw_atomic64_inc_return_acquire(v);
 }
 
 static __always_inline s64
@@ -852,14 +847,14 @@ atomic64_inc_return_release(atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_inc_return_release(v);
+	return raw_atomic64_inc_return_release(v);
 }
 
 static __always_inline s64
 atomic64_inc_return_relaxed(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_inc_return_relaxed(v);
+	return raw_atomic64_inc_return_relaxed(v);
 }
 
 static __always_inline s64
@@ -867,14 +862,14 @@ atomic64_fetch_inc(atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_inc(v);
+	return raw_atomic64_fetch_inc(v);
 }
 
 static __always_inline s64
 atomic64_fetch_inc_acquire(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_inc_acquire(v);
+	return raw_atomic64_fetch_inc_acquire(v);
 }
 
 static __always_inline s64
@@ -882,21 +877,21 @@ atomic64_fetch_inc_release(atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_inc_release(v);
+	return raw_atomic64_fetch_inc_release(v);
 }
 
 static __always_inline s64
 atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_inc_relaxed(v);
+	return raw_atomic64_fetch_inc_relaxed(v);
 }
 
 static __always_inline void
 atomic64_dec(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic64_dec(v);
+	raw_atomic64_dec(v);
 }
 
 static __always_inline s64
@@ -904,14 +899,14 @@ atomic64_dec_return(atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_dec_return(v);
+	return raw_atomic64_dec_return(v);
 }
 
 static __always_inline s64
 atomic64_dec_return_acquire(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_dec_return_acquire(v);
+	return raw_atomic64_dec_return_acquire(v);
 }
 
 static __always_inline s64
@@ -919,14 +914,14 @@ atomic64_dec_return_release(atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_dec_return_release(v);
+	return raw_atomic64_dec_return_release(v);
 }
 
 static __always_inline s64
 atomic64_dec_return_relaxed(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_dec_return_relaxed(v);
+	return raw_atomic64_dec_return_relaxed(v);
 }
 
 static __always_inline s64
@@ -934,14 +929,14 @@ atomic64_fetch_dec(atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_dec(v);
+	return raw_atomic64_fetch_dec(v);
 }
 
 static __always_inline s64
 atomic64_fetch_dec_acquire(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_dec_acquire(v);
+	return raw_atomic64_fetch_dec_acquire(v);
 }
 
 static __always_inline s64
@@ -949,21 +944,21 @@ atomic64_fetch_dec_release(atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_dec_release(v);
+	return raw_atomic64_fetch_dec_release(v);
 }
 
 static __always_inline s64
 atomic64_fetch_dec_relaxed(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_dec_relaxed(v);
+	return raw_atomic64_fetch_dec_relaxed(v);
 }
 
 static __always_inline void
 atomic64_and(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic64_and(i, v);
+	raw_atomic64_and(i, v);
 }
 
 static __always_inline s64
@@ -971,14 +966,14 @@ atomic64_fetch_and(s64 i, atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_and(i, v);
+	return raw_atomic64_fetch_and(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_and_acquire(i, v);
+	return raw_atomic64_fetch_and_acquire(i, v);
 }
 
 static __always_inline s64
@@ -986,21 +981,21 @@ atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_and_release(i, v);
+	return raw_atomic64_fetch_and_release(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_and_relaxed(i, v);
+	return raw_atomic64_fetch_and_relaxed(i, v);
 }
 
 static __always_inline void
 atomic64_andnot(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic64_andnot(i, v);
+	raw_atomic64_andnot(i, v);
 }
 
 static __always_inline s64
@@ -1008,14 +1003,14 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_andnot(i, v);
+	return raw_atomic64_fetch_andnot(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_andnot_acquire(i, v);
+	return raw_atomic64_fetch_andnot_acquire(i, v);
 }
 
 static __always_inline s64
@@ -1023,21 +1018,21 @@ atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_andnot_release(i, v);
+	return raw_atomic64_fetch_andnot_release(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_andnot_relaxed(i, v);
+	return raw_atomic64_fetch_andnot_relaxed(i, v);
 }
 
 static __always_inline void
 atomic64_or(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic64_or(i, v);
+	raw_atomic64_or(i, v);
 }
 
 static __always_inline s64
@@ -1045,14 +1040,14 @@ atomic64_fetch_or(s64 i, atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_or(i, v);
+	return raw_atomic64_fetch_or(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_or_acquire(i, v);
+	return raw_atomic64_fetch_or_acquire(i, v);
 }
 
 static __always_inline s64
@@ -1060,21 +1055,21 @@ atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_or_release(i, v);
+	return raw_atomic64_fetch_or_release(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_or_relaxed(i, v);
+	return raw_atomic64_fetch_or_relaxed(i, v);
 }
 
 static __always_inline void
 atomic64_xor(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic64_xor(i, v);
+	raw_atomic64_xor(i, v);
 }
 
 static __always_inline s64
@@ -1082,14 +1077,14 @@ atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_xor(i, v);
+	return raw_atomic64_fetch_xor(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_xor_acquire(i, v);
+	return raw_atomic64_fetch_xor_acquire(i, v);
 }
 
 static __always_inline s64
@@ -1097,14 +1092,14 @@ atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_xor_release(i, v);
+	return raw_atomic64_fetch_xor_release(i, v);
 }
 
 static __always_inline s64
 atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_xor_relaxed(i, v);
+	return raw_atomic64_fetch_xor_relaxed(i, v);
 }
 
 static __always_inline s64
@@ -1112,14 +1107,14 @@ atomic64_xchg(atomic64_t *v, s64 i)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_xchg(v, i);
+	return raw_atomic64_xchg(v, i);
 }
 
 static __always_inline s64
 atomic64_xchg_acquire(atomic64_t *v, s64 i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_xchg_acquire(v, i);
+	return raw_atomic64_xchg_acquire(v, i);
 }
 
 static __always_inline s64
@@ -1127,14 +1122,14 @@ atomic64_xchg_release(atomic64_t *v, s64 i)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_xchg_release(v, i);
+	return raw_atomic64_xchg_release(v, i);
 }
 
 static __always_inline s64
 atomic64_xchg_relaxed(atomic64_t *v, s64 i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_xchg_relaxed(v, i);
+	return raw_atomic64_xchg_relaxed(v, i);
 }
 
 static __always_inline s64
@@ -1142,14 +1137,14 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_cmpxchg(v, old, new);
+	return raw_atomic64_cmpxchg(v, old, new);
 }
 
 static __always_inline s64
 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_cmpxchg_acquire(v, old, new);
+	return raw_atomic64_cmpxchg_acquire(v, old, new);
 }
 
 static __always_inline s64
@@ -1157,14 +1152,14 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_cmpxchg_release(v, old, new);
+	return raw_atomic64_cmpxchg_release(v, old, new);
 }
 
 static __always_inline s64
 atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_cmpxchg_relaxed(v, old, new);
+	return raw_atomic64_cmpxchg_relaxed(v, old, new);
 }
 
 static __always_inline bool
@@ -1173,7 +1168,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic64_try_cmpxchg(v, old, new);
+	return raw_atomic64_try_cmpxchg(v, old, new);
 }
 
 static __always_inline bool
@@ -1181,7 +1176,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic64_try_cmpxchg_acquire(v, old, new);
+	return raw_atomic64_try_cmpxchg_acquire(v, old, new);
 }
 
 static __always_inline bool
@@ -1190,7 +1185,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic64_try_cmpxchg_release(v, old, new);
+	return raw_atomic64_try_cmpxchg_release(v, old, new);
 }
 
 static __always_inline bool
@@ -1198,7 +1193,7 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
+	return raw_atomic64_try_cmpxchg_relaxed(v, old, new);
 }
 
 static __always_inline bool
@@ -1206,7 +1201,7 @@ atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_sub_and_test(i, v);
+	return raw_atomic64_sub_and_test(i, v);
 }
 
 static __always_inline bool
@@ -1214,7 +1209,7 @@ atomic64_dec_and_test(atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_dec_and_test(v);
+	return raw_atomic64_dec_and_test(v);
 }
 
 static __always_inline bool
@@ -1222,7 +1217,7 @@ atomic64_inc_and_test(atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_inc_and_test(v);
+	return raw_atomic64_inc_and_test(v);
 }
 
 static __always_inline bool
@@ -1230,14 +1225,14 @@ atomic64_add_negative(s64 i, atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_add_negative(i, v);
+	return raw_atomic64_add_negative(i, v);
 }
 
 static __always_inline bool
 atomic64_add_negative_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_add_negative_acquire(i, v);
+	return raw_atomic64_add_negative_acquire(i, v);
 }
 
 static __always_inline bool
@@ -1245,14 +1240,14 @@ atomic64_add_negative_release(s64 i, atomic64_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_add_negative_release(i, v);
+	return raw_atomic64_add_negative_release(i, v);
 }
 
 static __always_inline bool
 atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_add_negative_relaxed(i, v);
+	return raw_atomic64_add_negative_relaxed(i, v);
 }
 
 static __always_inline s64
@@ -1260,7 +1255,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_fetch_add_unless(v, a, u);
+	return raw_atomic64_fetch_add_unless(v, a, u);
 }
 
 static __always_inline bool
@@ -1268,7 +1263,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_add_unless(v, a, u);
+	return raw_atomic64_add_unless(v, a, u);
 }
 
 static __always_inline bool
@@ -1276,7 +1271,7 @@ atomic64_inc_not_zero(atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_inc_not_zero(v);
+	return raw_atomic64_inc_not_zero(v);
 }
 
 static __always_inline bool
@@ -1284,7 +1279,7 @@ atomic64_inc_unless_negative(atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_inc_unless_negative(v);
+	return raw_atomic64_inc_unless_negative(v);
 }
 
 static __always_inline bool
@@ -1292,7 +1287,7 @@ atomic64_dec_unless_positive(atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_dec_unless_positive(v);
+	return raw_atomic64_dec_unless_positive(v);
 }
 
 static __always_inline s64
@@ -1300,28 +1295,28 @@ atomic64_dec_if_positive(atomic64_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic64_dec_if_positive(v);
+	return raw_atomic64_dec_if_positive(v);
 }
 
 static __always_inline long
 atomic_long_read(const atomic_long_t *v)
 {
 	instrument_atomic_read(v, sizeof(*v));
-	return arch_atomic_long_read(v);
+	return raw_atomic_long_read(v);
 }
 
 static __always_inline long
 atomic_long_read_acquire(const atomic_long_t *v)
 {
 	instrument_atomic_read(v, sizeof(*v));
-	return arch_atomic_long_read_acquire(v);
+	return raw_atomic_long_read_acquire(v);
 }
 
 static __always_inline void
 atomic_long_set(atomic_long_t *v, long i)
 {
 	instrument_atomic_write(v, sizeof(*v));
-	arch_atomic_long_set(v, i);
+	raw_atomic_long_set(v, i);
 }
 
 static __always_inline void
@@ -1329,14 +1324,14 @@ atomic_long_set_release(atomic_long_t *v, long i)
 {
 	kcsan_release();
 	instrument_atomic_write(v, sizeof(*v));
-	arch_atomic_long_set_release(v, i);
+	raw_atomic_long_set_release(v, i);
 }
 
 static __always_inline void
 atomic_long_add(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_long_add(i, v);
+	raw_atomic_long_add(i, v);
 }
 
 static __always_inline long
@@ -1344,14 +1339,14 @@ atomic_long_add_return(long i, atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_add_return(i, v);
+	return raw_atomic_long_add_return(i, v);
 }
 
 static __always_inline long
 atomic_long_add_return_acquire(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_add_return_acquire(i, v);
+	return raw_atomic_long_add_return_acquire(i, v);
 }
 
 static __always_inline long
@@ -1359,14 +1354,14 @@ atomic_long_add_return_release(long i, atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_add_return_release(i, v);
+	return raw_atomic_long_add_return_release(i, v);
 }
 
 static __always_inline long
 atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_add_return_relaxed(i, v);
+	return raw_atomic_long_add_return_relaxed(i, v);
 }
 
 static __always_inline long
@@ -1374,14 +1369,14 @@ atomic_long_fetch_add(long i, atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_add(i, v);
+	return raw_atomic_long_fetch_add(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_add_acquire(i, v);
+	return raw_atomic_long_fetch_add_acquire(i, v);
 }
 
 static __always_inline long
@@ -1389,21 +1384,21 @@ atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_add_release(i, v);
+	return raw_atomic_long_fetch_add_release(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_add_relaxed(i, v);
+	return raw_atomic_long_fetch_add_relaxed(i, v);
 }
 
 static __always_inline void
 atomic_long_sub(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_long_sub(i, v);
+	raw_atomic_long_sub(i, v);
 }
 
 static __always_inline long
@@ -1411,14 +1406,14 @@ atomic_long_sub_return(long i, atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_sub_return(i, v);
+	return raw_atomic_long_sub_return(i, v);
 }
 
 static __always_inline long
 atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_sub_return_acquire(i, v);
+	return raw_atomic_long_sub_return_acquire(i, v);
 }
 
 static __always_inline long
@@ -1426,14 +1421,14 @@ atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_sub_return_release(i, v);
+	return raw_atomic_long_sub_return_release(i, v);
 }
 
 static __always_inline long
 atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_sub_return_relaxed(i, v);
+	return raw_atomic_long_sub_return_relaxed(i, v);
 }
 
 static __always_inline long
@@ -1441,14 +1436,14 @@ atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_sub(i, v);
+	return raw_atomic_long_fetch_sub(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_sub_acquire(i, v);
+	return raw_atomic_long_fetch_sub_acquire(i, v);
 }
 
 static __always_inline long
@@ -1456,21 +1451,21 @@ atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_sub_release(i, v);
+	return raw_atomic_long_fetch_sub_release(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_sub_relaxed(i, v);
+	return raw_atomic_long_fetch_sub_relaxed(i, v);
 }
 
 static __always_inline void
 atomic_long_inc(atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_long_inc(v);
+	raw_atomic_long_inc(v);
 }
 
 static __always_inline long
@@ -1478,14 +1473,14 @@ atomic_long_inc_return(atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_inc_return(v);
+	return raw_atomic_long_inc_return(v);
 }
 
 static __always_inline long
 atomic_long_inc_return_acquire(atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_inc_return_acquire(v);
+	return raw_atomic_long_inc_return_acquire(v);
 }
 
 static __always_inline long
@@ -1493,14 +1488,14 @@ atomic_long_inc_return_release(atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_inc_return_release(v);
+	return raw_atomic_long_inc_return_release(v);
 }
 
 static __always_inline long
 atomic_long_inc_return_relaxed(atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_inc_return_relaxed(v);
+	return raw_atomic_long_inc_return_relaxed(v);
 }
 
 static __always_inline long
@@ -1508,14 +1503,14 @@ atomic_long_fetch_inc(atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_inc(v);
+	return raw_atomic_long_fetch_inc(v);
 }
 
 static __always_inline long
 atomic_long_fetch_inc_acquire(atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_inc_acquire(v);
+	return raw_atomic_long_fetch_inc_acquire(v);
 }
 
 static __always_inline long
@@ -1523,21 +1518,21 @@ atomic_long_fetch_inc_release(atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_inc_release(v);
+	return raw_atomic_long_fetch_inc_release(v);
 }
 
 static __always_inline long
 atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_inc_relaxed(v);
+	return raw_atomic_long_fetch_inc_relaxed(v);
 }
 
 static __always_inline void
 atomic_long_dec(atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_long_dec(v);
+	raw_atomic_long_dec(v);
 }
 
 static __always_inline long
@@ -1545,14 +1540,14 @@ atomic_long_dec_return(atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_dec_return(v);
+	return raw_atomic_long_dec_return(v);
 }
 
 static __always_inline long
 atomic_long_dec_return_acquire(atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_dec_return_acquire(v);
+	return raw_atomic_long_dec_return_acquire(v);
 }
 
 static __always_inline long
@@ -1560,14 +1555,14 @@ atomic_long_dec_return_release(atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_dec_return_release(v);
+	return raw_atomic_long_dec_return_release(v);
 }
 
 static __always_inline long
 atomic_long_dec_return_relaxed(atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_dec_return_relaxed(v);
+	return raw_atomic_long_dec_return_relaxed(v);
 }
 
 static __always_inline long
@@ -1575,14 +1570,14 @@ atomic_long_fetch_dec(atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_dec(v);
+	return raw_atomic_long_fetch_dec(v);
 }
 
 static __always_inline long
 atomic_long_fetch_dec_acquire(atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_dec_acquire(v);
+	return raw_atomic_long_fetch_dec_acquire(v);
 }
 
 static __always_inline long
@@ -1590,21 +1585,21 @@ atomic_long_fetch_dec_release(atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_dec_release(v);
+	return raw_atomic_long_fetch_dec_release(v);
 }
 
 static __always_inline long
 atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_dec_relaxed(v);
+	return raw_atomic_long_fetch_dec_relaxed(v);
 }
 
 static __always_inline void
 atomic_long_and(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_long_and(i, v);
+	raw_atomic_long_and(i, v);
 }
 
 static __always_inline long
@@ -1612,14 +1607,14 @@ atomic_long_fetch_and(long i, atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_and(i, v);
+	return raw_atomic_long_fetch_and(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_and_acquire(i, v);
+	return raw_atomic_long_fetch_and_acquire(i, v);
 }
 
 static __always_inline long
@@ -1627,21 +1622,21 @@ atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_and_release(i, v);
+	return raw_atomic_long_fetch_and_release(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_and_relaxed(i, v);
+	return raw_atomic_long_fetch_and_relaxed(i, v);
 }
 
 static __always_inline void
 atomic_long_andnot(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_long_andnot(i, v);
+	raw_atomic_long_andnot(i, v);
 }
 
 static __always_inline long
@@ -1649,14 +1644,14 @@ atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_andnot(i, v);
+	return raw_atomic_long_fetch_andnot(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_andnot_acquire(i, v);
+	return raw_atomic_long_fetch_andnot_acquire(i, v);
 }
 
 static __always_inline long
@@ -1664,21 +1659,21 @@ atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_andnot_release(i, v);
+	return raw_atomic_long_fetch_andnot_release(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_andnot_relaxed(i, v);
+	return raw_atomic_long_fetch_andnot_relaxed(i, v);
 }
 
 static __always_inline void
 atomic_long_or(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_long_or(i, v);
+	raw_atomic_long_or(i, v);
 }
 
 static __always_inline long
@@ -1686,14 +1681,14 @@ atomic_long_fetch_or(long i, atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_or(i, v);
+	return raw_atomic_long_fetch_or(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_or_acquire(i, v);
+	return raw_atomic_long_fetch_or_acquire(i, v);
 }
 
 static __always_inline long
@@ -1701,21 +1696,21 @@ atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_or_release(i, v);
+	return raw_atomic_long_fetch_or_release(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_or_relaxed(i, v);
+	return raw_atomic_long_fetch_or_relaxed(i, v);
 }
 
 static __always_inline void
 atomic_long_xor(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	arch_atomic_long_xor(i, v);
+	raw_atomic_long_xor(i, v);
 }
 
 static __always_inline long
@@ -1723,14 +1718,14 @@ atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_xor(i, v);
+	return raw_atomic_long_fetch_xor(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_xor_acquire(i, v);
+	return raw_atomic_long_fetch_xor_acquire(i, v);
 }
 
 static __always_inline long
@@ -1738,14 +1733,14 @@ atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_xor_release(i, v);
+	return raw_atomic_long_fetch_xor_release(i, v);
 }
 
 static __always_inline long
 atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_xor_relaxed(i, v);
+	return raw_atomic_long_fetch_xor_relaxed(i, v);
 }
 
 static __always_inline long
@@ -1753,14 +1748,14 @@ atomic_long_xchg(atomic_long_t *v, long i)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_xchg(v, i);
+	return raw_atomic_long_xchg(v, i);
 }
 
 static __always_inline long
 atomic_long_xchg_acquire(atomic_long_t *v, long i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_xchg_acquire(v, i);
+	return raw_atomic_long_xchg_acquire(v, i);
 }
 
 static __always_inline long
@@ -1768,14 +1763,14 @@ atomic_long_xchg_release(atomic_long_t *v, long i)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_xchg_release(v, i);
+	return raw_atomic_long_xchg_release(v, i);
 }
 
 static __always_inline long
 atomic_long_xchg_relaxed(atomic_long_t *v, long i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_xchg_relaxed(v, i);
+	return raw_atomic_long_xchg_relaxed(v, i);
 }
 
 static __always_inline long
@@ -1783,14 +1778,14 @@ atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_cmpxchg(v, old, new);
+	return raw_atomic_long_cmpxchg(v, old, new);
 }
 
 static __always_inline long
 atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_cmpxchg_acquire(v, old, new);
+	return raw_atomic_long_cmpxchg_acquire(v, old, new);
 }
 
 static __always_inline long
@@ -1798,14 +1793,14 @@ atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_cmpxchg_release(v, old, new);
+	return raw_atomic_long_cmpxchg_release(v, old, new);
 }
 
 static __always_inline long
 atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_cmpxchg_relaxed(v, old, new);
+	return raw_atomic_long_cmpxchg_relaxed(v, old, new);
 }
 
 static __always_inline bool
@@ -1814,7 +1809,7 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic_long_try_cmpxchg(v, old, new);
+	return raw_atomic_long_try_cmpxchg(v, old, new);
 }
 
 static __always_inline bool
@@ -1822,7 +1817,7 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic_long_try_cmpxchg_acquire(v, old, new);
+	return raw_atomic_long_try_cmpxchg_acquire(v, old, new);
 }
 
 static __always_inline bool
@@ -1831,7 +1826,7 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic_long_try_cmpxchg_release(v, old, new);
+	return raw_atomic_long_try_cmpxchg_release(v, old, new);
 }
 
 static __always_inline bool
@@ -1839,7 +1834,7 @@ atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	instrument_atomic_read_write(old, sizeof(*old));
-	return arch_atomic_long_try_cmpxchg_relaxed(v, old, new);
+	return raw_atomic_long_try_cmpxchg_relaxed(v, old, new);
 }
 
 static __always_inline bool
@@ -1847,7 +1842,7 @@ atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_sub_and_test(i, v);
+	return raw_atomic_long_sub_and_test(i, v);
 }
 
 static __always_inline bool
@@ -1855,7 +1850,7 @@ atomic_long_dec_and_test(atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_dec_and_test(v);
+	return raw_atomic_long_dec_and_test(v);
 }
 
 static __always_inline bool
@@ -1863,7 +1858,7 @@ atomic_long_inc_and_test(atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_inc_and_test(v);
+	return raw_atomic_long_inc_and_test(v);
 }
 
 static __always_inline bool
@@ -1871,14 +1866,14 @@ atomic_long_add_negative(long i, atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_add_negative(i, v);
+	return raw_atomic_long_add_negative(i, v);
 }
 
 static __always_inline bool
 atomic_long_add_negative_acquire(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_add_negative_acquire(i, v);
+	return raw_atomic_long_add_negative_acquire(i, v);
 }
 
 static __always_inline bool
@@ -1886,14 +1881,14 @@ atomic_long_add_negative_release(long i, atomic_long_t *v)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_add_negative_release(i, v);
+	return raw_atomic_long_add_negative_release(i, v);
 }
 
 static __always_inline bool
 atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_add_negative_relaxed(i, v);
+	return raw_atomic_long_add_negative_relaxed(i, v);
 }
 
 static __always_inline long
@@ -1901,7 +1896,7 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_fetch_add_unless(v, a, u);
+	return raw_atomic_long_fetch_add_unless(v, a, u);
 }
 
 static __always_inline bool
@@ -1909,7 +1904,7 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_add_unless(v, a, u);
+	return raw_atomic_long_add_unless(v, a, u);
 }
 
 static __always_inline bool
@@ -1917,7 +1912,7 @@ atomic_long_inc_not_zero(atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_inc_not_zero(v);
+	return raw_atomic_long_inc_not_zero(v);
 }
 
 static __always_inline bool
@@ -1925,7 +1920,7 @@ atomic_long_inc_unless_negative(atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_inc_unless_negative(v);
+	return raw_atomic_long_inc_unless_negative(v);
 }
 
 static __always_inline bool
@@ -1933,7 +1928,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_dec_unless_positive(v);
+	return raw_atomic_long_dec_unless_positive(v);
 }
 
 static __always_inline long
@@ -1941,7 +1936,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return arch_atomic_long_dec_if_positive(v);
+	return raw_atomic_long_dec_if_positive(v);
 }
 
 #define xchg(ptr, ...) \
@@ -1949,14 +1944,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(ptr) __ai_ptr = (ptr); \
 	kcsan_mb(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_xchg(__ai_ptr, __VA_ARGS__); \
+	raw_xchg(__ai_ptr, __VA_ARGS__); \
 })
 
 #define xchg_acquire(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_xchg_acquire(__ai_ptr, __VA_ARGS__); \
+	raw_xchg_acquire(__ai_ptr, __VA_ARGS__); \
 })
 
 #define xchg_release(ptr, ...) \
@@ -1964,14 +1959,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(ptr) __ai_ptr = (ptr); \
 	kcsan_release(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_xchg_release(__ai_ptr, __VA_ARGS__); \
+	raw_xchg_release(__ai_ptr, __VA_ARGS__); \
 })
 
 #define xchg_relaxed(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_xchg_relaxed(__ai_ptr, __VA_ARGS__); \
+	raw_xchg_relaxed(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg(ptr, ...) \
@@ -1979,14 +1974,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(ptr) __ai_ptr = (ptr); \
 	kcsan_mb(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg_acquire(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg_acquire(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg_acquire(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg_release(ptr, ...) \
@@ -1994,14 +1989,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(ptr) __ai_ptr = (ptr); \
 	kcsan_release(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg_release(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg_release(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg_relaxed(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg_relaxed(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg_relaxed(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg64(ptr, ...) \
@@ -2009,14 +2004,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(ptr) __ai_ptr = (ptr); \
 	kcsan_mb(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg64(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg64(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg64_acquire(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg64_acquire(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg64_acquire(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg64_release(ptr, ...) \
@@ -2024,14 +2019,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(ptr) __ai_ptr = (ptr); \
 	kcsan_release(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg64_release(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg64_release(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg64_relaxed(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg64_relaxed(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg64_relaxed(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg128(ptr, ...) \
@@ -2039,14 +2034,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(ptr) __ai_ptr = (ptr); \
 	kcsan_mb(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg128(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg128(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg128_acquire(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg128_acquire(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg128_acquire(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg128_release(ptr, ...) \
@@ -2054,14 +2049,14 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(ptr) __ai_ptr = (ptr); \
 	kcsan_release(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg128_release(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg128_release(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg128_relaxed(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg128_relaxed(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg128_relaxed(__ai_ptr, __VA_ARGS__); \
 })
 
 #define try_cmpxchg(ptr, oldp, ...) \
@@ -2071,7 +2066,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	kcsan_mb(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg_acquire(ptr, oldp, ...) \
@@ -2080,7 +2075,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(oldp) __ai_oldp = (oldp); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg_release(ptr, oldp, ...) \
@@ -2090,7 +2085,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	kcsan_release(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg_relaxed(ptr, oldp, ...) \
@@ -2099,7 +2094,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(oldp) __ai_oldp = (oldp); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg64(ptr, oldp, ...) \
@@ -2109,7 +2104,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	kcsan_mb(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg64(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg64(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg64_acquire(ptr, oldp, ...) \
@@ -2118,7 +2113,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(oldp) __ai_oldp = (oldp); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg64_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg64_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg64_release(ptr, oldp, ...) \
@@ -2128,7 +2123,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	kcsan_release(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg64_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg64_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg64_relaxed(ptr, oldp, ...) \
@@ -2137,7 +2132,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(oldp) __ai_oldp = (oldp); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg64_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg64_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg128(ptr, oldp, ...) \
@@ -2147,7 +2142,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	kcsan_mb(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg128(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg128(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg128_acquire(ptr, oldp, ...) \
@@ -2156,7 +2151,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(oldp) __ai_oldp = (oldp); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg128_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg128_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg128_release(ptr, oldp, ...) \
@@ -2166,7 +2161,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	kcsan_release(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg128_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg128_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg128_relaxed(ptr, oldp, ...) \
@@ -2175,28 +2170,28 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(oldp) __ai_oldp = (oldp); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg128_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg128_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define cmpxchg_local(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg_local(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg_local(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg64_local(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg64_local(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg64_local(__ai_ptr, __VA_ARGS__); \
 })
 
 #define cmpxchg128_local(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_cmpxchg128_local(__ai_ptr, __VA_ARGS__); \
+	raw_cmpxchg128_local(__ai_ptr, __VA_ARGS__); \
 })
 
 #define sync_cmpxchg(ptr, ...) \
@@ -2204,7 +2199,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(ptr) __ai_ptr = (ptr); \
 	kcsan_mb(); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
-	arch_sync_cmpxchg(__ai_ptr, __VA_ARGS__); \
+	raw_sync_cmpxchg(__ai_ptr, __VA_ARGS__); \
 })
 
 #define try_cmpxchg_local(ptr, oldp, ...) \
@@ -2213,7 +2208,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(oldp) __ai_oldp = (oldp); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg64_local(ptr, oldp, ...) \
@@ -2222,7 +2217,7 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(oldp) __ai_oldp = (oldp); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg64_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg64_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 #define try_cmpxchg128_local(ptr, oldp, ...) \
@@ -2231,9 +2226,9 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 	typeof(oldp) __ai_oldp = (oldp); \
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \
-	arch_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
+	raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
 
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 3611991b015450e119bcd7417a9431af7f3ba13c
+// f6502977180430e61c1a7c4e5e665f04f501fb8d
diff --git a/include/linux/atomic/atomic-raw.h b/include/linux/atomic/atomic-raw.h
new file mode 100644
index 000000000000..83ff0269657e
--- /dev/null
+++ b/include/linux/atomic/atomic-raw.h
@@ -0,0 +1,1645 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Generated by scripts/atomic/gen-atomic-raw.sh
+// DO NOT MODIFY THIS FILE DIRECTLY
+
+#ifndef _LINUX_ATOMIC_RAW_H
+#define _LINUX_ATOMIC_RAW_H
+
+static __always_inline int
+raw_atomic_read(const atomic_t *v)
+{
+	return arch_atomic_read(v);
+}
+
+static __always_inline int
+raw_atomic_read_acquire(const atomic_t *v)
+{
+	return arch_atomic_read_acquire(v);
+}
+
+static __always_inline void
+raw_atomic_set(atomic_t *v, int i)
+{
+	arch_atomic_set(v, i);
+}
+
+static __always_inline void
+raw_atomic_set_release(atomic_t *v, int i)
+{
+	arch_atomic_set_release(v, i);
+}
+
+static __always_inline void
+raw_atomic_add(int i, atomic_t *v)
+{
+	arch_atomic_add(i, v);
+}
+
+static __always_inline int
+raw_atomic_add_return(int i, atomic_t *v)
+{
+	return arch_atomic_add_return(i, v);
+}
+
+static __always_inline int
+raw_atomic_add_return_acquire(int i, atomic_t *v)
+{
+	return arch_atomic_add_return_acquire(i, v);
+}
+
+static __always_inline int
+raw_atomic_add_return_release(int i, atomic_t *v)
+{
+	return arch_atomic_add_return_release(i, v);
+}
+
+static __always_inline int
+raw_atomic_add_return_relaxed(int i, atomic_t *v)
+{
+	return arch_atomic_add_return_relaxed(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_add(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_add(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_add_acquire(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_add_acquire(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_add_release(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_add_release(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_add_relaxed(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_add_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic_sub(int i, atomic_t *v)
+{
+	arch_atomic_sub(i, v);
+}
+
+static __always_inline int
+raw_atomic_sub_return(int i, atomic_t *v)
+{
+	return arch_atomic_sub_return(i, v);
+}
+
+static __always_inline int
+raw_atomic_sub_return_acquire(int i, atomic_t *v)
+{
+	return arch_atomic_sub_return_acquire(i, v);
+}
+
+static __always_inline int
+raw_atomic_sub_return_release(int i, atomic_t *v)
+{
+	return arch_atomic_sub_return_release(i, v);
+}
+
+static __always_inline int
+raw_atomic_sub_return_relaxed(int i, atomic_t *v)
+{
+	return arch_atomic_sub_return_relaxed(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_sub(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_sub(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_sub_acquire(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_sub_acquire(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_sub_release(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_sub_release(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_sub_relaxed(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_sub_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic_inc(atomic_t *v)
+{
+	arch_atomic_inc(v);
+}
+
+static __always_inline int
+raw_atomic_inc_return(atomic_t *v)
+{
+	return arch_atomic_inc_return(v);
+}
+
+static __always_inline int
+raw_atomic_inc_return_acquire(atomic_t *v)
+{
+	return arch_atomic_inc_return_acquire(v);
+}
+
+static __always_inline int
+raw_atomic_inc_return_release(atomic_t *v)
+{
+	return arch_atomic_inc_return_release(v);
+}
+
+static __always_inline int
+raw_atomic_inc_return_relaxed(atomic_t *v)
+{
+	return arch_atomic_inc_return_relaxed(v);
+}
+
+static __always_inline int
+raw_atomic_fetch_inc(atomic_t *v)
+{
+	return arch_atomic_fetch_inc(v);
+}
+
+static __always_inline int
+raw_atomic_fetch_inc_acquire(atomic_t *v)
+{
+	return arch_atomic_fetch_inc_acquire(v);
+}
+
+static __always_inline int
+raw_atomic_fetch_inc_release(atomic_t *v)
+{
+	return arch_atomic_fetch_inc_release(v);
+}
+
+static __always_inline int
+raw_atomic_fetch_inc_relaxed(atomic_t *v)
+{
+	return arch_atomic_fetch_inc_relaxed(v);
+}
+
+static __always_inline void
+raw_atomic_dec(atomic_t *v)
+{
+	arch_atomic_dec(v);
+}
+
+static __always_inline int
+raw_atomic_dec_return(atomic_t *v)
+{
+	return arch_atomic_dec_return(v);
+}
+
+static __always_inline int
+raw_atomic_dec_return_acquire(atomic_t *v)
+{
+	return arch_atomic_dec_return_acquire(v);
+}
+
+static __always_inline int
+raw_atomic_dec_return_release(atomic_t *v)
+{
+	return arch_atomic_dec_return_release(v);
+}
+
+static __always_inline int
+raw_atomic_dec_return_relaxed(atomic_t *v)
+{
+	return arch_atomic_dec_return_relaxed(v);
+}
+
+static __always_inline int
+raw_atomic_fetch_dec(atomic_t *v)
+{
+	return arch_atomic_fetch_dec(v);
+}
+
+static __always_inline int
+raw_atomic_fetch_dec_acquire(atomic_t *v)
+{
+	return arch_atomic_fetch_dec_acquire(v);
+}
+
+static __always_inline int
+raw_atomic_fetch_dec_release(atomic_t *v)
+{
+	return arch_atomic_fetch_dec_release(v);
+}
+
+static __always_inline int
+raw_atomic_fetch_dec_relaxed(atomic_t *v)
+{
+	return arch_atomic_fetch_dec_relaxed(v);
+}
+
+static __always_inline void
+raw_atomic_and(int i, atomic_t *v)
+{
+	arch_atomic_and(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_and(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_and(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_and_acquire(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_and_acquire(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_and_release(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_and_release(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_and_relaxed(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_and_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic_andnot(int i, atomic_t *v)
+{
+	arch_atomic_andnot(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_andnot(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_andnot(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_andnot_acquire(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_andnot_release(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_andnot_release(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_andnot_relaxed(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_andnot_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic_or(int i, atomic_t *v)
+{
+	arch_atomic_or(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_or(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_or(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_or_acquire(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_or_acquire(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_or_release(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_or_release(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_or_relaxed(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_or_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic_xor(int i, atomic_t *v)
+{
+	arch_atomic_xor(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_xor(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_xor(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_xor_acquire(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_xor_acquire(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_xor_release(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_xor_release(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_xor_relaxed(int i, atomic_t *v)
+{
+	return arch_atomic_fetch_xor_relaxed(i, v);
+}
+
+static __always_inline int
+raw_atomic_xchg(atomic_t *v, int i)
+{
+	return arch_atomic_xchg(v, i);
+}
+
+static __always_inline int
+raw_atomic_xchg_acquire(atomic_t *v, int i)
+{
+	return arch_atomic_xchg_acquire(v, i);
+}
+
+static __always_inline int
+raw_atomic_xchg_release(atomic_t *v, int i)
+{
+	return arch_atomic_xchg_release(v, i);
+}
+
+static __always_inline int
+raw_atomic_xchg_relaxed(atomic_t *v, int i)
+{
+	return arch_atomic_xchg_relaxed(v, i);
+}
+
+static __always_inline int
+raw_atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	return arch_atomic_cmpxchg(v, old, new);
+}
+
+static __always_inline int
+raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
+{
+	return arch_atomic_cmpxchg_acquire(v, old, new);
+}
+
+static __always_inline int
+raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
+{
+	return arch_atomic_cmpxchg_release(v, old, new);
+}
+
+static __always_inline int
+raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
+{
+	return arch_atomic_cmpxchg_relaxed(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+	return arch_atomic_try_cmpxchg(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
+{
+	return arch_atomic_try_cmpxchg_acquire(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
+{
+	return arch_atomic_try_cmpxchg_release(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
+{
+	return arch_atomic_try_cmpxchg_relaxed(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic_sub_and_test(int i, atomic_t *v)
+{
+	return arch_atomic_sub_and_test(i, v);
+}
+
+static __always_inline bool
+raw_atomic_dec_and_test(atomic_t *v)
+{
+	return arch_atomic_dec_and_test(v);
+}
+
+static __always_inline bool
+raw_atomic_inc_and_test(atomic_t *v)
+{
+	return arch_atomic_inc_and_test(v);
+}
+
+static __always_inline bool
+raw_atomic_add_negative(int i, atomic_t *v)
+{
+	return arch_atomic_add_negative(i, v);
+}
+
+static __always_inline bool
+raw_atomic_add_negative_acquire(int i, atomic_t *v)
+{
+	return arch_atomic_add_negative_acquire(i, v);
+}
+
+static __always_inline bool
+raw_atomic_add_negative_release(int i, atomic_t *v)
+{
+	return arch_atomic_add_negative_release(i, v);
+}
+
+static __always_inline bool
+raw_atomic_add_negative_relaxed(int i, atomic_t *v)
+{
+	return arch_atomic_add_negative_relaxed(i, v);
+}
+
+static __always_inline int
+raw_atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+	return arch_atomic_fetch_add_unless(v, a, u);
+}
+
+static __always_inline bool
+raw_atomic_add_unless(atomic_t *v, int a, int u)
+{
+	return arch_atomic_add_unless(v, a, u);
+}
+
+static __always_inline bool
+raw_atomic_inc_not_zero(atomic_t *v)
+{
+	return arch_atomic_inc_not_zero(v);
+}
+
+static __always_inline bool
+raw_atomic_inc_unless_negative(atomic_t *v)
+{
+	return arch_atomic_inc_unless_negative(v);
+}
+
+static __always_inline bool
+raw_atomic_dec_unless_positive(atomic_t *v)
+{
+	return arch_atomic_dec_unless_positive(v);
+}
+
+static __always_inline int
+raw_atomic_dec_if_positive(atomic_t *v)
+{
+	return arch_atomic_dec_if_positive(v);
+}
+
+static __always_inline s64
+raw_atomic64_read(const atomic64_t *v)
+{
+	return arch_atomic64_read(v);
+}
+
+static __always_inline s64
+raw_atomic64_read_acquire(const atomic64_t *v)
+{
+	return arch_atomic64_read_acquire(v);
+}
+
+static __always_inline void
+raw_atomic64_set(atomic64_t *v, s64 i)
+{
+	arch_atomic64_set(v, i);
+}
+
+static __always_inline void
+raw_atomic64_set_release(atomic64_t *v, s64 i)
+{
+	arch_atomic64_set_release(v, i);
+}
+
+static __always_inline void
+raw_atomic64_add(s64 i, atomic64_t *v)
+{
+	arch_atomic64_add(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_add_return(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_add_return(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_add_return_acquire(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_add_return_acquire(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_add_return_release(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_add_return_release(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_add_return_relaxed(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_add_return_relaxed(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_add(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_add(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_add_acquire(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_add_release(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_add_release(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_add_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic64_sub(s64 i, atomic64_t *v)
+{
+	arch_atomic64_sub(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_sub_return(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_sub_return(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_sub_return_acquire(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_sub_return_acquire(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_sub_return_release(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_sub_return_release(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_sub_return_relaxed(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_sub(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_sub(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_sub_acquire(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_sub_release(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_sub_release(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_sub_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic64_inc(atomic64_t *v)
+{
+	arch_atomic64_inc(v);
+}
+
+static __always_inline s64
+raw_atomic64_inc_return(atomic64_t *v)
+{
+	return arch_atomic64_inc_return(v);
+}
+
+static __always_inline s64
+raw_atomic64_inc_return_acquire(atomic64_t *v)
+{
+	return arch_atomic64_inc_return_acquire(v);
+}
+
+static __always_inline s64
+raw_atomic64_inc_return_release(atomic64_t *v)
+{
+	return arch_atomic64_inc_return_release(v);
+}
+
+static __always_inline s64
+raw_atomic64_inc_return_relaxed(atomic64_t *v)
+{
+	return arch_atomic64_inc_return_relaxed(v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_inc(atomic64_t *v)
+{
+	return arch_atomic64_fetch_inc(v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_inc_acquire(atomic64_t *v)
+{
+	return arch_atomic64_fetch_inc_acquire(v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_inc_release(atomic64_t *v)
+{
+	return arch_atomic64_fetch_inc_release(v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_inc_relaxed(atomic64_t *v)
+{
+	return arch_atomic64_fetch_inc_relaxed(v);
+}
+
+static __always_inline void
+raw_atomic64_dec(atomic64_t *v)
+{
+	arch_atomic64_dec(v);
+}
+
+static __always_inline s64
+raw_atomic64_dec_return(atomic64_t *v)
+{
+	return arch_atomic64_dec_return(v);
+}
+
+static __always_inline s64
+raw_atomic64_dec_return_acquire(atomic64_t *v)
+{
+	return arch_atomic64_dec_return_acquire(v);
+}
+
+static __always_inline s64
+raw_atomic64_dec_return_release(atomic64_t *v)
+{
+	return arch_atomic64_dec_return_release(v);
+}
+
+static __always_inline s64
+raw_atomic64_dec_return_relaxed(atomic64_t *v)
+{
+	return arch_atomic64_dec_return_relaxed(v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_dec(atomic64_t *v)
+{
+	return arch_atomic64_fetch_dec(v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_dec_acquire(atomic64_t *v)
+{
+	return arch_atomic64_fetch_dec_acquire(v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_dec_release(atomic64_t *v)
+{
+	return arch_atomic64_fetch_dec_release(v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_dec_relaxed(atomic64_t *v)
+{
+	return arch_atomic64_fetch_dec_relaxed(v);
+}
+
+static __always_inline void
+raw_atomic64_and(s64 i, atomic64_t *v)
+{
+	arch_atomic64_and(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_and(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_and(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_and_acquire(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_and_release(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_and_release(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_and_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic64_andnot(s64 i, atomic64_t *v)
+{
+	arch_atomic64_andnot(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_andnot(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_andnot(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_andnot_acquire(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_andnot_release(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_andnot_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic64_or(s64 i, atomic64_t *v)
+{
+	arch_atomic64_or(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_or(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_or(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_or_acquire(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_or_release(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_or_release(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_or_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic64_xor(s64 i, atomic64_t *v)
+{
+	arch_atomic64_xor(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_xor(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_xor(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_xor_acquire(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_xor_release(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_xor_release(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_fetch_xor_relaxed(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_xchg(atomic64_t *v, s64 i)
+{
+	return arch_atomic64_xchg(v, i);
+}
+
+static __always_inline s64
+raw_atomic64_xchg_acquire(atomic64_t *v, s64 i)
+{
+	return arch_atomic64_xchg_acquire(v, i);
+}
+
+static __always_inline s64
+raw_atomic64_xchg_release(atomic64_t *v, s64 i)
+{
+	return arch_atomic64_xchg_release(v, i);
+}
+
+static __always_inline s64
+raw_atomic64_xchg_relaxed(atomic64_t *v, s64 i)
+{
+	return arch_atomic64_xchg_relaxed(v, i);
+}
+
+static __always_inline s64
+raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+{
+	return arch_atomic64_cmpxchg(v, old, new);
+}
+
+static __always_inline s64
+raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
+{
+	return arch_atomic64_cmpxchg_acquire(v, old, new);
+}
+
+static __always_inline s64
+raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
+{
+	return arch_atomic64_cmpxchg_release(v, old, new);
+}
+
+static __always_inline s64
+raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
+{
+	return arch_atomic64_cmpxchg_relaxed(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+	return arch_atomic64_try_cmpxchg(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
+{
+	return arch_atomic64_try_cmpxchg_acquire(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
+{
+	return arch_atomic64_try_cmpxchg_release(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
+{
+	return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic64_sub_and_test(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_sub_and_test(i, v);
+}
+
+static __always_inline bool
+raw_atomic64_dec_and_test(atomic64_t *v)
+{
+	return arch_atomic64_dec_and_test(v);
+}
+
+static __always_inline bool
+raw_atomic64_inc_and_test(atomic64_t *v)
+{
+	return arch_atomic64_inc_and_test(v);
+}
+
+static __always_inline bool
+raw_atomic64_add_negative(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_add_negative(i, v);
+}
+
+static __always_inline bool
+raw_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_add_negative_acquire(i, v);
+}
+
+static __always_inline bool
+raw_atomic64_add_negative_release(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_add_negative_release(i, v);
+}
+
+static __always_inline bool
+raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
+{
+	return arch_atomic64_add_negative_relaxed(i, v);
+}
+
+static __always_inline s64
+raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	return arch_atomic64_fetch_add_unless(v, a, u);
+}
+
+static __always_inline bool
+raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	return arch_atomic64_add_unless(v, a, u);
+}
+
+static __always_inline bool
+raw_atomic64_inc_not_zero(atomic64_t *v)
+{
+	return arch_atomic64_inc_not_zero(v);
+}
+
+static __always_inline bool
+raw_atomic64_inc_unless_negative(atomic64_t *v)
+{
+	return arch_atomic64_inc_unless_negative(v);
+}
+
+static __always_inline bool
+raw_atomic64_dec_unless_positive(atomic64_t *v)
+{
+	return arch_atomic64_dec_unless_positive(v);
+}
+
+static __always_inline s64
+raw_atomic64_dec_if_positive(atomic64_t *v)
+{
+	return arch_atomic64_dec_if_positive(v);
+}
+
+static __always_inline long
+raw_atomic_long_read(const atomic_long_t *v)
+{
+	return arch_atomic_long_read(v);
+}
+
+static __always_inline long
+raw_atomic_long_read_acquire(const atomic_long_t *v)
+{
+	return arch_atomic_long_read_acquire(v);
+}
+
+static __always_inline void
+raw_atomic_long_set(atomic_long_t *v, long i)
+{
+	arch_atomic_long_set(v, i);
+}
+
+static __always_inline void
+raw_atomic_long_set_release(atomic_long_t *v, long i)
+{
+	arch_atomic_long_set_release(v, i);
+}
+
+static __always_inline void
+raw_atomic_long_add(long i, atomic_long_t *v)
+{
+	arch_atomic_long_add(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_add_return(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_add_return(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_add_return_acquire(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_add_return_acquire(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_add_return_release(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_add_return_release(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_add_return_relaxed(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_add(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_add(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_add_acquire(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_add_release(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_add_release(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_add_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic_long_sub(long i, atomic_long_t *v)
+{
+	arch_atomic_long_sub(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_sub_return(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_sub_return(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_sub_return_acquire(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_sub_return_release(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_sub_return_release(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_sub_return_relaxed(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_sub(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_sub(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_sub_acquire(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_sub_release(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_sub_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic_long_inc(atomic_long_t *v)
+{
+	arch_atomic_long_inc(v);
+}
+
+static __always_inline long
+raw_atomic_long_inc_return(atomic_long_t *v)
+{
+	return arch_atomic_long_inc_return(v);
+}
+
+static __always_inline long
+raw_atomic_long_inc_return_acquire(atomic_long_t *v)
+{
+	return arch_atomic_long_inc_return_acquire(v);
+}
+
+static __always_inline long
+raw_atomic_long_inc_return_release(atomic_long_t *v)
+{
+	return arch_atomic_long_inc_return_release(v);
+}
+
+static __always_inline long
+raw_atomic_long_inc_return_relaxed(atomic_long_t *v)
+{
+	return arch_atomic_long_inc_return_relaxed(v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_inc(atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_inc(v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_inc_acquire(atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_inc_acquire(v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_inc_release(atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_inc_release(v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_inc_relaxed(v);
+}
+
+static __always_inline void
+raw_atomic_long_dec(atomic_long_t *v)
+{
+	arch_atomic_long_dec(v);
+}
+
+static __always_inline long
+raw_atomic_long_dec_return(atomic_long_t *v)
+{
+	return arch_atomic_long_dec_return(v);
+}
+
+static __always_inline long
+raw_atomic_long_dec_return_acquire(atomic_long_t *v)
+{
+	return arch_atomic_long_dec_return_acquire(v);
+}
+
+static __always_inline long
+raw_atomic_long_dec_return_release(atomic_long_t *v)
+{
+	return arch_atomic_long_dec_return_release(v);
+}
+
+static __always_inline long
+raw_atomic_long_dec_return_relaxed(atomic_long_t *v)
+{
+	return arch_atomic_long_dec_return_relaxed(v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_dec(atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_dec(v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_dec_acquire(atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_dec_acquire(v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_dec_release(atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_dec_release(v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_dec_relaxed(v);
+}
+
+static __always_inline void
+raw_atomic_long_and(long i, atomic_long_t *v)
+{
+	arch_atomic_long_and(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_and(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_and(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_and_acquire(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_and_release(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_and_release(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_and_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic_long_andnot(long i, atomic_long_t *v)
+{
+	arch_atomic_long_andnot(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_andnot(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_andnot(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_andnot_acquire(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_andnot_release(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_andnot_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic_long_or(long i, atomic_long_t *v)
+{
+	arch_atomic_long_or(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_or(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_or(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_or_acquire(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_or_release(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_or_release(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_or_relaxed(i, v);
+}
+
+static __always_inline void
+raw_atomic_long_xor(long i, atomic_long_t *v)
+{
+	arch_atomic_long_xor(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_xor(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_xor(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_xor_acquire(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_xor_release(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_fetch_xor_relaxed(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_xchg(atomic_long_t *v, long i)
+{
+	return arch_atomic_long_xchg(v, i);
+}
+
+static __always_inline long
+raw_atomic_long_xchg_acquire(atomic_long_t *v, long i)
+{
+	return arch_atomic_long_xchg_acquire(v, i);
+}
+
+static __always_inline long
+raw_atomic_long_xchg_release(atomic_long_t *v, long i)
+{
+	return arch_atomic_long_xchg_release(v, i);
+}
+
+static __always_inline long
+raw_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
+{
+	return arch_atomic_long_xchg_relaxed(v, i);
+}
+
+static __always_inline long
+raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
+{
+	return arch_atomic_long_cmpxchg(v, old, new);
+}
+
+static __always_inline long
+raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
+{
+	return arch_atomic_long_cmpxchg_acquire(v, old, new);
+}
+
+static __always_inline long
+raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
+{
+	return arch_atomic_long_cmpxchg_release(v, old, new);
+}
+
+static __always_inline long
+raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
+{
+	return arch_atomic_long_cmpxchg_relaxed(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
+{
+	return arch_atomic_long_try_cmpxchg(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
+{
+	return arch_atomic_long_try_cmpxchg_acquire(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
+{
+	return arch_atomic_long_try_cmpxchg_release(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
+{
+	return arch_atomic_long_try_cmpxchg_relaxed(v, old, new);
+}
+
+static __always_inline bool
+raw_atomic_long_sub_and_test(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_sub_and_test(i, v);
+}
+
+static __always_inline bool
+raw_atomic_long_dec_and_test(atomic_long_t *v)
+{
+	return arch_atomic_long_dec_and_test(v);
+}
+
+static __always_inline bool
+raw_atomic_long_inc_and_test(atomic_long_t *v)
+{
+	return arch_atomic_long_inc_and_test(v);
+}
+
+static __always_inline bool
+raw_atomic_long_add_negative(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_add_negative(i, v);
+}
+
+static __always_inline bool
+raw_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_add_negative_acquire(i, v);
+}
+
+static __always_inline bool
+raw_atomic_long_add_negative_release(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_add_negative_release(i, v);
+}
+
+static __always_inline bool
+raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
+{
+	return arch_atomic_long_add_negative_relaxed(i, v);
+}
+
+static __always_inline long
+raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+{
+	return arch_atomic_long_fetch_add_unless(v, a, u);
+}
+
+static __always_inline bool
+raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
+{
+	return arch_atomic_long_add_unless(v, a, u);
+}
+
+static __always_inline bool
+raw_atomic_long_inc_not_zero(atomic_long_t *v)
+{
+	return arch_atomic_long_inc_not_zero(v);
+}
+
+static __always_inline bool
+raw_atomic_long_inc_unless_negative(atomic_long_t *v)
+{
+	return arch_atomic_long_inc_unless_negative(v);
+}
+
+static __always_inline bool
+raw_atomic_long_dec_unless_positive(atomic_long_t *v)
+{
+	return arch_atomic_long_dec_unless_positive(v);
+}
+
+static __always_inline long
+raw_atomic_long_dec_if_positive(atomic_long_t *v)
+{
+	return arch_atomic_long_dec_if_positive(v);
+}
+
+#define raw_xchg(...) \
+	arch_xchg(__VA_ARGS__)
+
+#define raw_xchg_acquire(...) \
+	arch_xchg_acquire(__VA_ARGS__)
+
+#define raw_xchg_release(...) \
+	arch_xchg_release(__VA_ARGS__)
+
+#define raw_xchg_relaxed(...) \
+	arch_xchg_relaxed(__VA_ARGS__)
+
+#define raw_cmpxchg(...) \
+	arch_cmpxchg(__VA_ARGS__)
+
+#define raw_cmpxchg_acquire(...) \
+	arch_cmpxchg_acquire(__VA_ARGS__)
+
+#define raw_cmpxchg_release(...) \
+	arch_cmpxchg_release(__VA_ARGS__)
+
+#define raw_cmpxchg_relaxed(...) \
+	arch_cmpxchg_relaxed(__VA_ARGS__)
+
+#define raw_cmpxchg64(...) \
+	arch_cmpxchg64(__VA_ARGS__)
+
+#define raw_cmpxchg64_acquire(...) \
+	arch_cmpxchg64_acquire(__VA_ARGS__)
+
+#define raw_cmpxchg64_release(...) \
+	arch_cmpxchg64_release(__VA_ARGS__)
+
+#define raw_cmpxchg64_relaxed(...) \
+	arch_cmpxchg64_relaxed(__VA_ARGS__)
+
+#define raw_cmpxchg128(...) \
+	arch_cmpxchg128(__VA_ARGS__)
+
+#define raw_cmpxchg128_acquire(...) \
+	arch_cmpxchg128_acquire(__VA_ARGS__)
+
+#define raw_cmpxchg128_release(...) \
+	arch_cmpxchg128_release(__VA_ARGS__)
+
+#define raw_cmpxchg128_relaxed(...) \
+	arch_cmpxchg128_relaxed(__VA_ARGS__)
+
+#define raw_try_cmpxchg(...) \
+	arch_try_cmpxchg(__VA_ARGS__)
+
+#define raw_try_cmpxchg_acquire(...) \
+	arch_try_cmpxchg_acquire(__VA_ARGS__)
+
+#define raw_try_cmpxchg_release(...) \
+	arch_try_cmpxchg_release(__VA_ARGS__)
+
+#define raw_try_cmpxchg_relaxed(...) \
+	arch_try_cmpxchg_relaxed(__VA_ARGS__)
+
+#define raw_try_cmpxchg64(...) \
+	arch_try_cmpxchg64(__VA_ARGS__)
+
+#define raw_try_cmpxchg64_acquire(...) \
+	arch_try_cmpxchg64_acquire(__VA_ARGS__)
+
+#define raw_try_cmpxchg64_release(...) \
+	arch_try_cmpxchg64_release(__VA_ARGS__)
+
+#define raw_try_cmpxchg64_relaxed(...) \
+	arch_try_cmpxchg64_relaxed(__VA_ARGS__)
+
+#define raw_try_cmpxchg128(...) \
+	arch_try_cmpxchg128(__VA_ARGS__)
+
+#define raw_try_cmpxchg128_acquire(...) \
+	arch_try_cmpxchg128_acquire(__VA_ARGS__)
+
+#define raw_try_cmpxchg128_release(...) \
+	arch_try_cmpxchg128_release(__VA_ARGS__)
+
+#define raw_try_cmpxchg128_relaxed(...) \
+	arch_try_cmpxchg128_relaxed(__VA_ARGS__)
+
+#define raw_cmpxchg_local(...) \
+	arch_cmpxchg_local(__VA_ARGS__)
+
+#define raw_cmpxchg64_local(...) \
+	arch_cmpxchg64_local(__VA_ARGS__)
+
+#define raw_cmpxchg128_local(...) \
+	arch_cmpxchg128_local(__VA_ARGS__)
+
+#define raw_sync_cmpxchg(...) \
+	arch_sync_cmpxchg(__VA_ARGS__)
+
+#define raw_try_cmpxchg_local(...) \
+	arch_try_cmpxchg_local(__VA_ARGS__)
+
+#define raw_try_cmpxchg64_local(...) \
+	arch_try_cmpxchg64_local(__VA_ARGS__)
+
+#define raw_try_cmpxchg128_local(...) \
+	arch_try_cmpxchg128_local(__VA_ARGS__)
+
+#endif /* _LINUX_ATOMIC_RAW_H */
+// 01d54200571b3857755a07c10074a4fd58cef6b1
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index 68557bfbbdc5..93c949aa9e54 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -73,7 +73,7 @@ static __always_inline ${ret}
 ${atomicname}(${params})
 {
 ${checks}
-	${retstmt}arch_${atomicname}(${args});
+	${retstmt}raw_${atomicname}(${args});
 }
 EOF
 
@@ -105,7 +105,7 @@ EOF
 cat <<EOF
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \\
 	instrument_read_write(__ai_oldp, sizeof(*__ai_oldp)); \\
-	arch_${xchg}${order}(__ai_ptr, __ai_oldp, __VA_ARGS__); \\
+	raw_${xchg}${order}(__ai_ptr, __ai_oldp, __VA_ARGS__); \\
 })
 EOF
 
@@ -119,7 +119,7 @@ EOF
 [ -n "$kcsan_barrier" ] && printf "\t${kcsan_barrier}; \\\\\n"
 cat <<EOF
 	instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \\
-	arch_${xchg}${order}(__ai_ptr, __VA_ARGS__); \\
+	raw_${xchg}${order}(__ai_ptr, __VA_ARGS__); \\
 })
 EOF
 
@@ -133,15 +133,10 @@ cat << EOF
 // DO NOT MODIFY THIS FILE DIRECTLY
 
 /*
- * This file provides wrappers with KASAN instrumentation for atomic operations.
- * To use this functionality an arch's atomic.h file needs to define all
- * atomic operations with arch_ prefix (e.g. arch_atomic_read()) and include
- * this file at the end. This file provides atomic_read() that forwards to
- * arch_atomic_read() for actual atomic operation.
- * Note: if an arch atomic operation is implemented by means of other atomic
- * operations (e.g. atomic_read()/atomic_cmpxchg() loop), then it needs to use
- * arch_ variants (i.e. arch_atomic_read()/arch_atomic_cmpxchg()) to avoid
- * double instrumentation.
+ * This file provoides atomic operations with explicit instrumentation (e.g.
+ * KASAN, KCSAN), which should be used unless it is necessary to avoid
+ * instrumentation. Where it is necessary to aovid instrumenation, the
+ * raw_atomic*() operations should be used.
  */
 #ifndef _LINUX_ATOMIC_INSTRUMENTED_H
 #define _LINUX_ATOMIC_INSTRUMENTED_H
diff --git a/scripts/atomic/gen-atomic-raw.sh b/scripts/atomic/gen-atomic-raw.sh
new file mode 100644
index 000000000000..ba8d136f30e4
--- /dev/null
+++ b/scripts/atomic/gen-atomic-raw.sh
@@ -0,0 +1,84 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+ATOMICDIR=$(dirname $0)
+
+. ${ATOMICDIR}/atomic-tbl.sh
+
+#gen_proto_order_variant(meta, pfx, name, sfx, order, atomic, int, arg...)
+gen_proto_order_variant()
+{
+	local meta="$1"; shift
+	local pfx="$1"; shift
+	local name="$1"; shift
+	local sfx="$1"; shift
+	local order="$1"; shift
+	local atomic="$1"; shift
+	local int="$1"; shift
+
+	local atomicname="${atomic}_${pfx}${name}${sfx}${order}"
+
+	local ret="$(gen_ret_type "${meta}" "${int}")"
+	local params="$(gen_params "${int}" "${atomic}" "$@")"
+	local args="$(gen_args "$@")"
+	local retstmt="$(gen_ret_stmt "${meta}")"
+
+cat <<EOF
+static __always_inline ${ret}
+raw_${atomicname}(${params})
+{
+	${retstmt}arch_${atomicname}(${args});
+}
+
+EOF
+}
+
+gen_xchg()
+{
+	local xchg="$1"; shift
+	local order="$1"; shift
+
+cat <<EOF
+#define raw_${xchg}${order}(...) \\
+	arch_${xchg}${order}(__VA_ARGS__)
+EOF
+}
+
+cat << EOF
+// SPDX-License-Identifier: GPL-2.0
+
+// Generated by $0
+// DO NOT MODIFY THIS FILE DIRECTLY
+
+#ifndef _LINUX_ATOMIC_RAW_H
+#define _LINUX_ATOMIC_RAW_H
+
+EOF
+
+grep '^[a-z]' "$1" | while read name meta args; do
+	gen_proto "${meta}" "${name}" "atomic" "int" ${args}
+done
+
+grep '^[a-z]' "$1" | while read name meta args; do
+	gen_proto "${meta}" "${name}" "atomic64" "s64" ${args}
+done
+
+grep '^[a-z]' "$1" | while read name meta args; do
+	gen_proto "${meta}" "${name}" "atomic_long" "long" ${args}
+done
+
+for xchg in "xchg" "cmpxchg" "cmpxchg64" "cmpxchg128" "try_cmpxchg" "try_cmpxchg64" "try_cmpxchg128"; do
+	for order in "" "_acquire" "_release" "_relaxed"; do
+		gen_xchg "${xchg}" "${order}"
+		printf "\n"
+	done
+done
+
+for xchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local" "sync_cmpxchg" "try_cmpxchg_local" "try_cmpxchg64_local" "try_cmpxchg128_local"; do
+	gen_xchg "${xchg}" ""
+	printf "\n"
+done
+
+cat <<EOF
+#endif /* _LINUX_ATOMIC_RAW_H */
+EOF
diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh
index 5b98a8307693..631d351f9f1f 100755
--- a/scripts/atomic/gen-atomics.sh
+++ b/scripts/atomic/gen-atomics.sh
@@ -11,6 +11,7 @@ cat <<EOF |
 gen-atomic-instrumented.sh      linux/atomic/atomic-instrumented.h
 gen-atomic-long.sh              linux/atomic/atomic-long.h
 gen-atomic-fallback.sh          linux/atomic/atomic-arch-fallback.h
+gen-atomic-raw.sh               linux/atomic/atomic-raw.h
 EOF
 while read script header args; do
 	/bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} ${args} > ${LINUXDIR}/include/${header}
-- 
cgit v1.2.3


From 0f613bfa8268a89be25f2b6b58fc6fe8ccd9a2ba Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 5 Jun 2023 08:01:15 +0100
Subject: locking/atomic: treewide: use raw_atomic*_<op>()

Now that we have raw_atomic*_<op>() definitions, there's no need to use
arch_atomic*_<op>() definitions outside of the low-level atomic
definitions.

Move treewide users of arch_atomic*_<op>() over to the equivalent
raw_atomic*_<op>().

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230605070124.3741859-19-mark.rutland@arm.com
---
 arch/powerpc/kernel/smp.c              | 12 ++++++------
 arch/x86/kernel/alternative.c          |  4 ++--
 arch/x86/kernel/cpu/mce/core.c         | 16 ++++++++--------
 arch/x86/kernel/nmi.c                  |  2 +-
 arch/x86/kernel/pvclock.c              |  4 ++--
 arch/x86/kvm/x86.c                     |  2 +-
 include/asm-generic/bitops/atomic.h    | 12 ++++++------
 include/asm-generic/bitops/lock.h      |  8 ++++----
 include/linux/context_tracking.h       |  4 ++--
 include/linux/context_tracking_state.h |  2 +-
 include/linux/cpumask.h                |  2 +-
 include/linux/jump_label.h             |  2 +-
 kernel/context_tracking.c              | 12 ++++++------
 kernel/sched/clock.c                   |  2 +-
 14 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 265801a3e94c..e8965f18686f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -417,9 +417,9 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags)
 {
 	raw_local_irq_save(*flags);
 	hard_irq_disable();
-	while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
+	while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
 		raw_local_irq_restore(*flags);
-		spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
+		spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
 		raw_local_irq_save(*flags);
 		hard_irq_disable();
 	}
@@ -427,15 +427,15 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags)
 
 noinstr static void nmi_ipi_lock(void)
 {
-	while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
-		spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0);
+	while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
+		spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
 }
 
 noinstr static void nmi_ipi_unlock(void)
 {
 	smp_mb();
-	WARN_ON(arch_atomic_read(&__nmi_ipi_lock) != 1);
-	arch_atomic_set(&__nmi_ipi_lock, 0);
+	WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1);
+	raw_atomic_set(&__nmi_ipi_lock, 0);
 }
 
 noinstr static void nmi_ipi_unlock_end(unsigned long *flags)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f615e0cb6d93..18f16e93838f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1799,7 +1799,7 @@ struct bp_patching_desc *try_get_desc(void)
 {
 	struct bp_patching_desc *desc = &bp_desc;
 
-	if (!arch_atomic_inc_not_zero(&desc->refs))
+	if (!raw_atomic_inc_not_zero(&desc->refs))
 		return NULL;
 
 	return desc;
@@ -1810,7 +1810,7 @@ static __always_inline void put_desc(void)
 	struct bp_patching_desc *desc = &bp_desc;
 
 	smp_mb__before_atomic();
-	arch_atomic_dec(&desc->refs);
+	raw_atomic_dec(&desc->refs);
 }
 
 static __always_inline void *text_poke_addr(struct text_poke_loc *tp)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2eec60f50057..ab156e6e7120 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1022,12 +1022,12 @@ static noinstr int mce_start(int *no_way_out)
 	if (!timeout)
 		return ret;
 
-	arch_atomic_add(*no_way_out, &global_nwo);
+	raw_atomic_add(*no_way_out, &global_nwo);
 	/*
 	 * Rely on the implied barrier below, such that global_nwo
 	 * is updated before mce_callin.
 	 */
-	order = arch_atomic_inc_return(&mce_callin);
+	order = raw_atomic_inc_return(&mce_callin);
 	arch_cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
 
 	/* Enable instrumentation around calls to external facilities */
@@ -1036,10 +1036,10 @@ static noinstr int mce_start(int *no_way_out)
 	/*
 	 * Wait for everyone.
 	 */
-	while (arch_atomic_read(&mce_callin) != num_online_cpus()) {
+	while (raw_atomic_read(&mce_callin) != num_online_cpus()) {
 		if (mce_timed_out(&timeout,
 				  "Timeout: Not all CPUs entered broadcast exception handler")) {
-			arch_atomic_set(&global_nwo, 0);
+			raw_atomic_set(&global_nwo, 0);
 			goto out;
 		}
 		ndelay(SPINUNIT);
@@ -1054,7 +1054,7 @@ static noinstr int mce_start(int *no_way_out)
 		/*
 		 * Monarch: Starts executing now, the others wait.
 		 */
-		arch_atomic_set(&mce_executing, 1);
+		raw_atomic_set(&mce_executing, 1);
 	} else {
 		/*
 		 * Subject: Now start the scanning loop one by one in
@@ -1062,10 +1062,10 @@ static noinstr int mce_start(int *no_way_out)
 		 * This way when there are any shared banks it will be
 		 * only seen by one CPU before cleared, avoiding duplicates.
 		 */
-		while (arch_atomic_read(&mce_executing) < order) {
+		while (raw_atomic_read(&mce_executing) < order) {
 			if (mce_timed_out(&timeout,
 					  "Timeout: Subject CPUs unable to finish machine check processing")) {
-				arch_atomic_set(&global_nwo, 0);
+				raw_atomic_set(&global_nwo, 0);
 				goto out;
 			}
 			ndelay(SPINUNIT);
@@ -1075,7 +1075,7 @@ static noinstr int mce_start(int *no_way_out)
 	/*
 	 * Cache the global no_way_out state.
 	 */
-	*no_way_out = arch_atomic_read(&global_nwo);
+	*no_way_out = raw_atomic_read(&global_nwo);
 
 	ret = order;
 
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 776f4b1e395b..a0c551846b35 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -496,7 +496,7 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
 	 */
 	sev_es_nmi_complete();
 	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
-		arch_atomic_long_inc(&nsp->idt_calls);
+		raw_atomic_long_inc(&nsp->idt_calls);
 
 	if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
 		return;
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 56acf53a782a..b3f81379c2fc 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -101,11 +101,11 @@ u64 __pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, bool dowd)
 	 * updating at the same time, and one of them could be slightly behind,
 	 * making the assumption that last_value always go forward fail to hold.
 	 */
-	last = arch_atomic64_read(&last_value);
+	last = raw_atomic64_read(&last_value);
 	do {
 		if (ret <= last)
 			return last;
-	} while (!arch_atomic64_try_cmpxchg(&last_value, &last, ret));
+	} while (!raw_atomic64_try_cmpxchg(&last_value, &last, ret));
 
 	return ret;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ceb7c5e9cf9e..ac6f60906810 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13155,7 +13155,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
 
 bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
 {
-	return arch_atomic_read(&kvm->arch.assigned_device_count);
+	return raw_atomic_read(&kvm->arch.assigned_device_count);
 }
 EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
 
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index 71ab4ba9c25d..e076e079f6b2 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -15,21 +15,21 @@ static __always_inline void
 arch_set_bit(unsigned int nr, volatile unsigned long *p)
 {
 	p += BIT_WORD(nr);
-	arch_atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p);
+	raw_atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p);
 }
 
 static __always_inline void
 arch_clear_bit(unsigned int nr, volatile unsigned long *p)
 {
 	p += BIT_WORD(nr);
-	arch_atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p);
+	raw_atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p);
 }
 
 static __always_inline void
 arch_change_bit(unsigned int nr, volatile unsigned long *p)
 {
 	p += BIT_WORD(nr);
-	arch_atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p);
+	raw_atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p);
 }
 
 static __always_inline int
@@ -39,7 +39,7 @@ arch_test_and_set_bit(unsigned int nr, volatile unsigned long *p)
 	unsigned long mask = BIT_MASK(nr);
 
 	p += BIT_WORD(nr);
-	old = arch_atomic_long_fetch_or(mask, (atomic_long_t *)p);
+	old = raw_atomic_long_fetch_or(mask, (atomic_long_t *)p);
 	return !!(old & mask);
 }
 
@@ -50,7 +50,7 @@ arch_test_and_clear_bit(unsigned int nr, volatile unsigned long *p)
 	unsigned long mask = BIT_MASK(nr);
 
 	p += BIT_WORD(nr);
-	old = arch_atomic_long_fetch_andnot(mask, (atomic_long_t *)p);
+	old = raw_atomic_long_fetch_andnot(mask, (atomic_long_t *)p);
 	return !!(old & mask);
 }
 
@@ -61,7 +61,7 @@ arch_test_and_change_bit(unsigned int nr, volatile unsigned long *p)
 	unsigned long mask = BIT_MASK(nr);
 
 	p += BIT_WORD(nr);
-	old = arch_atomic_long_fetch_xor(mask, (atomic_long_t *)p);
+	old = raw_atomic_long_fetch_xor(mask, (atomic_long_t *)p);
 	return !!(old & mask);
 }
 
diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index 630f2f6b9595..40913516e654 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -25,7 +25,7 @@ arch_test_and_set_bit_lock(unsigned int nr, volatile unsigned long *p)
 	if (READ_ONCE(*p) & mask)
 		return 1;
 
-	old = arch_atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p);
+	old = raw_atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p);
 	return !!(old & mask);
 }
 
@@ -41,7 +41,7 @@ static __always_inline void
 arch_clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
 {
 	p += BIT_WORD(nr);
-	arch_atomic_long_fetch_andnot_release(BIT_MASK(nr), (atomic_long_t *)p);
+	raw_atomic_long_fetch_andnot_release(BIT_MASK(nr), (atomic_long_t *)p);
 }
 
 /**
@@ -63,7 +63,7 @@ arch___clear_bit_unlock(unsigned int nr, volatile unsigned long *p)
 	p += BIT_WORD(nr);
 	old = READ_ONCE(*p);
 	old &= ~BIT_MASK(nr);
-	arch_atomic_long_set_release((atomic_long_t *)p, old);
+	raw_atomic_long_set_release((atomic_long_t *)p, old);
 }
 
 /**
@@ -83,7 +83,7 @@ static inline bool arch_clear_bit_unlock_is_negative_byte(unsigned int nr,
 	unsigned long mask = BIT_MASK(nr);
 
 	p += BIT_WORD(nr);
-	old = arch_atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p);
+	old = raw_atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p);
 	return !!(old & BIT(7));
 }
 #define arch_clear_bit_unlock_is_negative_byte arch_clear_bit_unlock_is_negative_byte
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d3cbb6c16bab..6e76b9dba00e 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -119,7 +119,7 @@ extern void ct_idle_exit(void);
  */
 static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
 {
-	return !(arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & RCU_DYNTICKS_IDX);
+	return !(raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & RCU_DYNTICKS_IDX);
 }
 
 /*
@@ -128,7 +128,7 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
  */
 static __always_inline unsigned long ct_state_inc(int incby)
 {
-	return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state));
+	return raw_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state));
 }
 
 static __always_inline bool warn_rcu_enter(void)
diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h
index fdd537ea513f..bbff5f7f8803 100644
--- a/include/linux/context_tracking_state.h
+++ b/include/linux/context_tracking_state.h
@@ -51,7 +51,7 @@ DECLARE_PER_CPU(struct context_tracking, context_tracking);
 #ifdef CONFIG_CONTEXT_TRACKING_USER
 static __always_inline int __ct_state(void)
 {
-	return arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK;
+	return raw_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK;
 }
 #endif
 
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index ca736b05ec7b..0d2e2a38b92d 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -1071,7 +1071,7 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
  */
 static __always_inline unsigned int num_online_cpus(void)
 {
-	return arch_atomic_read(&__num_online_cpus);
+	return raw_atomic_read(&__num_online_cpus);
 }
 #define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
 #define num_present_cpus()	cpumask_weight(cpu_present_mask)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 4e968ebadce6..f0a949b7c973 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -257,7 +257,7 @@ extern enum jump_label_type jump_label_init_type(struct jump_entry *entry);
 
 static __always_inline int static_key_count(struct static_key *key)
 {
-	return arch_atomic_read(&key->enabled);
+	return raw_atomic_read(&key->enabled);
 }
 
 static __always_inline void jump_label_init(void)
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c
index a09f1c19336a..6ef0b35fc28c 100644
--- a/kernel/context_tracking.c
+++ b/kernel/context_tracking.c
@@ -510,7 +510,7 @@ void noinstr __ct_user_enter(enum ctx_state state)
 			 * In this we case we don't care about any concurrency/ordering.
 			 */
 			if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
-				arch_atomic_set(&ct->state, state);
+				raw_atomic_set(&ct->state, state);
 		} else {
 			/*
 			 * Even if context tracking is disabled on this CPU, because it's outside
@@ -527,7 +527,7 @@ void noinstr __ct_user_enter(enum ctx_state state)
 			 */
 			if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
 				/* Tracking for vtime only, no concurrent RCU EQS accounting */
-				arch_atomic_set(&ct->state, state);
+				raw_atomic_set(&ct->state, state);
 			} else {
 				/*
 				 * Tracking for vtime and RCU EQS. Make sure we don't race
@@ -535,7 +535,7 @@ void noinstr __ct_user_enter(enum ctx_state state)
 				 * RCU only requires RCU_DYNTICKS_IDX increments to be fully
 				 * ordered.
 				 */
-				arch_atomic_add(state, &ct->state);
+				raw_atomic_add(state, &ct->state);
 			}
 		}
 	}
@@ -630,12 +630,12 @@ void noinstr __ct_user_exit(enum ctx_state state)
 			 * In this we case we don't care about any concurrency/ordering.
 			 */
 			if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
-				arch_atomic_set(&ct->state, CONTEXT_KERNEL);
+				raw_atomic_set(&ct->state, CONTEXT_KERNEL);
 
 		} else {
 			if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
 				/* Tracking for vtime only, no concurrent RCU EQS accounting */
-				arch_atomic_set(&ct->state, CONTEXT_KERNEL);
+				raw_atomic_set(&ct->state, CONTEXT_KERNEL);
 			} else {
 				/*
 				 * Tracking for vtime and RCU EQS. Make sure we don't race
@@ -643,7 +643,7 @@ void noinstr __ct_user_exit(enum ctx_state state)
 				 * RCU only requires RCU_DYNTICKS_IDX increments to be fully
 				 * ordered.
 				 */
-				arch_atomic_sub(state, &ct->state);
+				raw_atomic_sub(state, &ct->state);
 			}
 		}
 	}
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index b5cc2b53464d..71443cff31f0 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -287,7 +287,7 @@ again:
 	clock = wrap_max(clock, min_clock);
 	clock = wrap_min(clock, max_clock);
 
-	if (!arch_try_cmpxchg64(&scd->clock, &old_clock, clock))
+	if (!raw_try_cmpxchg64(&scd->clock, &old_clock, clock))
 		goto again;
 
 	return clock;
-- 
cgit v1.2.3


From 1815da1718aa4c062b94cf3fc09432f552e25768 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 5 Jun 2023 08:01:16 +0100
Subject: locking/atomic: scripts: build raw_atomic_long*() directly

Now that arch_atomic*() usage is limited to the atomic headers, we no
longer have any users of arch_atomic_long_*(), and can generate
raw_atomic_long_*() directly.

Generate the raw_atomic_long_*() ops directly.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230605070124.3741859-20-mark.rutland@arm.com
---
 include/linux/atomic.h             |   2 +-
 include/linux/atomic/atomic-long.h | 682 ++++++++++++++++++-------------------
 include/linux/atomic/atomic-raw.h  | 512 +---------------------------
 scripts/atomic/gen-atomic-long.sh  |   4 +-
 scripts/atomic/gen-atomic-raw.sh   |   4 -
 5 files changed, 345 insertions(+), 859 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 127f5dc63a7d..296cfae0389f 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -78,8 +78,8 @@
 })
 
 #include <linux/atomic/atomic-arch-fallback.h>
-#include <linux/atomic/atomic-long.h>
 #include <linux/atomic/atomic-raw.h>
+#include <linux/atomic/atomic-long.h>
 #include <linux/atomic/atomic-instrumented.h>
 
 #endif /* _LINUX_ATOMIC_H */
diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
index 2fc51ba66beb..92dc82ce1ce6 100644
--- a/include/linux/atomic/atomic-long.h
+++ b/include/linux/atomic/atomic-long.h
@@ -24,1027 +24,1027 @@ typedef atomic_t atomic_long_t;
 #ifdef CONFIG_64BIT
 
 static __always_inline long
-arch_atomic_long_read(const atomic_long_t *v)
+raw_atomic_long_read(const atomic_long_t *v)
 {
-	return arch_atomic64_read(v);
+	return raw_atomic64_read(v);
 }
 
 static __always_inline long
-arch_atomic_long_read_acquire(const atomic_long_t *v)
+raw_atomic_long_read_acquire(const atomic_long_t *v)
 {
-	return arch_atomic64_read_acquire(v);
+	return raw_atomic64_read_acquire(v);
 }
 
 static __always_inline void
-arch_atomic_long_set(atomic_long_t *v, long i)
+raw_atomic_long_set(atomic_long_t *v, long i)
 {
-	arch_atomic64_set(v, i);
+	raw_atomic64_set(v, i);
 }
 
 static __always_inline void
-arch_atomic_long_set_release(atomic_long_t *v, long i)
+raw_atomic_long_set_release(atomic_long_t *v, long i)
 {
-	arch_atomic64_set_release(v, i);
+	raw_atomic64_set_release(v, i);
 }
 
 static __always_inline void
-arch_atomic_long_add(long i, atomic_long_t *v)
+raw_atomic_long_add(long i, atomic_long_t *v)
 {
-	arch_atomic64_add(i, v);
+	raw_atomic64_add(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_add_return(long i, atomic_long_t *v)
+raw_atomic_long_add_return(long i, atomic_long_t *v)
 {
-	return arch_atomic64_add_return(i, v);
+	return raw_atomic64_add_return(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_add_return_acquire(long i, atomic_long_t *v)
+raw_atomic_long_add_return_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic64_add_return_acquire(i, v);
+	return raw_atomic64_add_return_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_add_return_release(long i, atomic_long_t *v)
+raw_atomic_long_add_return_release(long i, atomic_long_t *v)
 {
-	return arch_atomic64_add_return_release(i, v);
+	return raw_atomic64_add_return_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic64_add_return_relaxed(i, v);
+	return raw_atomic64_add_return_relaxed(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_add(long i, atomic_long_t *v)
+raw_atomic_long_fetch_add(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_add(i, v);
+	return raw_atomic64_fetch_add(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_add_acquire(i, v);
+	return raw_atomic64_fetch_add_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_add_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_add_release(i, v);
+	return raw_atomic64_fetch_add_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_add_relaxed(i, v);
+	return raw_atomic64_fetch_add_relaxed(i, v);
 }
 
 static __always_inline void
-arch_atomic_long_sub(long i, atomic_long_t *v)
+raw_atomic_long_sub(long i, atomic_long_t *v)
 {
-	arch_atomic64_sub(i, v);
+	raw_atomic64_sub(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_sub_return(long i, atomic_long_t *v)
+raw_atomic_long_sub_return(long i, atomic_long_t *v)
 {
-	return arch_atomic64_sub_return(i, v);
+	return raw_atomic64_sub_return(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
+raw_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic64_sub_return_acquire(i, v);
+	return raw_atomic64_sub_return_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_sub_return_release(long i, atomic_long_t *v)
+raw_atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
-	return arch_atomic64_sub_return_release(i, v);
+	return raw_atomic64_sub_return_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic64_sub_return_relaxed(i, v);
+	return raw_atomic64_sub_return_relaxed(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_sub(long i, atomic_long_t *v)
+raw_atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_sub(i, v);
+	return raw_atomic64_fetch_sub(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_sub_acquire(i, v);
+	return raw_atomic64_fetch_sub_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_sub_release(i, v);
+	return raw_atomic64_fetch_sub_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_sub_relaxed(i, v);
+	return raw_atomic64_fetch_sub_relaxed(i, v);
 }
 
 static __always_inline void
-arch_atomic_long_inc(atomic_long_t *v)
+raw_atomic_long_inc(atomic_long_t *v)
 {
-	arch_atomic64_inc(v);
+	raw_atomic64_inc(v);
 }
 
 static __always_inline long
-arch_atomic_long_inc_return(atomic_long_t *v)
+raw_atomic_long_inc_return(atomic_long_t *v)
 {
-	return arch_atomic64_inc_return(v);
+	return raw_atomic64_inc_return(v);
 }
 
 static __always_inline long
-arch_atomic_long_inc_return_acquire(atomic_long_t *v)
+raw_atomic_long_inc_return_acquire(atomic_long_t *v)
 {
-	return arch_atomic64_inc_return_acquire(v);
+	return raw_atomic64_inc_return_acquire(v);
 }
 
 static __always_inline long
-arch_atomic_long_inc_return_release(atomic_long_t *v)
+raw_atomic_long_inc_return_release(atomic_long_t *v)
 {
-	return arch_atomic64_inc_return_release(v);
+	return raw_atomic64_inc_return_release(v);
 }
 
 static __always_inline long
-arch_atomic_long_inc_return_relaxed(atomic_long_t *v)
+raw_atomic_long_inc_return_relaxed(atomic_long_t *v)
 {
-	return arch_atomic64_inc_return_relaxed(v);
+	return raw_atomic64_inc_return_relaxed(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_inc(atomic_long_t *v)
+raw_atomic_long_fetch_inc(atomic_long_t *v)
 {
-	return arch_atomic64_fetch_inc(v);
+	return raw_atomic64_fetch_inc(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_inc_acquire(atomic_long_t *v)
+raw_atomic_long_fetch_inc_acquire(atomic_long_t *v)
 {
-	return arch_atomic64_fetch_inc_acquire(v);
+	return raw_atomic64_fetch_inc_acquire(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_inc_release(atomic_long_t *v)
+raw_atomic_long_fetch_inc_release(atomic_long_t *v)
 {
-	return arch_atomic64_fetch_inc_release(v);
+	return raw_atomic64_fetch_inc_release(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
+raw_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 {
-	return arch_atomic64_fetch_inc_relaxed(v);
+	return raw_atomic64_fetch_inc_relaxed(v);
 }
 
 static __always_inline void
-arch_atomic_long_dec(atomic_long_t *v)
+raw_atomic_long_dec(atomic_long_t *v)
 {
-	arch_atomic64_dec(v);
+	raw_atomic64_dec(v);
 }
 
 static __always_inline long
-arch_atomic_long_dec_return(atomic_long_t *v)
+raw_atomic_long_dec_return(atomic_long_t *v)
 {
-	return arch_atomic64_dec_return(v);
+	return raw_atomic64_dec_return(v);
 }
 
 static __always_inline long
-arch_atomic_long_dec_return_acquire(atomic_long_t *v)
+raw_atomic_long_dec_return_acquire(atomic_long_t *v)
 {
-	return arch_atomic64_dec_return_acquire(v);
+	return raw_atomic64_dec_return_acquire(v);
 }
 
 static __always_inline long
-arch_atomic_long_dec_return_release(atomic_long_t *v)
+raw_atomic_long_dec_return_release(atomic_long_t *v)
 {
-	return arch_atomic64_dec_return_release(v);
+	return raw_atomic64_dec_return_release(v);
 }
 
 static __always_inline long
-arch_atomic_long_dec_return_relaxed(atomic_long_t *v)
+raw_atomic_long_dec_return_relaxed(atomic_long_t *v)
 {
-	return arch_atomic64_dec_return_relaxed(v);
+	return raw_atomic64_dec_return_relaxed(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_dec(atomic_long_t *v)
+raw_atomic_long_fetch_dec(atomic_long_t *v)
 {
-	return arch_atomic64_fetch_dec(v);
+	return raw_atomic64_fetch_dec(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_dec_acquire(atomic_long_t *v)
+raw_atomic_long_fetch_dec_acquire(atomic_long_t *v)
 {
-	return arch_atomic64_fetch_dec_acquire(v);
+	return raw_atomic64_fetch_dec_acquire(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_dec_release(atomic_long_t *v)
+raw_atomic_long_fetch_dec_release(atomic_long_t *v)
 {
-	return arch_atomic64_fetch_dec_release(v);
+	return raw_atomic64_fetch_dec_release(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
+raw_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 {
-	return arch_atomic64_fetch_dec_relaxed(v);
+	return raw_atomic64_fetch_dec_relaxed(v);
 }
 
 static __always_inline void
-arch_atomic_long_and(long i, atomic_long_t *v)
+raw_atomic_long_and(long i, atomic_long_t *v)
 {
-	arch_atomic64_and(i, v);
+	raw_atomic64_and(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_and(long i, atomic_long_t *v)
+raw_atomic_long_fetch_and(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_and(i, v);
+	return raw_atomic64_fetch_and(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_and_acquire(i, v);
+	return raw_atomic64_fetch_and_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_and_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_and_release(i, v);
+	return raw_atomic64_fetch_and_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_and_relaxed(i, v);
+	return raw_atomic64_fetch_and_relaxed(i, v);
 }
 
 static __always_inline void
-arch_atomic_long_andnot(long i, atomic_long_t *v)
+raw_atomic_long_andnot(long i, atomic_long_t *v)
 {
-	arch_atomic64_andnot(i, v);
+	raw_atomic64_andnot(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_andnot(long i, atomic_long_t *v)
+raw_atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_andnot(i, v);
+	return raw_atomic64_fetch_andnot(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_andnot_acquire(i, v);
+	return raw_atomic64_fetch_andnot_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_andnot_release(i, v);
+	return raw_atomic64_fetch_andnot_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_andnot_relaxed(i, v);
+	return raw_atomic64_fetch_andnot_relaxed(i, v);
 }
 
 static __always_inline void
-arch_atomic_long_or(long i, atomic_long_t *v)
+raw_atomic_long_or(long i, atomic_long_t *v)
 {
-	arch_atomic64_or(i, v);
+	raw_atomic64_or(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_or(long i, atomic_long_t *v)
+raw_atomic_long_fetch_or(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_or(i, v);
+	return raw_atomic64_fetch_or(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_or_acquire(i, v);
+	return raw_atomic64_fetch_or_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_or_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_or_release(i, v);
+	return raw_atomic64_fetch_or_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_or_relaxed(i, v);
+	return raw_atomic64_fetch_or_relaxed(i, v);
 }
 
 static __always_inline void
-arch_atomic_long_xor(long i, atomic_long_t *v)
+raw_atomic_long_xor(long i, atomic_long_t *v)
 {
-	arch_atomic64_xor(i, v);
+	raw_atomic64_xor(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_xor(long i, atomic_long_t *v)
+raw_atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_xor(i, v);
+	return raw_atomic64_fetch_xor(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_xor_acquire(i, v);
+	return raw_atomic64_fetch_xor_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_xor_release(i, v);
+	return raw_atomic64_fetch_xor_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic64_fetch_xor_relaxed(i, v);
+	return raw_atomic64_fetch_xor_relaxed(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_xchg(atomic_long_t *v, long i)
+raw_atomic_long_xchg(atomic_long_t *v, long i)
 {
-	return arch_atomic64_xchg(v, i);
+	return raw_atomic64_xchg(v, i);
 }
 
 static __always_inline long
-arch_atomic_long_xchg_acquire(atomic_long_t *v, long i)
+raw_atomic_long_xchg_acquire(atomic_long_t *v, long i)
 {
-	return arch_atomic64_xchg_acquire(v, i);
+	return raw_atomic64_xchg_acquire(v, i);
 }
 
 static __always_inline long
-arch_atomic_long_xchg_release(atomic_long_t *v, long i)
+raw_atomic_long_xchg_release(atomic_long_t *v, long i)
 {
-	return arch_atomic64_xchg_release(v, i);
+	return raw_atomic64_xchg_release(v, i);
 }
 
 static __always_inline long
-arch_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
+raw_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
 {
-	return arch_atomic64_xchg_relaxed(v, i);
+	return raw_atomic64_xchg_relaxed(v, i);
 }
 
 static __always_inline long
-arch_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
+raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
-	return arch_atomic64_cmpxchg(v, old, new);
+	return raw_atomic64_cmpxchg(v, old, new);
 }
 
 static __always_inline long
-arch_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
+raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
-	return arch_atomic64_cmpxchg_acquire(v, old, new);
+	return raw_atomic64_cmpxchg_acquire(v, old, new);
 }
 
 static __always_inline long
-arch_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
+raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
-	return arch_atomic64_cmpxchg_release(v, old, new);
+	return raw_atomic64_cmpxchg_release(v, old, new);
 }
 
 static __always_inline long
-arch_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
+raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
-	return arch_atomic64_cmpxchg_relaxed(v, old, new);
+	return raw_atomic64_cmpxchg_relaxed(v, old, new);
 }
 
 static __always_inline bool
-arch_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
+raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 {
-	return arch_atomic64_try_cmpxchg(v, (s64 *)old, new);
+	return raw_atomic64_try_cmpxchg(v, (s64 *)old, new);
 }
 
 static __always_inline bool
-arch_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
+raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 {
-	return arch_atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
+	return raw_atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
 }
 
 static __always_inline bool
-arch_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
+raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 {
-	return arch_atomic64_try_cmpxchg_release(v, (s64 *)old, new);
+	return raw_atomic64_try_cmpxchg_release(v, (s64 *)old, new);
 }
 
 static __always_inline bool
-arch_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
+raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 {
-	return arch_atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
+	return raw_atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
 }
 
 static __always_inline bool
-arch_atomic_long_sub_and_test(long i, atomic_long_t *v)
+raw_atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
-	return arch_atomic64_sub_and_test(i, v);
+	return raw_atomic64_sub_and_test(i, v);
 }
 
 static __always_inline bool
-arch_atomic_long_dec_and_test(atomic_long_t *v)
+raw_atomic_long_dec_and_test(atomic_long_t *v)
 {
-	return arch_atomic64_dec_and_test(v);
+	return raw_atomic64_dec_and_test(v);
 }
 
 static __always_inline bool
-arch_atomic_long_inc_and_test(atomic_long_t *v)
+raw_atomic_long_inc_and_test(atomic_long_t *v)
 {
-	return arch_atomic64_inc_and_test(v);
+	return raw_atomic64_inc_and_test(v);
 }
 
 static __always_inline bool
-arch_atomic_long_add_negative(long i, atomic_long_t *v)
+raw_atomic_long_add_negative(long i, atomic_long_t *v)
 {
-	return arch_atomic64_add_negative(i, v);
+	return raw_atomic64_add_negative(i, v);
 }
 
 static __always_inline bool
-arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
+raw_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic64_add_negative_acquire(i, v);
+	return raw_atomic64_add_negative_acquire(i, v);
 }
 
 static __always_inline bool
-arch_atomic_long_add_negative_release(long i, atomic_long_t *v)
+raw_atomic_long_add_negative_release(long i, atomic_long_t *v)
 {
-	return arch_atomic64_add_negative_release(i, v);
+	return raw_atomic64_add_negative_release(i, v);
 }
 
 static __always_inline bool
-arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic64_add_negative_relaxed(i, v);
+	return raw_atomic64_add_negative_relaxed(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
-	return arch_atomic64_fetch_add_unless(v, a, u);
+	return raw_atomic64_fetch_add_unless(v, a, u);
 }
 
 static __always_inline bool
-arch_atomic_long_add_unless(atomic_long_t *v, long a, long u)
+raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
-	return arch_atomic64_add_unless(v, a, u);
+	return raw_atomic64_add_unless(v, a, u);
 }
 
 static __always_inline bool
-arch_atomic_long_inc_not_zero(atomic_long_t *v)
+raw_atomic_long_inc_not_zero(atomic_long_t *v)
 {
-	return arch_atomic64_inc_not_zero(v);
+	return raw_atomic64_inc_not_zero(v);
 }
 
 static __always_inline bool
-arch_atomic_long_inc_unless_negative(atomic_long_t *v)
+raw_atomic_long_inc_unless_negative(atomic_long_t *v)
 {
-	return arch_atomic64_inc_unless_negative(v);
+	return raw_atomic64_inc_unless_negative(v);
 }
 
 static __always_inline bool
-arch_atomic_long_dec_unless_positive(atomic_long_t *v)
+raw_atomic_long_dec_unless_positive(atomic_long_t *v)
 {
-	return arch_atomic64_dec_unless_positive(v);
+	return raw_atomic64_dec_unless_positive(v);
 }
 
 static __always_inline long
-arch_atomic_long_dec_if_positive(atomic_long_t *v)
+raw_atomic_long_dec_if_positive(atomic_long_t *v)
 {
-	return arch_atomic64_dec_if_positive(v);
+	return raw_atomic64_dec_if_positive(v);
 }
 
 #else /* CONFIG_64BIT */
 
 static __always_inline long
-arch_atomic_long_read(const atomic_long_t *v)
+raw_atomic_long_read(const atomic_long_t *v)
 {
-	return arch_atomic_read(v);
+	return raw_atomic_read(v);
 }
 
 static __always_inline long
-arch_atomic_long_read_acquire(const atomic_long_t *v)
+raw_atomic_long_read_acquire(const atomic_long_t *v)
 {
-	return arch_atomic_read_acquire(v);
+	return raw_atomic_read_acquire(v);
 }
 
 static __always_inline void
-arch_atomic_long_set(atomic_long_t *v, long i)
+raw_atomic_long_set(atomic_long_t *v, long i)
 {
-	arch_atomic_set(v, i);
+	raw_atomic_set(v, i);
 }
 
 static __always_inline void
-arch_atomic_long_set_release(atomic_long_t *v, long i)
+raw_atomic_long_set_release(atomic_long_t *v, long i)
 {
-	arch_atomic_set_release(v, i);
+	raw_atomic_set_release(v, i);
 }
 
 static __always_inline void
-arch_atomic_long_add(long i, atomic_long_t *v)
+raw_atomic_long_add(long i, atomic_long_t *v)
 {
-	arch_atomic_add(i, v);
+	raw_atomic_add(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_add_return(long i, atomic_long_t *v)
+raw_atomic_long_add_return(long i, atomic_long_t *v)
 {
-	return arch_atomic_add_return(i, v);
+	return raw_atomic_add_return(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_add_return_acquire(long i, atomic_long_t *v)
+raw_atomic_long_add_return_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic_add_return_acquire(i, v);
+	return raw_atomic_add_return_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_add_return_release(long i, atomic_long_t *v)
+raw_atomic_long_add_return_release(long i, atomic_long_t *v)
 {
-	return arch_atomic_add_return_release(i, v);
+	return raw_atomic_add_return_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic_add_return_relaxed(i, v);
+	return raw_atomic_add_return_relaxed(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_add(long i, atomic_long_t *v)
+raw_atomic_long_fetch_add(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_add(i, v);
+	return raw_atomic_fetch_add(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_add_acquire(i, v);
+	return raw_atomic_fetch_add_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_add_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_add_release(i, v);
+	return raw_atomic_fetch_add_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_add_relaxed(i, v);
+	return raw_atomic_fetch_add_relaxed(i, v);
 }
 
 static __always_inline void
-arch_atomic_long_sub(long i, atomic_long_t *v)
+raw_atomic_long_sub(long i, atomic_long_t *v)
 {
-	arch_atomic_sub(i, v);
+	raw_atomic_sub(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_sub_return(long i, atomic_long_t *v)
+raw_atomic_long_sub_return(long i, atomic_long_t *v)
 {
-	return arch_atomic_sub_return(i, v);
+	return raw_atomic_sub_return(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
+raw_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic_sub_return_acquire(i, v);
+	return raw_atomic_sub_return_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_sub_return_release(long i, atomic_long_t *v)
+raw_atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
-	return arch_atomic_sub_return_release(i, v);
+	return raw_atomic_sub_return_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic_sub_return_relaxed(i, v);
+	return raw_atomic_sub_return_relaxed(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_sub(long i, atomic_long_t *v)
+raw_atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_sub(i, v);
+	return raw_atomic_fetch_sub(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_sub_acquire(i, v);
+	return raw_atomic_fetch_sub_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_sub_release(i, v);
+	return raw_atomic_fetch_sub_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_sub_relaxed(i, v);
+	return raw_atomic_fetch_sub_relaxed(i, v);
 }
 
 static __always_inline void
-arch_atomic_long_inc(atomic_long_t *v)
+raw_atomic_long_inc(atomic_long_t *v)
 {
-	arch_atomic_inc(v);
+	raw_atomic_inc(v);
 }
 
 static __always_inline long
-arch_atomic_long_inc_return(atomic_long_t *v)
+raw_atomic_long_inc_return(atomic_long_t *v)
 {
-	return arch_atomic_inc_return(v);
+	return raw_atomic_inc_return(v);
 }
 
 static __always_inline long
-arch_atomic_long_inc_return_acquire(atomic_long_t *v)
+raw_atomic_long_inc_return_acquire(atomic_long_t *v)
 {
-	return arch_atomic_inc_return_acquire(v);
+	return raw_atomic_inc_return_acquire(v);
 }
 
 static __always_inline long
-arch_atomic_long_inc_return_release(atomic_long_t *v)
+raw_atomic_long_inc_return_release(atomic_long_t *v)
 {
-	return arch_atomic_inc_return_release(v);
+	return raw_atomic_inc_return_release(v);
 }
 
 static __always_inline long
-arch_atomic_long_inc_return_relaxed(atomic_long_t *v)
+raw_atomic_long_inc_return_relaxed(atomic_long_t *v)
 {
-	return arch_atomic_inc_return_relaxed(v);
+	return raw_atomic_inc_return_relaxed(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_inc(atomic_long_t *v)
+raw_atomic_long_fetch_inc(atomic_long_t *v)
 {
-	return arch_atomic_fetch_inc(v);
+	return raw_atomic_fetch_inc(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_inc_acquire(atomic_long_t *v)
+raw_atomic_long_fetch_inc_acquire(atomic_long_t *v)
 {
-	return arch_atomic_fetch_inc_acquire(v);
+	return raw_atomic_fetch_inc_acquire(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_inc_release(atomic_long_t *v)
+raw_atomic_long_fetch_inc_release(atomic_long_t *v)
 {
-	return arch_atomic_fetch_inc_release(v);
+	return raw_atomic_fetch_inc_release(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
+raw_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 {
-	return arch_atomic_fetch_inc_relaxed(v);
+	return raw_atomic_fetch_inc_relaxed(v);
 }
 
 static __always_inline void
-arch_atomic_long_dec(atomic_long_t *v)
+raw_atomic_long_dec(atomic_long_t *v)
 {
-	arch_atomic_dec(v);
+	raw_atomic_dec(v);
 }
 
 static __always_inline long
-arch_atomic_long_dec_return(atomic_long_t *v)
+raw_atomic_long_dec_return(atomic_long_t *v)
 {
-	return arch_atomic_dec_return(v);
+	return raw_atomic_dec_return(v);
 }
 
 static __always_inline long
-arch_atomic_long_dec_return_acquire(atomic_long_t *v)
+raw_atomic_long_dec_return_acquire(atomic_long_t *v)
 {
-	return arch_atomic_dec_return_acquire(v);
+	return raw_atomic_dec_return_acquire(v);
 }
 
 static __always_inline long
-arch_atomic_long_dec_return_release(atomic_long_t *v)
+raw_atomic_long_dec_return_release(atomic_long_t *v)
 {
-	return arch_atomic_dec_return_release(v);
+	return raw_atomic_dec_return_release(v);
 }
 
 static __always_inline long
-arch_atomic_long_dec_return_relaxed(atomic_long_t *v)
+raw_atomic_long_dec_return_relaxed(atomic_long_t *v)
 {
-	return arch_atomic_dec_return_relaxed(v);
+	return raw_atomic_dec_return_relaxed(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_dec(atomic_long_t *v)
+raw_atomic_long_fetch_dec(atomic_long_t *v)
 {
-	return arch_atomic_fetch_dec(v);
+	return raw_atomic_fetch_dec(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_dec_acquire(atomic_long_t *v)
+raw_atomic_long_fetch_dec_acquire(atomic_long_t *v)
 {
-	return arch_atomic_fetch_dec_acquire(v);
+	return raw_atomic_fetch_dec_acquire(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_dec_release(atomic_long_t *v)
+raw_atomic_long_fetch_dec_release(atomic_long_t *v)
 {
-	return arch_atomic_fetch_dec_release(v);
+	return raw_atomic_fetch_dec_release(v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
+raw_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 {
-	return arch_atomic_fetch_dec_relaxed(v);
+	return raw_atomic_fetch_dec_relaxed(v);
 }
 
 static __always_inline void
-arch_atomic_long_and(long i, atomic_long_t *v)
+raw_atomic_long_and(long i, atomic_long_t *v)
 {
-	arch_atomic_and(i, v);
+	raw_atomic_and(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_and(long i, atomic_long_t *v)
+raw_atomic_long_fetch_and(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_and(i, v);
+	return raw_atomic_fetch_and(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_and_acquire(i, v);
+	return raw_atomic_fetch_and_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_and_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_and_release(i, v);
+	return raw_atomic_fetch_and_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_and_relaxed(i, v);
+	return raw_atomic_fetch_and_relaxed(i, v);
 }
 
 static __always_inline void
-arch_atomic_long_andnot(long i, atomic_long_t *v)
+raw_atomic_long_andnot(long i, atomic_long_t *v)
 {
-	arch_atomic_andnot(i, v);
+	raw_atomic_andnot(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_andnot(long i, atomic_long_t *v)
+raw_atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_andnot(i, v);
+	return raw_atomic_fetch_andnot(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_andnot_acquire(i, v);
+	return raw_atomic_fetch_andnot_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_andnot_release(i, v);
+	return raw_atomic_fetch_andnot_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_andnot_relaxed(i, v);
+	return raw_atomic_fetch_andnot_relaxed(i, v);
 }
 
 static __always_inline void
-arch_atomic_long_or(long i, atomic_long_t *v)
+raw_atomic_long_or(long i, atomic_long_t *v)
 {
-	arch_atomic_or(i, v);
+	raw_atomic_or(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_or(long i, atomic_long_t *v)
+raw_atomic_long_fetch_or(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_or(i, v);
+	return raw_atomic_fetch_or(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_or_acquire(i, v);
+	return raw_atomic_fetch_or_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_or_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_or_release(i, v);
+	return raw_atomic_fetch_or_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_or_relaxed(i, v);
+	return raw_atomic_fetch_or_relaxed(i, v);
 }
 
 static __always_inline void
-arch_atomic_long_xor(long i, atomic_long_t *v)
+raw_atomic_long_xor(long i, atomic_long_t *v)
 {
-	arch_atomic_xor(i, v);
+	raw_atomic_xor(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_xor(long i, atomic_long_t *v)
+raw_atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_xor(i, v);
+	return raw_atomic_fetch_xor(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
+raw_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_xor_acquire(i, v);
+	return raw_atomic_fetch_xor_acquire(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
+raw_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_xor_release(i, v);
+	return raw_atomic_fetch_xor_release(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic_fetch_xor_relaxed(i, v);
+	return raw_atomic_fetch_xor_relaxed(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_xchg(atomic_long_t *v, long i)
+raw_atomic_long_xchg(atomic_long_t *v, long i)
 {
-	return arch_atomic_xchg(v, i);
+	return raw_atomic_xchg(v, i);
 }
 
 static __always_inline long
-arch_atomic_long_xchg_acquire(atomic_long_t *v, long i)
+raw_atomic_long_xchg_acquire(atomic_long_t *v, long i)
 {
-	return arch_atomic_xchg_acquire(v, i);
+	return raw_atomic_xchg_acquire(v, i);
 }
 
 static __always_inline long
-arch_atomic_long_xchg_release(atomic_long_t *v, long i)
+raw_atomic_long_xchg_release(atomic_long_t *v, long i)
 {
-	return arch_atomic_xchg_release(v, i);
+	return raw_atomic_xchg_release(v, i);
 }
 
 static __always_inline long
-arch_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
+raw_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
 {
-	return arch_atomic_xchg_relaxed(v, i);
+	return raw_atomic_xchg_relaxed(v, i);
 }
 
 static __always_inline long
-arch_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
+raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
-	return arch_atomic_cmpxchg(v, old, new);
+	return raw_atomic_cmpxchg(v, old, new);
 }
 
 static __always_inline long
-arch_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
+raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
-	return arch_atomic_cmpxchg_acquire(v, old, new);
+	return raw_atomic_cmpxchg_acquire(v, old, new);
 }
 
 static __always_inline long
-arch_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
+raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
-	return arch_atomic_cmpxchg_release(v, old, new);
+	return raw_atomic_cmpxchg_release(v, old, new);
 }
 
 static __always_inline long
-arch_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
+raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
-	return arch_atomic_cmpxchg_relaxed(v, old, new);
+	return raw_atomic_cmpxchg_relaxed(v, old, new);
 }
 
 static __always_inline bool
-arch_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
+raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 {
-	return arch_atomic_try_cmpxchg(v, (int *)old, new);
+	return raw_atomic_try_cmpxchg(v, (int *)old, new);
 }
 
 static __always_inline bool
-arch_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
+raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 {
-	return arch_atomic_try_cmpxchg_acquire(v, (int *)old, new);
+	return raw_atomic_try_cmpxchg_acquire(v, (int *)old, new);
 }
 
 static __always_inline bool
-arch_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
+raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 {
-	return arch_atomic_try_cmpxchg_release(v, (int *)old, new);
+	return raw_atomic_try_cmpxchg_release(v, (int *)old, new);
 }
 
 static __always_inline bool
-arch_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
+raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 {
-	return arch_atomic_try_cmpxchg_relaxed(v, (int *)old, new);
+	return raw_atomic_try_cmpxchg_relaxed(v, (int *)old, new);
 }
 
 static __always_inline bool
-arch_atomic_long_sub_and_test(long i, atomic_long_t *v)
+raw_atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
-	return arch_atomic_sub_and_test(i, v);
+	return raw_atomic_sub_and_test(i, v);
 }
 
 static __always_inline bool
-arch_atomic_long_dec_and_test(atomic_long_t *v)
+raw_atomic_long_dec_and_test(atomic_long_t *v)
 {
-	return arch_atomic_dec_and_test(v);
+	return raw_atomic_dec_and_test(v);
 }
 
 static __always_inline bool
-arch_atomic_long_inc_and_test(atomic_long_t *v)
+raw_atomic_long_inc_and_test(atomic_long_t *v)
 {
-	return arch_atomic_inc_and_test(v);
+	return raw_atomic_inc_and_test(v);
 }
 
 static __always_inline bool
-arch_atomic_long_add_negative(long i, atomic_long_t *v)
+raw_atomic_long_add_negative(long i, atomic_long_t *v)
 {
-	return arch_atomic_add_negative(i, v);
+	return raw_atomic_add_negative(i, v);
 }
 
 static __always_inline bool
-arch_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
+raw_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
 {
-	return arch_atomic_add_negative_acquire(i, v);
+	return raw_atomic_add_negative_acquire(i, v);
 }
 
 static __always_inline bool
-arch_atomic_long_add_negative_release(long i, atomic_long_t *v)
+raw_atomic_long_add_negative_release(long i, atomic_long_t *v)
 {
-	return arch_atomic_add_negative_release(i, v);
+	return raw_atomic_add_negative_release(i, v);
 }
 
 static __always_inline bool
-arch_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
+raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
 {
-	return arch_atomic_add_negative_relaxed(i, v);
+	return raw_atomic_add_negative_relaxed(i, v);
 }
 
 static __always_inline long
-arch_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
-	return arch_atomic_fetch_add_unless(v, a, u);
+	return raw_atomic_fetch_add_unless(v, a, u);
 }
 
 static __always_inline bool
-arch_atomic_long_add_unless(atomic_long_t *v, long a, long u)
+raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
-	return arch_atomic_add_unless(v, a, u);
+	return raw_atomic_add_unless(v, a, u);
 }
 
 static __always_inline bool
-arch_atomic_long_inc_not_zero(atomic_long_t *v)
+raw_atomic_long_inc_not_zero(atomic_long_t *v)
 {
-	return arch_atomic_inc_not_zero(v);
+	return raw_atomic_inc_not_zero(v);
 }
 
 static __always_inline bool
-arch_atomic_long_inc_unless_negative(atomic_long_t *v)
+raw_atomic_long_inc_unless_negative(atomic_long_t *v)
 {
-	return arch_atomic_inc_unless_negative(v);
+	return raw_atomic_inc_unless_negative(v);
 }
 
 static __always_inline bool
-arch_atomic_long_dec_unless_positive(atomic_long_t *v)
+raw_atomic_long_dec_unless_positive(atomic_long_t *v)
 {
-	return arch_atomic_dec_unless_positive(v);
+	return raw_atomic_dec_unless_positive(v);
 }
 
 static __always_inline long
-arch_atomic_long_dec_if_positive(atomic_long_t *v)
+raw_atomic_long_dec_if_positive(atomic_long_t *v)
 {
-	return arch_atomic_dec_if_positive(v);
+	return raw_atomic_dec_if_positive(v);
 }
 
 #endif /* CONFIG_64BIT */
 #endif /* _LINUX_ATOMIC_LONG_H */
-// a194c07d7d2f4b0e178d3c118c919775d5d65f50
+// 108784846d3bbbb201b8dabe621c5dc30b216206
diff --git a/include/linux/atomic/atomic-raw.h b/include/linux/atomic/atomic-raw.h
index 83ff0269657e..8b2fc04cf8c5 100644
--- a/include/linux/atomic/atomic-raw.h
+++ b/include/linux/atomic/atomic-raw.h
@@ -1026,516 +1026,6 @@ raw_atomic64_dec_if_positive(atomic64_t *v)
 	return arch_atomic64_dec_if_positive(v);
 }
 
-static __always_inline long
-raw_atomic_long_read(const atomic_long_t *v)
-{
-	return arch_atomic_long_read(v);
-}
-
-static __always_inline long
-raw_atomic_long_read_acquire(const atomic_long_t *v)
-{
-	return arch_atomic_long_read_acquire(v);
-}
-
-static __always_inline void
-raw_atomic_long_set(atomic_long_t *v, long i)
-{
-	arch_atomic_long_set(v, i);
-}
-
-static __always_inline void
-raw_atomic_long_set_release(atomic_long_t *v, long i)
-{
-	arch_atomic_long_set_release(v, i);
-}
-
-static __always_inline void
-raw_atomic_long_add(long i, atomic_long_t *v)
-{
-	arch_atomic_long_add(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_add_return(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_add_return(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_add_return_acquire(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_add_return_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_add_return_release(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_add_return_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_add_return_relaxed(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_add(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_add(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_add_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_add_release(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_add_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_add_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_long_sub(long i, atomic_long_t *v)
-{
-	arch_atomic_long_sub(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_sub_return(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_sub_return(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_sub_return_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_sub_return_release(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_sub_return_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_sub_return_relaxed(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_sub(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_sub(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_sub_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_sub_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_sub_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_long_inc(atomic_long_t *v)
-{
-	arch_atomic_long_inc(v);
-}
-
-static __always_inline long
-raw_atomic_long_inc_return(atomic_long_t *v)
-{
-	return arch_atomic_long_inc_return(v);
-}
-
-static __always_inline long
-raw_atomic_long_inc_return_acquire(atomic_long_t *v)
-{
-	return arch_atomic_long_inc_return_acquire(v);
-}
-
-static __always_inline long
-raw_atomic_long_inc_return_release(atomic_long_t *v)
-{
-	return arch_atomic_long_inc_return_release(v);
-}
-
-static __always_inline long
-raw_atomic_long_inc_return_relaxed(atomic_long_t *v)
-{
-	return arch_atomic_long_inc_return_relaxed(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_inc(atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_inc(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_inc_acquire(atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_inc_acquire(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_inc_release(atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_inc_release(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_inc_relaxed(v);
-}
-
-static __always_inline void
-raw_atomic_long_dec(atomic_long_t *v)
-{
-	arch_atomic_long_dec(v);
-}
-
-static __always_inline long
-raw_atomic_long_dec_return(atomic_long_t *v)
-{
-	return arch_atomic_long_dec_return(v);
-}
-
-static __always_inline long
-raw_atomic_long_dec_return_acquire(atomic_long_t *v)
-{
-	return arch_atomic_long_dec_return_acquire(v);
-}
-
-static __always_inline long
-raw_atomic_long_dec_return_release(atomic_long_t *v)
-{
-	return arch_atomic_long_dec_return_release(v);
-}
-
-static __always_inline long
-raw_atomic_long_dec_return_relaxed(atomic_long_t *v)
-{
-	return arch_atomic_long_dec_return_relaxed(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_dec(atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_dec(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_dec_acquire(atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_dec_acquire(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_dec_release(atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_dec_release(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_dec_relaxed(v);
-}
-
-static __always_inline void
-raw_atomic_long_and(long i, atomic_long_t *v)
-{
-	arch_atomic_long_and(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_and(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_and(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_and_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_and_release(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_and_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_and_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_long_andnot(long i, atomic_long_t *v)
-{
-	arch_atomic_long_andnot(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_andnot(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_andnot(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_andnot_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_andnot_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_andnot_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_long_or(long i, atomic_long_t *v)
-{
-	arch_atomic_long_or(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_or(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_or(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_or_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_or_release(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_or_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_or_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_long_xor(long i, atomic_long_t *v)
-{
-	arch_atomic_long_xor(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_xor(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_xor(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_xor_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_xor_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_fetch_xor_relaxed(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_xchg(atomic_long_t *v, long i)
-{
-	return arch_atomic_long_xchg(v, i);
-}
-
-static __always_inline long
-raw_atomic_long_xchg_acquire(atomic_long_t *v, long i)
-{
-	return arch_atomic_long_xchg_acquire(v, i);
-}
-
-static __always_inline long
-raw_atomic_long_xchg_release(atomic_long_t *v, long i)
-{
-	return arch_atomic_long_xchg_release(v, i);
-}
-
-static __always_inline long
-raw_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
-{
-	return arch_atomic_long_xchg_relaxed(v, i);
-}
-
-static __always_inline long
-raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
-{
-	return arch_atomic_long_cmpxchg(v, old, new);
-}
-
-static __always_inline long
-raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
-{
-	return arch_atomic_long_cmpxchg_acquire(v, old, new);
-}
-
-static __always_inline long
-raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
-{
-	return arch_atomic_long_cmpxchg_release(v, old, new);
-}
-
-static __always_inline long
-raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
-{
-	return arch_atomic_long_cmpxchg_relaxed(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
-{
-	return arch_atomic_long_try_cmpxchg(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
-{
-	return arch_atomic_long_try_cmpxchg_acquire(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
-{
-	return arch_atomic_long_try_cmpxchg_release(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
-{
-	return arch_atomic_long_try_cmpxchg_relaxed(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_long_sub_and_test(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_sub_and_test(i, v);
-}
-
-static __always_inline bool
-raw_atomic_long_dec_and_test(atomic_long_t *v)
-{
-	return arch_atomic_long_dec_and_test(v);
-}
-
-static __always_inline bool
-raw_atomic_long_inc_and_test(atomic_long_t *v)
-{
-	return arch_atomic_long_inc_and_test(v);
-}
-
-static __always_inline bool
-raw_atomic_long_add_negative(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_add_negative(i, v);
-}
-
-static __always_inline bool
-raw_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_add_negative_acquire(i, v);
-}
-
-static __always_inline bool
-raw_atomic_long_add_negative_release(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_add_negative_release(i, v);
-}
-
-static __always_inline bool
-raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
-{
-	return arch_atomic_long_add_negative_relaxed(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
-{
-	return arch_atomic_long_fetch_add_unless(v, a, u);
-}
-
-static __always_inline bool
-raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
-{
-	return arch_atomic_long_add_unless(v, a, u);
-}
-
-static __always_inline bool
-raw_atomic_long_inc_not_zero(atomic_long_t *v)
-{
-	return arch_atomic_long_inc_not_zero(v);
-}
-
-static __always_inline bool
-raw_atomic_long_inc_unless_negative(atomic_long_t *v)
-{
-	return arch_atomic_long_inc_unless_negative(v);
-}
-
-static __always_inline bool
-raw_atomic_long_dec_unless_positive(atomic_long_t *v)
-{
-	return arch_atomic_long_dec_unless_positive(v);
-}
-
-static __always_inline long
-raw_atomic_long_dec_if_positive(atomic_long_t *v)
-{
-	return arch_atomic_long_dec_if_positive(v);
-}
-
 #define raw_xchg(...) \
 	arch_xchg(__VA_ARGS__)
 
@@ -1642,4 +1132,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v)
 	arch_try_cmpxchg128_local(__VA_ARGS__)
 
 #endif /* _LINUX_ATOMIC_RAW_H */
-// 01d54200571b3857755a07c10074a4fd58cef6b1
+// b23ed4424e85200e200ded094522e1d743b3a5b1
diff --git a/scripts/atomic/gen-atomic-long.sh b/scripts/atomic/gen-atomic-long.sh
index eda89cea6e1d..75e91d6da30d 100755
--- a/scripts/atomic/gen-atomic-long.sh
+++ b/scripts/atomic/gen-atomic-long.sh
@@ -47,9 +47,9 @@ gen_proto_order_variant()
 
 cat <<EOF
 static __always_inline ${ret}
-arch_atomic_long_${name}(${params})
+raw_atomic_long_${name}(${params})
 {
-	${retstmt}arch_${atomic}_${name}(${argscast});
+	${retstmt}raw_${atomic}_${name}(${argscast});
 }
 
 EOF
diff --git a/scripts/atomic/gen-atomic-raw.sh b/scripts/atomic/gen-atomic-raw.sh
index ba8d136f30e4..c7e3c52b4927 100644
--- a/scripts/atomic/gen-atomic-raw.sh
+++ b/scripts/atomic/gen-atomic-raw.sh
@@ -63,10 +63,6 @@ grep '^[a-z]' "$1" | while read name meta args; do
 	gen_proto "${meta}" "${name}" "atomic64" "s64" ${args}
 done
 
-grep '^[a-z]' "$1" | while read name meta args; do
-	gen_proto "${meta}" "${name}" "atomic_long" "long" ${args}
-done
-
 for xchg in "xchg" "cmpxchg" "cmpxchg64" "cmpxchg128" "try_cmpxchg" "try_cmpxchg64" "try_cmpxchg128"; do
 	for order in "" "_acquire" "_release" "_relaxed"; do
 		gen_xchg "${xchg}" "${order}"
-- 
cgit v1.2.3


From 9257959a6e5b4fca6fc8e985790bff62c2046f20 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 5 Jun 2023 08:01:17 +0100
Subject: locking/atomic: scripts: restructure fallback ifdeffery

Currently the various ordering variants of an atomic operation are
defined in groups of full/acquire/release/relaxed ordering variants with
some shared ifdeffery and several potential definitions of each ordering
variant in different branches of the shared ifdeffery.

As an ordering variant can have several potential definitions down
different branches of the shared ifdeffery, it can be painful for a
human to find a relevant definition, and we don't have a good location
to place anything common to all definitions of an ordering variant (e.g.
kerneldoc).

Historically the grouping of full/acquire/release/relaxed ordering
variants was necessary as we filled in the missing atomics in the same
namespace as the architecture used. It would be easy to accidentally
define one ordering fallback in terms of another ordering fallback with
redundant barriers, and avoiding that would otherwise require a lot of
baroque ifdeffery.

With recent changes we no longer need to fill in the missing atomics in
the arch_atomic*_<op>() namespace, and only need to fill in the
raw_atomic*_<op>() namespace. Due to this, there's no risk of a
namespace collision, and we can define each raw_atomic*_<op> ordering
variant with its own ifdeffery checking for the arch_atomic*_<op>
ordering variants.

Restructure the fallbacks in this way, with each ordering variant having
its own ifdeffery of the form:

| #if defined(arch_atomic_fetch_andnot_acquire)
| #define raw_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire
| #elif defined(arch_atomic_fetch_andnot_relaxed)
| static __always_inline int
| raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
| {
| 	int ret = arch_atomic_fetch_andnot_relaxed(i, v);
| 	__atomic_acquire_fence();
| 	return ret;
| }
| #elif defined(arch_atomic_fetch_andnot)
| #define raw_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot
| #else
| static __always_inline int
| raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
| {
| 	return raw_atomic_fetch_and_acquire(~i, v);
| }
| #endif

Note that where there's no relevant arch_atomic*_<op>() ordering
variant, we'll define the operation in terms of a distinct
raw_atomic*_<otherop>(), as this itself might have been filled in with a
fallback.

As we now generate the raw_atomic*_<op>() implementations directly, we
no longer need the trivial wrappers, so they are removed.

This makes the ifdeffery easier to follow, and will allow for further
improvements in subsequent patches.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230605070124.3741859-21-mark.rutland@arm.com
---
 include/linux/atomic.h                       |    1 -
 include/linux/atomic/atomic-arch-fallback.h  | 3178 ++++++++++++++------------
 include/linux/atomic/atomic-raw.h            | 1135 ---------
 scripts/atomic/fallbacks/acquire             |    2 +-
 scripts/atomic/fallbacks/add_negative        |    4 +-
 scripts/atomic/fallbacks/add_unless          |    4 +-
 scripts/atomic/fallbacks/andnot              |    4 +-
 scripts/atomic/fallbacks/cmpxchg             |    4 +-
 scripts/atomic/fallbacks/dec                 |    4 +-
 scripts/atomic/fallbacks/dec_and_test        |    4 +-
 scripts/atomic/fallbacks/dec_if_positive     |    6 +-
 scripts/atomic/fallbacks/dec_unless_positive |    6 +-
 scripts/atomic/fallbacks/fence               |    2 +-
 scripts/atomic/fallbacks/fetch_add_unless    |    6 +-
 scripts/atomic/fallbacks/inc                 |    4 +-
 scripts/atomic/fallbacks/inc_and_test        |    4 +-
 scripts/atomic/fallbacks/inc_not_zero        |    4 +-
 scripts/atomic/fallbacks/inc_unless_negative |    6 +-
 scripts/atomic/fallbacks/read_acquire        |    4 +-
 scripts/atomic/fallbacks/release             |    2 +-
 scripts/atomic/fallbacks/set_release         |    4 +-
 scripts/atomic/fallbacks/sub_and_test        |    4 +-
 scripts/atomic/fallbacks/try_cmpxchg         |    4 +-
 scripts/atomic/fallbacks/xchg                |    4 +-
 scripts/atomic/gen-atomic-fallback.sh        |  236 +-
 scripts/atomic/gen-atomic-raw.sh             |   80 -
 scripts/atomic/gen-atomics.sh                |    1 -
 27 files changed, 1866 insertions(+), 2851 deletions(-)
 delete mode 100644 include/linux/atomic/atomic-raw.h
 delete mode 100644 scripts/atomic/gen-atomic-raw.sh

(limited to 'include/linux')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 296cfae0389f..8dd57c3a99e9 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -78,7 +78,6 @@
 })
 
 #include <linux/atomic/atomic-arch-fallback.h>
-#include <linux/atomic/atomic-raw.h>
 #include <linux/atomic/atomic-long.h>
 #include <linux/atomic/atomic-instrumented.h>
 
diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 1a2d81dbc2e4..99bc1a871dc1 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -8,2749 +8,2911 @@
 
 #include <linux/compiler.h>
 
-#ifndef arch_xchg_relaxed
-#define arch_xchg_acquire arch_xchg
-#define arch_xchg_release arch_xchg
-#define arch_xchg_relaxed arch_xchg
-#else /* arch_xchg_relaxed */
-
-#ifndef arch_xchg_acquire
-#define arch_xchg_acquire(...) \
-	__atomic_op_acquire(arch_xchg, __VA_ARGS__)
+#if defined(arch_xchg)
+#define raw_xchg arch_xchg
+#elif defined(arch_xchg_relaxed)
+#define raw_xchg(...) \
+	__atomic_op_fence(arch_xchg, __VA_ARGS__)
+#else
+extern void raw_xchg_not_implemented(void);
+#define raw_xchg(...) raw_xchg_not_implemented()
 #endif
 
-#ifndef arch_xchg_release
-#define arch_xchg_release(...) \
-	__atomic_op_release(arch_xchg, __VA_ARGS__)
+#if defined(arch_xchg_acquire)
+#define raw_xchg_acquire arch_xchg_acquire
+#elif defined(arch_xchg_relaxed)
+#define raw_xchg_acquire(...) \
+	__atomic_op_acquire(arch_xchg, __VA_ARGS__)
+#elif defined(arch_xchg)
+#define raw_xchg_acquire arch_xchg
+#else
+extern void raw_xchg_acquire_not_implemented(void);
+#define raw_xchg_acquire(...) raw_xchg_acquire_not_implemented()
 #endif
 
-#ifndef arch_xchg
-#define arch_xchg(...) \
-	__atomic_op_fence(arch_xchg, __VA_ARGS__)
+#if defined(arch_xchg_release)
+#define raw_xchg_release arch_xchg_release
+#elif defined(arch_xchg_relaxed)
+#define raw_xchg_release(...) \
+	__atomic_op_release(arch_xchg, __VA_ARGS__)
+#elif defined(arch_xchg)
+#define raw_xchg_release arch_xchg
+#else
+extern void raw_xchg_release_not_implemented(void);
+#define raw_xchg_release(...) raw_xchg_release_not_implemented()
+#endif
+
+#if defined(arch_xchg_relaxed)
+#define raw_xchg_relaxed arch_xchg_relaxed
+#elif defined(arch_xchg)
+#define raw_xchg_relaxed arch_xchg
+#else
+extern void raw_xchg_relaxed_not_implemented(void);
+#define raw_xchg_relaxed(...) raw_xchg_relaxed_not_implemented()
+#endif
+
+#if defined(arch_cmpxchg)
+#define raw_cmpxchg arch_cmpxchg
+#elif defined(arch_cmpxchg_relaxed)
+#define raw_cmpxchg(...) \
+	__atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
+#else
+extern void raw_cmpxchg_not_implemented(void);
+#define raw_cmpxchg(...) raw_cmpxchg_not_implemented()
 #endif
 
-#endif /* arch_xchg_relaxed */
-
-#ifndef arch_cmpxchg_relaxed
-#define arch_cmpxchg_acquire arch_cmpxchg
-#define arch_cmpxchg_release arch_cmpxchg
-#define arch_cmpxchg_relaxed arch_cmpxchg
-#else /* arch_cmpxchg_relaxed */
-
-#ifndef arch_cmpxchg_acquire
-#define arch_cmpxchg_acquire(...) \
+#if defined(arch_cmpxchg_acquire)
+#define raw_cmpxchg_acquire arch_cmpxchg_acquire
+#elif defined(arch_cmpxchg_relaxed)
+#define raw_cmpxchg_acquire(...) \
 	__atomic_op_acquire(arch_cmpxchg, __VA_ARGS__)
+#elif defined(arch_cmpxchg)
+#define raw_cmpxchg_acquire arch_cmpxchg
+#else
+extern void raw_cmpxchg_acquire_not_implemented(void);
+#define raw_cmpxchg_acquire(...) raw_cmpxchg_acquire_not_implemented()
 #endif
 
-#ifndef arch_cmpxchg_release
-#define arch_cmpxchg_release(...) \
+#if defined(arch_cmpxchg_release)
+#define raw_cmpxchg_release arch_cmpxchg_release
+#elif defined(arch_cmpxchg_relaxed)
+#define raw_cmpxchg_release(...) \
 	__atomic_op_release(arch_cmpxchg, __VA_ARGS__)
+#elif defined(arch_cmpxchg)
+#define raw_cmpxchg_release arch_cmpxchg
+#else
+extern void raw_cmpxchg_release_not_implemented(void);
+#define raw_cmpxchg_release(...) raw_cmpxchg_release_not_implemented()
+#endif
+
+#if defined(arch_cmpxchg_relaxed)
+#define raw_cmpxchg_relaxed arch_cmpxchg_relaxed
+#elif defined(arch_cmpxchg)
+#define raw_cmpxchg_relaxed arch_cmpxchg
+#else
+extern void raw_cmpxchg_relaxed_not_implemented(void);
+#define raw_cmpxchg_relaxed(...) raw_cmpxchg_relaxed_not_implemented()
+#endif
+
+#if defined(arch_cmpxchg64)
+#define raw_cmpxchg64 arch_cmpxchg64
+#elif defined(arch_cmpxchg64_relaxed)
+#define raw_cmpxchg64(...) \
+	__atomic_op_fence(arch_cmpxchg64, __VA_ARGS__)
+#else
+extern void raw_cmpxchg64_not_implemented(void);
+#define raw_cmpxchg64(...) raw_cmpxchg64_not_implemented()
 #endif
 
-#ifndef arch_cmpxchg
-#define arch_cmpxchg(...) \
-	__atomic_op_fence(arch_cmpxchg, __VA_ARGS__)
-#endif
-
-#endif /* arch_cmpxchg_relaxed */
-
-#ifndef arch_cmpxchg64_relaxed
-#define arch_cmpxchg64_acquire arch_cmpxchg64
-#define arch_cmpxchg64_release arch_cmpxchg64
-#define arch_cmpxchg64_relaxed arch_cmpxchg64
-#else /* arch_cmpxchg64_relaxed */
-
-#ifndef arch_cmpxchg64_acquire
-#define arch_cmpxchg64_acquire(...) \
+#if defined(arch_cmpxchg64_acquire)
+#define raw_cmpxchg64_acquire arch_cmpxchg64_acquire
+#elif defined(arch_cmpxchg64_relaxed)
+#define raw_cmpxchg64_acquire(...) \
 	__atomic_op_acquire(arch_cmpxchg64, __VA_ARGS__)
+#elif defined(arch_cmpxchg64)
+#define raw_cmpxchg64_acquire arch_cmpxchg64
+#else
+extern void raw_cmpxchg64_acquire_not_implemented(void);
+#define raw_cmpxchg64_acquire(...) raw_cmpxchg64_acquire_not_implemented()
 #endif
 
-#ifndef arch_cmpxchg64_release
-#define arch_cmpxchg64_release(...) \
+#if defined(arch_cmpxchg64_release)
+#define raw_cmpxchg64_release arch_cmpxchg64_release
+#elif defined(arch_cmpxchg64_relaxed)
+#define raw_cmpxchg64_release(...) \
 	__atomic_op_release(arch_cmpxchg64, __VA_ARGS__)
+#elif defined(arch_cmpxchg64)
+#define raw_cmpxchg64_release arch_cmpxchg64
+#else
+extern void raw_cmpxchg64_release_not_implemented(void);
+#define raw_cmpxchg64_release(...) raw_cmpxchg64_release_not_implemented()
+#endif
+
+#if defined(arch_cmpxchg64_relaxed)
+#define raw_cmpxchg64_relaxed arch_cmpxchg64_relaxed
+#elif defined(arch_cmpxchg64)
+#define raw_cmpxchg64_relaxed arch_cmpxchg64
+#else
+extern void raw_cmpxchg64_relaxed_not_implemented(void);
+#define raw_cmpxchg64_relaxed(...) raw_cmpxchg64_relaxed_not_implemented()
+#endif
+
+#if defined(arch_cmpxchg128)
+#define raw_cmpxchg128 arch_cmpxchg128
+#elif defined(arch_cmpxchg128_relaxed)
+#define raw_cmpxchg128(...) \
+	__atomic_op_fence(arch_cmpxchg128, __VA_ARGS__)
+#else
+extern void raw_cmpxchg128_not_implemented(void);
+#define raw_cmpxchg128(...) raw_cmpxchg128_not_implemented()
 #endif
 
-#ifndef arch_cmpxchg64
-#define arch_cmpxchg64(...) \
-	__atomic_op_fence(arch_cmpxchg64, __VA_ARGS__)
-#endif
-
-#endif /* arch_cmpxchg64_relaxed */
-
-#ifndef arch_cmpxchg128_relaxed
-#define arch_cmpxchg128_acquire arch_cmpxchg128
-#define arch_cmpxchg128_release arch_cmpxchg128
-#define arch_cmpxchg128_relaxed arch_cmpxchg128
-#else /* arch_cmpxchg128_relaxed */
-
-#ifndef arch_cmpxchg128_acquire
-#define arch_cmpxchg128_acquire(...) \
+#if defined(arch_cmpxchg128_acquire)
+#define raw_cmpxchg128_acquire arch_cmpxchg128_acquire
+#elif defined(arch_cmpxchg128_relaxed)
+#define raw_cmpxchg128_acquire(...) \
 	__atomic_op_acquire(arch_cmpxchg128, __VA_ARGS__)
+#elif defined(arch_cmpxchg128)
+#define raw_cmpxchg128_acquire arch_cmpxchg128
+#else
+extern void raw_cmpxchg128_acquire_not_implemented(void);
+#define raw_cmpxchg128_acquire(...) raw_cmpxchg128_acquire_not_implemented()
 #endif
 
-#ifndef arch_cmpxchg128_release
-#define arch_cmpxchg128_release(...) \
+#if defined(arch_cmpxchg128_release)
+#define raw_cmpxchg128_release arch_cmpxchg128_release
+#elif defined(arch_cmpxchg128_relaxed)
+#define raw_cmpxchg128_release(...) \
 	__atomic_op_release(arch_cmpxchg128, __VA_ARGS__)
-#endif
-
-#ifndef arch_cmpxchg128
-#define arch_cmpxchg128(...) \
-	__atomic_op_fence(arch_cmpxchg128, __VA_ARGS__)
-#endif
-
-#endif /* arch_cmpxchg128_relaxed */
-
-#ifndef arch_try_cmpxchg_relaxed
-#ifdef arch_try_cmpxchg
-#define arch_try_cmpxchg_acquire arch_try_cmpxchg
-#define arch_try_cmpxchg_release arch_try_cmpxchg
-#define arch_try_cmpxchg_relaxed arch_try_cmpxchg
-#endif /* arch_try_cmpxchg */
-
-#ifndef arch_try_cmpxchg
-#define arch_try_cmpxchg(_ptr, _oldp, _new) \
+#elif defined(arch_cmpxchg128)
+#define raw_cmpxchg128_release arch_cmpxchg128
+#else
+extern void raw_cmpxchg128_release_not_implemented(void);
+#define raw_cmpxchg128_release(...) raw_cmpxchg128_release_not_implemented()
+#endif
+
+#if defined(arch_cmpxchg128_relaxed)
+#define raw_cmpxchg128_relaxed arch_cmpxchg128_relaxed
+#elif defined(arch_cmpxchg128)
+#define raw_cmpxchg128_relaxed arch_cmpxchg128
+#else
+extern void raw_cmpxchg128_relaxed_not_implemented(void);
+#define raw_cmpxchg128_relaxed(...) raw_cmpxchg128_relaxed_not_implemented()
+#endif
+
+#if defined(arch_try_cmpxchg)
+#define raw_try_cmpxchg arch_try_cmpxchg
+#elif defined(arch_try_cmpxchg_relaxed)
+#define raw_try_cmpxchg(...) \
+	__atomic_op_fence(arch_try_cmpxchg, __VA_ARGS__)
+#else
+#define raw_try_cmpxchg(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg */
+#endif
 
-#ifndef arch_try_cmpxchg_acquire
-#define arch_try_cmpxchg_acquire(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg_acquire)
+#define raw_try_cmpxchg_acquire arch_try_cmpxchg_acquire
+#elif defined(arch_try_cmpxchg_relaxed)
+#define raw_try_cmpxchg_acquire(...) \
+	__atomic_op_acquire(arch_try_cmpxchg, __VA_ARGS__)
+#elif defined(arch_try_cmpxchg)
+#define raw_try_cmpxchg_acquire arch_try_cmpxchg
+#else
+#define raw_try_cmpxchg_acquire(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg_acquire((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg_acquire((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg_acquire */
+#endif
 
-#ifndef arch_try_cmpxchg_release
-#define arch_try_cmpxchg_release(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg_release)
+#define raw_try_cmpxchg_release arch_try_cmpxchg_release
+#elif defined(arch_try_cmpxchg_relaxed)
+#define raw_try_cmpxchg_release(...) \
+	__atomic_op_release(arch_try_cmpxchg, __VA_ARGS__)
+#elif defined(arch_try_cmpxchg)
+#define raw_try_cmpxchg_release arch_try_cmpxchg
+#else
+#define raw_try_cmpxchg_release(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg_release((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg_release((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg_release */
+#endif
 
-#ifndef arch_try_cmpxchg_relaxed
-#define arch_try_cmpxchg_relaxed(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg_relaxed)
+#define raw_try_cmpxchg_relaxed arch_try_cmpxchg_relaxed
+#elif defined(arch_try_cmpxchg)
+#define raw_try_cmpxchg_relaxed arch_try_cmpxchg
+#else
+#define raw_try_cmpxchg_relaxed(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg_relaxed((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg_relaxed((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg_relaxed */
-
-#else /* arch_try_cmpxchg_relaxed */
-
-#ifndef arch_try_cmpxchg_acquire
-#define arch_try_cmpxchg_acquire(...) \
-	__atomic_op_acquire(arch_try_cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef arch_try_cmpxchg_release
-#define arch_try_cmpxchg_release(...) \
-	__atomic_op_release(arch_try_cmpxchg, __VA_ARGS__)
 #endif
 
-#ifndef arch_try_cmpxchg
-#define arch_try_cmpxchg(...) \
-	__atomic_op_fence(arch_try_cmpxchg, __VA_ARGS__)
-#endif
-
-#endif /* arch_try_cmpxchg_relaxed */
-
-#ifndef arch_try_cmpxchg64_relaxed
-#ifdef arch_try_cmpxchg64
-#define arch_try_cmpxchg64_acquire arch_try_cmpxchg64
-#define arch_try_cmpxchg64_release arch_try_cmpxchg64
-#define arch_try_cmpxchg64_relaxed arch_try_cmpxchg64
-#endif /* arch_try_cmpxchg64 */
-
-#ifndef arch_try_cmpxchg64
-#define arch_try_cmpxchg64(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg64)
+#define raw_try_cmpxchg64 arch_try_cmpxchg64
+#elif defined(arch_try_cmpxchg64_relaxed)
+#define raw_try_cmpxchg64(...) \
+	__atomic_op_fence(arch_try_cmpxchg64, __VA_ARGS__)
+#else
+#define raw_try_cmpxchg64(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg64((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg64((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg64 */
+#endif
 
-#ifndef arch_try_cmpxchg64_acquire
-#define arch_try_cmpxchg64_acquire(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg64_acquire)
+#define raw_try_cmpxchg64_acquire arch_try_cmpxchg64_acquire
+#elif defined(arch_try_cmpxchg64_relaxed)
+#define raw_try_cmpxchg64_acquire(...) \
+	__atomic_op_acquire(arch_try_cmpxchg64, __VA_ARGS__)
+#elif defined(arch_try_cmpxchg64)
+#define raw_try_cmpxchg64_acquire arch_try_cmpxchg64
+#else
+#define raw_try_cmpxchg64_acquire(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg64_acquire((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg64_acquire((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg64_acquire */
+#endif
 
-#ifndef arch_try_cmpxchg64_release
-#define arch_try_cmpxchg64_release(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg64_release)
+#define raw_try_cmpxchg64_release arch_try_cmpxchg64_release
+#elif defined(arch_try_cmpxchg64_relaxed)
+#define raw_try_cmpxchg64_release(...) \
+	__atomic_op_release(arch_try_cmpxchg64, __VA_ARGS__)
+#elif defined(arch_try_cmpxchg64)
+#define raw_try_cmpxchg64_release arch_try_cmpxchg64
+#else
+#define raw_try_cmpxchg64_release(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg64_release((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg64_release((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg64_release */
+#endif
 
-#ifndef arch_try_cmpxchg64_relaxed
-#define arch_try_cmpxchg64_relaxed(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg64_relaxed)
+#define raw_try_cmpxchg64_relaxed arch_try_cmpxchg64_relaxed
+#elif defined(arch_try_cmpxchg64)
+#define raw_try_cmpxchg64_relaxed arch_try_cmpxchg64
+#else
+#define raw_try_cmpxchg64_relaxed(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg64_relaxed((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg64_relaxed((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg64_relaxed */
-
-#else /* arch_try_cmpxchg64_relaxed */
-
-#ifndef arch_try_cmpxchg64_acquire
-#define arch_try_cmpxchg64_acquire(...) \
-	__atomic_op_acquire(arch_try_cmpxchg64, __VA_ARGS__)
-#endif
-
-#ifndef arch_try_cmpxchg64_release
-#define arch_try_cmpxchg64_release(...) \
-	__atomic_op_release(arch_try_cmpxchg64, __VA_ARGS__)
-#endif
-
-#ifndef arch_try_cmpxchg64
-#define arch_try_cmpxchg64(...) \
-	__atomic_op_fence(arch_try_cmpxchg64, __VA_ARGS__)
 #endif
 
-#endif /* arch_try_cmpxchg64_relaxed */
-
-#ifndef arch_try_cmpxchg128_relaxed
-#ifdef arch_try_cmpxchg128
-#define arch_try_cmpxchg128_acquire arch_try_cmpxchg128
-#define arch_try_cmpxchg128_release arch_try_cmpxchg128
-#define arch_try_cmpxchg128_relaxed arch_try_cmpxchg128
-#endif /* arch_try_cmpxchg128 */
-
-#ifndef arch_try_cmpxchg128
-#define arch_try_cmpxchg128(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg128)
+#define raw_try_cmpxchg128 arch_try_cmpxchg128
+#elif defined(arch_try_cmpxchg128_relaxed)
+#define raw_try_cmpxchg128(...) \
+	__atomic_op_fence(arch_try_cmpxchg128, __VA_ARGS__)
+#else
+#define raw_try_cmpxchg128(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg128((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg128((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg128 */
+#endif
 
-#ifndef arch_try_cmpxchg128_acquire
-#define arch_try_cmpxchg128_acquire(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg128_acquire)
+#define raw_try_cmpxchg128_acquire arch_try_cmpxchg128_acquire
+#elif defined(arch_try_cmpxchg128_relaxed)
+#define raw_try_cmpxchg128_acquire(...) \
+	__atomic_op_acquire(arch_try_cmpxchg128, __VA_ARGS__)
+#elif defined(arch_try_cmpxchg128)
+#define raw_try_cmpxchg128_acquire arch_try_cmpxchg128
+#else
+#define raw_try_cmpxchg128_acquire(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg128_acquire((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg128_acquire((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg128_acquire */
+#endif
 
-#ifndef arch_try_cmpxchg128_release
-#define arch_try_cmpxchg128_release(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg128_release)
+#define raw_try_cmpxchg128_release arch_try_cmpxchg128_release
+#elif defined(arch_try_cmpxchg128_relaxed)
+#define raw_try_cmpxchg128_release(...) \
+	__atomic_op_release(arch_try_cmpxchg128, __VA_ARGS__)
+#elif defined(arch_try_cmpxchg128)
+#define raw_try_cmpxchg128_release arch_try_cmpxchg128
+#else
+#define raw_try_cmpxchg128_release(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg128_release((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg128_release((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg128_release */
+#endif
 
-#ifndef arch_try_cmpxchg128_relaxed
-#define arch_try_cmpxchg128_relaxed(_ptr, _oldp, _new) \
+#if defined(arch_try_cmpxchg128_relaxed)
+#define raw_try_cmpxchg128_relaxed arch_try_cmpxchg128_relaxed
+#elif defined(arch_try_cmpxchg128)
+#define raw_try_cmpxchg128_relaxed arch_try_cmpxchg128
+#else
+#define raw_try_cmpxchg128_relaxed(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg128_relaxed((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg128_relaxed((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg128_relaxed */
-
-#else /* arch_try_cmpxchg128_relaxed */
-
-#ifndef arch_try_cmpxchg128_acquire
-#define arch_try_cmpxchg128_acquire(...) \
-	__atomic_op_acquire(arch_try_cmpxchg128, __VA_ARGS__)
 #endif
 
-#ifndef arch_try_cmpxchg128_release
-#define arch_try_cmpxchg128_release(...) \
-	__atomic_op_release(arch_try_cmpxchg128, __VA_ARGS__)
-#endif
+#define raw_cmpxchg_local arch_cmpxchg_local
 
-#ifndef arch_try_cmpxchg128
-#define arch_try_cmpxchg128(...) \
-	__atomic_op_fence(arch_try_cmpxchg128, __VA_ARGS__)
+#ifdef arch_try_cmpxchg_local
+#define raw_try_cmpxchg_local arch_try_cmpxchg_local
+#else
+#define raw_try_cmpxchg_local(_ptr, _oldp, _new) \
+({ \
+	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+	___r = raw_cmpxchg_local((_ptr), ___o, (_new)); \
+	if (unlikely(___r != ___o)) \
+		*___op = ___r; \
+	likely(___r == ___o); \
+})
 #endif
 
-#endif /* arch_try_cmpxchg128_relaxed */
+#define raw_cmpxchg64_local arch_cmpxchg64_local
 
-#ifndef arch_try_cmpxchg_local
-#define arch_try_cmpxchg_local(_ptr, _oldp, _new) \
+#ifdef arch_try_cmpxchg64_local
+#define raw_try_cmpxchg64_local arch_try_cmpxchg64_local
+#else
+#define raw_try_cmpxchg64_local(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg_local((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg64_local((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg_local */
+#endif
+
+#define raw_cmpxchg128_local arch_cmpxchg128_local
 
-#ifndef arch_try_cmpxchg64_local
-#define arch_try_cmpxchg64_local(_ptr, _oldp, _new) \
+#ifdef arch_try_cmpxchg128_local
+#define raw_try_cmpxchg128_local arch_try_cmpxchg128_local
+#else
+#define raw_try_cmpxchg128_local(_ptr, _oldp, _new) \
 ({ \
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = arch_cmpxchg64_local((_ptr), ___o, (_new)); \
+	___r = raw_cmpxchg128_local((_ptr), ___o, (_new)); \
 	if (unlikely(___r != ___o)) \
 		*___op = ___r; \
 	likely(___r == ___o); \
 })
-#endif /* arch_try_cmpxchg64_local */
+#endif
+
+#define raw_sync_cmpxchg arch_sync_cmpxchg
 
-#ifndef arch_atomic_read_acquire
+#define raw_atomic_read arch_atomic_read
+
+#if defined(arch_atomic_read_acquire)
+#define raw_atomic_read_acquire arch_atomic_read_acquire
+#elif defined(arch_atomic_read)
+#define raw_atomic_read_acquire arch_atomic_read
+#else
 static __always_inline int
-arch_atomic_read_acquire(const atomic_t *v)
+raw_atomic_read_acquire(const atomic_t *v)
 {
 	int ret;
 
 	if (__native_word(atomic_t)) {
 		ret = smp_load_acquire(&(v)->counter);
 	} else {
-		ret = arch_atomic_read(v);
+		ret = raw_atomic_read(v);
 		__atomic_acquire_fence();
 	}
 
 	return ret;
 }
-#define arch_atomic_read_acquire arch_atomic_read_acquire
 #endif
 
-#ifndef arch_atomic_set_release
+#define raw_atomic_set arch_atomic_set
+
+#if defined(arch_atomic_set_release)
+#define raw_atomic_set_release arch_atomic_set_release
+#elif defined(arch_atomic_set)
+#define raw_atomic_set_release arch_atomic_set
+#else
 static __always_inline void
-arch_atomic_set_release(atomic_t *v, int i)
+raw_atomic_set_release(atomic_t *v, int i)
 {
 	if (__native_word(atomic_t)) {
 		smp_store_release(&(v)->counter, i);
 	} else {
 		__atomic_release_fence();
-		arch_atomic_set(v, i);
+		raw_atomic_set(v, i);
 	}
 }
-#define arch_atomic_set_release arch_atomic_set_release
 #endif
 
-#ifndef arch_atomic_add_return_relaxed
-#define arch_atomic_add_return_acquire arch_atomic_add_return
-#define arch_atomic_add_return_release arch_atomic_add_return
-#define arch_atomic_add_return_relaxed arch_atomic_add_return
-#else /* arch_atomic_add_return_relaxed */
+#define raw_atomic_add arch_atomic_add
+
+#if defined(arch_atomic_add_return)
+#define raw_atomic_add_return arch_atomic_add_return
+#elif defined(arch_atomic_add_return_relaxed)
+static __always_inline int
+raw_atomic_add_return(int i, atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = arch_atomic_add_return_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#else
+#error "Unable to define raw_atomic_add_return"
+#endif
 
-#ifndef arch_atomic_add_return_acquire
+#if defined(arch_atomic_add_return_acquire)
+#define raw_atomic_add_return_acquire arch_atomic_add_return_acquire
+#elif defined(arch_atomic_add_return_relaxed)
 static __always_inline int
-arch_atomic_add_return_acquire(int i, atomic_t *v)
+raw_atomic_add_return_acquire(int i, atomic_t *v)
 {
 	int ret = arch_atomic_add_return_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic_add_return_acquire arch_atomic_add_return_acquire
+#elif defined(arch_atomic_add_return)
+#define raw_atomic_add_return_acquire arch_atomic_add_return
+#else
+#error "Unable to define raw_atomic_add_return_acquire"
 #endif
 
-#ifndef arch_atomic_add_return_release
+#if defined(arch_atomic_add_return_release)
+#define raw_atomic_add_return_release arch_atomic_add_return_release
+#elif defined(arch_atomic_add_return_relaxed)
 static __always_inline int
-arch_atomic_add_return_release(int i, atomic_t *v)
+raw_atomic_add_return_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic_add_return_relaxed(i, v);
 }
-#define arch_atomic_add_return_release arch_atomic_add_return_release
+#elif defined(arch_atomic_add_return)
+#define raw_atomic_add_return_release arch_atomic_add_return
+#else
+#error "Unable to define raw_atomic_add_return_release"
 #endif
 
-#ifndef arch_atomic_add_return
+#if defined(arch_atomic_add_return_relaxed)
+#define raw_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#elif defined(arch_atomic_add_return)
+#define raw_atomic_add_return_relaxed arch_atomic_add_return
+#else
+#error "Unable to define raw_atomic_add_return_relaxed"
+#endif
+
+#if defined(arch_atomic_fetch_add)
+#define raw_atomic_fetch_add arch_atomic_fetch_add
+#elif defined(arch_atomic_fetch_add_relaxed)
 static __always_inline int
-arch_atomic_add_return(int i, atomic_t *v)
+raw_atomic_fetch_add(int i, atomic_t *v)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_add_return_relaxed(i, v);
+	ret = arch_atomic_fetch_add_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_add_return arch_atomic_add_return
+#else
+#error "Unable to define raw_atomic_fetch_add"
 #endif
 
-#endif /* arch_atomic_add_return_relaxed */
-
-#ifndef arch_atomic_fetch_add_relaxed
-#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add
-#define arch_atomic_fetch_add_release arch_atomic_fetch_add
-#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add
-#else /* arch_atomic_fetch_add_relaxed */
-
-#ifndef arch_atomic_fetch_add_acquire
+#if defined(arch_atomic_fetch_add_acquire)
+#define raw_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire
+#elif defined(arch_atomic_fetch_add_relaxed)
 static __always_inline int
-arch_atomic_fetch_add_acquire(int i, atomic_t *v)
+raw_atomic_fetch_add_acquire(int i, atomic_t *v)
 {
 	int ret = arch_atomic_fetch_add_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire
+#elif defined(arch_atomic_fetch_add)
+#define raw_atomic_fetch_add_acquire arch_atomic_fetch_add
+#else
+#error "Unable to define raw_atomic_fetch_add_acquire"
 #endif
 
-#ifndef arch_atomic_fetch_add_release
+#if defined(arch_atomic_fetch_add_release)
+#define raw_atomic_fetch_add_release arch_atomic_fetch_add_release
+#elif defined(arch_atomic_fetch_add_relaxed)
 static __always_inline int
-arch_atomic_fetch_add_release(int i, atomic_t *v)
+raw_atomic_fetch_add_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic_fetch_add_relaxed(i, v);
 }
-#define arch_atomic_fetch_add_release arch_atomic_fetch_add_release
+#elif defined(arch_atomic_fetch_add)
+#define raw_atomic_fetch_add_release arch_atomic_fetch_add
+#else
+#error "Unable to define raw_atomic_fetch_add_release"
+#endif
+
+#if defined(arch_atomic_fetch_add_relaxed)
+#define raw_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#elif defined(arch_atomic_fetch_add)
+#define raw_atomic_fetch_add_relaxed arch_atomic_fetch_add
+#else
+#error "Unable to define raw_atomic_fetch_add_relaxed"
 #endif
 
-#ifndef arch_atomic_fetch_add
+#define raw_atomic_sub arch_atomic_sub
+
+#if defined(arch_atomic_sub_return)
+#define raw_atomic_sub_return arch_atomic_sub_return
+#elif defined(arch_atomic_sub_return_relaxed)
 static __always_inline int
-arch_atomic_fetch_add(int i, atomic_t *v)
+raw_atomic_sub_return(int i, atomic_t *v)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_fetch_add_relaxed(i, v);
+	ret = arch_atomic_sub_return_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_fetch_add arch_atomic_fetch_add
+#else
+#error "Unable to define raw_atomic_sub_return"
 #endif
 
-#endif /* arch_atomic_fetch_add_relaxed */
-
-#ifndef arch_atomic_sub_return_relaxed
-#define arch_atomic_sub_return_acquire arch_atomic_sub_return
-#define arch_atomic_sub_return_release arch_atomic_sub_return
-#define arch_atomic_sub_return_relaxed arch_atomic_sub_return
-#else /* arch_atomic_sub_return_relaxed */
-
-#ifndef arch_atomic_sub_return_acquire
+#if defined(arch_atomic_sub_return_acquire)
+#define raw_atomic_sub_return_acquire arch_atomic_sub_return_acquire
+#elif defined(arch_atomic_sub_return_relaxed)
 static __always_inline int
-arch_atomic_sub_return_acquire(int i, atomic_t *v)
+raw_atomic_sub_return_acquire(int i, atomic_t *v)
 {
 	int ret = arch_atomic_sub_return_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic_sub_return_acquire arch_atomic_sub_return_acquire
+#elif defined(arch_atomic_sub_return)
+#define raw_atomic_sub_return_acquire arch_atomic_sub_return
+#else
+#error "Unable to define raw_atomic_sub_return_acquire"
 #endif
 
-#ifndef arch_atomic_sub_return_release
+#if defined(arch_atomic_sub_return_release)
+#define raw_atomic_sub_return_release arch_atomic_sub_return_release
+#elif defined(arch_atomic_sub_return_relaxed)
 static __always_inline int
-arch_atomic_sub_return_release(int i, atomic_t *v)
+raw_atomic_sub_return_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic_sub_return_relaxed(i, v);
 }
-#define arch_atomic_sub_return_release arch_atomic_sub_return_release
+#elif defined(arch_atomic_sub_return)
+#define raw_atomic_sub_return_release arch_atomic_sub_return
+#else
+#error "Unable to define raw_atomic_sub_return_release"
+#endif
+
+#if defined(arch_atomic_sub_return_relaxed)
+#define raw_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#elif defined(arch_atomic_sub_return)
+#define raw_atomic_sub_return_relaxed arch_atomic_sub_return
+#else
+#error "Unable to define raw_atomic_sub_return_relaxed"
 #endif
 
-#ifndef arch_atomic_sub_return
+#if defined(arch_atomic_fetch_sub)
+#define raw_atomic_fetch_sub arch_atomic_fetch_sub
+#elif defined(arch_atomic_fetch_sub_relaxed)
 static __always_inline int
-arch_atomic_sub_return(int i, atomic_t *v)
+raw_atomic_fetch_sub(int i, atomic_t *v)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_sub_return_relaxed(i, v);
+	ret = arch_atomic_fetch_sub_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_sub_return arch_atomic_sub_return
+#else
+#error "Unable to define raw_atomic_fetch_sub"
 #endif
 
-#endif /* arch_atomic_sub_return_relaxed */
-
-#ifndef arch_atomic_fetch_sub_relaxed
-#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub
-#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub
-#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub
-#else /* arch_atomic_fetch_sub_relaxed */
-
-#ifndef arch_atomic_fetch_sub_acquire
+#if defined(arch_atomic_fetch_sub_acquire)
+#define raw_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire
+#elif defined(arch_atomic_fetch_sub_relaxed)
 static __always_inline int
-arch_atomic_fetch_sub_acquire(int i, atomic_t *v)
+raw_atomic_fetch_sub_acquire(int i, atomic_t *v)
 {
 	int ret = arch_atomic_fetch_sub_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire
+#elif defined(arch_atomic_fetch_sub)
+#define raw_atomic_fetch_sub_acquire arch_atomic_fetch_sub
+#else
+#error "Unable to define raw_atomic_fetch_sub_acquire"
 #endif
 
-#ifndef arch_atomic_fetch_sub_release
+#if defined(arch_atomic_fetch_sub_release)
+#define raw_atomic_fetch_sub_release arch_atomic_fetch_sub_release
+#elif defined(arch_atomic_fetch_sub_relaxed)
 static __always_inline int
-arch_atomic_fetch_sub_release(int i, atomic_t *v)
+raw_atomic_fetch_sub_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic_fetch_sub_relaxed(i, v);
 }
-#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub_release
+#elif defined(arch_atomic_fetch_sub)
+#define raw_atomic_fetch_sub_release arch_atomic_fetch_sub
+#else
+#error "Unable to define raw_atomic_fetch_sub_release"
 #endif
 
-#ifndef arch_atomic_fetch_sub
-static __always_inline int
-arch_atomic_fetch_sub(int i, atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = arch_atomic_fetch_sub_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+#if defined(arch_atomic_fetch_sub_relaxed)
+#define raw_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+#elif defined(arch_atomic_fetch_sub)
+#define raw_atomic_fetch_sub_relaxed arch_atomic_fetch_sub
+#else
+#error "Unable to define raw_atomic_fetch_sub_relaxed"
 #endif
 
-#endif /* arch_atomic_fetch_sub_relaxed */
-
-#ifndef arch_atomic_inc
+#if defined(arch_atomic_inc)
+#define raw_atomic_inc arch_atomic_inc
+#else
 static __always_inline void
-arch_atomic_inc(atomic_t *v)
+raw_atomic_inc(atomic_t *v)
 {
-	arch_atomic_add(1, v);
+	raw_atomic_add(1, v);
 }
-#define arch_atomic_inc arch_atomic_inc
 #endif
 
-#ifndef arch_atomic_inc_return_relaxed
-#ifdef arch_atomic_inc_return
-#define arch_atomic_inc_return_acquire arch_atomic_inc_return
-#define arch_atomic_inc_return_release arch_atomic_inc_return
-#define arch_atomic_inc_return_relaxed arch_atomic_inc_return
-#endif /* arch_atomic_inc_return */
-
-#ifndef arch_atomic_inc_return
+#if defined(arch_atomic_inc_return)
+#define raw_atomic_inc_return arch_atomic_inc_return
+#elif defined(arch_atomic_inc_return_relaxed)
+static __always_inline int
+raw_atomic_inc_return(atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = arch_atomic_inc_return_relaxed(v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#else
 static __always_inline int
-arch_atomic_inc_return(atomic_t *v)
+raw_atomic_inc_return(atomic_t *v)
 {
-	return arch_atomic_add_return(1, v);
+	return raw_atomic_add_return(1, v);
 }
-#define arch_atomic_inc_return arch_atomic_inc_return
 #endif
 
-#ifndef arch_atomic_inc_return_acquire
+#if defined(arch_atomic_inc_return_acquire)
+#define raw_atomic_inc_return_acquire arch_atomic_inc_return_acquire
+#elif defined(arch_atomic_inc_return_relaxed)
 static __always_inline int
-arch_atomic_inc_return_acquire(atomic_t *v)
+raw_atomic_inc_return_acquire(atomic_t *v)
 {
-	return arch_atomic_add_return_acquire(1, v);
+	int ret = arch_atomic_inc_return_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic_inc_return_acquire arch_atomic_inc_return_acquire
-#endif
-
-#ifndef arch_atomic_inc_return_release
+#elif defined(arch_atomic_inc_return)
+#define raw_atomic_inc_return_acquire arch_atomic_inc_return
+#else
 static __always_inline int
-arch_atomic_inc_return_release(atomic_t *v)
+raw_atomic_inc_return_acquire(atomic_t *v)
 {
-	return arch_atomic_add_return_release(1, v);
+	return raw_atomic_add_return_acquire(1, v);
 }
-#define arch_atomic_inc_return_release arch_atomic_inc_return_release
 #endif
 
-#ifndef arch_atomic_inc_return_relaxed
+#if defined(arch_atomic_inc_return_release)
+#define raw_atomic_inc_return_release arch_atomic_inc_return_release
+#elif defined(arch_atomic_inc_return_relaxed)
 static __always_inline int
-arch_atomic_inc_return_relaxed(atomic_t *v)
+raw_atomic_inc_return_release(atomic_t *v)
 {
-	return arch_atomic_add_return_relaxed(1, v);
+	__atomic_release_fence();
+	return arch_atomic_inc_return_relaxed(v);
 }
-#define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed
-#endif
-
-#else /* arch_atomic_inc_return_relaxed */
-
-#ifndef arch_atomic_inc_return_acquire
+#elif defined(arch_atomic_inc_return)
+#define raw_atomic_inc_return_release arch_atomic_inc_return
+#else
 static __always_inline int
-arch_atomic_inc_return_acquire(atomic_t *v)
+raw_atomic_inc_return_release(atomic_t *v)
 {
-	int ret = arch_atomic_inc_return_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_atomic_add_return_release(1, v);
 }
-#define arch_atomic_inc_return_acquire arch_atomic_inc_return_acquire
 #endif
 
-#ifndef arch_atomic_inc_return_release
+#if defined(arch_atomic_inc_return_relaxed)
+#define raw_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed
+#elif defined(arch_atomic_inc_return)
+#define raw_atomic_inc_return_relaxed arch_atomic_inc_return
+#else
 static __always_inline int
-arch_atomic_inc_return_release(atomic_t *v)
+raw_atomic_inc_return_relaxed(atomic_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic_inc_return_relaxed(v);
+	return raw_atomic_add_return_relaxed(1, v);
 }
-#define arch_atomic_inc_return_release arch_atomic_inc_return_release
 #endif
 
-#ifndef arch_atomic_inc_return
+#if defined(arch_atomic_fetch_inc)
+#define raw_atomic_fetch_inc arch_atomic_fetch_inc
+#elif defined(arch_atomic_fetch_inc_relaxed)
 static __always_inline int
-arch_atomic_inc_return(atomic_t *v)
+raw_atomic_fetch_inc(atomic_t *v)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_inc_return_relaxed(v);
+	ret = arch_atomic_fetch_inc_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_inc_return arch_atomic_inc_return
-#endif
-
-#endif /* arch_atomic_inc_return_relaxed */
-
-#ifndef arch_atomic_fetch_inc_relaxed
-#ifdef arch_atomic_fetch_inc
-#define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc
-#define arch_atomic_fetch_inc_release arch_atomic_fetch_inc
-#define arch_atomic_fetch_inc_relaxed arch_atomic_fetch_inc
-#endif /* arch_atomic_fetch_inc */
-
-#ifndef arch_atomic_fetch_inc
+#else
 static __always_inline int
-arch_atomic_fetch_inc(atomic_t *v)
+raw_atomic_fetch_inc(atomic_t *v)
 {
-	return arch_atomic_fetch_add(1, v);
+	return raw_atomic_fetch_add(1, v);
 }
-#define arch_atomic_fetch_inc arch_atomic_fetch_inc
 #endif
 
-#ifndef arch_atomic_fetch_inc_acquire
+#if defined(arch_atomic_fetch_inc_acquire)
+#define raw_atomic_fetch_inc_acquire arch_atomic_fetch_inc_acquire
+#elif defined(arch_atomic_fetch_inc_relaxed)
 static __always_inline int
-arch_atomic_fetch_inc_acquire(atomic_t *v)
+raw_atomic_fetch_inc_acquire(atomic_t *v)
 {
-	return arch_atomic_fetch_add_acquire(1, v);
+	int ret = arch_atomic_fetch_inc_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc_acquire
-#endif
-
-#ifndef arch_atomic_fetch_inc_release
+#elif defined(arch_atomic_fetch_inc)
+#define raw_atomic_fetch_inc_acquire arch_atomic_fetch_inc
+#else
 static __always_inline int
-arch_atomic_fetch_inc_release(atomic_t *v)
+raw_atomic_fetch_inc_acquire(atomic_t *v)
 {
-	return arch_atomic_fetch_add_release(1, v);
+	return raw_atomic_fetch_add_acquire(1, v);
 }
-#define arch_atomic_fetch_inc_release arch_atomic_fetch_inc_release
 #endif
 
-#ifndef arch_atomic_fetch_inc_relaxed
+#if defined(arch_atomic_fetch_inc_release)
+#define raw_atomic_fetch_inc_release arch_atomic_fetch_inc_release
+#elif defined(arch_atomic_fetch_inc_relaxed)
+static __always_inline int
+raw_atomic_fetch_inc_release(atomic_t *v)
+{
+	__atomic_release_fence();
+	return arch_atomic_fetch_inc_relaxed(v);
+}
+#elif defined(arch_atomic_fetch_inc)
+#define raw_atomic_fetch_inc_release arch_atomic_fetch_inc
+#else
 static __always_inline int
-arch_atomic_fetch_inc_relaxed(atomic_t *v)
+raw_atomic_fetch_inc_release(atomic_t *v)
 {
-	return arch_atomic_fetch_add_relaxed(1, v);
+	return raw_atomic_fetch_add_release(1, v);
 }
-#define arch_atomic_fetch_inc_relaxed arch_atomic_fetch_inc_relaxed
 #endif
 
-#else /* arch_atomic_fetch_inc_relaxed */
-
-#ifndef arch_atomic_fetch_inc_acquire
+#if defined(arch_atomic_fetch_inc_relaxed)
+#define raw_atomic_fetch_inc_relaxed arch_atomic_fetch_inc_relaxed
+#elif defined(arch_atomic_fetch_inc)
+#define raw_atomic_fetch_inc_relaxed arch_atomic_fetch_inc
+#else
 static __always_inline int
-arch_atomic_fetch_inc_acquire(atomic_t *v)
+raw_atomic_fetch_inc_relaxed(atomic_t *v)
 {
-	int ret = arch_atomic_fetch_inc_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_atomic_fetch_add_relaxed(1, v);
 }
-#define arch_atomic_fetch_inc_acquire arch_atomic_fetch_inc_acquire
 #endif
 
-#ifndef arch_atomic_fetch_inc_release
-static __always_inline int
-arch_atomic_fetch_inc_release(atomic_t *v)
+#if defined(arch_atomic_dec)
+#define raw_atomic_dec arch_atomic_dec
+#else
+static __always_inline void
+raw_atomic_dec(atomic_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic_fetch_inc_relaxed(v);
+	raw_atomic_sub(1, v);
 }
-#define arch_atomic_fetch_inc_release arch_atomic_fetch_inc_release
 #endif
 
-#ifndef arch_atomic_fetch_inc
+#if defined(arch_atomic_dec_return)
+#define raw_atomic_dec_return arch_atomic_dec_return
+#elif defined(arch_atomic_dec_return_relaxed)
 static __always_inline int
-arch_atomic_fetch_inc(atomic_t *v)
+raw_atomic_dec_return(atomic_t *v)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_fetch_inc_relaxed(v);
+	ret = arch_atomic_dec_return_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_fetch_inc arch_atomic_fetch_inc
-#endif
-
-#endif /* arch_atomic_fetch_inc_relaxed */
-
-#ifndef arch_atomic_dec
-static __always_inline void
-arch_atomic_dec(atomic_t *v)
-{
-	arch_atomic_sub(1, v);
-}
-#define arch_atomic_dec arch_atomic_dec
-#endif
-
-#ifndef arch_atomic_dec_return_relaxed
-#ifdef arch_atomic_dec_return
-#define arch_atomic_dec_return_acquire arch_atomic_dec_return
-#define arch_atomic_dec_return_release arch_atomic_dec_return
-#define arch_atomic_dec_return_relaxed arch_atomic_dec_return
-#endif /* arch_atomic_dec_return */
-
-#ifndef arch_atomic_dec_return
+#else
 static __always_inline int
-arch_atomic_dec_return(atomic_t *v)
+raw_atomic_dec_return(atomic_t *v)
 {
-	return arch_atomic_sub_return(1, v);
+	return raw_atomic_sub_return(1, v);
 }
-#define arch_atomic_dec_return arch_atomic_dec_return
 #endif
 
-#ifndef arch_atomic_dec_return_acquire
+#if defined(arch_atomic_dec_return_acquire)
+#define raw_atomic_dec_return_acquire arch_atomic_dec_return_acquire
+#elif defined(arch_atomic_dec_return_relaxed)
 static __always_inline int
-arch_atomic_dec_return_acquire(atomic_t *v)
+raw_atomic_dec_return_acquire(atomic_t *v)
 {
-	return arch_atomic_sub_return_acquire(1, v);
+	int ret = arch_atomic_dec_return_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic_dec_return_acquire arch_atomic_dec_return_acquire
-#endif
-
-#ifndef arch_atomic_dec_return_release
+#elif defined(arch_atomic_dec_return)
+#define raw_atomic_dec_return_acquire arch_atomic_dec_return
+#else
 static __always_inline int
-arch_atomic_dec_return_release(atomic_t *v)
+raw_atomic_dec_return_acquire(atomic_t *v)
 {
-	return arch_atomic_sub_return_release(1, v);
+	return raw_atomic_sub_return_acquire(1, v);
 }
-#define arch_atomic_dec_return_release arch_atomic_dec_return_release
 #endif
 
-#ifndef arch_atomic_dec_return_relaxed
+#if defined(arch_atomic_dec_return_release)
+#define raw_atomic_dec_return_release arch_atomic_dec_return_release
+#elif defined(arch_atomic_dec_return_relaxed)
 static __always_inline int
-arch_atomic_dec_return_relaxed(atomic_t *v)
+raw_atomic_dec_return_release(atomic_t *v)
 {
-	return arch_atomic_sub_return_relaxed(1, v);
+	__atomic_release_fence();
+	return arch_atomic_dec_return_relaxed(v);
 }
-#define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed
-#endif
-
-#else /* arch_atomic_dec_return_relaxed */
-
-#ifndef arch_atomic_dec_return_acquire
+#elif defined(arch_atomic_dec_return)
+#define raw_atomic_dec_return_release arch_atomic_dec_return
+#else
 static __always_inline int
-arch_atomic_dec_return_acquire(atomic_t *v)
+raw_atomic_dec_return_release(atomic_t *v)
 {
-	int ret = arch_atomic_dec_return_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_atomic_sub_return_release(1, v);
 }
-#define arch_atomic_dec_return_acquire arch_atomic_dec_return_acquire
 #endif
 
-#ifndef arch_atomic_dec_return_release
+#if defined(arch_atomic_dec_return_relaxed)
+#define raw_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed
+#elif defined(arch_atomic_dec_return)
+#define raw_atomic_dec_return_relaxed arch_atomic_dec_return
+#else
 static __always_inline int
-arch_atomic_dec_return_release(atomic_t *v)
+raw_atomic_dec_return_relaxed(atomic_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic_dec_return_relaxed(v);
+	return raw_atomic_sub_return_relaxed(1, v);
 }
-#define arch_atomic_dec_return_release arch_atomic_dec_return_release
 #endif
 
-#ifndef arch_atomic_dec_return
+#if defined(arch_atomic_fetch_dec)
+#define raw_atomic_fetch_dec arch_atomic_fetch_dec
+#elif defined(arch_atomic_fetch_dec_relaxed)
 static __always_inline int
-arch_atomic_dec_return(atomic_t *v)
+raw_atomic_fetch_dec(atomic_t *v)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_dec_return_relaxed(v);
+	ret = arch_atomic_fetch_dec_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_dec_return arch_atomic_dec_return
-#endif
-
-#endif /* arch_atomic_dec_return_relaxed */
-
-#ifndef arch_atomic_fetch_dec_relaxed
-#ifdef arch_atomic_fetch_dec
-#define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec
-#define arch_atomic_fetch_dec_release arch_atomic_fetch_dec
-#define arch_atomic_fetch_dec_relaxed arch_atomic_fetch_dec
-#endif /* arch_atomic_fetch_dec */
-
-#ifndef arch_atomic_fetch_dec
+#else
 static __always_inline int
-arch_atomic_fetch_dec(atomic_t *v)
+raw_atomic_fetch_dec(atomic_t *v)
 {
-	return arch_atomic_fetch_sub(1, v);
+	return raw_atomic_fetch_sub(1, v);
 }
-#define arch_atomic_fetch_dec arch_atomic_fetch_dec
 #endif
 
-#ifndef arch_atomic_fetch_dec_acquire
+#if defined(arch_atomic_fetch_dec_acquire)
+#define raw_atomic_fetch_dec_acquire arch_atomic_fetch_dec_acquire
+#elif defined(arch_atomic_fetch_dec_relaxed)
 static __always_inline int
-arch_atomic_fetch_dec_acquire(atomic_t *v)
+raw_atomic_fetch_dec_acquire(atomic_t *v)
 {
-	return arch_atomic_fetch_sub_acquire(1, v);
+	int ret = arch_atomic_fetch_dec_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec_acquire
-#endif
-
-#ifndef arch_atomic_fetch_dec_release
+#elif defined(arch_atomic_fetch_dec)
+#define raw_atomic_fetch_dec_acquire arch_atomic_fetch_dec
+#else
 static __always_inline int
-arch_atomic_fetch_dec_release(atomic_t *v)
+raw_atomic_fetch_dec_acquire(atomic_t *v)
 {
-	return arch_atomic_fetch_sub_release(1, v);
+	return raw_atomic_fetch_sub_acquire(1, v);
 }
-#define arch_atomic_fetch_dec_release arch_atomic_fetch_dec_release
 #endif
 
-#ifndef arch_atomic_fetch_dec_relaxed
+#if defined(arch_atomic_fetch_dec_release)
+#define raw_atomic_fetch_dec_release arch_atomic_fetch_dec_release
+#elif defined(arch_atomic_fetch_dec_relaxed)
 static __always_inline int
-arch_atomic_fetch_dec_relaxed(atomic_t *v)
+raw_atomic_fetch_dec_release(atomic_t *v)
 {
-	return arch_atomic_fetch_sub_relaxed(1, v);
+	__atomic_release_fence();
+	return arch_atomic_fetch_dec_relaxed(v);
 }
-#define arch_atomic_fetch_dec_relaxed arch_atomic_fetch_dec_relaxed
-#endif
-
-#else /* arch_atomic_fetch_dec_relaxed */
-
-#ifndef arch_atomic_fetch_dec_acquire
+#elif defined(arch_atomic_fetch_dec)
+#define raw_atomic_fetch_dec_release arch_atomic_fetch_dec
+#else
 static __always_inline int
-arch_atomic_fetch_dec_acquire(atomic_t *v)
+raw_atomic_fetch_dec_release(atomic_t *v)
 {
-	int ret = arch_atomic_fetch_dec_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_atomic_fetch_sub_release(1, v);
 }
-#define arch_atomic_fetch_dec_acquire arch_atomic_fetch_dec_acquire
 #endif
 
-#ifndef arch_atomic_fetch_dec_release
+#if defined(arch_atomic_fetch_dec_relaxed)
+#define raw_atomic_fetch_dec_relaxed arch_atomic_fetch_dec_relaxed
+#elif defined(arch_atomic_fetch_dec)
+#define raw_atomic_fetch_dec_relaxed arch_atomic_fetch_dec
+#else
 static __always_inline int
-arch_atomic_fetch_dec_release(atomic_t *v)
+raw_atomic_fetch_dec_relaxed(atomic_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic_fetch_dec_relaxed(v);
+	return raw_atomic_fetch_sub_relaxed(1, v);
 }
-#define arch_atomic_fetch_dec_release arch_atomic_fetch_dec_release
 #endif
 
-#ifndef arch_atomic_fetch_dec
+#define raw_atomic_and arch_atomic_and
+
+#if defined(arch_atomic_fetch_and)
+#define raw_atomic_fetch_and arch_atomic_fetch_and
+#elif defined(arch_atomic_fetch_and_relaxed)
 static __always_inline int
-arch_atomic_fetch_dec(atomic_t *v)
+raw_atomic_fetch_and(int i, atomic_t *v)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_fetch_dec_relaxed(v);
+	ret = arch_atomic_fetch_and_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_fetch_dec arch_atomic_fetch_dec
+#else
+#error "Unable to define raw_atomic_fetch_and"
 #endif
 
-#endif /* arch_atomic_fetch_dec_relaxed */
-
-#ifndef arch_atomic_fetch_and_relaxed
-#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and
-#define arch_atomic_fetch_and_release arch_atomic_fetch_and
-#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and
-#else /* arch_atomic_fetch_and_relaxed */
-
-#ifndef arch_atomic_fetch_and_acquire
+#if defined(arch_atomic_fetch_and_acquire)
+#define raw_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire
+#elif defined(arch_atomic_fetch_and_relaxed)
 static __always_inline int
-arch_atomic_fetch_and_acquire(int i, atomic_t *v)
+raw_atomic_fetch_and_acquire(int i, atomic_t *v)
 {
 	int ret = arch_atomic_fetch_and_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire
+#elif defined(arch_atomic_fetch_and)
+#define raw_atomic_fetch_and_acquire arch_atomic_fetch_and
+#else
+#error "Unable to define raw_atomic_fetch_and_acquire"
 #endif
 
-#ifndef arch_atomic_fetch_and_release
+#if defined(arch_atomic_fetch_and_release)
+#define raw_atomic_fetch_and_release arch_atomic_fetch_and_release
+#elif defined(arch_atomic_fetch_and_relaxed)
 static __always_inline int
-arch_atomic_fetch_and_release(int i, atomic_t *v)
+raw_atomic_fetch_and_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic_fetch_and_relaxed(i, v);
 }
-#define arch_atomic_fetch_and_release arch_atomic_fetch_and_release
+#elif defined(arch_atomic_fetch_and)
+#define raw_atomic_fetch_and_release arch_atomic_fetch_and
+#else
+#error "Unable to define raw_atomic_fetch_and_release"
 #endif
 
-#ifndef arch_atomic_fetch_and
+#if defined(arch_atomic_fetch_and_relaxed)
+#define raw_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#elif defined(arch_atomic_fetch_and)
+#define raw_atomic_fetch_and_relaxed arch_atomic_fetch_and
+#else
+#error "Unable to define raw_atomic_fetch_and_relaxed"
+#endif
+
+#if defined(arch_atomic_andnot)
+#define raw_atomic_andnot arch_atomic_andnot
+#else
+static __always_inline void
+raw_atomic_andnot(int i, atomic_t *v)
+{
+	raw_atomic_and(~i, v);
+}
+#endif
+
+#if defined(arch_atomic_fetch_andnot)
+#define raw_atomic_fetch_andnot arch_atomic_fetch_andnot
+#elif defined(arch_atomic_fetch_andnot_relaxed)
 static __always_inline int
-arch_atomic_fetch_and(int i, atomic_t *v)
+raw_atomic_fetch_andnot(int i, atomic_t *v)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_fetch_and_relaxed(i, v);
+	ret = arch_atomic_fetch_andnot_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_fetch_and arch_atomic_fetch_and
-#endif
-
-#endif /* arch_atomic_fetch_and_relaxed */
-
-#ifndef arch_atomic_andnot
-static __always_inline void
-arch_atomic_andnot(int i, atomic_t *v)
+#else
+static __always_inline int
+raw_atomic_fetch_andnot(int i, atomic_t *v)
 {
-	arch_atomic_and(~i, v);
+	return raw_atomic_fetch_and(~i, v);
 }
-#define arch_atomic_andnot arch_atomic_andnot
 #endif
 
-#ifndef arch_atomic_fetch_andnot_relaxed
-#ifdef arch_atomic_fetch_andnot
-#define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot
-#define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot
-#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot
-#endif /* arch_atomic_fetch_andnot */
-
-#ifndef arch_atomic_fetch_andnot
+#if defined(arch_atomic_fetch_andnot_acquire)
+#define raw_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire
+#elif defined(arch_atomic_fetch_andnot_relaxed)
+static __always_inline int
+raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
+{
+	int ret = arch_atomic_fetch_andnot_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#elif defined(arch_atomic_fetch_andnot)
+#define raw_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot
+#else
 static __always_inline int
-arch_atomic_fetch_andnot(int i, atomic_t *v)
+raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
-	return arch_atomic_fetch_and(~i, v);
+	return raw_atomic_fetch_and_acquire(~i, v);
 }
-#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
 #endif
 
-#ifndef arch_atomic_fetch_andnot_acquire
+#if defined(arch_atomic_fetch_andnot_release)
+#define raw_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release
+#elif defined(arch_atomic_fetch_andnot_relaxed)
 static __always_inline int
-arch_atomic_fetch_andnot_acquire(int i, atomic_t *v)
+raw_atomic_fetch_andnot_release(int i, atomic_t *v)
 {
-	return arch_atomic_fetch_and_acquire(~i, v);
+	__atomic_release_fence();
+	return arch_atomic_fetch_andnot_relaxed(i, v);
 }
-#define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire
-#endif
-
-#ifndef arch_atomic_fetch_andnot_release
+#elif defined(arch_atomic_fetch_andnot)
+#define raw_atomic_fetch_andnot_release arch_atomic_fetch_andnot
+#else
 static __always_inline int
-arch_atomic_fetch_andnot_release(int i, atomic_t *v)
+raw_atomic_fetch_andnot_release(int i, atomic_t *v)
 {
-	return arch_atomic_fetch_and_release(~i, v);
+	return raw_atomic_fetch_and_release(~i, v);
 }
-#define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release
 #endif
 
-#ifndef arch_atomic_fetch_andnot_relaxed
+#if defined(arch_atomic_fetch_andnot_relaxed)
+#define raw_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
+#elif defined(arch_atomic_fetch_andnot)
+#define raw_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot
+#else
 static __always_inline int
-arch_atomic_fetch_andnot_relaxed(int i, atomic_t *v)
+raw_atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
-	return arch_atomic_fetch_and_relaxed(~i, v);
+	return raw_atomic_fetch_and_relaxed(~i, v);
 }
-#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
 #endif
 
-#else /* arch_atomic_fetch_andnot_relaxed */
+#define raw_atomic_or arch_atomic_or
 
-#ifndef arch_atomic_fetch_andnot_acquire
+#if defined(arch_atomic_fetch_or)
+#define raw_atomic_fetch_or arch_atomic_fetch_or
+#elif defined(arch_atomic_fetch_or_relaxed)
 static __always_inline int
-arch_atomic_fetch_andnot_acquire(int i, atomic_t *v)
+raw_atomic_fetch_or(int i, atomic_t *v)
 {
-	int ret = arch_atomic_fetch_andnot_relaxed(i, v);
-	__atomic_acquire_fence();
+	int ret;
+	__atomic_pre_full_fence();
+	ret = arch_atomic_fetch_or_relaxed(i, v);
+	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire
+#else
+#error "Unable to define raw_atomic_fetch_or"
 #endif
 
-#ifndef arch_atomic_fetch_andnot_release
+#if defined(arch_atomic_fetch_or_acquire)
+#define raw_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire
+#elif defined(arch_atomic_fetch_or_relaxed)
 static __always_inline int
-arch_atomic_fetch_andnot_release(int i, atomic_t *v)
-{
-	__atomic_release_fence();
-	return arch_atomic_fetch_andnot_relaxed(i, v);
-}
-#define arch_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release
-#endif
-
-#ifndef arch_atomic_fetch_andnot
-static __always_inline int
-arch_atomic_fetch_andnot(int i, atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = arch_atomic_fetch_andnot_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
-#endif
-
-#endif /* arch_atomic_fetch_andnot_relaxed */
-
-#ifndef arch_atomic_fetch_or_relaxed
-#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or
-#define arch_atomic_fetch_or_release arch_atomic_fetch_or
-#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or
-#else /* arch_atomic_fetch_or_relaxed */
-
-#ifndef arch_atomic_fetch_or_acquire
-static __always_inline int
-arch_atomic_fetch_or_acquire(int i, atomic_t *v)
+raw_atomic_fetch_or_acquire(int i, atomic_t *v)
 {
 	int ret = arch_atomic_fetch_or_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire
+#elif defined(arch_atomic_fetch_or)
+#define raw_atomic_fetch_or_acquire arch_atomic_fetch_or
+#else
+#error "Unable to define raw_atomic_fetch_or_acquire"
 #endif
 
-#ifndef arch_atomic_fetch_or_release
+#if defined(arch_atomic_fetch_or_release)
+#define raw_atomic_fetch_or_release arch_atomic_fetch_or_release
+#elif defined(arch_atomic_fetch_or_relaxed)
 static __always_inline int
-arch_atomic_fetch_or_release(int i, atomic_t *v)
+raw_atomic_fetch_or_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic_fetch_or_relaxed(i, v);
 }
-#define arch_atomic_fetch_or_release arch_atomic_fetch_or_release
+#elif defined(arch_atomic_fetch_or)
+#define raw_atomic_fetch_or_release arch_atomic_fetch_or
+#else
+#error "Unable to define raw_atomic_fetch_or_release"
 #endif
 
-#ifndef arch_atomic_fetch_or
+#if defined(arch_atomic_fetch_or_relaxed)
+#define raw_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#elif defined(arch_atomic_fetch_or)
+#define raw_atomic_fetch_or_relaxed arch_atomic_fetch_or
+#else
+#error "Unable to define raw_atomic_fetch_or_relaxed"
+#endif
+
+#define raw_atomic_xor arch_atomic_xor
+
+#if defined(arch_atomic_fetch_xor)
+#define raw_atomic_fetch_xor arch_atomic_fetch_xor
+#elif defined(arch_atomic_fetch_xor_relaxed)
 static __always_inline int
-arch_atomic_fetch_or(int i, atomic_t *v)
+raw_atomic_fetch_xor(int i, atomic_t *v)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_fetch_or_relaxed(i, v);
+	ret = arch_atomic_fetch_xor_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_fetch_or arch_atomic_fetch_or
+#else
+#error "Unable to define raw_atomic_fetch_xor"
 #endif
 
-#endif /* arch_atomic_fetch_or_relaxed */
-
-#ifndef arch_atomic_fetch_xor_relaxed
-#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor
-#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor
-#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor
-#else /* arch_atomic_fetch_xor_relaxed */
-
-#ifndef arch_atomic_fetch_xor_acquire
+#if defined(arch_atomic_fetch_xor_acquire)
+#define raw_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire
+#elif defined(arch_atomic_fetch_xor_relaxed)
 static __always_inline int
-arch_atomic_fetch_xor_acquire(int i, atomic_t *v)
+raw_atomic_fetch_xor_acquire(int i, atomic_t *v)
 {
 	int ret = arch_atomic_fetch_xor_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire
+#elif defined(arch_atomic_fetch_xor)
+#define raw_atomic_fetch_xor_acquire arch_atomic_fetch_xor
+#else
+#error "Unable to define raw_atomic_fetch_xor_acquire"
 #endif
 
-#ifndef arch_atomic_fetch_xor_release
+#if defined(arch_atomic_fetch_xor_release)
+#define raw_atomic_fetch_xor_release arch_atomic_fetch_xor_release
+#elif defined(arch_atomic_fetch_xor_relaxed)
 static __always_inline int
-arch_atomic_fetch_xor_release(int i, atomic_t *v)
+raw_atomic_fetch_xor_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic_fetch_xor_relaxed(i, v);
 }
-#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor_release
+#elif defined(arch_atomic_fetch_xor)
+#define raw_atomic_fetch_xor_release arch_atomic_fetch_xor
+#else
+#error "Unable to define raw_atomic_fetch_xor_release"
 #endif
 
-#ifndef arch_atomic_fetch_xor
+#if defined(arch_atomic_fetch_xor_relaxed)
+#define raw_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+#elif defined(arch_atomic_fetch_xor)
+#define raw_atomic_fetch_xor_relaxed arch_atomic_fetch_xor
+#else
+#error "Unable to define raw_atomic_fetch_xor_relaxed"
+#endif
+
+#if defined(arch_atomic_xchg)
+#define raw_atomic_xchg arch_atomic_xchg
+#elif defined(arch_atomic_xchg_relaxed)
 static __always_inline int
-arch_atomic_fetch_xor(int i, atomic_t *v)
+raw_atomic_xchg(atomic_t *v, int i)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_fetch_xor_relaxed(i, v);
+	ret = arch_atomic_xchg_relaxed(v, i);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_fetch_xor arch_atomic_fetch_xor
-#endif
-
-#endif /* arch_atomic_fetch_xor_relaxed */
-
-#ifndef arch_atomic_xchg_relaxed
-#ifdef arch_atomic_xchg
-#define arch_atomic_xchg_acquire arch_atomic_xchg
-#define arch_atomic_xchg_release arch_atomic_xchg
-#define arch_atomic_xchg_relaxed arch_atomic_xchg
-#endif /* arch_atomic_xchg */
-
-#ifndef arch_atomic_xchg
+#else
 static __always_inline int
-arch_atomic_xchg(atomic_t *v, int new)
+raw_atomic_xchg(atomic_t *v, int new)
 {
-	return arch_xchg(&v->counter, new);
+	return raw_xchg(&v->counter, new);
 }
-#define arch_atomic_xchg arch_atomic_xchg
 #endif
 
-#ifndef arch_atomic_xchg_acquire
+#if defined(arch_atomic_xchg_acquire)
+#define raw_atomic_xchg_acquire arch_atomic_xchg_acquire
+#elif defined(arch_atomic_xchg_relaxed)
 static __always_inline int
-arch_atomic_xchg_acquire(atomic_t *v, int new)
+raw_atomic_xchg_acquire(atomic_t *v, int i)
 {
-	return arch_xchg_acquire(&v->counter, new);
+	int ret = arch_atomic_xchg_relaxed(v, i);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic_xchg_acquire arch_atomic_xchg_acquire
-#endif
-
-#ifndef arch_atomic_xchg_release
+#elif defined(arch_atomic_xchg)
+#define raw_atomic_xchg_acquire arch_atomic_xchg
+#else
 static __always_inline int
-arch_atomic_xchg_release(atomic_t *v, int new)
+raw_atomic_xchg_acquire(atomic_t *v, int new)
 {
-	return arch_xchg_release(&v->counter, new);
+	return raw_xchg_acquire(&v->counter, new);
 }
-#define arch_atomic_xchg_release arch_atomic_xchg_release
 #endif
 
-#ifndef arch_atomic_xchg_relaxed
+#if defined(arch_atomic_xchg_release)
+#define raw_atomic_xchg_release arch_atomic_xchg_release
+#elif defined(arch_atomic_xchg_relaxed)
 static __always_inline int
-arch_atomic_xchg_relaxed(atomic_t *v, int new)
+raw_atomic_xchg_release(atomic_t *v, int i)
 {
-	return arch_xchg_relaxed(&v->counter, new);
+	__atomic_release_fence();
+	return arch_atomic_xchg_relaxed(v, i);
 }
-#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
-#endif
-
-#else /* arch_atomic_xchg_relaxed */
-
-#ifndef arch_atomic_xchg_acquire
+#elif defined(arch_atomic_xchg)
+#define raw_atomic_xchg_release arch_atomic_xchg
+#else
 static __always_inline int
-arch_atomic_xchg_acquire(atomic_t *v, int i)
+raw_atomic_xchg_release(atomic_t *v, int new)
 {
-	int ret = arch_atomic_xchg_relaxed(v, i);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_xchg_release(&v->counter, new);
 }
-#define arch_atomic_xchg_acquire arch_atomic_xchg_acquire
 #endif
 
-#ifndef arch_atomic_xchg_release
+#if defined(arch_atomic_xchg_relaxed)
+#define raw_atomic_xchg_relaxed arch_atomic_xchg_relaxed
+#elif defined(arch_atomic_xchg)
+#define raw_atomic_xchg_relaxed arch_atomic_xchg
+#else
 static __always_inline int
-arch_atomic_xchg_release(atomic_t *v, int i)
+raw_atomic_xchg_relaxed(atomic_t *v, int new)
 {
-	__atomic_release_fence();
-	return arch_atomic_xchg_relaxed(v, i);
+	return raw_xchg_relaxed(&v->counter, new);
 }
-#define arch_atomic_xchg_release arch_atomic_xchg_release
 #endif
 
-#ifndef arch_atomic_xchg
+#if defined(arch_atomic_cmpxchg)
+#define raw_atomic_cmpxchg arch_atomic_cmpxchg
+#elif defined(arch_atomic_cmpxchg_relaxed)
 static __always_inline int
-arch_atomic_xchg(atomic_t *v, int i)
+raw_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_xchg_relaxed(v, i);
+	ret = arch_atomic_cmpxchg_relaxed(v, old, new);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_xchg arch_atomic_xchg
-#endif
-
-#endif /* arch_atomic_xchg_relaxed */
-
-#ifndef arch_atomic_cmpxchg_relaxed
-#ifdef arch_atomic_cmpxchg
-#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg
-#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg
-#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg
-#endif /* arch_atomic_cmpxchg */
-
-#ifndef arch_atomic_cmpxchg
+#else
 static __always_inline int
-arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+raw_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
-	return arch_cmpxchg(&v->counter, old, new);
+	return raw_cmpxchg(&v->counter, old, new);
 }
-#define arch_atomic_cmpxchg arch_atomic_cmpxchg
 #endif
 
-#ifndef arch_atomic_cmpxchg_acquire
+#if defined(arch_atomic_cmpxchg_acquire)
+#define raw_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
+#elif defined(arch_atomic_cmpxchg_relaxed)
 static __always_inline int
-arch_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
+raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
-	return arch_cmpxchg_acquire(&v->counter, old, new);
+	int ret = arch_atomic_cmpxchg_relaxed(v, old, new);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
-#endif
-
-#ifndef arch_atomic_cmpxchg_release
+#elif defined(arch_atomic_cmpxchg)
+#define raw_atomic_cmpxchg_acquire arch_atomic_cmpxchg
+#else
 static __always_inline int
-arch_atomic_cmpxchg_release(atomic_t *v, int old, int new)
+raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
-	return arch_cmpxchg_release(&v->counter, old, new);
+	return raw_cmpxchg_acquire(&v->counter, old, new);
 }
-#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release
 #endif
 
-#ifndef arch_atomic_cmpxchg_relaxed
+#if defined(arch_atomic_cmpxchg_release)
+#define raw_atomic_cmpxchg_release arch_atomic_cmpxchg_release
+#elif defined(arch_atomic_cmpxchg_relaxed)
 static __always_inline int
-arch_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
+raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
-	return arch_cmpxchg_relaxed(&v->counter, old, new);
+	__atomic_release_fence();
+	return arch_atomic_cmpxchg_relaxed(v, old, new);
 }
-#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
-#endif
-
-#else /* arch_atomic_cmpxchg_relaxed */
-
-#ifndef arch_atomic_cmpxchg_acquire
+#elif defined(arch_atomic_cmpxchg)
+#define raw_atomic_cmpxchg_release arch_atomic_cmpxchg
+#else
 static __always_inline int
-arch_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
+raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
-	int ret = arch_atomic_cmpxchg_relaxed(v, old, new);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_cmpxchg_release(&v->counter, old, new);
 }
-#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
 #endif
 
-#ifndef arch_atomic_cmpxchg_release
+#if defined(arch_atomic_cmpxchg_relaxed)
+#define raw_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
+#elif defined(arch_atomic_cmpxchg)
+#define raw_atomic_cmpxchg_relaxed arch_atomic_cmpxchg
+#else
 static __always_inline int
-arch_atomic_cmpxchg_release(atomic_t *v, int old, int new)
+raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 {
-	__atomic_release_fence();
-	return arch_atomic_cmpxchg_relaxed(v, old, new);
+	return raw_cmpxchg_relaxed(&v->counter, old, new);
 }
-#define arch_atomic_cmpxchg_release arch_atomic_cmpxchg_release
 #endif
 
-#ifndef arch_atomic_cmpxchg
-static __always_inline int
-arch_atomic_cmpxchg(atomic_t *v, int old, int new)
+#if defined(arch_atomic_try_cmpxchg)
+#define raw_atomic_try_cmpxchg arch_atomic_try_cmpxchg
+#elif defined(arch_atomic_try_cmpxchg_relaxed)
+static __always_inline bool
+raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
-	int ret;
+	bool ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_cmpxchg_relaxed(v, old, new);
+	ret = arch_atomic_try_cmpxchg_relaxed(v, old, new);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic_cmpxchg arch_atomic_cmpxchg
-#endif
-
-#endif /* arch_atomic_cmpxchg_relaxed */
-
-#ifndef arch_atomic_try_cmpxchg_relaxed
-#ifdef arch_atomic_try_cmpxchg
-#define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg
-#define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg
-#define arch_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg
-#endif /* arch_atomic_try_cmpxchg */
-
-#ifndef arch_atomic_try_cmpxchg
+#else
 static __always_inline bool
-arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
 	int r, o = *old;
-	r = arch_atomic_cmpxchg(v, o, new);
+	r = raw_atomic_cmpxchg(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
 }
-#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
 #endif
 
-#ifndef arch_atomic_try_cmpxchg_acquire
+#if defined(arch_atomic_try_cmpxchg_acquire)
+#define raw_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg_acquire
+#elif defined(arch_atomic_try_cmpxchg_relaxed)
+static __always_inline bool
+raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
+{
+	bool ret = arch_atomic_try_cmpxchg_relaxed(v, old, new);
+	__atomic_acquire_fence();
+	return ret;
+}
+#elif defined(arch_atomic_try_cmpxchg)
+#define raw_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg
+#else
 static __always_inline bool
-arch_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
+raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
 	int r, o = *old;
-	r = arch_atomic_cmpxchg_acquire(v, o, new);
+	r = raw_atomic_cmpxchg_acquire(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
 }
-#define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg_acquire
 #endif
 
-#ifndef arch_atomic_try_cmpxchg_release
+#if defined(arch_atomic_try_cmpxchg_release)
+#define raw_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg_release
+#elif defined(arch_atomic_try_cmpxchg_relaxed)
 static __always_inline bool
-arch_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
+raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
+{
+	__atomic_release_fence();
+	return arch_atomic_try_cmpxchg_relaxed(v, old, new);
+}
+#elif defined(arch_atomic_try_cmpxchg)
+#define raw_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg
+#else
+static __always_inline bool
+raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
 	int r, o = *old;
-	r = arch_atomic_cmpxchg_release(v, o, new);
+	r = raw_atomic_cmpxchg_release(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
 }
-#define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg_release
 #endif
 
-#ifndef arch_atomic_try_cmpxchg_relaxed
+#if defined(arch_atomic_try_cmpxchg_relaxed)
+#define raw_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg_relaxed
+#elif defined(arch_atomic_try_cmpxchg)
+#define raw_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg
+#else
 static __always_inline bool
-arch_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
+raw_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 {
 	int r, o = *old;
-	r = arch_atomic_cmpxchg_relaxed(v, o, new);
+	r = raw_atomic_cmpxchg_relaxed(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
 }
-#define arch_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg_relaxed
-#endif
-
-#else /* arch_atomic_try_cmpxchg_relaxed */
-
-#ifndef arch_atomic_try_cmpxchg_acquire
-static __always_inline bool
-arch_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
-{
-	bool ret = arch_atomic_try_cmpxchg_relaxed(v, old, new);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define arch_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg_acquire
-#endif
-
-#ifndef arch_atomic_try_cmpxchg_release
-static __always_inline bool
-arch_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
-{
-	__atomic_release_fence();
-	return arch_atomic_try_cmpxchg_relaxed(v, old, new);
-}
-#define arch_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg_release
-#endif
-
-#ifndef arch_atomic_try_cmpxchg
-static __always_inline bool
-arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
-{
-	bool ret;
-	__atomic_pre_full_fence();
-	ret = arch_atomic_try_cmpxchg_relaxed(v, old, new);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
 #endif
 
-#endif /* arch_atomic_try_cmpxchg_relaxed */
-
-#ifndef arch_atomic_sub_and_test
+#if defined(arch_atomic_sub_and_test)
+#define raw_atomic_sub_and_test arch_atomic_sub_and_test
+#else
 static __always_inline bool
-arch_atomic_sub_and_test(int i, atomic_t *v)
+raw_atomic_sub_and_test(int i, atomic_t *v)
 {
-	return arch_atomic_sub_return(i, v) == 0;
+	return raw_atomic_sub_return(i, v) == 0;
 }
-#define arch_atomic_sub_and_test arch_atomic_sub_and_test
 #endif
 
-#ifndef arch_atomic_dec_and_test
+#if defined(arch_atomic_dec_and_test)
+#define raw_atomic_dec_and_test arch_atomic_dec_and_test
+#else
 static __always_inline bool
-arch_atomic_dec_and_test(atomic_t *v)
+raw_atomic_dec_and_test(atomic_t *v)
 {
-	return arch_atomic_dec_return(v) == 0;
+	return raw_atomic_dec_return(v) == 0;
 }
-#define arch_atomic_dec_and_test arch_atomic_dec_and_test
 #endif
 
-#ifndef arch_atomic_inc_and_test
+#if defined(arch_atomic_inc_and_test)
+#define raw_atomic_inc_and_test arch_atomic_inc_and_test
+#else
 static __always_inline bool
-arch_atomic_inc_and_test(atomic_t *v)
+raw_atomic_inc_and_test(atomic_t *v)
 {
-	return arch_atomic_inc_return(v) == 0;
+	return raw_atomic_inc_return(v) == 0;
 }
-#define arch_atomic_inc_and_test arch_atomic_inc_and_test
 #endif
 
-#ifndef arch_atomic_add_negative_relaxed
-#ifdef arch_atomic_add_negative
-#define arch_atomic_add_negative_acquire arch_atomic_add_negative
-#define arch_atomic_add_negative_release arch_atomic_add_negative
-#define arch_atomic_add_negative_relaxed arch_atomic_add_negative
-#endif /* arch_atomic_add_negative */
-
-#ifndef arch_atomic_add_negative
+#if defined(arch_atomic_add_negative)
+#define raw_atomic_add_negative arch_atomic_add_negative
+#elif defined(arch_atomic_add_negative_relaxed)
 static __always_inline bool
-arch_atomic_add_negative(int i, atomic_t *v)
+raw_atomic_add_negative(int i, atomic_t *v)
 {
-	return arch_atomic_add_return(i, v) < 0;
+	bool ret;
+	__atomic_pre_full_fence();
+	ret = arch_atomic_add_negative_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
 }
-#define arch_atomic_add_negative arch_atomic_add_negative
-#endif
-
-#ifndef arch_atomic_add_negative_acquire
+#else
 static __always_inline bool
-arch_atomic_add_negative_acquire(int i, atomic_t *v)
+raw_atomic_add_negative(int i, atomic_t *v)
 {
-	return arch_atomic_add_return_acquire(i, v) < 0;
+	return raw_atomic_add_return(i, v) < 0;
 }
-#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire
 #endif
 
-#ifndef arch_atomic_add_negative_release
+#if defined(arch_atomic_add_negative_acquire)
+#define raw_atomic_add_negative_acquire arch_atomic_add_negative_acquire
+#elif defined(arch_atomic_add_negative_relaxed)
 static __always_inline bool
-arch_atomic_add_negative_release(int i, atomic_t *v)
+raw_atomic_add_negative_acquire(int i, atomic_t *v)
 {
-	return arch_atomic_add_return_release(i, v) < 0;
+	bool ret = arch_atomic_add_negative_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic_add_negative_release arch_atomic_add_negative_release
-#endif
-
-#ifndef arch_atomic_add_negative_relaxed
+#elif defined(arch_atomic_add_negative)
+#define raw_atomic_add_negative_acquire arch_atomic_add_negative
+#else
 static __always_inline bool
-arch_atomic_add_negative_relaxed(int i, atomic_t *v)
+raw_atomic_add_negative_acquire(int i, atomic_t *v)
 {
-	return arch_atomic_add_return_relaxed(i, v) < 0;
+	return raw_atomic_add_return_acquire(i, v) < 0;
 }
-#define arch_atomic_add_negative_relaxed arch_atomic_add_negative_relaxed
 #endif
 
-#else /* arch_atomic_add_negative_relaxed */
-
-#ifndef arch_atomic_add_negative_acquire
+#if defined(arch_atomic_add_negative_release)
+#define raw_atomic_add_negative_release arch_atomic_add_negative_release
+#elif defined(arch_atomic_add_negative_relaxed)
 static __always_inline bool
-arch_atomic_add_negative_acquire(int i, atomic_t *v)
+raw_atomic_add_negative_release(int i, atomic_t *v)
 {
-	bool ret = arch_atomic_add_negative_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
+	__atomic_release_fence();
+	return arch_atomic_add_negative_relaxed(i, v);
 }
-#define arch_atomic_add_negative_acquire arch_atomic_add_negative_acquire
-#endif
-
-#ifndef arch_atomic_add_negative_release
+#elif defined(arch_atomic_add_negative)
+#define raw_atomic_add_negative_release arch_atomic_add_negative
+#else
 static __always_inline bool
-arch_atomic_add_negative_release(int i, atomic_t *v)
+raw_atomic_add_negative_release(int i, atomic_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic_add_negative_relaxed(i, v);
+	return raw_atomic_add_return_release(i, v) < 0;
 }
-#define arch_atomic_add_negative_release arch_atomic_add_negative_release
 #endif
 
-#ifndef arch_atomic_add_negative
+#if defined(arch_atomic_add_negative_relaxed)
+#define raw_atomic_add_negative_relaxed arch_atomic_add_negative_relaxed
+#elif defined(arch_atomic_add_negative)
+#define raw_atomic_add_negative_relaxed arch_atomic_add_negative
+#else
 static __always_inline bool
-arch_atomic_add_negative(int i, atomic_t *v)
+raw_atomic_add_negative_relaxed(int i, atomic_t *v)
 {
-	bool ret;
-	__atomic_pre_full_fence();
-	ret = arch_atomic_add_negative_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
+	return raw_atomic_add_return_relaxed(i, v) < 0;
 }
-#define arch_atomic_add_negative arch_atomic_add_negative
 #endif
 
-#endif /* arch_atomic_add_negative_relaxed */
-
-#ifndef arch_atomic_fetch_add_unless
+#if defined(arch_atomic_fetch_add_unless)
+#define raw_atomic_fetch_add_unless arch_atomic_fetch_add_unless
+#else
 static __always_inline int
-arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
+raw_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
-	int c = arch_atomic_read(v);
+	int c = raw_atomic_read(v);
 
 	do {
 		if (unlikely(c == u))
 			break;
-	} while (!arch_atomic_try_cmpxchg(v, &c, c + a));
+	} while (!raw_atomic_try_cmpxchg(v, &c, c + a));
 
 	return c;
 }
-#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
 #endif
 
-#ifndef arch_atomic_add_unless
+#if defined(arch_atomic_add_unless)
+#define raw_atomic_add_unless arch_atomic_add_unless
+#else
 static __always_inline bool
-arch_atomic_add_unless(atomic_t *v, int a, int u)
+raw_atomic_add_unless(atomic_t *v, int a, int u)
 {
-	return arch_atomic_fetch_add_unless(v, a, u) != u;
+	return raw_atomic_fetch_add_unless(v, a, u) != u;
 }
-#define arch_atomic_add_unless arch_atomic_add_unless
 #endif
 
-#ifndef arch_atomic_inc_not_zero
+#if defined(arch_atomic_inc_not_zero)
+#define raw_atomic_inc_not_zero arch_atomic_inc_not_zero
+#else
 static __always_inline bool
-arch_atomic_inc_not_zero(atomic_t *v)
+raw_atomic_inc_not_zero(atomic_t *v)
 {
-	return arch_atomic_add_unless(v, 1, 0);
+	return raw_atomic_add_unless(v, 1, 0);
 }
-#define arch_atomic_inc_not_zero arch_atomic_inc_not_zero
 #endif
 
-#ifndef arch_atomic_inc_unless_negative
+#if defined(arch_atomic_inc_unless_negative)
+#define raw_atomic_inc_unless_negative arch_atomic_inc_unless_negative
+#else
 static __always_inline bool
-arch_atomic_inc_unless_negative(atomic_t *v)
+raw_atomic_inc_unless_negative(atomic_t *v)
 {
-	int c = arch_atomic_read(v);
+	int c = raw_atomic_read(v);
 
 	do {
 		if (unlikely(c < 0))
 			return false;
-	} while (!arch_atomic_try_cmpxchg(v, &c, c + 1));
+	} while (!raw_atomic_try_cmpxchg(v, &c, c + 1));
 
 	return true;
 }
-#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
 #endif
 
-#ifndef arch_atomic_dec_unless_positive
+#if defined(arch_atomic_dec_unless_positive)
+#define raw_atomic_dec_unless_positive arch_atomic_dec_unless_positive
+#else
 static __always_inline bool
-arch_atomic_dec_unless_positive(atomic_t *v)
+raw_atomic_dec_unless_positive(atomic_t *v)
 {
-	int c = arch_atomic_read(v);
+	int c = raw_atomic_read(v);
 
 	do {
 		if (unlikely(c > 0))
 			return false;
-	} while (!arch_atomic_try_cmpxchg(v, &c, c - 1));
+	} while (!raw_atomic_try_cmpxchg(v, &c, c - 1));
 
 	return true;
 }
-#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
 #endif
 
-#ifndef arch_atomic_dec_if_positive
+#if defined(arch_atomic_dec_if_positive)
+#define raw_atomic_dec_if_positive arch_atomic_dec_if_positive
+#else
 static __always_inline int
-arch_atomic_dec_if_positive(atomic_t *v)
+raw_atomic_dec_if_positive(atomic_t *v)
 {
-	int dec, c = arch_atomic_read(v);
+	int dec, c = raw_atomic_read(v);
 
 	do {
 		dec = c - 1;
 		if (unlikely(dec < 0))
 			break;
-	} while (!arch_atomic_try_cmpxchg(v, &c, dec));
+	} while (!raw_atomic_try_cmpxchg(v, &c, dec));
 
 	return dec;
 }
-#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
 #endif
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
 
-#ifndef arch_atomic64_read_acquire
+#define raw_atomic64_read arch_atomic64_read
+
+#if defined(arch_atomic64_read_acquire)
+#define raw_atomic64_read_acquire arch_atomic64_read_acquire
+#elif defined(arch_atomic64_read)
+#define raw_atomic64_read_acquire arch_atomic64_read
+#else
 static __always_inline s64
-arch_atomic64_read_acquire(const atomic64_t *v)
+raw_atomic64_read_acquire(const atomic64_t *v)
 {
 	s64 ret;
 
 	if (__native_word(atomic64_t)) {
 		ret = smp_load_acquire(&(v)->counter);
 	} else {
-		ret = arch_atomic64_read(v);
+		ret = raw_atomic64_read(v);
 		__atomic_acquire_fence();
 	}
 
 	return ret;
 }
-#define arch_atomic64_read_acquire arch_atomic64_read_acquire
 #endif
 
-#ifndef arch_atomic64_set_release
+#define raw_atomic64_set arch_atomic64_set
+
+#if defined(arch_atomic64_set_release)
+#define raw_atomic64_set_release arch_atomic64_set_release
+#elif defined(arch_atomic64_set)
+#define raw_atomic64_set_release arch_atomic64_set
+#else
 static __always_inline void
-arch_atomic64_set_release(atomic64_t *v, s64 i)
+raw_atomic64_set_release(atomic64_t *v, s64 i)
 {
 	if (__native_word(atomic64_t)) {
 		smp_store_release(&(v)->counter, i);
 	} else {
 		__atomic_release_fence();
-		arch_atomic64_set(v, i);
+		raw_atomic64_set(v, i);
 	}
 }
-#define arch_atomic64_set_release arch_atomic64_set_release
 #endif
 
-#ifndef arch_atomic64_add_return_relaxed
-#define arch_atomic64_add_return_acquire arch_atomic64_add_return
-#define arch_atomic64_add_return_release arch_atomic64_add_return
-#define arch_atomic64_add_return_relaxed arch_atomic64_add_return
-#else /* arch_atomic64_add_return_relaxed */
+#define raw_atomic64_add arch_atomic64_add
+
+#if defined(arch_atomic64_add_return)
+#define raw_atomic64_add_return arch_atomic64_add_return
+#elif defined(arch_atomic64_add_return_relaxed)
+static __always_inline s64
+raw_atomic64_add_return(s64 i, atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = arch_atomic64_add_return_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#else
+#error "Unable to define raw_atomic64_add_return"
+#endif
 
-#ifndef arch_atomic64_add_return_acquire
+#if defined(arch_atomic64_add_return_acquire)
+#define raw_atomic64_add_return_acquire arch_atomic64_add_return_acquire
+#elif defined(arch_atomic64_add_return_relaxed)
 static __always_inline s64
-arch_atomic64_add_return_acquire(s64 i, atomic64_t *v)
+raw_atomic64_add_return_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = arch_atomic64_add_return_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic64_add_return_acquire arch_atomic64_add_return_acquire
+#elif defined(arch_atomic64_add_return)
+#define raw_atomic64_add_return_acquire arch_atomic64_add_return
+#else
+#error "Unable to define raw_atomic64_add_return_acquire"
 #endif
 
-#ifndef arch_atomic64_add_return_release
+#if defined(arch_atomic64_add_return_release)
+#define raw_atomic64_add_return_release arch_atomic64_add_return_release
+#elif defined(arch_atomic64_add_return_relaxed)
 static __always_inline s64
-arch_atomic64_add_return_release(s64 i, atomic64_t *v)
+raw_atomic64_add_return_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic64_add_return_relaxed(i, v);
 }
-#define arch_atomic64_add_return_release arch_atomic64_add_return_release
+#elif defined(arch_atomic64_add_return)
+#define raw_atomic64_add_return_release arch_atomic64_add_return
+#else
+#error "Unable to define raw_atomic64_add_return_release"
+#endif
+
+#if defined(arch_atomic64_add_return_relaxed)
+#define raw_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#elif defined(arch_atomic64_add_return)
+#define raw_atomic64_add_return_relaxed arch_atomic64_add_return
+#else
+#error "Unable to define raw_atomic64_add_return_relaxed"
 #endif
 
-#ifndef arch_atomic64_add_return
+#if defined(arch_atomic64_fetch_add)
+#define raw_atomic64_fetch_add arch_atomic64_fetch_add
+#elif defined(arch_atomic64_fetch_add_relaxed)
 static __always_inline s64
-arch_atomic64_add_return(s64 i, atomic64_t *v)
+raw_atomic64_fetch_add(s64 i, atomic64_t *v)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_add_return_relaxed(i, v);
+	ret = arch_atomic64_fetch_add_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_add_return arch_atomic64_add_return
+#else
+#error "Unable to define raw_atomic64_fetch_add"
 #endif
 
-#endif /* arch_atomic64_add_return_relaxed */
-
-#ifndef arch_atomic64_fetch_add_relaxed
-#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add
-#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add
-#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add
-#else /* arch_atomic64_fetch_add_relaxed */
-
-#ifndef arch_atomic64_fetch_add_acquire
+#if defined(arch_atomic64_fetch_add_acquire)
+#define raw_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire
+#elif defined(arch_atomic64_fetch_add_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
+raw_atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = arch_atomic64_fetch_add_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire
+#elif defined(arch_atomic64_fetch_add)
+#define raw_atomic64_fetch_add_acquire arch_atomic64_fetch_add
+#else
+#error "Unable to define raw_atomic64_fetch_add_acquire"
 #endif
 
-#ifndef arch_atomic64_fetch_add_release
+#if defined(arch_atomic64_fetch_add_release)
+#define raw_atomic64_fetch_add_release arch_atomic64_fetch_add_release
+#elif defined(arch_atomic64_fetch_add_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_add_release(s64 i, atomic64_t *v)
+raw_atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic64_fetch_add_relaxed(i, v);
 }
-#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add_release
+#elif defined(arch_atomic64_fetch_add)
+#define raw_atomic64_fetch_add_release arch_atomic64_fetch_add
+#else
+#error "Unable to define raw_atomic64_fetch_add_release"
+#endif
+
+#if defined(arch_atomic64_fetch_add_relaxed)
+#define raw_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#elif defined(arch_atomic64_fetch_add)
+#define raw_atomic64_fetch_add_relaxed arch_atomic64_fetch_add
+#else
+#error "Unable to define raw_atomic64_fetch_add_relaxed"
 #endif
 
-#ifndef arch_atomic64_fetch_add
+#define raw_atomic64_sub arch_atomic64_sub
+
+#if defined(arch_atomic64_sub_return)
+#define raw_atomic64_sub_return arch_atomic64_sub_return
+#elif defined(arch_atomic64_sub_return_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_add(s64 i, atomic64_t *v)
+raw_atomic64_sub_return(s64 i, atomic64_t *v)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_fetch_add_relaxed(i, v);
+	ret = arch_atomic64_sub_return_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_add arch_atomic64_fetch_add
+#else
+#error "Unable to define raw_atomic64_sub_return"
 #endif
 
-#endif /* arch_atomic64_fetch_add_relaxed */
-
-#ifndef arch_atomic64_sub_return_relaxed
-#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return
-#define arch_atomic64_sub_return_release arch_atomic64_sub_return
-#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return
-#else /* arch_atomic64_sub_return_relaxed */
-
-#ifndef arch_atomic64_sub_return_acquire
+#if defined(arch_atomic64_sub_return_acquire)
+#define raw_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire
+#elif defined(arch_atomic64_sub_return_relaxed)
 static __always_inline s64
-arch_atomic64_sub_return_acquire(s64 i, atomic64_t *v)
+raw_atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = arch_atomic64_sub_return_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire
+#elif defined(arch_atomic64_sub_return)
+#define raw_atomic64_sub_return_acquire arch_atomic64_sub_return
+#else
+#error "Unable to define raw_atomic64_sub_return_acquire"
 #endif
 
-#ifndef arch_atomic64_sub_return_release
+#if defined(arch_atomic64_sub_return_release)
+#define raw_atomic64_sub_return_release arch_atomic64_sub_return_release
+#elif defined(arch_atomic64_sub_return_relaxed)
 static __always_inline s64
-arch_atomic64_sub_return_release(s64 i, atomic64_t *v)
+raw_atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic64_sub_return_relaxed(i, v);
 }
-#define arch_atomic64_sub_return_release arch_atomic64_sub_return_release
+#elif defined(arch_atomic64_sub_return)
+#define raw_atomic64_sub_return_release arch_atomic64_sub_return
+#else
+#error "Unable to define raw_atomic64_sub_return_release"
+#endif
+
+#if defined(arch_atomic64_sub_return_relaxed)
+#define raw_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+#elif defined(arch_atomic64_sub_return)
+#define raw_atomic64_sub_return_relaxed arch_atomic64_sub_return
+#else
+#error "Unable to define raw_atomic64_sub_return_relaxed"
 #endif
 
-#ifndef arch_atomic64_sub_return
+#if defined(arch_atomic64_fetch_sub)
+#define raw_atomic64_fetch_sub arch_atomic64_fetch_sub
+#elif defined(arch_atomic64_fetch_sub_relaxed)
 static __always_inline s64
-arch_atomic64_sub_return(s64 i, atomic64_t *v)
+raw_atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_sub_return_relaxed(i, v);
+	ret = arch_atomic64_fetch_sub_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_sub_return arch_atomic64_sub_return
+#else
+#error "Unable to define raw_atomic64_fetch_sub"
 #endif
 
-#endif /* arch_atomic64_sub_return_relaxed */
-
-#ifndef arch_atomic64_fetch_sub_relaxed
-#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub
-#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub
-#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub
-#else /* arch_atomic64_fetch_sub_relaxed */
-
-#ifndef arch_atomic64_fetch_sub_acquire
+#if defined(arch_atomic64_fetch_sub_acquire)
+#define raw_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire
+#elif defined(arch_atomic64_fetch_sub_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
+raw_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = arch_atomic64_fetch_sub_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire
+#elif defined(arch_atomic64_fetch_sub)
+#define raw_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub
+#else
+#error "Unable to define raw_atomic64_fetch_sub_acquire"
 #endif
 
-#ifndef arch_atomic64_fetch_sub_release
+#if defined(arch_atomic64_fetch_sub_release)
+#define raw_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release
+#elif defined(arch_atomic64_fetch_sub_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_sub_release(s64 i, atomic64_t *v)
+raw_atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic64_fetch_sub_relaxed(i, v);
 }
-#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release
+#elif defined(arch_atomic64_fetch_sub)
+#define raw_atomic64_fetch_sub_release arch_atomic64_fetch_sub
+#else
+#error "Unable to define raw_atomic64_fetch_sub_release"
 #endif
 
-#ifndef arch_atomic64_fetch_sub
-static __always_inline s64
-arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = arch_atomic64_fetch_sub_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
+#if defined(arch_atomic64_fetch_sub_relaxed)
+#define raw_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+#elif defined(arch_atomic64_fetch_sub)
+#define raw_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub
+#else
+#error "Unable to define raw_atomic64_fetch_sub_relaxed"
 #endif
 
-#endif /* arch_atomic64_fetch_sub_relaxed */
-
-#ifndef arch_atomic64_inc
+#if defined(arch_atomic64_inc)
+#define raw_atomic64_inc arch_atomic64_inc
+#else
 static __always_inline void
-arch_atomic64_inc(atomic64_t *v)
+raw_atomic64_inc(atomic64_t *v)
 {
-	arch_atomic64_add(1, v);
+	raw_atomic64_add(1, v);
 }
-#define arch_atomic64_inc arch_atomic64_inc
 #endif
 
-#ifndef arch_atomic64_inc_return_relaxed
-#ifdef arch_atomic64_inc_return
-#define arch_atomic64_inc_return_acquire arch_atomic64_inc_return
-#define arch_atomic64_inc_return_release arch_atomic64_inc_return
-#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return
-#endif /* arch_atomic64_inc_return */
-
-#ifndef arch_atomic64_inc_return
+#if defined(arch_atomic64_inc_return)
+#define raw_atomic64_inc_return arch_atomic64_inc_return
+#elif defined(arch_atomic64_inc_return_relaxed)
 static __always_inline s64
-arch_atomic64_inc_return(atomic64_t *v)
+raw_atomic64_inc_return(atomic64_t *v)
 {
-	return arch_atomic64_add_return(1, v);
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = arch_atomic64_inc_return_relaxed(v);
+	__atomic_post_full_fence();
+	return ret;
 }
-#define arch_atomic64_inc_return arch_atomic64_inc_return
-#endif
-
-#ifndef arch_atomic64_inc_return_acquire
+#else
 static __always_inline s64
-arch_atomic64_inc_return_acquire(atomic64_t *v)
+raw_atomic64_inc_return(atomic64_t *v)
 {
-	return arch_atomic64_add_return_acquire(1, v);
+	return raw_atomic64_add_return(1, v);
 }
-#define arch_atomic64_inc_return_acquire arch_atomic64_inc_return_acquire
 #endif
 
-#ifndef arch_atomic64_inc_return_release
+#if defined(arch_atomic64_inc_return_acquire)
+#define raw_atomic64_inc_return_acquire arch_atomic64_inc_return_acquire
+#elif defined(arch_atomic64_inc_return_relaxed)
 static __always_inline s64
-arch_atomic64_inc_return_release(atomic64_t *v)
+raw_atomic64_inc_return_acquire(atomic64_t *v)
 {
-	return arch_atomic64_add_return_release(1, v);
+	s64 ret = arch_atomic64_inc_return_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic64_inc_return_release arch_atomic64_inc_return_release
-#endif
-
-#ifndef arch_atomic64_inc_return_relaxed
+#elif defined(arch_atomic64_inc_return)
+#define raw_atomic64_inc_return_acquire arch_atomic64_inc_return
+#else
 static __always_inline s64
-arch_atomic64_inc_return_relaxed(atomic64_t *v)
+raw_atomic64_inc_return_acquire(atomic64_t *v)
 {
-	return arch_atomic64_add_return_relaxed(1, v);
+	return raw_atomic64_add_return_acquire(1, v);
 }
-#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
 #endif
 
-#else /* arch_atomic64_inc_return_relaxed */
-
-#ifndef arch_atomic64_inc_return_acquire
+#if defined(arch_atomic64_inc_return_release)
+#define raw_atomic64_inc_return_release arch_atomic64_inc_return_release
+#elif defined(arch_atomic64_inc_return_relaxed)
 static __always_inline s64
-arch_atomic64_inc_return_acquire(atomic64_t *v)
+raw_atomic64_inc_return_release(atomic64_t *v)
 {
-	s64 ret = arch_atomic64_inc_return_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
+	__atomic_release_fence();
+	return arch_atomic64_inc_return_relaxed(v);
+}
+#elif defined(arch_atomic64_inc_return)
+#define raw_atomic64_inc_return_release arch_atomic64_inc_return
+#else
+static __always_inline s64
+raw_atomic64_inc_return_release(atomic64_t *v)
+{
+	return raw_atomic64_add_return_release(1, v);
 }
-#define arch_atomic64_inc_return_acquire arch_atomic64_inc_return_acquire
 #endif
 
-#ifndef arch_atomic64_inc_return_release
+#if defined(arch_atomic64_inc_return_relaxed)
+#define raw_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
+#elif defined(arch_atomic64_inc_return)
+#define raw_atomic64_inc_return_relaxed arch_atomic64_inc_return
+#else
 static __always_inline s64
-arch_atomic64_inc_return_release(atomic64_t *v)
+raw_atomic64_inc_return_relaxed(atomic64_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic64_inc_return_relaxed(v);
+	return raw_atomic64_add_return_relaxed(1, v);
 }
-#define arch_atomic64_inc_return_release arch_atomic64_inc_return_release
 #endif
 
-#ifndef arch_atomic64_inc_return
+#if defined(arch_atomic64_fetch_inc)
+#define raw_atomic64_fetch_inc arch_atomic64_fetch_inc
+#elif defined(arch_atomic64_fetch_inc_relaxed)
 static __always_inline s64
-arch_atomic64_inc_return(atomic64_t *v)
+raw_atomic64_fetch_inc(atomic64_t *v)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_inc_return_relaxed(v);
+	ret = arch_atomic64_fetch_inc_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_inc_return arch_atomic64_inc_return
-#endif
-
-#endif /* arch_atomic64_inc_return_relaxed */
-
-#ifndef arch_atomic64_fetch_inc_relaxed
-#ifdef arch_atomic64_fetch_inc
-#define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc
-#define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc
-#define arch_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc
-#endif /* arch_atomic64_fetch_inc */
-
-#ifndef arch_atomic64_fetch_inc
+#else
 static __always_inline s64
-arch_atomic64_fetch_inc(atomic64_t *v)
+raw_atomic64_fetch_inc(atomic64_t *v)
 {
-	return arch_atomic64_fetch_add(1, v);
+	return raw_atomic64_fetch_add(1, v);
 }
-#define arch_atomic64_fetch_inc arch_atomic64_fetch_inc
 #endif
 
-#ifndef arch_atomic64_fetch_inc_acquire
+#if defined(arch_atomic64_fetch_inc_acquire)
+#define raw_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc_acquire
+#elif defined(arch_atomic64_fetch_inc_relaxed)
+static __always_inline s64
+raw_atomic64_fetch_inc_acquire(atomic64_t *v)
+{
+	s64 ret = arch_atomic64_fetch_inc_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#elif defined(arch_atomic64_fetch_inc)
+#define raw_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc
+#else
 static __always_inline s64
-arch_atomic64_fetch_inc_acquire(atomic64_t *v)
+raw_atomic64_fetch_inc_acquire(atomic64_t *v)
 {
-	return arch_atomic64_fetch_add_acquire(1, v);
+	return raw_atomic64_fetch_add_acquire(1, v);
 }
-#define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc_acquire
 #endif
 
-#ifndef arch_atomic64_fetch_inc_release
+#if defined(arch_atomic64_fetch_inc_release)
+#define raw_atomic64_fetch_inc_release arch_atomic64_fetch_inc_release
+#elif defined(arch_atomic64_fetch_inc_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_inc_release(atomic64_t *v)
+raw_atomic64_fetch_inc_release(atomic64_t *v)
 {
-	return arch_atomic64_fetch_add_release(1, v);
+	__atomic_release_fence();
+	return arch_atomic64_fetch_inc_relaxed(v);
 }
-#define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc_release
-#endif
-
-#ifndef arch_atomic64_fetch_inc_relaxed
+#elif defined(arch_atomic64_fetch_inc)
+#define raw_atomic64_fetch_inc_release arch_atomic64_fetch_inc
+#else
 static __always_inline s64
-arch_atomic64_fetch_inc_relaxed(atomic64_t *v)
+raw_atomic64_fetch_inc_release(atomic64_t *v)
 {
-	return arch_atomic64_fetch_add_relaxed(1, v);
+	return raw_atomic64_fetch_add_release(1, v);
 }
-#define arch_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc_relaxed
 #endif
 
-#else /* arch_atomic64_fetch_inc_relaxed */
-
-#ifndef arch_atomic64_fetch_inc_acquire
+#if defined(arch_atomic64_fetch_inc_relaxed)
+#define raw_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc_relaxed
+#elif defined(arch_atomic64_fetch_inc)
+#define raw_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc
+#else
 static __always_inline s64
-arch_atomic64_fetch_inc_acquire(atomic64_t *v)
+raw_atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
-	s64 ret = arch_atomic64_fetch_inc_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_atomic64_fetch_add_relaxed(1, v);
 }
-#define arch_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc_acquire
 #endif
 
-#ifndef arch_atomic64_fetch_inc_release
-static __always_inline s64
-arch_atomic64_fetch_inc_release(atomic64_t *v)
+#if defined(arch_atomic64_dec)
+#define raw_atomic64_dec arch_atomic64_dec
+#else
+static __always_inline void
+raw_atomic64_dec(atomic64_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic64_fetch_inc_relaxed(v);
+	raw_atomic64_sub(1, v);
 }
-#define arch_atomic64_fetch_inc_release arch_atomic64_fetch_inc_release
 #endif
 
-#ifndef arch_atomic64_fetch_inc
+#if defined(arch_atomic64_dec_return)
+#define raw_atomic64_dec_return arch_atomic64_dec_return
+#elif defined(arch_atomic64_dec_return_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_inc(atomic64_t *v)
+raw_atomic64_dec_return(atomic64_t *v)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_fetch_inc_relaxed(v);
+	ret = arch_atomic64_dec_return_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_inc arch_atomic64_fetch_inc
-#endif
-
-#endif /* arch_atomic64_fetch_inc_relaxed */
-
-#ifndef arch_atomic64_dec
-static __always_inline void
-arch_atomic64_dec(atomic64_t *v)
-{
-	arch_atomic64_sub(1, v);
-}
-#define arch_atomic64_dec arch_atomic64_dec
-#endif
-
-#ifndef arch_atomic64_dec_return_relaxed
-#ifdef arch_atomic64_dec_return
-#define arch_atomic64_dec_return_acquire arch_atomic64_dec_return
-#define arch_atomic64_dec_return_release arch_atomic64_dec_return
-#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return
-#endif /* arch_atomic64_dec_return */
-
-#ifndef arch_atomic64_dec_return
+#else
 static __always_inline s64
-arch_atomic64_dec_return(atomic64_t *v)
+raw_atomic64_dec_return(atomic64_t *v)
 {
-	return arch_atomic64_sub_return(1, v);
+	return raw_atomic64_sub_return(1, v);
 }
-#define arch_atomic64_dec_return arch_atomic64_dec_return
 #endif
 
-#ifndef arch_atomic64_dec_return_acquire
+#if defined(arch_atomic64_dec_return_acquire)
+#define raw_atomic64_dec_return_acquire arch_atomic64_dec_return_acquire
+#elif defined(arch_atomic64_dec_return_relaxed)
 static __always_inline s64
-arch_atomic64_dec_return_acquire(atomic64_t *v)
+raw_atomic64_dec_return_acquire(atomic64_t *v)
 {
-	return arch_atomic64_sub_return_acquire(1, v);
+	s64 ret = arch_atomic64_dec_return_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic64_dec_return_acquire arch_atomic64_dec_return_acquire
-#endif
-
-#ifndef arch_atomic64_dec_return_release
+#elif defined(arch_atomic64_dec_return)
+#define raw_atomic64_dec_return_acquire arch_atomic64_dec_return
+#else
 static __always_inline s64
-arch_atomic64_dec_return_release(atomic64_t *v)
+raw_atomic64_dec_return_acquire(atomic64_t *v)
 {
-	return arch_atomic64_sub_return_release(1, v);
+	return raw_atomic64_sub_return_acquire(1, v);
 }
-#define arch_atomic64_dec_return_release arch_atomic64_dec_return_release
 #endif
 
-#ifndef arch_atomic64_dec_return_relaxed
+#if defined(arch_atomic64_dec_return_release)
+#define raw_atomic64_dec_return_release arch_atomic64_dec_return_release
+#elif defined(arch_atomic64_dec_return_relaxed)
 static __always_inline s64
-arch_atomic64_dec_return_relaxed(atomic64_t *v)
+raw_atomic64_dec_return_release(atomic64_t *v)
 {
-	return arch_atomic64_sub_return_relaxed(1, v);
+	__atomic_release_fence();
+	return arch_atomic64_dec_return_relaxed(v);
 }
-#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
-#endif
-
-#else /* arch_atomic64_dec_return_relaxed */
-
-#ifndef arch_atomic64_dec_return_acquire
+#elif defined(arch_atomic64_dec_return)
+#define raw_atomic64_dec_return_release arch_atomic64_dec_return
+#else
 static __always_inline s64
-arch_atomic64_dec_return_acquire(atomic64_t *v)
+raw_atomic64_dec_return_release(atomic64_t *v)
 {
-	s64 ret = arch_atomic64_dec_return_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_atomic64_sub_return_release(1, v);
 }
-#define arch_atomic64_dec_return_acquire arch_atomic64_dec_return_acquire
 #endif
 
-#ifndef arch_atomic64_dec_return_release
+#if defined(arch_atomic64_dec_return_relaxed)
+#define raw_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
+#elif defined(arch_atomic64_dec_return)
+#define raw_atomic64_dec_return_relaxed arch_atomic64_dec_return
+#else
 static __always_inline s64
-arch_atomic64_dec_return_release(atomic64_t *v)
+raw_atomic64_dec_return_relaxed(atomic64_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic64_dec_return_relaxed(v);
+	return raw_atomic64_sub_return_relaxed(1, v);
 }
-#define arch_atomic64_dec_return_release arch_atomic64_dec_return_release
 #endif
 
-#ifndef arch_atomic64_dec_return
+#if defined(arch_atomic64_fetch_dec)
+#define raw_atomic64_fetch_dec arch_atomic64_fetch_dec
+#elif defined(arch_atomic64_fetch_dec_relaxed)
 static __always_inline s64
-arch_atomic64_dec_return(atomic64_t *v)
+raw_atomic64_fetch_dec(atomic64_t *v)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_dec_return_relaxed(v);
+	ret = arch_atomic64_fetch_dec_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_dec_return arch_atomic64_dec_return
-#endif
-
-#endif /* arch_atomic64_dec_return_relaxed */
-
-#ifndef arch_atomic64_fetch_dec_relaxed
-#ifdef arch_atomic64_fetch_dec
-#define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec
-#define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec
-#define arch_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec
-#endif /* arch_atomic64_fetch_dec */
-
-#ifndef arch_atomic64_fetch_dec
+#else
 static __always_inline s64
-arch_atomic64_fetch_dec(atomic64_t *v)
+raw_atomic64_fetch_dec(atomic64_t *v)
 {
-	return arch_atomic64_fetch_sub(1, v);
+	return raw_atomic64_fetch_sub(1, v);
 }
-#define arch_atomic64_fetch_dec arch_atomic64_fetch_dec
 #endif
 
-#ifndef arch_atomic64_fetch_dec_acquire
+#if defined(arch_atomic64_fetch_dec_acquire)
+#define raw_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec_acquire
+#elif defined(arch_atomic64_fetch_dec_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_dec_acquire(atomic64_t *v)
+raw_atomic64_fetch_dec_acquire(atomic64_t *v)
 {
-	return arch_atomic64_fetch_sub_acquire(1, v);
+	s64 ret = arch_atomic64_fetch_dec_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec_acquire
-#endif
-
-#ifndef arch_atomic64_fetch_dec_release
+#elif defined(arch_atomic64_fetch_dec)
+#define raw_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec
+#else
 static __always_inline s64
-arch_atomic64_fetch_dec_release(atomic64_t *v)
+raw_atomic64_fetch_dec_acquire(atomic64_t *v)
 {
-	return arch_atomic64_fetch_sub_release(1, v);
+	return raw_atomic64_fetch_sub_acquire(1, v);
 }
-#define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec_release
 #endif
 
-#ifndef arch_atomic64_fetch_dec_relaxed
+#if defined(arch_atomic64_fetch_dec_release)
+#define raw_atomic64_fetch_dec_release arch_atomic64_fetch_dec_release
+#elif defined(arch_atomic64_fetch_dec_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_dec_relaxed(atomic64_t *v)
+raw_atomic64_fetch_dec_release(atomic64_t *v)
 {
-	return arch_atomic64_fetch_sub_relaxed(1, v);
+	__atomic_release_fence();
+	return arch_atomic64_fetch_dec_relaxed(v);
 }
-#define arch_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec_relaxed
-#endif
-
-#else /* arch_atomic64_fetch_dec_relaxed */
-
-#ifndef arch_atomic64_fetch_dec_acquire
+#elif defined(arch_atomic64_fetch_dec)
+#define raw_atomic64_fetch_dec_release arch_atomic64_fetch_dec
+#else
 static __always_inline s64
-arch_atomic64_fetch_dec_acquire(atomic64_t *v)
+raw_atomic64_fetch_dec_release(atomic64_t *v)
 {
-	s64 ret = arch_atomic64_fetch_dec_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_atomic64_fetch_sub_release(1, v);
 }
-#define arch_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec_acquire
 #endif
 
-#ifndef arch_atomic64_fetch_dec_release
+#if defined(arch_atomic64_fetch_dec_relaxed)
+#define raw_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec_relaxed
+#elif defined(arch_atomic64_fetch_dec)
+#define raw_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec
+#else
 static __always_inline s64
-arch_atomic64_fetch_dec_release(atomic64_t *v)
+raw_atomic64_fetch_dec_relaxed(atomic64_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic64_fetch_dec_relaxed(v);
+	return raw_atomic64_fetch_sub_relaxed(1, v);
 }
-#define arch_atomic64_fetch_dec_release arch_atomic64_fetch_dec_release
 #endif
 
-#ifndef arch_atomic64_fetch_dec
+#define raw_atomic64_and arch_atomic64_and
+
+#if defined(arch_atomic64_fetch_and)
+#define raw_atomic64_fetch_and arch_atomic64_fetch_and
+#elif defined(arch_atomic64_fetch_and_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_dec(atomic64_t *v)
+raw_atomic64_fetch_and(s64 i, atomic64_t *v)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_fetch_dec_relaxed(v);
+	ret = arch_atomic64_fetch_and_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_dec arch_atomic64_fetch_dec
+#else
+#error "Unable to define raw_atomic64_fetch_and"
 #endif
 
-#endif /* arch_atomic64_fetch_dec_relaxed */
-
-#ifndef arch_atomic64_fetch_and_relaxed
-#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and
-#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and
-#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and
-#else /* arch_atomic64_fetch_and_relaxed */
-
-#ifndef arch_atomic64_fetch_and_acquire
+#if defined(arch_atomic64_fetch_and_acquire)
+#define raw_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire
+#elif defined(arch_atomic64_fetch_and_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
+raw_atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = arch_atomic64_fetch_and_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire
+#elif defined(arch_atomic64_fetch_and)
+#define raw_atomic64_fetch_and_acquire arch_atomic64_fetch_and
+#else
+#error "Unable to define raw_atomic64_fetch_and_acquire"
 #endif
 
-#ifndef arch_atomic64_fetch_and_release
+#if defined(arch_atomic64_fetch_and_release)
+#define raw_atomic64_fetch_and_release arch_atomic64_fetch_and_release
+#elif defined(arch_atomic64_fetch_and_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_and_release(s64 i, atomic64_t *v)
+raw_atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic64_fetch_and_relaxed(i, v);
 }
-#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and_release
+#elif defined(arch_atomic64_fetch_and)
+#define raw_atomic64_fetch_and_release arch_atomic64_fetch_and
+#else
+#error "Unable to define raw_atomic64_fetch_and_release"
 #endif
 
-#ifndef arch_atomic64_fetch_and
-static __always_inline s64
-arch_atomic64_fetch_and(s64 i, atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = arch_atomic64_fetch_and_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define arch_atomic64_fetch_and arch_atomic64_fetch_and
+#if defined(arch_atomic64_fetch_and_relaxed)
+#define raw_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#elif defined(arch_atomic64_fetch_and)
+#define raw_atomic64_fetch_and_relaxed arch_atomic64_fetch_and
+#else
+#error "Unable to define raw_atomic64_fetch_and_relaxed"
 #endif
 
-#endif /* arch_atomic64_fetch_and_relaxed */
-
-#ifndef arch_atomic64_andnot
+#if defined(arch_atomic64_andnot)
+#define raw_atomic64_andnot arch_atomic64_andnot
+#else
 static __always_inline void
-arch_atomic64_andnot(s64 i, atomic64_t *v)
+raw_atomic64_andnot(s64 i, atomic64_t *v)
 {
-	arch_atomic64_and(~i, v);
+	raw_atomic64_and(~i, v);
 }
-#define arch_atomic64_andnot arch_atomic64_andnot
 #endif
 
-#ifndef arch_atomic64_fetch_andnot_relaxed
-#ifdef arch_atomic64_fetch_andnot
-#define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot
-#define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot
-#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot
-#endif /* arch_atomic64_fetch_andnot */
-
-#ifndef arch_atomic64_fetch_andnot
+#if defined(arch_atomic64_fetch_andnot)
+#define raw_atomic64_fetch_andnot arch_atomic64_fetch_andnot
+#elif defined(arch_atomic64_fetch_andnot_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_andnot(s64 i, atomic64_t *v)
+raw_atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
-	return arch_atomic64_fetch_and(~i, v);
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = arch_atomic64_fetch_andnot_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
 }
-#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot
-#endif
-
-#ifndef arch_atomic64_fetch_andnot_acquire
+#else
 static __always_inline s64
-arch_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
+raw_atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
-	return arch_atomic64_fetch_and_acquire(~i, v);
+	return raw_atomic64_fetch_and(~i, v);
 }
-#define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire
 #endif
 
-#ifndef arch_atomic64_fetch_andnot_release
+#if defined(arch_atomic64_fetch_andnot_acquire)
+#define raw_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire
+#elif defined(arch_atomic64_fetch_andnot_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
+raw_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
-	return arch_atomic64_fetch_and_release(~i, v);
+	s64 ret = arch_atomic64_fetch_andnot_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release
-#endif
-
-#ifndef arch_atomic64_fetch_andnot_relaxed
+#elif defined(arch_atomic64_fetch_andnot)
+#define raw_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot
+#else
 static __always_inline s64
-arch_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
+raw_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
-	return arch_atomic64_fetch_and_relaxed(~i, v);
+	return raw_atomic64_fetch_and_acquire(~i, v);
 }
-#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
 #endif
 
-#else /* arch_atomic64_fetch_andnot_relaxed */
-
-#ifndef arch_atomic64_fetch_andnot_acquire
+#if defined(arch_atomic64_fetch_andnot_release)
+#define raw_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release
+#elif defined(arch_atomic64_fetch_andnot_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
+raw_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
-	s64 ret = arch_atomic64_fetch_andnot_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
+	__atomic_release_fence();
+	return arch_atomic64_fetch_andnot_relaxed(i, v);
+}
+#elif defined(arch_atomic64_fetch_andnot)
+#define raw_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot
+#else
+static __always_inline s64
+raw_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
+{
+	return raw_atomic64_fetch_and_release(~i, v);
 }
-#define arch_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire
 #endif
 
-#ifndef arch_atomic64_fetch_andnot_release
+#if defined(arch_atomic64_fetch_andnot_relaxed)
+#define raw_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
+#elif defined(arch_atomic64_fetch_andnot)
+#define raw_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot
+#else
 static __always_inline s64
-arch_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
+raw_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic64_fetch_andnot_relaxed(i, v);
+	return raw_atomic64_fetch_and_relaxed(~i, v);
 }
-#define arch_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release
 #endif
 
-#ifndef arch_atomic64_fetch_andnot
+#define raw_atomic64_or arch_atomic64_or
+
+#if defined(arch_atomic64_fetch_or)
+#define raw_atomic64_fetch_or arch_atomic64_fetch_or
+#elif defined(arch_atomic64_fetch_or_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_andnot(s64 i, atomic64_t *v)
+raw_atomic64_fetch_or(s64 i, atomic64_t *v)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_fetch_andnot_relaxed(i, v);
+	ret = arch_atomic64_fetch_or_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot
+#else
+#error "Unable to define raw_atomic64_fetch_or"
 #endif
 
-#endif /* arch_atomic64_fetch_andnot_relaxed */
-
-#ifndef arch_atomic64_fetch_or_relaxed
-#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or
-#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or
-#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or
-#else /* arch_atomic64_fetch_or_relaxed */
-
-#ifndef arch_atomic64_fetch_or_acquire
+#if defined(arch_atomic64_fetch_or_acquire)
+#define raw_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire
+#elif defined(arch_atomic64_fetch_or_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
+raw_atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = arch_atomic64_fetch_or_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire
+#elif defined(arch_atomic64_fetch_or)
+#define raw_atomic64_fetch_or_acquire arch_atomic64_fetch_or
+#else
+#error "Unable to define raw_atomic64_fetch_or_acquire"
 #endif
 
-#ifndef arch_atomic64_fetch_or_release
+#if defined(arch_atomic64_fetch_or_release)
+#define raw_atomic64_fetch_or_release arch_atomic64_fetch_or_release
+#elif defined(arch_atomic64_fetch_or_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_or_release(s64 i, atomic64_t *v)
+raw_atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic64_fetch_or_relaxed(i, v);
 }
-#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or_release
+#elif defined(arch_atomic64_fetch_or)
+#define raw_atomic64_fetch_or_release arch_atomic64_fetch_or
+#else
+#error "Unable to define raw_atomic64_fetch_or_release"
+#endif
+
+#if defined(arch_atomic64_fetch_or_relaxed)
+#define raw_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+#elif defined(arch_atomic64_fetch_or)
+#define raw_atomic64_fetch_or_relaxed arch_atomic64_fetch_or
+#else
+#error "Unable to define raw_atomic64_fetch_or_relaxed"
 #endif
 
-#ifndef arch_atomic64_fetch_or
+#define raw_atomic64_xor arch_atomic64_xor
+
+#if defined(arch_atomic64_fetch_xor)
+#define raw_atomic64_fetch_xor arch_atomic64_fetch_xor
+#elif defined(arch_atomic64_fetch_xor_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_or(s64 i, atomic64_t *v)
+raw_atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_fetch_or_relaxed(i, v);
+	ret = arch_atomic64_fetch_xor_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_or arch_atomic64_fetch_or
+#else
+#error "Unable to define raw_atomic64_fetch_xor"
 #endif
 
-#endif /* arch_atomic64_fetch_or_relaxed */
-
-#ifndef arch_atomic64_fetch_xor_relaxed
-#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor
-#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor
-#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor
-#else /* arch_atomic64_fetch_xor_relaxed */
-
-#ifndef arch_atomic64_fetch_xor_acquire
+#if defined(arch_atomic64_fetch_xor_acquire)
+#define raw_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire
+#elif defined(arch_atomic64_fetch_xor_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
+raw_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = arch_atomic64_fetch_xor_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire
+#elif defined(arch_atomic64_fetch_xor)
+#define raw_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor
+#else
+#error "Unable to define raw_atomic64_fetch_xor_acquire"
 #endif
 
-#ifndef arch_atomic64_fetch_xor_release
+#if defined(arch_atomic64_fetch_xor_release)
+#define raw_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
+#elif defined(arch_atomic64_fetch_xor_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_xor_release(s64 i, atomic64_t *v)
+raw_atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
 	return arch_atomic64_fetch_xor_relaxed(i, v);
 }
-#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
+#elif defined(arch_atomic64_fetch_xor)
+#define raw_atomic64_fetch_xor_release arch_atomic64_fetch_xor
+#else
+#error "Unable to define raw_atomic64_fetch_xor_release"
+#endif
+
+#if defined(arch_atomic64_fetch_xor_relaxed)
+#define raw_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+#elif defined(arch_atomic64_fetch_xor)
+#define raw_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor
+#else
+#error "Unable to define raw_atomic64_fetch_xor_relaxed"
 #endif
 
-#ifndef arch_atomic64_fetch_xor
+#if defined(arch_atomic64_xchg)
+#define raw_atomic64_xchg arch_atomic64_xchg
+#elif defined(arch_atomic64_xchg_relaxed)
 static __always_inline s64
-arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
+raw_atomic64_xchg(atomic64_t *v, s64 i)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_fetch_xor_relaxed(i, v);
+	ret = arch_atomic64_xchg_relaxed(v, i);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
-#endif
-
-#endif /* arch_atomic64_fetch_xor_relaxed */
-
-#ifndef arch_atomic64_xchg_relaxed
-#ifdef arch_atomic64_xchg
-#define arch_atomic64_xchg_acquire arch_atomic64_xchg
-#define arch_atomic64_xchg_release arch_atomic64_xchg
-#define arch_atomic64_xchg_relaxed arch_atomic64_xchg
-#endif /* arch_atomic64_xchg */
-
-#ifndef arch_atomic64_xchg
+#else
 static __always_inline s64
-arch_atomic64_xchg(atomic64_t *v, s64 new)
+raw_atomic64_xchg(atomic64_t *v, s64 new)
 {
-	return arch_xchg(&v->counter, new);
+	return raw_xchg(&v->counter, new);
 }
-#define arch_atomic64_xchg arch_atomic64_xchg
 #endif
 
-#ifndef arch_atomic64_xchg_acquire
+#if defined(arch_atomic64_xchg_acquire)
+#define raw_atomic64_xchg_acquire arch_atomic64_xchg_acquire
+#elif defined(arch_atomic64_xchg_relaxed)
 static __always_inline s64
-arch_atomic64_xchg_acquire(atomic64_t *v, s64 new)
+raw_atomic64_xchg_acquire(atomic64_t *v, s64 i)
 {
-	return arch_xchg_acquire(&v->counter, new);
+	s64 ret = arch_atomic64_xchg_relaxed(v, i);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic64_xchg_acquire arch_atomic64_xchg_acquire
-#endif
-
-#ifndef arch_atomic64_xchg_release
+#elif defined(arch_atomic64_xchg)
+#define raw_atomic64_xchg_acquire arch_atomic64_xchg
+#else
 static __always_inline s64
-arch_atomic64_xchg_release(atomic64_t *v, s64 new)
+raw_atomic64_xchg_acquire(atomic64_t *v, s64 new)
 {
-	return arch_xchg_release(&v->counter, new);
+	return raw_xchg_acquire(&v->counter, new);
 }
-#define arch_atomic64_xchg_release arch_atomic64_xchg_release
 #endif
 
-#ifndef arch_atomic64_xchg_relaxed
+#if defined(arch_atomic64_xchg_release)
+#define raw_atomic64_xchg_release arch_atomic64_xchg_release
+#elif defined(arch_atomic64_xchg_relaxed)
 static __always_inline s64
-arch_atomic64_xchg_relaxed(atomic64_t *v, s64 new)
+raw_atomic64_xchg_release(atomic64_t *v, s64 i)
 {
-	return arch_xchg_relaxed(&v->counter, new);
+	__atomic_release_fence();
+	return arch_atomic64_xchg_relaxed(v, i);
 }
-#define arch_atomic64_xchg_relaxed arch_atomic64_xchg_relaxed
-#endif
-
-#else /* arch_atomic64_xchg_relaxed */
-
-#ifndef arch_atomic64_xchg_acquire
+#elif defined(arch_atomic64_xchg)
+#define raw_atomic64_xchg_release arch_atomic64_xchg
+#else
 static __always_inline s64
-arch_atomic64_xchg_acquire(atomic64_t *v, s64 i)
+raw_atomic64_xchg_release(atomic64_t *v, s64 new)
 {
-	s64 ret = arch_atomic64_xchg_relaxed(v, i);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_xchg_release(&v->counter, new);
 }
-#define arch_atomic64_xchg_acquire arch_atomic64_xchg_acquire
 #endif
 
-#ifndef arch_atomic64_xchg_release
+#if defined(arch_atomic64_xchg_relaxed)
+#define raw_atomic64_xchg_relaxed arch_atomic64_xchg_relaxed
+#elif defined(arch_atomic64_xchg)
+#define raw_atomic64_xchg_relaxed arch_atomic64_xchg
+#else
 static __always_inline s64
-arch_atomic64_xchg_release(atomic64_t *v, s64 i)
+raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new)
 {
-	__atomic_release_fence();
-	return arch_atomic64_xchg_relaxed(v, i);
+	return raw_xchg_relaxed(&v->counter, new);
 }
-#define arch_atomic64_xchg_release arch_atomic64_xchg_release
 #endif
 
-#ifndef arch_atomic64_xchg
+#if defined(arch_atomic64_cmpxchg)
+#define raw_atomic64_cmpxchg arch_atomic64_cmpxchg
+#elif defined(arch_atomic64_cmpxchg_relaxed)
 static __always_inline s64
-arch_atomic64_xchg(atomic64_t *v, s64 i)
+raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_xchg_relaxed(v, i);
+	ret = arch_atomic64_cmpxchg_relaxed(v, old, new);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_xchg arch_atomic64_xchg
-#endif
-
-#endif /* arch_atomic64_xchg_relaxed */
-
-#ifndef arch_atomic64_cmpxchg_relaxed
-#ifdef arch_atomic64_cmpxchg
-#define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg
-#define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg
-#define arch_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg
-#endif /* arch_atomic64_cmpxchg */
-
-#ifndef arch_atomic64_cmpxchg
+#else
 static __always_inline s64
-arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
-	return arch_cmpxchg(&v->counter, old, new);
+	return raw_cmpxchg(&v->counter, old, new);
 }
-#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
 #endif
 
-#ifndef arch_atomic64_cmpxchg_acquire
+#if defined(arch_atomic64_cmpxchg_acquire)
+#define raw_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg_acquire
+#elif defined(arch_atomic64_cmpxchg_relaxed)
 static __always_inline s64
-arch_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
+raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
-	return arch_cmpxchg_acquire(&v->counter, old, new);
+	s64 ret = arch_atomic64_cmpxchg_relaxed(v, old, new);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg_acquire
-#endif
-
-#ifndef arch_atomic64_cmpxchg_release
+#elif defined(arch_atomic64_cmpxchg)
+#define raw_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg
+#else
 static __always_inline s64
-arch_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
+raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
-	return arch_cmpxchg_release(&v->counter, old, new);
+	return raw_cmpxchg_acquire(&v->counter, old, new);
 }
-#define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg_release
 #endif
 
-#ifndef arch_atomic64_cmpxchg_relaxed
+#if defined(arch_atomic64_cmpxchg_release)
+#define raw_atomic64_cmpxchg_release arch_atomic64_cmpxchg_release
+#elif defined(arch_atomic64_cmpxchg_relaxed)
 static __always_inline s64
-arch_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
+raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
-	return arch_cmpxchg_relaxed(&v->counter, old, new);
+	__atomic_release_fence();
+	return arch_atomic64_cmpxchg_relaxed(v, old, new);
 }
-#define arch_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg_relaxed
-#endif
-
-#else /* arch_atomic64_cmpxchg_relaxed */
-
-#ifndef arch_atomic64_cmpxchg_acquire
+#elif defined(arch_atomic64_cmpxchg)
+#define raw_atomic64_cmpxchg_release arch_atomic64_cmpxchg
+#else
 static __always_inline s64
-arch_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
+raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
-	s64 ret = arch_atomic64_cmpxchg_relaxed(v, old, new);
-	__atomic_acquire_fence();
-	return ret;
+	return raw_cmpxchg_release(&v->counter, old, new);
 }
-#define arch_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg_acquire
 #endif
 
-#ifndef arch_atomic64_cmpxchg_release
+#if defined(arch_atomic64_cmpxchg_relaxed)
+#define raw_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg_relaxed
+#elif defined(arch_atomic64_cmpxchg)
+#define raw_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg
+#else
 static __always_inline s64
-arch_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
+raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 {
-	__atomic_release_fence();
-	return arch_atomic64_cmpxchg_relaxed(v, old, new);
+	return raw_cmpxchg_relaxed(&v->counter, old, new);
 }
-#define arch_atomic64_cmpxchg_release arch_atomic64_cmpxchg_release
 #endif
 
-#ifndef arch_atomic64_cmpxchg
-static __always_inline s64
-arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+#if defined(arch_atomic64_try_cmpxchg)
+#define raw_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+#elif defined(arch_atomic64_try_cmpxchg_relaxed)
+static __always_inline bool
+raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
-	s64 ret;
+	bool ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_cmpxchg_relaxed(v, old, new);
+	ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new);
 	__atomic_post_full_fence();
 	return ret;
 }
-#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
-#endif
-
-#endif /* arch_atomic64_cmpxchg_relaxed */
-
-#ifndef arch_atomic64_try_cmpxchg_relaxed
-#ifdef arch_atomic64_try_cmpxchg
-#define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg
-#define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg
-#define arch_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg
-#endif /* arch_atomic64_try_cmpxchg */
-
-#ifndef arch_atomic64_try_cmpxchg
+#else
 static __always_inline bool
-arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
 	s64 r, o = *old;
-	r = arch_atomic64_cmpxchg(v, o, new);
+	r = raw_atomic64_cmpxchg(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
 }
-#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
 #endif
 
-#ifndef arch_atomic64_try_cmpxchg_acquire
+#if defined(arch_atomic64_try_cmpxchg_acquire)
+#define raw_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg_acquire
+#elif defined(arch_atomic64_try_cmpxchg_relaxed)
+static __always_inline bool
+raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
+{
+	bool ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new);
+	__atomic_acquire_fence();
+	return ret;
+}
+#elif defined(arch_atomic64_try_cmpxchg)
+#define raw_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg
+#else
 static __always_inline bool
-arch_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
+raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
 	s64 r, o = *old;
-	r = arch_atomic64_cmpxchg_acquire(v, o, new);
+	r = raw_atomic64_cmpxchg_acquire(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
 }
-#define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg_acquire
 #endif
 
-#ifndef arch_atomic64_try_cmpxchg_release
+#if defined(arch_atomic64_try_cmpxchg_release)
+#define raw_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg_release
+#elif defined(arch_atomic64_try_cmpxchg_relaxed)
+static __always_inline bool
+raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
+{
+	__atomic_release_fence();
+	return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
+}
+#elif defined(arch_atomic64_try_cmpxchg)
+#define raw_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg
+#else
 static __always_inline bool
-arch_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
+raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
 	s64 r, o = *old;
-	r = arch_atomic64_cmpxchg_release(v, o, new);
+	r = raw_atomic64_cmpxchg_release(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
 }
-#define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg_release
 #endif
 
-#ifndef arch_atomic64_try_cmpxchg_relaxed
+#if defined(arch_atomic64_try_cmpxchg_relaxed)
+#define raw_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg_relaxed
+#elif defined(arch_atomic64_try_cmpxchg)
+#define raw_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg
+#else
 static __always_inline bool
-arch_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
+raw_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
 	s64 r, o = *old;
-	r = arch_atomic64_cmpxchg_relaxed(v, o, new);
+	r = raw_atomic64_cmpxchg_relaxed(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
 }
-#define arch_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg_relaxed
 #endif
 
-#else /* arch_atomic64_try_cmpxchg_relaxed */
-
-#ifndef arch_atomic64_try_cmpxchg_acquire
-static __always_inline bool
-arch_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
-{
-	bool ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define arch_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg_acquire
-#endif
-
-#ifndef arch_atomic64_try_cmpxchg_release
-static __always_inline bool
-arch_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
-{
-	__atomic_release_fence();
-	return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
-}
-#define arch_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg_release
-#endif
-
-#ifndef arch_atomic64_try_cmpxchg
-static __always_inline bool
-arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
-{
-	bool ret;
-	__atomic_pre_full_fence();
-	ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-#endif
-
-#endif /* arch_atomic64_try_cmpxchg_relaxed */
-
-#ifndef arch_atomic64_sub_and_test
+#if defined(arch_atomic64_sub_and_test)
+#define raw_atomic64_sub_and_test arch_atomic64_sub_and_test
+#else
 static __always_inline bool
-arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
+raw_atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
-	return arch_atomic64_sub_return(i, v) == 0;
+	return raw_atomic64_sub_return(i, v) == 0;
 }
-#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
 #endif
 
-#ifndef arch_atomic64_dec_and_test
+#if defined(arch_atomic64_dec_and_test)
+#define raw_atomic64_dec_and_test arch_atomic64_dec_and_test
+#else
 static __always_inline bool
-arch_atomic64_dec_and_test(atomic64_t *v)
+raw_atomic64_dec_and_test(atomic64_t *v)
 {
-	return arch_atomic64_dec_return(v) == 0;
+	return raw_atomic64_dec_return(v) == 0;
 }
-#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
 #endif
 
-#ifndef arch_atomic64_inc_and_test
+#if defined(arch_atomic64_inc_and_test)
+#define raw_atomic64_inc_and_test arch_atomic64_inc_and_test
+#else
 static __always_inline bool
-arch_atomic64_inc_and_test(atomic64_t *v)
+raw_atomic64_inc_and_test(atomic64_t *v)
 {
-	return arch_atomic64_inc_return(v) == 0;
+	return raw_atomic64_inc_return(v) == 0;
 }
-#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
 #endif
 
-#ifndef arch_atomic64_add_negative_relaxed
-#ifdef arch_atomic64_add_negative
-#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative
-#define arch_atomic64_add_negative_release arch_atomic64_add_negative
-#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative
-#endif /* arch_atomic64_add_negative */
-
-#ifndef arch_atomic64_add_negative
+#if defined(arch_atomic64_add_negative)
+#define raw_atomic64_add_negative arch_atomic64_add_negative
+#elif defined(arch_atomic64_add_negative_relaxed)
 static __always_inline bool
-arch_atomic64_add_negative(s64 i, atomic64_t *v)
+raw_atomic64_add_negative(s64 i, atomic64_t *v)
 {
-	return arch_atomic64_add_return(i, v) < 0;
+	bool ret;
+	__atomic_pre_full_fence();
+	ret = arch_atomic64_add_negative_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
 }
-#define arch_atomic64_add_negative arch_atomic64_add_negative
-#endif
-
-#ifndef arch_atomic64_add_negative_acquire
+#else
 static __always_inline bool
-arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
+raw_atomic64_add_negative(s64 i, atomic64_t *v)
 {
-	return arch_atomic64_add_return_acquire(i, v) < 0;
+	return raw_atomic64_add_return(i, v) < 0;
 }
-#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire
 #endif
 
-#ifndef arch_atomic64_add_negative_release
+#if defined(arch_atomic64_add_negative_acquire)
+#define raw_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire
+#elif defined(arch_atomic64_add_negative_relaxed)
 static __always_inline bool
-arch_atomic64_add_negative_release(s64 i, atomic64_t *v)
+raw_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
 {
-	return arch_atomic64_add_return_release(i, v) < 0;
+	bool ret = arch_atomic64_add_negative_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
 }
-#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release
-#endif
-
-#ifndef arch_atomic64_add_negative_relaxed
+#elif defined(arch_atomic64_add_negative)
+#define raw_atomic64_add_negative_acquire arch_atomic64_add_negative
+#else
 static __always_inline bool
-arch_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
+raw_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
 {
-	return arch_atomic64_add_return_relaxed(i, v) < 0;
+	return raw_atomic64_add_return_acquire(i, v) < 0;
 }
-#define arch_atomic64_add_negative_relaxed arch_atomic64_add_negative_relaxed
 #endif
 
-#else /* arch_atomic64_add_negative_relaxed */
-
-#ifndef arch_atomic64_add_negative_acquire
+#if defined(arch_atomic64_add_negative_release)
+#define raw_atomic64_add_negative_release arch_atomic64_add_negative_release
+#elif defined(arch_atomic64_add_negative_relaxed)
 static __always_inline bool
-arch_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
+raw_atomic64_add_negative_release(s64 i, atomic64_t *v)
 {
-	bool ret = arch_atomic64_add_negative_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
+	__atomic_release_fence();
+	return arch_atomic64_add_negative_relaxed(i, v);
 }
-#define arch_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire
-#endif
-
-#ifndef arch_atomic64_add_negative_release
+#elif defined(arch_atomic64_add_negative)
+#define raw_atomic64_add_negative_release arch_atomic64_add_negative
+#else
 static __always_inline bool
-arch_atomic64_add_negative_release(s64 i, atomic64_t *v)
+raw_atomic64_add_negative_release(s64 i, atomic64_t *v)
 {
-	__atomic_release_fence();
-	return arch_atomic64_add_negative_relaxed(i, v);
+	return raw_atomic64_add_return_release(i, v) < 0;
 }
-#define arch_atomic64_add_negative_release arch_atomic64_add_negative_release
 #endif
 
-#ifndef arch_atomic64_add_negative
+#if defined(arch_atomic64_add_negative_relaxed)
+#define raw_atomic64_add_negative_relaxed arch_atomic64_add_negative_relaxed
+#elif defined(arch_atomic64_add_negative)
+#define raw_atomic64_add_negative_relaxed arch_atomic64_add_negative
+#else
 static __always_inline bool
-arch_atomic64_add_negative(s64 i, atomic64_t *v)
+raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
 {
-	bool ret;
-	__atomic_pre_full_fence();
-	ret = arch_atomic64_add_negative_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
+	return raw_atomic64_add_return_relaxed(i, v) < 0;
 }
-#define arch_atomic64_add_negative arch_atomic64_add_negative
 #endif
 
-#endif /* arch_atomic64_add_negative_relaxed */
-
-#ifndef arch_atomic64_fetch_add_unless
+#if defined(arch_atomic64_fetch_add_unless)
+#define raw_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
+#else
 static __always_inline s64
-arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
-	s64 c = arch_atomic64_read(v);
+	s64 c = raw_atomic64_read(v);
 
 	do {
 		if (unlikely(c == u))
 			break;
-	} while (!arch_atomic64_try_cmpxchg(v, &c, c + a));
+	} while (!raw_atomic64_try_cmpxchg(v, &c, c + a));
 
 	return c;
 }
-#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
 #endif
 
-#ifndef arch_atomic64_add_unless
+#if defined(arch_atomic64_add_unless)
+#define raw_atomic64_add_unless arch_atomic64_add_unless
+#else
 static __always_inline bool
-arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
+raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
-	return arch_atomic64_fetch_add_unless(v, a, u) != u;
+	return raw_atomic64_fetch_add_unless(v, a, u) != u;
 }
-#define arch_atomic64_add_unless arch_atomic64_add_unless
 #endif
 
-#ifndef arch_atomic64_inc_not_zero
+#if defined(arch_atomic64_inc_not_zero)
+#define raw_atomic64_inc_not_zero arch_atomic64_inc_not_zero
+#else
 static __always_inline bool
-arch_atomic64_inc_not_zero(atomic64_t *v)
+raw_atomic64_inc_not_zero(atomic64_t *v)
 {
-	return arch_atomic64_add_unless(v, 1, 0);
+	return raw_atomic64_add_unless(v, 1, 0);
 }
-#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
 #endif
 
-#ifndef arch_atomic64_inc_unless_negative
+#if defined(arch_atomic64_inc_unless_negative)
+#define raw_atomic64_inc_unless_negative arch_atomic64_inc_unless_negative
+#else
 static __always_inline bool
-arch_atomic64_inc_unless_negative(atomic64_t *v)
+raw_atomic64_inc_unless_negative(atomic64_t *v)
 {
-	s64 c = arch_atomic64_read(v);
+	s64 c = raw_atomic64_read(v);
 
 	do {
 		if (unlikely(c < 0))
 			return false;
-	} while (!arch_atomic64_try_cmpxchg(v, &c, c + 1));
+	} while (!raw_atomic64_try_cmpxchg(v, &c, c + 1));
 
 	return true;
 }
-#define arch_atomic64_inc_unless_negative arch_atomic64_inc_unless_negative
 #endif
 
-#ifndef arch_atomic64_dec_unless_positive
+#if defined(arch_atomic64_dec_unless_positive)
+#define raw_atomic64_dec_unless_positive arch_atomic64_dec_unless_positive
+#else
 static __always_inline bool
-arch_atomic64_dec_unless_positive(atomic64_t *v)
+raw_atomic64_dec_unless_positive(atomic64_t *v)
 {
-	s64 c = arch_atomic64_read(v);
+	s64 c = raw_atomic64_read(v);
 
 	do {
 		if (unlikely(c > 0))
 			return false;
-	} while (!arch_atomic64_try_cmpxchg(v, &c, c - 1));
+	} while (!raw_atomic64_try_cmpxchg(v, &c, c - 1));
 
 	return true;
 }
-#define arch_atomic64_dec_unless_positive arch_atomic64_dec_unless_positive
 #endif
 
-#ifndef arch_atomic64_dec_if_positive
+#if defined(arch_atomic64_dec_if_positive)
+#define raw_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+#else
 static __always_inline s64
-arch_atomic64_dec_if_positive(atomic64_t *v)
+raw_atomic64_dec_if_positive(atomic64_t *v)
 {
-	s64 dec, c = arch_atomic64_read(v);
+	s64 dec, c = raw_atomic64_read(v);
 
 	do {
 		dec = c - 1;
 		if (unlikely(dec < 0))
 			break;
-	} while (!arch_atomic64_try_cmpxchg(v, &c, dec));
+	} while (!raw_atomic64_try_cmpxchg(v, &c, dec));
 
 	return dec;
 }
-#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 #endif
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// e1cee558cc61cae887890db30fcdf93baca9f498
+// c2048fccede6fac923252290e2b303949d5dec83
diff --git a/include/linux/atomic/atomic-raw.h b/include/linux/atomic/atomic-raw.h
deleted file mode 100644
index 8b2fc04cf8c5..000000000000
--- a/include/linux/atomic/atomic-raw.h
+++ /dev/null
@@ -1,1135 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// Generated by scripts/atomic/gen-atomic-raw.sh
-// DO NOT MODIFY THIS FILE DIRECTLY
-
-#ifndef _LINUX_ATOMIC_RAW_H
-#define _LINUX_ATOMIC_RAW_H
-
-static __always_inline int
-raw_atomic_read(const atomic_t *v)
-{
-	return arch_atomic_read(v);
-}
-
-static __always_inline int
-raw_atomic_read_acquire(const atomic_t *v)
-{
-	return arch_atomic_read_acquire(v);
-}
-
-static __always_inline void
-raw_atomic_set(atomic_t *v, int i)
-{
-	arch_atomic_set(v, i);
-}
-
-static __always_inline void
-raw_atomic_set_release(atomic_t *v, int i)
-{
-	arch_atomic_set_release(v, i);
-}
-
-static __always_inline void
-raw_atomic_add(int i, atomic_t *v)
-{
-	arch_atomic_add(i, v);
-}
-
-static __always_inline int
-raw_atomic_add_return(int i, atomic_t *v)
-{
-	return arch_atomic_add_return(i, v);
-}
-
-static __always_inline int
-raw_atomic_add_return_acquire(int i, atomic_t *v)
-{
-	return arch_atomic_add_return_acquire(i, v);
-}
-
-static __always_inline int
-raw_atomic_add_return_release(int i, atomic_t *v)
-{
-	return arch_atomic_add_return_release(i, v);
-}
-
-static __always_inline int
-raw_atomic_add_return_relaxed(int i, atomic_t *v)
-{
-	return arch_atomic_add_return_relaxed(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_add(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_add(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_add_acquire(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_add_acquire(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_add_release(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_add_release(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_add_relaxed(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_add_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_sub(int i, atomic_t *v)
-{
-	arch_atomic_sub(i, v);
-}
-
-static __always_inline int
-raw_atomic_sub_return(int i, atomic_t *v)
-{
-	return arch_atomic_sub_return(i, v);
-}
-
-static __always_inline int
-raw_atomic_sub_return_acquire(int i, atomic_t *v)
-{
-	return arch_atomic_sub_return_acquire(i, v);
-}
-
-static __always_inline int
-raw_atomic_sub_return_release(int i, atomic_t *v)
-{
-	return arch_atomic_sub_return_release(i, v);
-}
-
-static __always_inline int
-raw_atomic_sub_return_relaxed(int i, atomic_t *v)
-{
-	return arch_atomic_sub_return_relaxed(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_sub(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_sub(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_sub_acquire(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_sub_acquire(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_sub_release(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_sub_release(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_sub_relaxed(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_sub_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_inc(atomic_t *v)
-{
-	arch_atomic_inc(v);
-}
-
-static __always_inline int
-raw_atomic_inc_return(atomic_t *v)
-{
-	return arch_atomic_inc_return(v);
-}
-
-static __always_inline int
-raw_atomic_inc_return_acquire(atomic_t *v)
-{
-	return arch_atomic_inc_return_acquire(v);
-}
-
-static __always_inline int
-raw_atomic_inc_return_release(atomic_t *v)
-{
-	return arch_atomic_inc_return_release(v);
-}
-
-static __always_inline int
-raw_atomic_inc_return_relaxed(atomic_t *v)
-{
-	return arch_atomic_inc_return_relaxed(v);
-}
-
-static __always_inline int
-raw_atomic_fetch_inc(atomic_t *v)
-{
-	return arch_atomic_fetch_inc(v);
-}
-
-static __always_inline int
-raw_atomic_fetch_inc_acquire(atomic_t *v)
-{
-	return arch_atomic_fetch_inc_acquire(v);
-}
-
-static __always_inline int
-raw_atomic_fetch_inc_release(atomic_t *v)
-{
-	return arch_atomic_fetch_inc_release(v);
-}
-
-static __always_inline int
-raw_atomic_fetch_inc_relaxed(atomic_t *v)
-{
-	return arch_atomic_fetch_inc_relaxed(v);
-}
-
-static __always_inline void
-raw_atomic_dec(atomic_t *v)
-{
-	arch_atomic_dec(v);
-}
-
-static __always_inline int
-raw_atomic_dec_return(atomic_t *v)
-{
-	return arch_atomic_dec_return(v);
-}
-
-static __always_inline int
-raw_atomic_dec_return_acquire(atomic_t *v)
-{
-	return arch_atomic_dec_return_acquire(v);
-}
-
-static __always_inline int
-raw_atomic_dec_return_release(atomic_t *v)
-{
-	return arch_atomic_dec_return_release(v);
-}
-
-static __always_inline int
-raw_atomic_dec_return_relaxed(atomic_t *v)
-{
-	return arch_atomic_dec_return_relaxed(v);
-}
-
-static __always_inline int
-raw_atomic_fetch_dec(atomic_t *v)
-{
-	return arch_atomic_fetch_dec(v);
-}
-
-static __always_inline int
-raw_atomic_fetch_dec_acquire(atomic_t *v)
-{
-	return arch_atomic_fetch_dec_acquire(v);
-}
-
-static __always_inline int
-raw_atomic_fetch_dec_release(atomic_t *v)
-{
-	return arch_atomic_fetch_dec_release(v);
-}
-
-static __always_inline int
-raw_atomic_fetch_dec_relaxed(atomic_t *v)
-{
-	return arch_atomic_fetch_dec_relaxed(v);
-}
-
-static __always_inline void
-raw_atomic_and(int i, atomic_t *v)
-{
-	arch_atomic_and(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_and(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_and(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_and_acquire(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_and_acquire(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_and_release(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_and_release(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_and_relaxed(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_and_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_andnot(int i, atomic_t *v)
-{
-	arch_atomic_andnot(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_andnot(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_andnot(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_andnot_acquire(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_andnot_release(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_andnot_release(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_andnot_relaxed(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_andnot_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_or(int i, atomic_t *v)
-{
-	arch_atomic_or(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_or(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_or(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_or_acquire(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_or_acquire(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_or_release(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_or_release(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_or_relaxed(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_or_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_xor(int i, atomic_t *v)
-{
-	arch_atomic_xor(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_xor(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_xor(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_xor_acquire(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_xor_acquire(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_xor_release(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_xor_release(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_xor_relaxed(int i, atomic_t *v)
-{
-	return arch_atomic_fetch_xor_relaxed(i, v);
-}
-
-static __always_inline int
-raw_atomic_xchg(atomic_t *v, int i)
-{
-	return arch_atomic_xchg(v, i);
-}
-
-static __always_inline int
-raw_atomic_xchg_acquire(atomic_t *v, int i)
-{
-	return arch_atomic_xchg_acquire(v, i);
-}
-
-static __always_inline int
-raw_atomic_xchg_release(atomic_t *v, int i)
-{
-	return arch_atomic_xchg_release(v, i);
-}
-
-static __always_inline int
-raw_atomic_xchg_relaxed(atomic_t *v, int i)
-{
-	return arch_atomic_xchg_relaxed(v, i);
-}
-
-static __always_inline int
-raw_atomic_cmpxchg(atomic_t *v, int old, int new)
-{
-	return arch_atomic_cmpxchg(v, old, new);
-}
-
-static __always_inline int
-raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
-{
-	return arch_atomic_cmpxchg_acquire(v, old, new);
-}
-
-static __always_inline int
-raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
-{
-	return arch_atomic_cmpxchg_release(v, old, new);
-}
-
-static __always_inline int
-raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
-{
-	return arch_atomic_cmpxchg_relaxed(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
-{
-	return arch_atomic_try_cmpxchg(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
-{
-	return arch_atomic_try_cmpxchg_acquire(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
-{
-	return arch_atomic_try_cmpxchg_release(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
-{
-	return arch_atomic_try_cmpxchg_relaxed(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_sub_and_test(int i, atomic_t *v)
-{
-	return arch_atomic_sub_and_test(i, v);
-}
-
-static __always_inline bool
-raw_atomic_dec_and_test(atomic_t *v)
-{
-	return arch_atomic_dec_and_test(v);
-}
-
-static __always_inline bool
-raw_atomic_inc_and_test(atomic_t *v)
-{
-	return arch_atomic_inc_and_test(v);
-}
-
-static __always_inline bool
-raw_atomic_add_negative(int i, atomic_t *v)
-{
-	return arch_atomic_add_negative(i, v);
-}
-
-static __always_inline bool
-raw_atomic_add_negative_acquire(int i, atomic_t *v)
-{
-	return arch_atomic_add_negative_acquire(i, v);
-}
-
-static __always_inline bool
-raw_atomic_add_negative_release(int i, atomic_t *v)
-{
-	return arch_atomic_add_negative_release(i, v);
-}
-
-static __always_inline bool
-raw_atomic_add_negative_relaxed(int i, atomic_t *v)
-{
-	return arch_atomic_add_negative_relaxed(i, v);
-}
-
-static __always_inline int
-raw_atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	return arch_atomic_fetch_add_unless(v, a, u);
-}
-
-static __always_inline bool
-raw_atomic_add_unless(atomic_t *v, int a, int u)
-{
-	return arch_atomic_add_unless(v, a, u);
-}
-
-static __always_inline bool
-raw_atomic_inc_not_zero(atomic_t *v)
-{
-	return arch_atomic_inc_not_zero(v);
-}
-
-static __always_inline bool
-raw_atomic_inc_unless_negative(atomic_t *v)
-{
-	return arch_atomic_inc_unless_negative(v);
-}
-
-static __always_inline bool
-raw_atomic_dec_unless_positive(atomic_t *v)
-{
-	return arch_atomic_dec_unless_positive(v);
-}
-
-static __always_inline int
-raw_atomic_dec_if_positive(atomic_t *v)
-{
-	return arch_atomic_dec_if_positive(v);
-}
-
-static __always_inline s64
-raw_atomic64_read(const atomic64_t *v)
-{
-	return arch_atomic64_read(v);
-}
-
-static __always_inline s64
-raw_atomic64_read_acquire(const atomic64_t *v)
-{
-	return arch_atomic64_read_acquire(v);
-}
-
-static __always_inline void
-raw_atomic64_set(atomic64_t *v, s64 i)
-{
-	arch_atomic64_set(v, i);
-}
-
-static __always_inline void
-raw_atomic64_set_release(atomic64_t *v, s64 i)
-{
-	arch_atomic64_set_release(v, i);
-}
-
-static __always_inline void
-raw_atomic64_add(s64 i, atomic64_t *v)
-{
-	arch_atomic64_add(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_add_return(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_add_return(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_add_return_acquire(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_add_return_acquire(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_add_return_release(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_add_return_release(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_add_return_relaxed(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_add_return_relaxed(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_add(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_add(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_add_acquire(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_add_release(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_add_release(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_add_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic64_sub(s64 i, atomic64_t *v)
-{
-	arch_atomic64_sub(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_sub_return(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_sub_return(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_sub_return_acquire(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_sub_return_acquire(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_sub_return_release(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_sub_return_release(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_sub_return_relaxed(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_sub(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_sub(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_sub_acquire(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_sub_release(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_sub_release(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_sub_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic64_inc(atomic64_t *v)
-{
-	arch_atomic64_inc(v);
-}
-
-static __always_inline s64
-raw_atomic64_inc_return(atomic64_t *v)
-{
-	return arch_atomic64_inc_return(v);
-}
-
-static __always_inline s64
-raw_atomic64_inc_return_acquire(atomic64_t *v)
-{
-	return arch_atomic64_inc_return_acquire(v);
-}
-
-static __always_inline s64
-raw_atomic64_inc_return_release(atomic64_t *v)
-{
-	return arch_atomic64_inc_return_release(v);
-}
-
-static __always_inline s64
-raw_atomic64_inc_return_relaxed(atomic64_t *v)
-{
-	return arch_atomic64_inc_return_relaxed(v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_inc(atomic64_t *v)
-{
-	return arch_atomic64_fetch_inc(v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_inc_acquire(atomic64_t *v)
-{
-	return arch_atomic64_fetch_inc_acquire(v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_inc_release(atomic64_t *v)
-{
-	return arch_atomic64_fetch_inc_release(v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_inc_relaxed(atomic64_t *v)
-{
-	return arch_atomic64_fetch_inc_relaxed(v);
-}
-
-static __always_inline void
-raw_atomic64_dec(atomic64_t *v)
-{
-	arch_atomic64_dec(v);
-}
-
-static __always_inline s64
-raw_atomic64_dec_return(atomic64_t *v)
-{
-	return arch_atomic64_dec_return(v);
-}
-
-static __always_inline s64
-raw_atomic64_dec_return_acquire(atomic64_t *v)
-{
-	return arch_atomic64_dec_return_acquire(v);
-}
-
-static __always_inline s64
-raw_atomic64_dec_return_release(atomic64_t *v)
-{
-	return arch_atomic64_dec_return_release(v);
-}
-
-static __always_inline s64
-raw_atomic64_dec_return_relaxed(atomic64_t *v)
-{
-	return arch_atomic64_dec_return_relaxed(v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_dec(atomic64_t *v)
-{
-	return arch_atomic64_fetch_dec(v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_dec_acquire(atomic64_t *v)
-{
-	return arch_atomic64_fetch_dec_acquire(v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_dec_release(atomic64_t *v)
-{
-	return arch_atomic64_fetch_dec_release(v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_dec_relaxed(atomic64_t *v)
-{
-	return arch_atomic64_fetch_dec_relaxed(v);
-}
-
-static __always_inline void
-raw_atomic64_and(s64 i, atomic64_t *v)
-{
-	arch_atomic64_and(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_and(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_and(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_and_acquire(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_and_release(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_and_release(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_and_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic64_andnot(s64 i, atomic64_t *v)
-{
-	arch_atomic64_andnot(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_andnot(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_andnot(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_andnot_acquire(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_andnot_release(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_andnot_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic64_or(s64 i, atomic64_t *v)
-{
-	arch_atomic64_or(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_or(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_or(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_or_acquire(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_or_release(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_or_release(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_or_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic64_xor(s64 i, atomic64_t *v)
-{
-	arch_atomic64_xor(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_xor(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_xor(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_xor_acquire(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_xor_release(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_xor_release(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_fetch_xor_relaxed(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_xchg(atomic64_t *v, s64 i)
-{
-	return arch_atomic64_xchg(v, i);
-}
-
-static __always_inline s64
-raw_atomic64_xchg_acquire(atomic64_t *v, s64 i)
-{
-	return arch_atomic64_xchg_acquire(v, i);
-}
-
-static __always_inline s64
-raw_atomic64_xchg_release(atomic64_t *v, s64 i)
-{
-	return arch_atomic64_xchg_release(v, i);
-}
-
-static __always_inline s64
-raw_atomic64_xchg_relaxed(atomic64_t *v, s64 i)
-{
-	return arch_atomic64_xchg_relaxed(v, i);
-}
-
-static __always_inline s64
-raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
-{
-	return arch_atomic64_cmpxchg(v, old, new);
-}
-
-static __always_inline s64
-raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
-{
-	return arch_atomic64_cmpxchg_acquire(v, old, new);
-}
-
-static __always_inline s64
-raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
-{
-	return arch_atomic64_cmpxchg_release(v, old, new);
-}
-
-static __always_inline s64
-raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
-{
-	return arch_atomic64_cmpxchg_relaxed(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
-{
-	return arch_atomic64_try_cmpxchg(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
-{
-	return arch_atomic64_try_cmpxchg_acquire(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
-{
-	return arch_atomic64_try_cmpxchg_release(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
-{
-	return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic64_sub_and_test(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_sub_and_test(i, v);
-}
-
-static __always_inline bool
-raw_atomic64_dec_and_test(atomic64_t *v)
-{
-	return arch_atomic64_dec_and_test(v);
-}
-
-static __always_inline bool
-raw_atomic64_inc_and_test(atomic64_t *v)
-{
-	return arch_atomic64_inc_and_test(v);
-}
-
-static __always_inline bool
-raw_atomic64_add_negative(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_add_negative(i, v);
-}
-
-static __always_inline bool
-raw_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_add_negative_acquire(i, v);
-}
-
-static __always_inline bool
-raw_atomic64_add_negative_release(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_add_negative_release(i, v);
-}
-
-static __always_inline bool
-raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
-{
-	return arch_atomic64_add_negative_relaxed(i, v);
-}
-
-static __always_inline s64
-raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	return arch_atomic64_fetch_add_unless(v, a, u);
-}
-
-static __always_inline bool
-raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	return arch_atomic64_add_unless(v, a, u);
-}
-
-static __always_inline bool
-raw_atomic64_inc_not_zero(atomic64_t *v)
-{
-	return arch_atomic64_inc_not_zero(v);
-}
-
-static __always_inline bool
-raw_atomic64_inc_unless_negative(atomic64_t *v)
-{
-	return arch_atomic64_inc_unless_negative(v);
-}
-
-static __always_inline bool
-raw_atomic64_dec_unless_positive(atomic64_t *v)
-{
-	return arch_atomic64_dec_unless_positive(v);
-}
-
-static __always_inline s64
-raw_atomic64_dec_if_positive(atomic64_t *v)
-{
-	return arch_atomic64_dec_if_positive(v);
-}
-
-#define raw_xchg(...) \
-	arch_xchg(__VA_ARGS__)
-
-#define raw_xchg_acquire(...) \
-	arch_xchg_acquire(__VA_ARGS__)
-
-#define raw_xchg_release(...) \
-	arch_xchg_release(__VA_ARGS__)
-
-#define raw_xchg_relaxed(...) \
-	arch_xchg_relaxed(__VA_ARGS__)
-
-#define raw_cmpxchg(...) \
-	arch_cmpxchg(__VA_ARGS__)
-
-#define raw_cmpxchg_acquire(...) \
-	arch_cmpxchg_acquire(__VA_ARGS__)
-
-#define raw_cmpxchg_release(...) \
-	arch_cmpxchg_release(__VA_ARGS__)
-
-#define raw_cmpxchg_relaxed(...) \
-	arch_cmpxchg_relaxed(__VA_ARGS__)
-
-#define raw_cmpxchg64(...) \
-	arch_cmpxchg64(__VA_ARGS__)
-
-#define raw_cmpxchg64_acquire(...) \
-	arch_cmpxchg64_acquire(__VA_ARGS__)
-
-#define raw_cmpxchg64_release(...) \
-	arch_cmpxchg64_release(__VA_ARGS__)
-
-#define raw_cmpxchg64_relaxed(...) \
-	arch_cmpxchg64_relaxed(__VA_ARGS__)
-
-#define raw_cmpxchg128(...) \
-	arch_cmpxchg128(__VA_ARGS__)
-
-#define raw_cmpxchg128_acquire(...) \
-	arch_cmpxchg128_acquire(__VA_ARGS__)
-
-#define raw_cmpxchg128_release(...) \
-	arch_cmpxchg128_release(__VA_ARGS__)
-
-#define raw_cmpxchg128_relaxed(...) \
-	arch_cmpxchg128_relaxed(__VA_ARGS__)
-
-#define raw_try_cmpxchg(...) \
-	arch_try_cmpxchg(__VA_ARGS__)
-
-#define raw_try_cmpxchg_acquire(...) \
-	arch_try_cmpxchg_acquire(__VA_ARGS__)
-
-#define raw_try_cmpxchg_release(...) \
-	arch_try_cmpxchg_release(__VA_ARGS__)
-
-#define raw_try_cmpxchg_relaxed(...) \
-	arch_try_cmpxchg_relaxed(__VA_ARGS__)
-
-#define raw_try_cmpxchg64(...) \
-	arch_try_cmpxchg64(__VA_ARGS__)
-
-#define raw_try_cmpxchg64_acquire(...) \
-	arch_try_cmpxchg64_acquire(__VA_ARGS__)
-
-#define raw_try_cmpxchg64_release(...) \
-	arch_try_cmpxchg64_release(__VA_ARGS__)
-
-#define raw_try_cmpxchg64_relaxed(...) \
-	arch_try_cmpxchg64_relaxed(__VA_ARGS__)
-
-#define raw_try_cmpxchg128(...) \
-	arch_try_cmpxchg128(__VA_ARGS__)
-
-#define raw_try_cmpxchg128_acquire(...) \
-	arch_try_cmpxchg128_acquire(__VA_ARGS__)
-
-#define raw_try_cmpxchg128_release(...) \
-	arch_try_cmpxchg128_release(__VA_ARGS__)
-
-#define raw_try_cmpxchg128_relaxed(...) \
-	arch_try_cmpxchg128_relaxed(__VA_ARGS__)
-
-#define raw_cmpxchg_local(...) \
-	arch_cmpxchg_local(__VA_ARGS__)
-
-#define raw_cmpxchg64_local(...) \
-	arch_cmpxchg64_local(__VA_ARGS__)
-
-#define raw_cmpxchg128_local(...) \
-	arch_cmpxchg128_local(__VA_ARGS__)
-
-#define raw_sync_cmpxchg(...) \
-	arch_sync_cmpxchg(__VA_ARGS__)
-
-#define raw_try_cmpxchg_local(...) \
-	arch_try_cmpxchg_local(__VA_ARGS__)
-
-#define raw_try_cmpxchg64_local(...) \
-	arch_try_cmpxchg64_local(__VA_ARGS__)
-
-#define raw_try_cmpxchg128_local(...) \
-	arch_try_cmpxchg128_local(__VA_ARGS__)
-
-#endif /* _LINUX_ATOMIC_RAW_H */
-// b23ed4424e85200e200ded094522e1d743b3a5b1
diff --git a/scripts/atomic/fallbacks/acquire b/scripts/atomic/fallbacks/acquire
index ef764085c79a..b0f732a5c46e 100755
--- a/scripts/atomic/fallbacks/acquire
+++ b/scripts/atomic/fallbacks/acquire
@@ -1,6 +1,6 @@
 cat <<EOF
 static __always_inline ${ret}
-arch_${atomic}_${pfx}${name}${sfx}_acquire(${params})
+raw_${atomic}_${pfx}${name}${sfx}_acquire(${params})
 {
 	${ret} ret = arch_${atomic}_${pfx}${name}${sfx}_relaxed(${args});
 	__atomic_acquire_fence();
diff --git a/scripts/atomic/fallbacks/add_negative b/scripts/atomic/fallbacks/add_negative
index d0bd2dfbb244..16876118019e 100755
--- a/scripts/atomic/fallbacks/add_negative
+++ b/scripts/atomic/fallbacks/add_negative
@@ -1,7 +1,7 @@
 cat <<EOF
 static __always_inline bool
-arch_${atomic}_add_negative${order}(${int} i, ${atomic}_t *v)
+raw_${atomic}_add_negative${order}(${int} i, ${atomic}_t *v)
 {
-	return arch_${atomic}_add_return${order}(i, v) < 0;
+	return raw_${atomic}_add_return${order}(i, v) < 0;
 }
 EOF
diff --git a/scripts/atomic/fallbacks/add_unless b/scripts/atomic/fallbacks/add_unless
index cf79b9da38db..88593e28b163 100755
--- a/scripts/atomic/fallbacks/add_unless
+++ b/scripts/atomic/fallbacks/add_unless
@@ -1,7 +1,7 @@
 cat << EOF
 static __always_inline bool
-arch_${atomic}_add_unless(${atomic}_t *v, ${int} a, ${int} u)
+raw_${atomic}_add_unless(${atomic}_t *v, ${int} a, ${int} u)
 {
-	return arch_${atomic}_fetch_add_unless(v, a, u) != u;
+	return raw_${atomic}_fetch_add_unless(v, a, u) != u;
 }
 EOF
diff --git a/scripts/atomic/fallbacks/andnot b/scripts/atomic/fallbacks/andnot
index 5a42f54a3595..5b83bb63f728 100755
--- a/scripts/atomic/fallbacks/andnot
+++ b/scripts/atomic/fallbacks/andnot
@@ -1,7 +1,7 @@
 cat <<EOF
 static __always_inline ${ret}
-arch_${atomic}_${pfx}andnot${sfx}${order}(${int} i, ${atomic}_t *v)
+raw_${atomic}_${pfx}andnot${sfx}${order}(${int} i, ${atomic}_t *v)
 {
-	${retstmt}arch_${atomic}_${pfx}and${sfx}${order}(~i, v);
+	${retstmt}raw_${atomic}_${pfx}and${sfx}${order}(~i, v);
 }
 EOF
diff --git a/scripts/atomic/fallbacks/cmpxchg b/scripts/atomic/fallbacks/cmpxchg
index 87cd010f98d5..312ee67f1743 100644
--- a/scripts/atomic/fallbacks/cmpxchg
+++ b/scripts/atomic/fallbacks/cmpxchg
@@ -1,7 +1,7 @@
 cat <<EOF
 static __always_inline ${int}
-arch_${atomic}_cmpxchg${order}(${atomic}_t *v, ${int} old, ${int} new)
+raw_${atomic}_cmpxchg${order}(${atomic}_t *v, ${int} old, ${int} new)
 {
-	return arch_cmpxchg${order}(&v->counter, old, new);
+	return raw_cmpxchg${order}(&v->counter, old, new);
 }
 EOF
diff --git a/scripts/atomic/fallbacks/dec b/scripts/atomic/fallbacks/dec
index 8c144c818e9e..a660ac65994b 100755
--- a/scripts/atomic/fallbacks/dec
+++ b/scripts/atomic/fallbacks/dec
@@ -1,7 +1,7 @@
 cat <<EOF
 static __always_inline ${ret}
-arch_${atomic}_${pfx}dec${sfx}${order}(${atomic}_t *v)
+raw_${atomic}_${pfx}dec${sfx}${order}(${atomic}_t *v)
 {
-	${retstmt}arch_${atomic}_${pfx}sub${sfx}${order}(1, v);
+	${retstmt}raw_${atomic}_${pfx}sub${sfx}${order}(1, v);
 }
 EOF
diff --git a/scripts/atomic/fallbacks/dec_and_test b/scripts/atomic/fallbacks/dec_and_test
index 3f6b6a8b4773..521dfcae03f2 100755
--- a/scripts/atomic/fallbacks/dec_and_test
+++ b/scripts/atomic/fallbacks/dec_and_test
@@ -1,7 +1,7 @@
 cat <<EOF
 static __always_inline bool
-arch_${atomic}_dec_and_test(${atomic}_t *v)
+raw_${atomic}_dec_and_test(${atomic}_t *v)
 {
-	return arch_${atomic}_dec_return(v) == 0;
+	return raw_${atomic}_dec_return(v) == 0;
 }
 EOF
diff --git a/scripts/atomic/fallbacks/dec_if_positive b/scripts/atomic/fallbacks/dec_if_positive
index 86bdced3428d..7acb205e6ce3 100755
--- a/scripts/atomic/fallbacks/dec_if_positive
+++ b/scripts/atomic/fallbacks/dec_if_positive
@@ -1,14 +1,14 @@
 cat <<EOF
 static __always_inline ${ret}
-arch_${atomic}_dec_if_positive(${atomic}_t *v)
+raw_${atomic}_dec_if_positive(${atomic}_t *v)
 {
-	${int} dec, c = arch_${atomic}_read(v);
+	${int} dec, c = raw_${atomic}_read(v);
 
 	do {
 		dec = c - 1;
 		if (unlikely(dec < 0))
 			break;
-	} while (!arch_${atomic}_try_cmpxchg(v, &c, dec));
+	} while (!raw_${atomic}_try_cmpxchg(v, &c, dec));
 
 	return dec;
 }
diff --git a/scripts/atomic/fallbacks/dec_unless_positive b/scripts/atomic/fallbacks/dec_unless_positive
index c531d5afecc4..bcb4f27945ea 100755
--- a/scripts/atomic/fallbacks/dec_unless_positive
+++ b/scripts/atomic/fallbacks/dec_unless_positive
@@ -1,13 +1,13 @@
 cat <<EOF
 static __always_inline bool
-arch_${atomic}_dec_unless_positive(${atomic}_t *v)
+raw_${atomic}_dec_unless_positive(${atomic}_t *v)
 {
-	${int} c = arch_${atomic}_read(v);
+	${int} c = raw_${atomic}_read(v);
 
 	do {
 		if (unlikely(c > 0))
 			return false;
-	} while (!arch_${atomic}_try_cmpxchg(v, &c, c - 1));
+	} while (!raw_${atomic}_try_cmpxchg(v, &c, c - 1));
 
 	return true;
 }
diff --git a/scripts/atomic/fallbacks/fence b/scripts/atomic/fallbacks/fence
index 07757d8e338e..067eea553f5e 100755
--- a/scripts/atomic/fallbacks/fence
+++ b/scripts/atomic/fallbacks/fence
@@ -1,6 +1,6 @@
 cat <<EOF
 static __always_inline ${ret}
-arch_${atomic}_${pfx}${name}${sfx}(${params})
+raw_${atomic}_${pfx}${name}${sfx}(${params})
 {
 	${ret} ret;
 	__atomic_pre_full_fence();
diff --git a/scripts/atomic/fallbacks/fetch_add_unless b/scripts/atomic/fallbacks/fetch_add_unless
index 81d2834f03d2..c18b940153df 100755
--- a/scripts/atomic/fallbacks/fetch_add_unless
+++ b/scripts/atomic/fallbacks/fetch_add_unless
@@ -1,13 +1,13 @@
 cat << EOF
 static __always_inline ${int}
-arch_${atomic}_fetch_add_unless(${atomic}_t *v, ${int} a, ${int} u)
+raw_${atomic}_fetch_add_unless(${atomic}_t *v, ${int} a, ${int} u)
 {
-	${int} c = arch_${atomic}_read(v);
+	${int} c = raw_${atomic}_read(v);
 
 	do {
 		if (unlikely(c == u))
 			break;
-	} while (!arch_${atomic}_try_cmpxchg(v, &c, c + a));
+	} while (!raw_${atomic}_try_cmpxchg(v, &c, c + a));
 
 	return c;
 }
diff --git a/scripts/atomic/fallbacks/inc b/scripts/atomic/fallbacks/inc
index 3c2c3739169e..7d838f0b6639 100755
--- a/scripts/atomic/fallbacks/inc
+++ b/scripts/atomic/fallbacks/inc
@@ -1,7 +1,7 @@
 cat <<EOF
 static __always_inline ${ret}
-arch_${atomic}_${pfx}inc${sfx}${order}(${atomic}_t *v)
+raw_${atomic}_${pfx}inc${sfx}${order}(${atomic}_t *v)
 {
-	${retstmt}arch_${atomic}_${pfx}add${sfx}${order}(1, v);
+	${retstmt}raw_${atomic}_${pfx}add${sfx}${order}(1, v);
 }
 EOF
diff --git a/scripts/atomic/fallbacks/inc_and_test b/scripts/atomic/fallbacks/inc_and_test
index c726a6d0634d..de25aebee715 100755
--- a/scripts/atomic/fallbacks/inc_and_test
+++ b/scripts/atomic/fallbacks/inc_and_test
@@ -1,7 +1,7 @@
 cat <<EOF
 static __always_inline bool
-arch_${atomic}_inc_and_test(${atomic}_t *v)
+raw_${atomic}_inc_and_test(${atomic}_t *v)
 {
-	return arch_${atomic}_inc_return(v) == 0;
+	return raw_${atomic}_inc_return(v) == 0;
 }
 EOF
diff --git a/scripts/atomic/fallbacks/inc_not_zero b/scripts/atomic/fallbacks/inc_not_zero
index 97603591aac2..e02206d017f6 100755
--- a/scripts/atomic/fallbacks/inc_not_zero
+++ b/scripts/atomic/fallbacks/inc_not_zero
@@ -1,7 +1,7 @@
 cat <<EOF
 static __always_inline bool
-arch_${atomic}_inc_not_zero(${atomic}_t *v)
+raw_${atomic}_inc_not_zero(${atomic}_t *v)
 {
-	return arch_${atomic}_add_unless(v, 1, 0);
+	return raw_${atomic}_add_unless(v, 1, 0);
 }
 EOF
diff --git a/scripts/atomic/fallbacks/inc_unless_negative b/scripts/atomic/fallbacks/inc_unless_negative
index 95d8ce48233f..7b85cc5b00d2 100755
--- a/scripts/atomic/fallbacks/inc_unless_negative
+++ b/scripts/atomic/fallbacks/inc_unless_negative
@@ -1,13 +1,13 @@
 cat <<EOF
 static __always_inline bool
-arch_${atomic}_inc_unless_negative(${atomic}_t *v)
+raw_${atomic}_inc_unless_negative(${atomic}_t *v)
 {
-	${int} c = arch_${atomic}_read(v);
+	${int} c = raw_${atomic}_read(v);
 
 	do {
 		if (unlikely(c < 0))
 			return false;
-	} while (!arch_${atomic}_try_cmpxchg(v, &c, c + 1));
+	} while (!raw_${atomic}_try_cmpxchg(v, &c, c + 1));
 
 	return true;
 }
diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire
index a0ea1d26e6b2..26d15ad92d04 100755
--- a/scripts/atomic/fallbacks/read_acquire
+++ b/scripts/atomic/fallbacks/read_acquire
@@ -1,13 +1,13 @@
 cat <<EOF
 static __always_inline ${ret}
-arch_${atomic}_read_acquire(const ${atomic}_t *v)
+raw_${atomic}_read_acquire(const ${atomic}_t *v)
 {
 	${int} ret;
 
 	if (__native_word(${atomic}_t)) {
 		ret = smp_load_acquire(&(v)->counter);
 	} else {
-		ret = arch_${atomic}_read(v);
+		ret = raw_${atomic}_read(v);
 		__atomic_acquire_fence();
 	}
 
diff --git a/scripts/atomic/fallbacks/release b/scripts/atomic/fallbacks/release
index b46feb56d69c..cbbff708129b 100755
--- a/scripts/atomic/fallbacks/release
+++ b/scripts/atomic/fallbacks/release
@@ -1,6 +1,6 @@
 cat <<EOF
 static __always_inline ${ret}
-arch_${atomic}_${pfx}${name}${sfx}_release(${params})
+raw_${atomic}_${pfx}${name}${sfx}_release(${params})
 {
 	__atomic_release_fence();
 	${retstmt}arch_${atomic}_${pfx}${name}${sfx}_relaxed(${args});
diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release
index 05cdb7f42477..104693bc3c66 100755
--- a/scripts/atomic/fallbacks/set_release
+++ b/scripts/atomic/fallbacks/set_release
@@ -1,12 +1,12 @@
 cat <<EOF
 static __always_inline void
-arch_${atomic}_set_release(${atomic}_t *v, ${int} i)
+raw_${atomic}_set_release(${atomic}_t *v, ${int} i)
 {
 	if (__native_word(${atomic}_t)) {
 		smp_store_release(&(v)->counter, i);
 	} else {
 		__atomic_release_fence();
-		arch_${atomic}_set(v, i);
+		raw_${atomic}_set(v, i);
 	}
 }
 EOF
diff --git a/scripts/atomic/fallbacks/sub_and_test b/scripts/atomic/fallbacks/sub_and_test
index da8a049c9b02..8975a496d495 100755
--- a/scripts/atomic/fallbacks/sub_and_test
+++ b/scripts/atomic/fallbacks/sub_and_test
@@ -1,7 +1,7 @@
 cat <<EOF
 static __always_inline bool
-arch_${atomic}_sub_and_test(${int} i, ${atomic}_t *v)
+raw_${atomic}_sub_and_test(${int} i, ${atomic}_t *v)
 {
-	return arch_${atomic}_sub_return(i, v) == 0;
+	return raw_${atomic}_sub_return(i, v) == 0;
 }
 EOF
diff --git a/scripts/atomic/fallbacks/try_cmpxchg b/scripts/atomic/fallbacks/try_cmpxchg
index 890f850ede37..4c911a6cced9 100755
--- a/scripts/atomic/fallbacks/try_cmpxchg
+++ b/scripts/atomic/fallbacks/try_cmpxchg
@@ -1,9 +1,9 @@
 cat <<EOF
 static __always_inline bool
-arch_${atomic}_try_cmpxchg${order}(${atomic}_t *v, ${int} *old, ${int} new)
+raw_${atomic}_try_cmpxchg${order}(${atomic}_t *v, ${int} *old, ${int} new)
 {
 	${int} r, o = *old;
-	r = arch_${atomic}_cmpxchg${order}(v, o, new);
+	r = raw_${atomic}_cmpxchg${order}(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
diff --git a/scripts/atomic/fallbacks/xchg b/scripts/atomic/fallbacks/xchg
index 733b8980b2f3..bdd788aa575f 100644
--- a/scripts/atomic/fallbacks/xchg
+++ b/scripts/atomic/fallbacks/xchg
@@ -1,7 +1,7 @@
 cat <<EOF
 static __always_inline ${int}
-arch_${atomic}_xchg${order}(${atomic}_t *v, ${int} new)
+raw_${atomic}_xchg${order}(${atomic}_t *v, ${int} new)
 {
-	return arch_xchg${order}(&v->counter, new);
+	return raw_xchg${order}(&v->counter, new);
 }
 EOF
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
index 337330865fa2..86aca4f9f315 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -17,19 +17,12 @@ gen_template_fallback()
 	local atomic="$1"; shift
 	local int="$1"; shift
 
-	local atomicname="arch_${atomic}_${pfx}${name}${sfx}${order}"
-
 	local ret="$(gen_ret_type "${meta}" "${int}")"
 	local retstmt="$(gen_ret_stmt "${meta}")"
 	local params="$(gen_params "${int}" "${atomic}" "$@")"
 	local args="$(gen_args "$@")"
 
-	if [ ! -z "${template}" ]; then
-		printf "#ifndef ${atomicname}\n"
-		. ${template}
-		printf "#define ${atomicname} ${atomicname}\n"
-		printf "#endif\n\n"
-	fi
+	. ${template}
 }
 
 #gen_order_fallback(meta, pfx, name, sfx, order, atomic, int, args...)
@@ -59,69 +52,92 @@ gen_proto_fallback()
 	gen_template_fallback "${tmpl}" "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "$@"
 }
 
-#gen_basic_fallbacks(basename)
-gen_basic_fallbacks()
-{
-	local basename="$1"; shift
-cat << EOF
-#define ${basename}_acquire ${basename}
-#define ${basename}_release ${basename}
-#define ${basename}_relaxed ${basename}
-EOF
-}
-
-#gen_proto_order_variants(meta, pfx, name, sfx, atomic, int, args...)
-gen_proto_order_variants()
+#gen_proto_order_variant(meta, pfx, name, sfx, order, atomic, int, args...)
+gen_proto_order_variant()
 {
 	local meta="$1"; shift
 	local pfx="$1"; shift
 	local name="$1"; shift
 	local sfx="$1"; shift
+	local order="$1"; shift
 	local atomic="$1"
 
-	local basename="arch_${atomic}_${pfx}${name}${sfx}"
-
-	local template="$(find_fallback_template "${pfx}" "${name}" "${sfx}" "")"
+	local atomicname="${atomic}_${pfx}${name}${sfx}${order}"
+	local basename="${atomic}_${pfx}${name}${sfx}"
 
-	# If we don't have relaxed atomics, then we don't bother with ordering fallbacks
-	# read_acquire and set_release need to be templated, though
-	if ! meta_has_relaxed "${meta}"; then
-		gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "" "$@"
+	local template="$(find_fallback_template "${pfx}" "${name}" "${sfx}" "${order}")"
 
-		if meta_has_acquire "${meta}"; then
-			gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_acquire" "$@"
-		fi
+	# Where there is no possible fallback, this order variant is mandatory
+	# and must be provided by arch code. Add a comment to the header to
+	# make this obvious.
+	#
+	# Ideally we'd error on a missing definition, but arch code might
+	# define this order variant as a C function without a preprocessor
+	# symbol.
+	if [ -z ${template} ] && [ -z "${order}" ] && ! meta_has_relaxed "${meta}"; then
+		printf "#define raw_${atomicname} arch_${atomicname}\n\n"
+		return
+	fi
 
-		if meta_has_release "${meta}"; then
-			gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_release" "$@"
-		fi
+	printf "#if defined(arch_${atomicname})\n"
+	printf "#define raw_${atomicname} arch_${atomicname}\n"
 
-		return
+	# Allow FULL/ACQUIRE/RELEASE ops to be defined in terms of RELAXED ops
+	if [ "${order}" != "_relaxed" ] && meta_has_relaxed "${meta}"; then
+		printf "#elif defined(arch_${basename}_relaxed)\n"
+		gen_order_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "$@"
 	fi
 
-	printf "#ifndef ${basename}_relaxed\n"
+	# Allow ACQUIRE/RELEASE/RELAXED ops to be defined in terms of FULL ops
+	if [ ! -z "${order}" ]; then
+		printf "#elif defined(arch_${basename})\n"
+		printf "#define raw_${atomicname} arch_${basename}\n"
+	fi
 
+	printf "#else\n"
 	if [ ! -z "${template}" ]; then
-		printf "#ifdef ${basename}\n"
+		gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "$@"
+	else
+		printf "#error \"Unable to define raw_${atomicname}\"\n"
 	fi
 
-	gen_basic_fallbacks "${basename}"
+	printf "#endif\n\n"
+}
 
-	if [ ! -z "${template}" ]; then
-		printf "#endif /* ${basename} */\n\n"
-		gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "" "$@"
-		gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_acquire" "$@"
-		gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_release" "$@"
-		gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_relaxed" "$@"
+
+#gen_proto_order_variants(meta, pfx, name, sfx, atomic, int, args...)
+gen_proto_order_variants()
+{
+	local meta="$1"; shift
+	local pfx="$1"; shift
+	local name="$1"; shift
+	local sfx="$1"; shift
+	local atomic="$1"
+
+	gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "" "$@"
+
+	if meta_has_acquire "${meta}"; then
+		gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "_acquire" "$@"
 	fi
 
-	printf "#else /* ${basename}_relaxed */\n\n"
+	if meta_has_release "${meta}"; then
+		gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "_release" "$@"
+	fi
 
-	gen_order_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_acquire" "$@"
-	gen_order_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "_release" "$@"
-	gen_order_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "" "$@"
+	if meta_has_relaxed "${meta}"; then
+		gen_proto_order_variant "${meta}" "${pfx}" "${name}" "${sfx}" "_relaxed" "$@"
+	fi
+}
 
-	printf "#endif /* ${basename}_relaxed */\n\n"
+#gen_basic_fallbacks(basename)
+gen_basic_fallbacks()
+{
+	local basename="$1"; shift
+cat << EOF
+#define raw_${basename}_acquire arch_${basename}
+#define raw_${basename}_release arch_${basename}
+#define raw_${basename}_relaxed arch_${basename}
+EOF
 }
 
 gen_order_fallbacks()
@@ -130,36 +146,65 @@ gen_order_fallbacks()
 
 cat <<EOF
 
-#ifndef ${xchg}_acquire
-#define ${xchg}_acquire(...) \\
-	__atomic_op_acquire(${xchg}, __VA_ARGS__)
+#define raw_${xchg}_relaxed arch_${xchg}_relaxed
+
+#ifdef arch_${xchg}_acquire
+#define raw_${xchg}_acquire arch_${xchg}_acquire
+#else
+#define raw_${xchg}_acquire(...) \\
+	__atomic_op_acquire(arch_${xchg}, __VA_ARGS__)
 #endif
 
-#ifndef ${xchg}_release
-#define ${xchg}_release(...) \\
-	__atomic_op_release(${xchg}, __VA_ARGS__)
+#ifdef arch_${xchg}_release
+#define raw_${xchg}_release arch_${xchg}_release
+#else
+#define raw_${xchg}_release(...) \\
+	__atomic_op_release(arch_${xchg}, __VA_ARGS__)
 #endif
 
-#ifndef ${xchg}
-#define ${xchg}(...) \\
-	__atomic_op_fence(${xchg}, __VA_ARGS__)
+#ifdef arch_${xchg}
+#define raw_${xchg} arch_${xchg}
+#else
+#define raw_${xchg}(...) \\
+	__atomic_op_fence(arch_${xchg}, __VA_ARGS__)
 #endif
 
 EOF
 }
 
-gen_xchg_fallbacks()
+gen_xchg_order_fallback()
 {
 	local xchg="$1"; shift
-	printf "#ifndef ${xchg}_relaxed\n"
+	local order="$1"; shift
+	local forder="${order:-_fence}"
 
-	gen_basic_fallbacks ${xchg}
+	printf "#if defined(arch_${xchg}${order})\n"
+	printf "#define raw_${xchg}${order} arch_${xchg}${order}\n"
 
-	printf "#else /* ${xchg}_relaxed */\n"
+	if [ "${order}" != "_relaxed" ]; then
+		printf "#elif defined(arch_${xchg}_relaxed)\n"
+		printf "#define raw_${xchg}${order}(...) \\\\\n"
+		printf "	__atomic_op${forder}(arch_${xchg}, __VA_ARGS__)\n"
+	fi
 
-	gen_order_fallbacks ${xchg}
+	if [ ! -z "${order}" ]; then
+		printf "#elif defined(arch_${xchg})\n"
+		printf "#define raw_${xchg}${order} arch_${xchg}\n"
+	fi
 
-	printf "#endif /* ${xchg}_relaxed */\n\n"
+	printf "#else\n"
+	printf "extern void raw_${xchg}${order}_not_implemented(void);\n"
+	printf "#define raw_${xchg}${order}(...) raw_${xchg}${order}_not_implemented()\n"
+	printf "#endif\n\n"
+}
+
+gen_xchg_fallbacks()
+{
+	local xchg="$1"; shift
+
+	for order in "" "_acquire" "_release" "_relaxed"; do
+		gen_xchg_order_fallback "${xchg}" "${order}"
+	done
 }
 
 gen_try_cmpxchg_fallback()
@@ -168,40 +213,61 @@ gen_try_cmpxchg_fallback()
 	local order="$1"; shift;
 
 cat <<EOF
-#ifndef arch_try_${cmpxchg}${order}
-#define arch_try_${cmpxchg}${order}(_ptr, _oldp, _new) \\
+#define raw_try_${cmpxchg}${order}(_ptr, _oldp, _new) \\
 ({ \\
 	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \\
-	___r = arch_${cmpxchg}${order}((_ptr), ___o, (_new)); \\
+	___r = raw_${cmpxchg}${order}((_ptr), ___o, (_new)); \\
 	if (unlikely(___r != ___o)) \\
 		*___op = ___r; \\
 	likely(___r == ___o); \\
 })
-#endif /* arch_try_${cmpxchg}${order} */
-
 EOF
 }
 
-gen_try_cmpxchg_fallbacks()
+gen_try_cmpxchg_order_fallback()
 {
-	local cmpxchg="$1"; shift;
+	local cmpxchg="$1"; shift
+	local order="$1"; shift
+	local forder="${order:-_fence}"
 
-	printf "#ifndef arch_try_${cmpxchg}_relaxed\n"
-	printf "#ifdef arch_try_${cmpxchg}\n"
+	printf "#if defined(arch_try_${cmpxchg}${order})\n"
+	printf "#define raw_try_${cmpxchg}${order} arch_try_${cmpxchg}${order}\n"
 
-	gen_basic_fallbacks "arch_try_${cmpxchg}"
+	if [ "${order}" != "_relaxed" ]; then
+		printf "#elif defined(arch_try_${cmpxchg}_relaxed)\n"
+		printf "#define raw_try_${cmpxchg}${order}(...) \\\\\n"
+		printf "	__atomic_op${forder}(arch_try_${cmpxchg}, __VA_ARGS__)\n"
+	fi
+
+	if [ ! -z "${order}" ]; then
+		printf "#elif defined(arch_try_${cmpxchg})\n"
+		printf "#define raw_try_${cmpxchg}${order} arch_try_${cmpxchg}\n"
+	fi
 
-	printf "#endif /* arch_try_${cmpxchg} */\n\n"
+	printf "#else\n"
+	gen_try_cmpxchg_fallback "${cmpxchg}" "${order}"
+	printf "#endif\n\n"
+}
+
+gen_try_cmpxchg_fallbacks()
+{
+	local cmpxchg="$1"; shift;
 
 	for order in "" "_acquire" "_release" "_relaxed"; do
-		gen_try_cmpxchg_fallback "${cmpxchg}" "${order}"
+		gen_try_cmpxchg_order_fallback "${cmpxchg}" "${order}"
 	done
+}
 
-	printf "#else /* arch_try_${cmpxchg}_relaxed */\n"
-
-	gen_order_fallbacks "arch_try_${cmpxchg}"
+gen_cmpxchg_local_fallbacks()
+{
+	local cmpxchg="$1"; shift
 
-	printf "#endif /* arch_try_${cmpxchg}_relaxed */\n\n"
+	printf "#define raw_${cmpxchg} arch_${cmpxchg}\n\n"
+	printf "#ifdef arch_try_${cmpxchg}\n"
+	printf "#define raw_try_${cmpxchg} arch_try_${cmpxchg}\n"
+	printf "#else\n"
+	gen_try_cmpxchg_fallback "${cmpxchg}" ""
+	printf "#endif\n\n"
 }
 
 cat << EOF
@@ -217,7 +283,7 @@ cat << EOF
 
 EOF
 
-for xchg in "arch_xchg" "arch_cmpxchg" "arch_cmpxchg64" "arch_cmpxchg128"; do
+for xchg in "xchg" "cmpxchg" "cmpxchg64" "cmpxchg128"; do
 	gen_xchg_fallbacks "${xchg}"
 done
 
@@ -225,8 +291,12 @@ for cmpxchg in "cmpxchg" "cmpxchg64" "cmpxchg128"; do
 	gen_try_cmpxchg_fallbacks "${cmpxchg}"
 done
 
-for cmpxchg in "cmpxchg_local" "cmpxchg64_local"; do
-	gen_try_cmpxchg_fallback "${cmpxchg}" ""
+for cmpxchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local"; do
+	gen_cmpxchg_local_fallbacks "${cmpxchg}" ""
+done
+
+for cmpxchg in "sync_cmpxchg"; do
+	printf "#define raw_${cmpxchg} arch_${cmpxchg}\n\n"
 done
 
 grep '^[a-z]' "$1" | while read name meta args; do
diff --git a/scripts/atomic/gen-atomic-raw.sh b/scripts/atomic/gen-atomic-raw.sh
deleted file mode 100644
index c7e3c52b4927..000000000000
--- a/scripts/atomic/gen-atomic-raw.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-ATOMICDIR=$(dirname $0)
-
-. ${ATOMICDIR}/atomic-tbl.sh
-
-#gen_proto_order_variant(meta, pfx, name, sfx, order, atomic, int, arg...)
-gen_proto_order_variant()
-{
-	local meta="$1"; shift
-	local pfx="$1"; shift
-	local name="$1"; shift
-	local sfx="$1"; shift
-	local order="$1"; shift
-	local atomic="$1"; shift
-	local int="$1"; shift
-
-	local atomicname="${atomic}_${pfx}${name}${sfx}${order}"
-
-	local ret="$(gen_ret_type "${meta}" "${int}")"
-	local params="$(gen_params "${int}" "${atomic}" "$@")"
-	local args="$(gen_args "$@")"
-	local retstmt="$(gen_ret_stmt "${meta}")"
-
-cat <<EOF
-static __always_inline ${ret}
-raw_${atomicname}(${params})
-{
-	${retstmt}arch_${atomicname}(${args});
-}
-
-EOF
-}
-
-gen_xchg()
-{
-	local xchg="$1"; shift
-	local order="$1"; shift
-
-cat <<EOF
-#define raw_${xchg}${order}(...) \\
-	arch_${xchg}${order}(__VA_ARGS__)
-EOF
-}
-
-cat << EOF
-// SPDX-License-Identifier: GPL-2.0
-
-// Generated by $0
-// DO NOT MODIFY THIS FILE DIRECTLY
-
-#ifndef _LINUX_ATOMIC_RAW_H
-#define _LINUX_ATOMIC_RAW_H
-
-EOF
-
-grep '^[a-z]' "$1" | while read name meta args; do
-	gen_proto "${meta}" "${name}" "atomic" "int" ${args}
-done
-
-grep '^[a-z]' "$1" | while read name meta args; do
-	gen_proto "${meta}" "${name}" "atomic64" "s64" ${args}
-done
-
-for xchg in "xchg" "cmpxchg" "cmpxchg64" "cmpxchg128" "try_cmpxchg" "try_cmpxchg64" "try_cmpxchg128"; do
-	for order in "" "_acquire" "_release" "_relaxed"; do
-		gen_xchg "${xchg}" "${order}"
-		printf "\n"
-	done
-done
-
-for xchg in "cmpxchg_local" "cmpxchg64_local" "cmpxchg128_local" "sync_cmpxchg" "try_cmpxchg_local" "try_cmpxchg64_local" "try_cmpxchg128_local"; do
-	gen_xchg "${xchg}" ""
-	printf "\n"
-done
-
-cat <<EOF
-#endif /* _LINUX_ATOMIC_RAW_H */
-EOF
diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh
index 631d351f9f1f..5b98a8307693 100755
--- a/scripts/atomic/gen-atomics.sh
+++ b/scripts/atomic/gen-atomics.sh
@@ -11,7 +11,6 @@ cat <<EOF |
 gen-atomic-instrumented.sh      linux/atomic/atomic-instrumented.h
 gen-atomic-long.sh              linux/atomic/atomic-long.h
 gen-atomic-fallback.sh          linux/atomic/atomic-arch-fallback.h
-gen-atomic-raw.sh               linux/atomic/atomic-raw.h
 EOF
 while read script header args; do
 	/bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} ${args} > ${LINUXDIR}/include/${header}
-- 
cgit v1.2.3


From 630399469ffcb937936644fbaa5daf61e700a329 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 5 Jun 2023 08:01:19 +0100
Subject: locking/atomic: scripts: simplify raw_atomic_long*() definitions

Currently, atomic-long is split into two sections, one defining the
raw_atomic_long_*() ops for CONFIG_64BIT, and one defining the raw
atomic_long_*() ops for !CONFIG_64BIT.

With many lines elided, this looks like:

| #ifdef CONFIG_64BIT
| ...
| static __always_inline bool
| raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
| {
|         return raw_atomic64_try_cmpxchg(v, (s64 *)old, new);
| }
| ...
| #else /* CONFIG_64BIT */
| ...
| static __always_inline bool
| raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
| {
|         return raw_atomic_try_cmpxchg(v, (int *)old, new);
| }
| ...
| #endif

The two definitions are spread far apart in the file, and duplicate the
prototype, making it hard to have a legible set of kerneldoc comments.

Make this simpler by defining the C prototype once, and writing the two
definitions inline. For example, the above becomes:

| static __always_inline bool
| raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
| {
| #ifdef CONFIG_64BIT
|         return raw_atomic64_try_cmpxchg(v, (s64 *)old, new);
| #else
|         return raw_atomic_try_cmpxchg(v, (int *)old, new);
| #endif
| }

As we now always have a single copy of the C prototype wrapping all the
potential definitions, we now have an obvious single location for kerneldoc
comments. As a bonus, both the script and the generated file are
somewhat shorter.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230605070124.3741859-23-mark.rutland@arm.com
---
 include/linux/atomic/atomic-long.h | 857 +++++++++++++++----------------------
 scripts/atomic/gen-atomic-long.sh  |  27 +-
 2 files changed, 350 insertions(+), 534 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
index 92dc82ce1ce6..63e0b4078ebd 100644
--- a/include/linux/atomic/atomic-long.h
+++ b/include/linux/atomic/atomic-long.h
@@ -21,1030 +21,855 @@ typedef atomic_t atomic_long_t;
 #define atomic_long_cond_read_relaxed	atomic_cond_read_relaxed
 #endif
 
-#ifdef CONFIG_64BIT
-
-static __always_inline long
-raw_atomic_long_read(const atomic_long_t *v)
-{
-	return raw_atomic64_read(v);
-}
-
-static __always_inline long
-raw_atomic_long_read_acquire(const atomic_long_t *v)
-{
-	return raw_atomic64_read_acquire(v);
-}
-
-static __always_inline void
-raw_atomic_long_set(atomic_long_t *v, long i)
-{
-	raw_atomic64_set(v, i);
-}
-
-static __always_inline void
-raw_atomic_long_set_release(atomic_long_t *v, long i)
-{
-	raw_atomic64_set_release(v, i);
-}
-
-static __always_inline void
-raw_atomic_long_add(long i, atomic_long_t *v)
-{
-	raw_atomic64_add(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_add_return(long i, atomic_long_t *v)
-{
-	return raw_atomic64_add_return(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_add_return_acquire(long i, atomic_long_t *v)
-{
-	return raw_atomic64_add_return_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_add_return_release(long i, atomic_long_t *v)
-{
-	return raw_atomic64_add_return_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
-{
-	return raw_atomic64_add_return_relaxed(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_add(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_add(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_add_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_add_release(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_add_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_add_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_long_sub(long i, atomic_long_t *v)
-{
-	raw_atomic64_sub(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_sub_return(long i, atomic_long_t *v)
-{
-	return raw_atomic64_sub_return(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
-{
-	return raw_atomic64_sub_return_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_sub_return_release(long i, atomic_long_t *v)
-{
-	return raw_atomic64_sub_return_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
-{
-	return raw_atomic64_sub_return_relaxed(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_sub(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_sub(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_sub_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_sub_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_sub_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_long_inc(atomic_long_t *v)
-{
-	raw_atomic64_inc(v);
-}
-
-static __always_inline long
-raw_atomic_long_inc_return(atomic_long_t *v)
-{
-	return raw_atomic64_inc_return(v);
-}
-
-static __always_inline long
-raw_atomic_long_inc_return_acquire(atomic_long_t *v)
-{
-	return raw_atomic64_inc_return_acquire(v);
-}
-
-static __always_inline long
-raw_atomic_long_inc_return_release(atomic_long_t *v)
-{
-	return raw_atomic64_inc_return_release(v);
-}
-
-static __always_inline long
-raw_atomic_long_inc_return_relaxed(atomic_long_t *v)
-{
-	return raw_atomic64_inc_return_relaxed(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_inc(atomic_long_t *v)
-{
-	return raw_atomic64_fetch_inc(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_inc_acquire(atomic_long_t *v)
-{
-	return raw_atomic64_fetch_inc_acquire(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_inc_release(atomic_long_t *v)
-{
-	return raw_atomic64_fetch_inc_release(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
-{
-	return raw_atomic64_fetch_inc_relaxed(v);
-}
-
-static __always_inline void
-raw_atomic_long_dec(atomic_long_t *v)
-{
-	raw_atomic64_dec(v);
-}
-
-static __always_inline long
-raw_atomic_long_dec_return(atomic_long_t *v)
-{
-	return raw_atomic64_dec_return(v);
-}
-
-static __always_inline long
-raw_atomic_long_dec_return_acquire(atomic_long_t *v)
-{
-	return raw_atomic64_dec_return_acquire(v);
-}
-
-static __always_inline long
-raw_atomic_long_dec_return_release(atomic_long_t *v)
-{
-	return raw_atomic64_dec_return_release(v);
-}
-
-static __always_inline long
-raw_atomic_long_dec_return_relaxed(atomic_long_t *v)
-{
-	return raw_atomic64_dec_return_relaxed(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_dec(atomic_long_t *v)
-{
-	return raw_atomic64_fetch_dec(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_dec_acquire(atomic_long_t *v)
-{
-	return raw_atomic64_fetch_dec_acquire(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_dec_release(atomic_long_t *v)
-{
-	return raw_atomic64_fetch_dec_release(v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
-{
-	return raw_atomic64_fetch_dec_relaxed(v);
-}
-
-static __always_inline void
-raw_atomic_long_and(long i, atomic_long_t *v)
-{
-	raw_atomic64_and(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_and(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_and(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_and_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_and_release(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_and_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_and_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_long_andnot(long i, atomic_long_t *v)
-{
-	raw_atomic64_andnot(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_andnot(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_andnot(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_andnot_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_andnot_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_andnot_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_long_or(long i, atomic_long_t *v)
-{
-	raw_atomic64_or(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_or(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_or(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_or_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_or_release(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_or_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_or_relaxed(i, v);
-}
-
-static __always_inline void
-raw_atomic_long_xor(long i, atomic_long_t *v)
-{
-	raw_atomic64_xor(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_xor(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_xor(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_xor_acquire(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_xor_release(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
-{
-	return raw_atomic64_fetch_xor_relaxed(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_xchg(atomic_long_t *v, long i)
-{
-	return raw_atomic64_xchg(v, i);
-}
-
-static __always_inline long
-raw_atomic_long_xchg_acquire(atomic_long_t *v, long i)
-{
-	return raw_atomic64_xchg_acquire(v, i);
-}
-
-static __always_inline long
-raw_atomic_long_xchg_release(atomic_long_t *v, long i)
-{
-	return raw_atomic64_xchg_release(v, i);
-}
-
-static __always_inline long
-raw_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
-{
-	return raw_atomic64_xchg_relaxed(v, i);
-}
-
-static __always_inline long
-raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
-{
-	return raw_atomic64_cmpxchg(v, old, new);
-}
-
-static __always_inline long
-raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
-{
-	return raw_atomic64_cmpxchg_acquire(v, old, new);
-}
-
-static __always_inline long
-raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
-{
-	return raw_atomic64_cmpxchg_release(v, old, new);
-}
-
-static __always_inline long
-raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
-{
-	return raw_atomic64_cmpxchg_relaxed(v, old, new);
-}
-
-static __always_inline bool
-raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
-{
-	return raw_atomic64_try_cmpxchg(v, (s64 *)old, new);
-}
-
-static __always_inline bool
-raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
-{
-	return raw_atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
-}
-
-static __always_inline bool
-raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
-{
-	return raw_atomic64_try_cmpxchg_release(v, (s64 *)old, new);
-}
-
-static __always_inline bool
-raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
-{
-	return raw_atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
-}
-
-static __always_inline bool
-raw_atomic_long_sub_and_test(long i, atomic_long_t *v)
-{
-	return raw_atomic64_sub_and_test(i, v);
-}
-
-static __always_inline bool
-raw_atomic_long_dec_and_test(atomic_long_t *v)
-{
-	return raw_atomic64_dec_and_test(v);
-}
-
-static __always_inline bool
-raw_atomic_long_inc_and_test(atomic_long_t *v)
-{
-	return raw_atomic64_inc_and_test(v);
-}
-
-static __always_inline bool
-raw_atomic_long_add_negative(long i, atomic_long_t *v)
-{
-	return raw_atomic64_add_negative(i, v);
-}
-
-static __always_inline bool
-raw_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
-{
-	return raw_atomic64_add_negative_acquire(i, v);
-}
-
-static __always_inline bool
-raw_atomic_long_add_negative_release(long i, atomic_long_t *v)
-{
-	return raw_atomic64_add_negative_release(i, v);
-}
-
-static __always_inline bool
-raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
-{
-	return raw_atomic64_add_negative_relaxed(i, v);
-}
-
-static __always_inline long
-raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
-{
-	return raw_atomic64_fetch_add_unless(v, a, u);
-}
-
-static __always_inline bool
-raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
-{
-	return raw_atomic64_add_unless(v, a, u);
-}
-
-static __always_inline bool
-raw_atomic_long_inc_not_zero(atomic_long_t *v)
-{
-	return raw_atomic64_inc_not_zero(v);
-}
-
-static __always_inline bool
-raw_atomic_long_inc_unless_negative(atomic_long_t *v)
-{
-	return raw_atomic64_inc_unless_negative(v);
-}
-
-static __always_inline bool
-raw_atomic_long_dec_unless_positive(atomic_long_t *v)
-{
-	return raw_atomic64_dec_unless_positive(v);
-}
-
-static __always_inline long
-raw_atomic_long_dec_if_positive(atomic_long_t *v)
-{
-	return raw_atomic64_dec_if_positive(v);
-}
-
-#else /* CONFIG_64BIT */
-
 static __always_inline long
 raw_atomic_long_read(const atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_read(v);
+#else
 	return raw_atomic_read(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_read_acquire(const atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_read_acquire(v);
+#else
 	return raw_atomic_read_acquire(v);
+#endif
 }
 
 static __always_inline void
 raw_atomic_long_set(atomic_long_t *v, long i)
 {
+#ifdef CONFIG_64BIT
+	raw_atomic64_set(v, i);
+#else
 	raw_atomic_set(v, i);
+#endif
 }
 
 static __always_inline void
 raw_atomic_long_set_release(atomic_long_t *v, long i)
 {
+#ifdef CONFIG_64BIT
+	raw_atomic64_set_release(v, i);
+#else
 	raw_atomic_set_release(v, i);
+#endif
 }
 
 static __always_inline void
 raw_atomic_long_add(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	raw_atomic64_add(i, v);
+#else
 	raw_atomic_add(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_add_return(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_add_return(i, v);
+#else
 	return raw_atomic_add_return(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_add_return_acquire(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_add_return_acquire(i, v);
+#else
 	return raw_atomic_add_return_acquire(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_add_return_release(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_add_return_release(i, v);
+#else
 	return raw_atomic_add_return_release(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_add_return_relaxed(i, v);
+#else
 	return raw_atomic_add_return_relaxed(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_add(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_add(i, v);
+#else
 	return raw_atomic_fetch_add(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_add_acquire(i, v);
+#else
 	return raw_atomic_fetch_add_acquire(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_add_release(i, v);
+#else
 	return raw_atomic_fetch_add_release(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_add_relaxed(i, v);
+#else
 	return raw_atomic_fetch_add_relaxed(i, v);
+#endif
 }
 
 static __always_inline void
 raw_atomic_long_sub(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	raw_atomic64_sub(i, v);
+#else
 	raw_atomic_sub(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_sub_return(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_sub_return(i, v);
+#else
 	return raw_atomic_sub_return(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_sub_return_acquire(i, v);
+#else
 	return raw_atomic_sub_return_acquire(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_sub_return_release(i, v);
+#else
 	return raw_atomic_sub_return_release(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_sub_return_relaxed(i, v);
+#else
 	return raw_atomic_sub_return_relaxed(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_sub(i, v);
+#else
 	return raw_atomic_fetch_sub(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_sub_acquire(i, v);
+#else
 	return raw_atomic_fetch_sub_acquire(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_sub_release(i, v);
+#else
 	return raw_atomic_fetch_sub_release(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_sub_relaxed(i, v);
+#else
 	return raw_atomic_fetch_sub_relaxed(i, v);
+#endif
 }
 
 static __always_inline void
 raw_atomic_long_inc(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	raw_atomic64_inc(v);
+#else
 	raw_atomic_inc(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_inc_return(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_inc_return(v);
+#else
 	return raw_atomic_inc_return(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_inc_return_acquire(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_inc_return_acquire(v);
+#else
 	return raw_atomic_inc_return_acquire(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_inc_return_release(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_inc_return_release(v);
+#else
 	return raw_atomic_inc_return_release(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_inc_return_relaxed(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_inc_return_relaxed(v);
+#else
 	return raw_atomic_inc_return_relaxed(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_inc(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_inc(v);
+#else
 	return raw_atomic_fetch_inc(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_inc_acquire(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_inc_acquire(v);
+#else
 	return raw_atomic_fetch_inc_acquire(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_inc_release(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_inc_release(v);
+#else
 	return raw_atomic_fetch_inc_release(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_inc_relaxed(v);
+#else
 	return raw_atomic_fetch_inc_relaxed(v);
+#endif
 }
 
 static __always_inline void
 raw_atomic_long_dec(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	raw_atomic64_dec(v);
+#else
 	raw_atomic_dec(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_dec_return(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_dec_return(v);
+#else
 	return raw_atomic_dec_return(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_dec_return_acquire(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_dec_return_acquire(v);
+#else
 	return raw_atomic_dec_return_acquire(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_dec_return_release(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_dec_return_release(v);
+#else
 	return raw_atomic_dec_return_release(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_dec_return_relaxed(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_dec_return_relaxed(v);
+#else
 	return raw_atomic_dec_return_relaxed(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_dec(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_dec(v);
+#else
 	return raw_atomic_fetch_dec(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_dec_acquire(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_dec_acquire(v);
+#else
 	return raw_atomic_fetch_dec_acquire(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_dec_release(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_dec_release(v);
+#else
 	return raw_atomic_fetch_dec_release(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_dec_relaxed(v);
+#else
 	return raw_atomic_fetch_dec_relaxed(v);
+#endif
 }
 
 static __always_inline void
 raw_atomic_long_and(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	raw_atomic64_and(i, v);
+#else
 	raw_atomic_and(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_and(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_and(i, v);
+#else
 	return raw_atomic_fetch_and(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_and_acquire(i, v);
+#else
 	return raw_atomic_fetch_and_acquire(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_and_release(i, v);
+#else
 	return raw_atomic_fetch_and_release(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_and_relaxed(i, v);
+#else
 	return raw_atomic_fetch_and_relaxed(i, v);
+#endif
 }
 
 static __always_inline void
 raw_atomic_long_andnot(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	raw_atomic64_andnot(i, v);
+#else
 	raw_atomic_andnot(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_andnot(i, v);
+#else
 	return raw_atomic_fetch_andnot(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_andnot_acquire(i, v);
+#else
 	return raw_atomic_fetch_andnot_acquire(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_andnot_release(i, v);
+#else
 	return raw_atomic_fetch_andnot_release(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_andnot_relaxed(i, v);
+#else
 	return raw_atomic_fetch_andnot_relaxed(i, v);
+#endif
 }
 
 static __always_inline void
 raw_atomic_long_or(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	raw_atomic64_or(i, v);
+#else
 	raw_atomic_or(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_or(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_or(i, v);
+#else
 	return raw_atomic_fetch_or(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_or_acquire(i, v);
+#else
 	return raw_atomic_fetch_or_acquire(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_or_release(i, v);
+#else
 	return raw_atomic_fetch_or_release(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_or_relaxed(i, v);
+#else
 	return raw_atomic_fetch_or_relaxed(i, v);
+#endif
 }
 
 static __always_inline void
 raw_atomic_long_xor(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	raw_atomic64_xor(i, v);
+#else
 	raw_atomic_xor(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_xor(i, v);
+#else
 	return raw_atomic_fetch_xor(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_xor_acquire(i, v);
+#else
 	return raw_atomic_fetch_xor_acquire(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_xor_release(i, v);
+#else
 	return raw_atomic_fetch_xor_release(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_xor_relaxed(i, v);
+#else
 	return raw_atomic_fetch_xor_relaxed(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_xchg(atomic_long_t *v, long i)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_xchg(v, i);
+#else
 	return raw_atomic_xchg(v, i);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_xchg_acquire(atomic_long_t *v, long i)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_xchg_acquire(v, i);
+#else
 	return raw_atomic_xchg_acquire(v, i);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_xchg_release(atomic_long_t *v, long i)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_xchg_release(v, i);
+#else
 	return raw_atomic_xchg_release(v, i);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_xchg_relaxed(v, i);
+#else
 	return raw_atomic_xchg_relaxed(v, i);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_cmpxchg(v, old, new);
+#else
 	return raw_atomic_cmpxchg(v, old, new);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_cmpxchg_acquire(v, old, new);
+#else
 	return raw_atomic_cmpxchg_acquire(v, old, new);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_cmpxchg_release(v, old, new);
+#else
 	return raw_atomic_cmpxchg_release(v, old, new);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_cmpxchg_relaxed(v, old, new);
+#else
 	return raw_atomic_cmpxchg_relaxed(v, old, new);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_try_cmpxchg(v, (s64 *)old, new);
+#else
 	return raw_atomic_try_cmpxchg(v, (int *)old, new);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
+#else
 	return raw_atomic_try_cmpxchg_acquire(v, (int *)old, new);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_try_cmpxchg_release(v, (s64 *)old, new);
+#else
 	return raw_atomic_try_cmpxchg_release(v, (int *)old, new);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
+#else
 	return raw_atomic_try_cmpxchg_relaxed(v, (int *)old, new);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_sub_and_test(i, v);
+#else
 	return raw_atomic_sub_and_test(i, v);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_dec_and_test(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_dec_and_test(v);
+#else
 	return raw_atomic_dec_and_test(v);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_inc_and_test(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_inc_and_test(v);
+#else
 	return raw_atomic_inc_and_test(v);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_add_negative(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_add_negative(i, v);
+#else
 	return raw_atomic_add_negative(i, v);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_add_negative_acquire(i, v);
+#else
 	return raw_atomic_add_negative_acquire(i, v);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_add_negative_release(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_add_negative_release(i, v);
+#else
 	return raw_atomic_add_negative_release(i, v);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_add_negative_relaxed(i, v);
+#else
 	return raw_atomic_add_negative_relaxed(i, v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_fetch_add_unless(v, a, u);
+#else
 	return raw_atomic_fetch_add_unless(v, a, u);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_add_unless(v, a, u);
+#else
 	return raw_atomic_add_unless(v, a, u);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_inc_not_zero(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_inc_not_zero(v);
+#else
 	return raw_atomic_inc_not_zero(v);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_inc_unless_negative(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_inc_unless_negative(v);
+#else
 	return raw_atomic_inc_unless_negative(v);
+#endif
 }
 
 static __always_inline bool
 raw_atomic_long_dec_unless_positive(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_dec_unless_positive(v);
+#else
 	return raw_atomic_dec_unless_positive(v);
+#endif
 }
 
 static __always_inline long
 raw_atomic_long_dec_if_positive(atomic_long_t *v)
 {
+#ifdef CONFIG_64BIT
+	return raw_atomic64_dec_if_positive(v);
+#else
 	return raw_atomic_dec_if_positive(v);
+#endif
 }
 
-#endif /* CONFIG_64BIT */
 #endif /* _LINUX_ATOMIC_LONG_H */
-// 108784846d3bbbb201b8dabe621c5dc30b216206
+// ad09f849db0db5b30c82e497eeb9056a394c5f22
diff --git a/scripts/atomic/gen-atomic-long.sh b/scripts/atomic/gen-atomic-long.sh
index 13832171f721..af27a71b37ef 100755
--- a/scripts/atomic/gen-atomic-long.sh
+++ b/scripts/atomic/gen-atomic-long.sh
@@ -32,7 +32,7 @@ gen_args_cast()
 	done
 }
 
-#gen_proto_order_variant(meta, pfx, name, sfx, order, atomic, int, arg...)
+#gen_proto_order_variant(meta, pfx, name, sfx, order, arg...)
 gen_proto_order_variant()
 {
 	local meta="$1"; shift
@@ -40,21 +40,24 @@ gen_proto_order_variant()
 	local name="$1"; shift
 	local sfx="$1"; shift
 	local order="$1"; shift
-	local atomic="$1"; shift
-	local int="$1"; shift
 
 	local atomicname="${pfx}${name}${sfx}${order}"
 
 	local ret="$(gen_ret_type "${meta}" "long")"
 	local params="$(gen_params "long" "atomic_long" "$@")"
-	local argscast="$(gen_args_cast "${int}" "${atomic}" "$@")"
+	local argscast_32="$(gen_args_cast "int" "atomic" "$@")"
+	local argscast_64="$(gen_args_cast "s64" "atomic64" "$@")"
 	local retstmt="$(gen_ret_stmt "${meta}")"
 
 cat <<EOF
 static __always_inline ${ret}
 raw_atomic_long_${atomicname}(${params})
 {
-	${retstmt}raw_${atomic}_${atomicname}(${argscast});
+#ifdef CONFIG_64BIT
+	${retstmt}raw_atomic64_${atomicname}(${argscast_64});
+#else
+	${retstmt}raw_atomic_${atomicname}(${argscast_32});
+#endif
 }
 
 EOF
@@ -84,24 +87,12 @@ typedef atomic_t atomic_long_t;
 #define atomic_long_cond_read_relaxed	atomic_cond_read_relaxed
 #endif
 
-#ifdef CONFIG_64BIT
-
-EOF
-
-grep '^[a-z]' "$1" | while read name meta args; do
-	gen_proto "${meta}" "${name}" "atomic64" "s64" ${args}
-done
-
-cat <<EOF
-#else /* CONFIG_64BIT */
-
 EOF
 
 grep '^[a-z]' "$1" | while read name meta args; do
-	gen_proto "${meta}" "${name}" "atomic" "int" ${args}
+	gen_proto "${meta}" "${name}" ${args}
 done
 
 cat <<EOF
-#endif /* CONFIG_64BIT */
 #endif /* _LINUX_ATOMIC_LONG_H */
 EOF
-- 
cgit v1.2.3


From 1d78814d41701c216e28fcf2656526146dec4a1a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 5 Jun 2023 08:01:20 +0100
Subject: locking/atomic: scripts: simplify raw_atomic*() definitions

Currently each ordering variant has several potential definitions,
with a mixture of preprocessor and C definitions, including several
copies of its C prototype, e.g.

| #if defined(arch_atomic_fetch_andnot_acquire)
| #define raw_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire
| #elif defined(arch_atomic_fetch_andnot_relaxed)
| static __always_inline int
| raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
| {
|       int ret = arch_atomic_fetch_andnot_relaxed(i, v);
|       __atomic_acquire_fence();
|       return ret;
| }
| #elif defined(arch_atomic_fetch_andnot)
| #define raw_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot
| #else
| static __always_inline int
| raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
| {
|       return raw_atomic_fetch_and_acquire(~i, v);
| }
| #endif

Make this a bit simpler by defining the C prototype once, and writing
the various potential definitions as plain C code guarded by ifdeffery.
For example, the above becomes:

| static __always_inline int
| raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
| {
| #if defined(arch_atomic_fetch_andnot_acquire)
|         return arch_atomic_fetch_andnot_acquire(i, v);
| #elif defined(arch_atomic_fetch_andnot_relaxed)
|         int ret = arch_atomic_fetch_andnot_relaxed(i, v);
|         __atomic_acquire_fence();
|         return ret;
| #elif defined(arch_atomic_fetch_andnot)
|         return arch_atomic_fetch_andnot(i, v);
| #else
|         return raw_atomic_fetch_and_acquire(~i, v);
| #endif
| }

Which is far easier to read. As we now always have a single copy of the
C prototype wrapping all the potential definitions, we now have an
obvious single location for kerneldoc comments.

At the same time, the fallbacks for raw_atomic*_xhcg() are made to use
'new' rather than 'i' as the name of the new value. This is what the
existing fallback template used, and is more consistent with the
raw_atomic{_try,}cmpxchg() fallbacks.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230605070124.3741859-24-mark.rutland@arm.com
---
 include/linux/atomic/atomic-arch-fallback.h  | 1790 ++++++++++++--------------
 include/linux/atomic/atomic-instrumented.h   |   50 +-
 include/linux/atomic/atomic-long.h           |   26 +-
 scripts/atomic/atomics.tbl                   |    2 +-
 scripts/atomic/fallbacks/acquire             |    4 -
 scripts/atomic/fallbacks/add_negative        |    4 -
 scripts/atomic/fallbacks/add_unless          |    4 -
 scripts/atomic/fallbacks/andnot              |    4 -
 scripts/atomic/fallbacks/cmpxchg             |    4 -
 scripts/atomic/fallbacks/dec                 |    4 -
 scripts/atomic/fallbacks/dec_and_test        |    4 -
 scripts/atomic/fallbacks/dec_if_positive     |    4 -
 scripts/atomic/fallbacks/dec_unless_positive |    4 -
 scripts/atomic/fallbacks/fence               |    4 -
 scripts/atomic/fallbacks/fetch_add_unless    |    4 -
 scripts/atomic/fallbacks/inc                 |    4 -
 scripts/atomic/fallbacks/inc_and_test        |    4 -
 scripts/atomic/fallbacks/inc_not_zero        |    4 -
 scripts/atomic/fallbacks/inc_unless_negative |    4 -
 scripts/atomic/fallbacks/read_acquire        |    4 -
 scripts/atomic/fallbacks/release             |    4 -
 scripts/atomic/fallbacks/set_release         |    4 -
 scripts/atomic/fallbacks/sub_and_test        |    4 -
 scripts/atomic/fallbacks/try_cmpxchg         |    4 -
 scripts/atomic/fallbacks/xchg                |    4 -
 scripts/atomic/gen-atomic-fallback.sh        |   26 +-
 26 files changed, 901 insertions(+), 1077 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 99bc1a871dc1..470c2890ab8d 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -428,16 +428,20 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void);
 
 #define raw_sync_cmpxchg arch_sync_cmpxchg
 
-#define raw_atomic_read arch_atomic_read
+static __always_inline int
+raw_atomic_read(const atomic_t *v)
+{
+	return arch_atomic_read(v);
+}
 
-#if defined(arch_atomic_read_acquire)
-#define raw_atomic_read_acquire arch_atomic_read_acquire
-#elif defined(arch_atomic_read)
-#define raw_atomic_read_acquire arch_atomic_read
-#else
 static __always_inline int
 raw_atomic_read_acquire(const atomic_t *v)
 {
+#if defined(arch_atomic_read_acquire)
+	return arch_atomic_read_acquire(v);
+#elif defined(arch_atomic_read)
+	return arch_atomic_read(v);
+#else
 	int ret;
 
 	if (__native_word(atomic_t)) {
@@ -448,1144 +452,1088 @@ raw_atomic_read_acquire(const atomic_t *v)
 	}
 
 	return ret;
-}
 #endif
+}
 
-#define raw_atomic_set arch_atomic_set
+static __always_inline void
+raw_atomic_set(atomic_t *v, int i)
+{
+	arch_atomic_set(v, i);
+}
 
-#if defined(arch_atomic_set_release)
-#define raw_atomic_set_release arch_atomic_set_release
-#elif defined(arch_atomic_set)
-#define raw_atomic_set_release arch_atomic_set
-#else
 static __always_inline void
 raw_atomic_set_release(atomic_t *v, int i)
 {
+#if defined(arch_atomic_set_release)
+	arch_atomic_set_release(v, i);
+#elif defined(arch_atomic_set)
+	arch_atomic_set(v, i);
+#else
 	if (__native_word(atomic_t)) {
 		smp_store_release(&(v)->counter, i);
 	} else {
 		__atomic_release_fence();
 		raw_atomic_set(v, i);
 	}
-}
 #endif
+}
 
-#define raw_atomic_add arch_atomic_add
+static __always_inline void
+raw_atomic_add(int i, atomic_t *v)
+{
+	arch_atomic_add(i, v);
+}
 
-#if defined(arch_atomic_add_return)
-#define raw_atomic_add_return arch_atomic_add_return
-#elif defined(arch_atomic_add_return_relaxed)
 static __always_inline int
 raw_atomic_add_return(int i, atomic_t *v)
 {
+#if defined(arch_atomic_add_return)
+	return arch_atomic_add_return(i, v);
+#elif defined(arch_atomic_add_return_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_add_return_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic_add_return"
 #endif
+}
 
-#if defined(arch_atomic_add_return_acquire)
-#define raw_atomic_add_return_acquire arch_atomic_add_return_acquire
-#elif defined(arch_atomic_add_return_relaxed)
 static __always_inline int
 raw_atomic_add_return_acquire(int i, atomic_t *v)
 {
+#if defined(arch_atomic_add_return_acquire)
+	return arch_atomic_add_return_acquire(i, v);
+#elif defined(arch_atomic_add_return_relaxed)
 	int ret = arch_atomic_add_return_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_add_return)
-#define raw_atomic_add_return_acquire arch_atomic_add_return
+	return arch_atomic_add_return(i, v);
 #else
 #error "Unable to define raw_atomic_add_return_acquire"
 #endif
+}
 
-#if defined(arch_atomic_add_return_release)
-#define raw_atomic_add_return_release arch_atomic_add_return_release
-#elif defined(arch_atomic_add_return_relaxed)
 static __always_inline int
 raw_atomic_add_return_release(int i, atomic_t *v)
 {
+#if defined(arch_atomic_add_return_release)
+	return arch_atomic_add_return_release(i, v);
+#elif defined(arch_atomic_add_return_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_add_return_relaxed(i, v);
-}
 #elif defined(arch_atomic_add_return)
-#define raw_atomic_add_return_release arch_atomic_add_return
+	return arch_atomic_add_return(i, v);
 #else
 #error "Unable to define raw_atomic_add_return_release"
 #endif
+}
 
+static __always_inline int
+raw_atomic_add_return_relaxed(int i, atomic_t *v)
+{
 #if defined(arch_atomic_add_return_relaxed)
-#define raw_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+	return arch_atomic_add_return_relaxed(i, v);
 #elif defined(arch_atomic_add_return)
-#define raw_atomic_add_return_relaxed arch_atomic_add_return
+	return arch_atomic_add_return(i, v);
 #else
 #error "Unable to define raw_atomic_add_return_relaxed"
 #endif
+}
 
-#if defined(arch_atomic_fetch_add)
-#define raw_atomic_fetch_add arch_atomic_fetch_add
-#elif defined(arch_atomic_fetch_add_relaxed)
 static __always_inline int
 raw_atomic_fetch_add(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_add)
+	return arch_atomic_fetch_add(i, v);
+#elif defined(arch_atomic_fetch_add_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_fetch_add_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic_fetch_add"
 #endif
+}
 
-#if defined(arch_atomic_fetch_add_acquire)
-#define raw_atomic_fetch_add_acquire arch_atomic_fetch_add_acquire
-#elif defined(arch_atomic_fetch_add_relaxed)
 static __always_inline int
 raw_atomic_fetch_add_acquire(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_add_acquire)
+	return arch_atomic_fetch_add_acquire(i, v);
+#elif defined(arch_atomic_fetch_add_relaxed)
 	int ret = arch_atomic_fetch_add_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_fetch_add)
-#define raw_atomic_fetch_add_acquire arch_atomic_fetch_add
+	return arch_atomic_fetch_add(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_add_acquire"
 #endif
+}
 
-#if defined(arch_atomic_fetch_add_release)
-#define raw_atomic_fetch_add_release arch_atomic_fetch_add_release
-#elif defined(arch_atomic_fetch_add_relaxed)
 static __always_inline int
 raw_atomic_fetch_add_release(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_add_release)
+	return arch_atomic_fetch_add_release(i, v);
+#elif defined(arch_atomic_fetch_add_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_fetch_add_relaxed(i, v);
-}
 #elif defined(arch_atomic_fetch_add)
-#define raw_atomic_fetch_add_release arch_atomic_fetch_add
+	return arch_atomic_fetch_add(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_add_release"
 #endif
+}
 
+static __always_inline int
+raw_atomic_fetch_add_relaxed(int i, atomic_t *v)
+{
 #if defined(arch_atomic_fetch_add_relaxed)
-#define raw_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+	return arch_atomic_fetch_add_relaxed(i, v);
 #elif defined(arch_atomic_fetch_add)
-#define raw_atomic_fetch_add_relaxed arch_atomic_fetch_add
+	return arch_atomic_fetch_add(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_add_relaxed"
 #endif
+}
 
-#define raw_atomic_sub arch_atomic_sub
+static __always_inline void
+raw_atomic_sub(int i, atomic_t *v)
+{
+	arch_atomic_sub(i, v);
+}
 
-#if defined(arch_atomic_sub_return)
-#define raw_atomic_sub_return arch_atomic_sub_return
-#elif defined(arch_atomic_sub_return_relaxed)
 static __always_inline int
 raw_atomic_sub_return(int i, atomic_t *v)
 {
+#if defined(arch_atomic_sub_return)
+	return arch_atomic_sub_return(i, v);
+#elif defined(arch_atomic_sub_return_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_sub_return_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic_sub_return"
 #endif
+}
 
-#if defined(arch_atomic_sub_return_acquire)
-#define raw_atomic_sub_return_acquire arch_atomic_sub_return_acquire
-#elif defined(arch_atomic_sub_return_relaxed)
 static __always_inline int
 raw_atomic_sub_return_acquire(int i, atomic_t *v)
 {
+#if defined(arch_atomic_sub_return_acquire)
+	return arch_atomic_sub_return_acquire(i, v);
+#elif defined(arch_atomic_sub_return_relaxed)
 	int ret = arch_atomic_sub_return_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_sub_return)
-#define raw_atomic_sub_return_acquire arch_atomic_sub_return
+	return arch_atomic_sub_return(i, v);
 #else
 #error "Unable to define raw_atomic_sub_return_acquire"
 #endif
+}
 
-#if defined(arch_atomic_sub_return_release)
-#define raw_atomic_sub_return_release arch_atomic_sub_return_release
-#elif defined(arch_atomic_sub_return_relaxed)
 static __always_inline int
 raw_atomic_sub_return_release(int i, atomic_t *v)
 {
+#if defined(arch_atomic_sub_return_release)
+	return arch_atomic_sub_return_release(i, v);
+#elif defined(arch_atomic_sub_return_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_sub_return_relaxed(i, v);
-}
 #elif defined(arch_atomic_sub_return)
-#define raw_atomic_sub_return_release arch_atomic_sub_return
+	return arch_atomic_sub_return(i, v);
 #else
 #error "Unable to define raw_atomic_sub_return_release"
 #endif
+}
 
+static __always_inline int
+raw_atomic_sub_return_relaxed(int i, atomic_t *v)
+{
 #if defined(arch_atomic_sub_return_relaxed)
-#define raw_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+	return arch_atomic_sub_return_relaxed(i, v);
 #elif defined(arch_atomic_sub_return)
-#define raw_atomic_sub_return_relaxed arch_atomic_sub_return
+	return arch_atomic_sub_return(i, v);
 #else
 #error "Unable to define raw_atomic_sub_return_relaxed"
 #endif
+}
 
-#if defined(arch_atomic_fetch_sub)
-#define raw_atomic_fetch_sub arch_atomic_fetch_sub
-#elif defined(arch_atomic_fetch_sub_relaxed)
 static __always_inline int
 raw_atomic_fetch_sub(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_sub)
+	return arch_atomic_fetch_sub(i, v);
+#elif defined(arch_atomic_fetch_sub_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_fetch_sub_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic_fetch_sub"
 #endif
+}
 
-#if defined(arch_atomic_fetch_sub_acquire)
-#define raw_atomic_fetch_sub_acquire arch_atomic_fetch_sub_acquire
-#elif defined(arch_atomic_fetch_sub_relaxed)
 static __always_inline int
 raw_atomic_fetch_sub_acquire(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_sub_acquire)
+	return arch_atomic_fetch_sub_acquire(i, v);
+#elif defined(arch_atomic_fetch_sub_relaxed)
 	int ret = arch_atomic_fetch_sub_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_fetch_sub)
-#define raw_atomic_fetch_sub_acquire arch_atomic_fetch_sub
+	return arch_atomic_fetch_sub(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_sub_acquire"
 #endif
+}
 
-#if defined(arch_atomic_fetch_sub_release)
-#define raw_atomic_fetch_sub_release arch_atomic_fetch_sub_release
-#elif defined(arch_atomic_fetch_sub_relaxed)
 static __always_inline int
 raw_atomic_fetch_sub_release(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_sub_release)
+	return arch_atomic_fetch_sub_release(i, v);
+#elif defined(arch_atomic_fetch_sub_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_fetch_sub_relaxed(i, v);
-}
 #elif defined(arch_atomic_fetch_sub)
-#define raw_atomic_fetch_sub_release arch_atomic_fetch_sub
+	return arch_atomic_fetch_sub(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_sub_release"
 #endif
+}
 
+static __always_inline int
+raw_atomic_fetch_sub_relaxed(int i, atomic_t *v)
+{
 #if defined(arch_atomic_fetch_sub_relaxed)
-#define raw_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
+	return arch_atomic_fetch_sub_relaxed(i, v);
 #elif defined(arch_atomic_fetch_sub)
-#define raw_atomic_fetch_sub_relaxed arch_atomic_fetch_sub
+	return arch_atomic_fetch_sub(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_sub_relaxed"
 #endif
+}
 
-#if defined(arch_atomic_inc)
-#define raw_atomic_inc arch_atomic_inc
-#else
 static __always_inline void
 raw_atomic_inc(atomic_t *v)
 {
+#if defined(arch_atomic_inc)
+	arch_atomic_inc(v);
+#else
 	raw_atomic_add(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_inc_return)
-#define raw_atomic_inc_return arch_atomic_inc_return
-#elif defined(arch_atomic_inc_return_relaxed)
 static __always_inline int
 raw_atomic_inc_return(atomic_t *v)
 {
+#if defined(arch_atomic_inc_return)
+	return arch_atomic_inc_return(v);
+#elif defined(arch_atomic_inc_return_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_inc_return_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline int
-raw_atomic_inc_return(atomic_t *v)
-{
 	return raw_atomic_add_return(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_inc_return_acquire)
-#define raw_atomic_inc_return_acquire arch_atomic_inc_return_acquire
-#elif defined(arch_atomic_inc_return_relaxed)
 static __always_inline int
 raw_atomic_inc_return_acquire(atomic_t *v)
 {
+#if defined(arch_atomic_inc_return_acquire)
+	return arch_atomic_inc_return_acquire(v);
+#elif defined(arch_atomic_inc_return_relaxed)
 	int ret = arch_atomic_inc_return_relaxed(v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_inc_return)
-#define raw_atomic_inc_return_acquire arch_atomic_inc_return
+	return arch_atomic_inc_return(v);
 #else
-static __always_inline int
-raw_atomic_inc_return_acquire(atomic_t *v)
-{
 	return raw_atomic_add_return_acquire(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_inc_return_release)
-#define raw_atomic_inc_return_release arch_atomic_inc_return_release
-#elif defined(arch_atomic_inc_return_relaxed)
 static __always_inline int
 raw_atomic_inc_return_release(atomic_t *v)
 {
+#if defined(arch_atomic_inc_return_release)
+	return arch_atomic_inc_return_release(v);
+#elif defined(arch_atomic_inc_return_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_inc_return_relaxed(v);
-}
 #elif defined(arch_atomic_inc_return)
-#define raw_atomic_inc_return_release arch_atomic_inc_return
+	return arch_atomic_inc_return(v);
 #else
-static __always_inline int
-raw_atomic_inc_return_release(atomic_t *v)
-{
 	return raw_atomic_add_return_release(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_inc_return_relaxed)
-#define raw_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed
-#elif defined(arch_atomic_inc_return)
-#define raw_atomic_inc_return_relaxed arch_atomic_inc_return
-#else
 static __always_inline int
 raw_atomic_inc_return_relaxed(atomic_t *v)
 {
+#if defined(arch_atomic_inc_return_relaxed)
+	return arch_atomic_inc_return_relaxed(v);
+#elif defined(arch_atomic_inc_return)
+	return arch_atomic_inc_return(v);
+#else
 	return raw_atomic_add_return_relaxed(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_inc)
-#define raw_atomic_fetch_inc arch_atomic_fetch_inc
-#elif defined(arch_atomic_fetch_inc_relaxed)
 static __always_inline int
 raw_atomic_fetch_inc(atomic_t *v)
 {
+#if defined(arch_atomic_fetch_inc)
+	return arch_atomic_fetch_inc(v);
+#elif defined(arch_atomic_fetch_inc_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_fetch_inc_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline int
-raw_atomic_fetch_inc(atomic_t *v)
-{
 	return raw_atomic_fetch_add(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_inc_acquire)
-#define raw_atomic_fetch_inc_acquire arch_atomic_fetch_inc_acquire
-#elif defined(arch_atomic_fetch_inc_relaxed)
 static __always_inline int
 raw_atomic_fetch_inc_acquire(atomic_t *v)
 {
+#if defined(arch_atomic_fetch_inc_acquire)
+	return arch_atomic_fetch_inc_acquire(v);
+#elif defined(arch_atomic_fetch_inc_relaxed)
 	int ret = arch_atomic_fetch_inc_relaxed(v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_fetch_inc)
-#define raw_atomic_fetch_inc_acquire arch_atomic_fetch_inc
+	return arch_atomic_fetch_inc(v);
 #else
-static __always_inline int
-raw_atomic_fetch_inc_acquire(atomic_t *v)
-{
 	return raw_atomic_fetch_add_acquire(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_inc_release)
-#define raw_atomic_fetch_inc_release arch_atomic_fetch_inc_release
-#elif defined(arch_atomic_fetch_inc_relaxed)
 static __always_inline int
 raw_atomic_fetch_inc_release(atomic_t *v)
 {
+#if defined(arch_atomic_fetch_inc_release)
+	return arch_atomic_fetch_inc_release(v);
+#elif defined(arch_atomic_fetch_inc_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_fetch_inc_relaxed(v);
-}
 #elif defined(arch_atomic_fetch_inc)
-#define raw_atomic_fetch_inc_release arch_atomic_fetch_inc
+	return arch_atomic_fetch_inc(v);
 #else
-static __always_inline int
-raw_atomic_fetch_inc_release(atomic_t *v)
-{
 	return raw_atomic_fetch_add_release(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_inc_relaxed)
-#define raw_atomic_fetch_inc_relaxed arch_atomic_fetch_inc_relaxed
-#elif defined(arch_atomic_fetch_inc)
-#define raw_atomic_fetch_inc_relaxed arch_atomic_fetch_inc
-#else
 static __always_inline int
 raw_atomic_fetch_inc_relaxed(atomic_t *v)
 {
+#if defined(arch_atomic_fetch_inc_relaxed)
+	return arch_atomic_fetch_inc_relaxed(v);
+#elif defined(arch_atomic_fetch_inc)
+	return arch_atomic_fetch_inc(v);
+#else
 	return raw_atomic_fetch_add_relaxed(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_dec)
-#define raw_atomic_dec arch_atomic_dec
-#else
 static __always_inline void
 raw_atomic_dec(atomic_t *v)
 {
+#if defined(arch_atomic_dec)
+	arch_atomic_dec(v);
+#else
 	raw_atomic_sub(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_dec_return)
-#define raw_atomic_dec_return arch_atomic_dec_return
-#elif defined(arch_atomic_dec_return_relaxed)
 static __always_inline int
 raw_atomic_dec_return(atomic_t *v)
 {
+#if defined(arch_atomic_dec_return)
+	return arch_atomic_dec_return(v);
+#elif defined(arch_atomic_dec_return_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_dec_return_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline int
-raw_atomic_dec_return(atomic_t *v)
-{
 	return raw_atomic_sub_return(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_dec_return_acquire)
-#define raw_atomic_dec_return_acquire arch_atomic_dec_return_acquire
-#elif defined(arch_atomic_dec_return_relaxed)
 static __always_inline int
 raw_atomic_dec_return_acquire(atomic_t *v)
 {
+#if defined(arch_atomic_dec_return_acquire)
+	return arch_atomic_dec_return_acquire(v);
+#elif defined(arch_atomic_dec_return_relaxed)
 	int ret = arch_atomic_dec_return_relaxed(v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_dec_return)
-#define raw_atomic_dec_return_acquire arch_atomic_dec_return
+	return arch_atomic_dec_return(v);
 #else
-static __always_inline int
-raw_atomic_dec_return_acquire(atomic_t *v)
-{
 	return raw_atomic_sub_return_acquire(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_dec_return_release)
-#define raw_atomic_dec_return_release arch_atomic_dec_return_release
-#elif defined(arch_atomic_dec_return_relaxed)
 static __always_inline int
 raw_atomic_dec_return_release(atomic_t *v)
 {
+#if defined(arch_atomic_dec_return_release)
+	return arch_atomic_dec_return_release(v);
+#elif defined(arch_atomic_dec_return_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_dec_return_relaxed(v);
-}
 #elif defined(arch_atomic_dec_return)
-#define raw_atomic_dec_return_release arch_atomic_dec_return
+	return arch_atomic_dec_return(v);
 #else
-static __always_inline int
-raw_atomic_dec_return_release(atomic_t *v)
-{
 	return raw_atomic_sub_return_release(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_dec_return_relaxed)
-#define raw_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed
-#elif defined(arch_atomic_dec_return)
-#define raw_atomic_dec_return_relaxed arch_atomic_dec_return
-#else
 static __always_inline int
 raw_atomic_dec_return_relaxed(atomic_t *v)
 {
+#if defined(arch_atomic_dec_return_relaxed)
+	return arch_atomic_dec_return_relaxed(v);
+#elif defined(arch_atomic_dec_return)
+	return arch_atomic_dec_return(v);
+#else
 	return raw_atomic_sub_return_relaxed(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_dec)
-#define raw_atomic_fetch_dec arch_atomic_fetch_dec
-#elif defined(arch_atomic_fetch_dec_relaxed)
 static __always_inline int
 raw_atomic_fetch_dec(atomic_t *v)
 {
+#if defined(arch_atomic_fetch_dec)
+	return arch_atomic_fetch_dec(v);
+#elif defined(arch_atomic_fetch_dec_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_fetch_dec_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline int
-raw_atomic_fetch_dec(atomic_t *v)
-{
 	return raw_atomic_fetch_sub(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_dec_acquire)
-#define raw_atomic_fetch_dec_acquire arch_atomic_fetch_dec_acquire
-#elif defined(arch_atomic_fetch_dec_relaxed)
 static __always_inline int
 raw_atomic_fetch_dec_acquire(atomic_t *v)
 {
+#if defined(arch_atomic_fetch_dec_acquire)
+	return arch_atomic_fetch_dec_acquire(v);
+#elif defined(arch_atomic_fetch_dec_relaxed)
 	int ret = arch_atomic_fetch_dec_relaxed(v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_fetch_dec)
-#define raw_atomic_fetch_dec_acquire arch_atomic_fetch_dec
+	return arch_atomic_fetch_dec(v);
 #else
-static __always_inline int
-raw_atomic_fetch_dec_acquire(atomic_t *v)
-{
 	return raw_atomic_fetch_sub_acquire(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_dec_release)
-#define raw_atomic_fetch_dec_release arch_atomic_fetch_dec_release
-#elif defined(arch_atomic_fetch_dec_relaxed)
 static __always_inline int
 raw_atomic_fetch_dec_release(atomic_t *v)
 {
+#if defined(arch_atomic_fetch_dec_release)
+	return arch_atomic_fetch_dec_release(v);
+#elif defined(arch_atomic_fetch_dec_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_fetch_dec_relaxed(v);
-}
 #elif defined(arch_atomic_fetch_dec)
-#define raw_atomic_fetch_dec_release arch_atomic_fetch_dec
+	return arch_atomic_fetch_dec(v);
 #else
-static __always_inline int
-raw_atomic_fetch_dec_release(atomic_t *v)
-{
 	return raw_atomic_fetch_sub_release(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_dec_relaxed)
-#define raw_atomic_fetch_dec_relaxed arch_atomic_fetch_dec_relaxed
-#elif defined(arch_atomic_fetch_dec)
-#define raw_atomic_fetch_dec_relaxed arch_atomic_fetch_dec
-#else
 static __always_inline int
 raw_atomic_fetch_dec_relaxed(atomic_t *v)
 {
+#if defined(arch_atomic_fetch_dec_relaxed)
+	return arch_atomic_fetch_dec_relaxed(v);
+#elif defined(arch_atomic_fetch_dec)
+	return arch_atomic_fetch_dec(v);
+#else
 	return raw_atomic_fetch_sub_relaxed(1, v);
-}
 #endif
+}
 
-#define raw_atomic_and arch_atomic_and
+static __always_inline void
+raw_atomic_and(int i, atomic_t *v)
+{
+	arch_atomic_and(i, v);
+}
 
-#if defined(arch_atomic_fetch_and)
-#define raw_atomic_fetch_and arch_atomic_fetch_and
-#elif defined(arch_atomic_fetch_and_relaxed)
 static __always_inline int
 raw_atomic_fetch_and(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_and)
+	return arch_atomic_fetch_and(i, v);
+#elif defined(arch_atomic_fetch_and_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_fetch_and_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic_fetch_and"
 #endif
+}
 
-#if defined(arch_atomic_fetch_and_acquire)
-#define raw_atomic_fetch_and_acquire arch_atomic_fetch_and_acquire
-#elif defined(arch_atomic_fetch_and_relaxed)
 static __always_inline int
 raw_atomic_fetch_and_acquire(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_and_acquire)
+	return arch_atomic_fetch_and_acquire(i, v);
+#elif defined(arch_atomic_fetch_and_relaxed)
 	int ret = arch_atomic_fetch_and_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_fetch_and)
-#define raw_atomic_fetch_and_acquire arch_atomic_fetch_and
+	return arch_atomic_fetch_and(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_and_acquire"
 #endif
+}
 
-#if defined(arch_atomic_fetch_and_release)
-#define raw_atomic_fetch_and_release arch_atomic_fetch_and_release
-#elif defined(arch_atomic_fetch_and_relaxed)
 static __always_inline int
 raw_atomic_fetch_and_release(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_and_release)
+	return arch_atomic_fetch_and_release(i, v);
+#elif defined(arch_atomic_fetch_and_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_fetch_and_relaxed(i, v);
-}
 #elif defined(arch_atomic_fetch_and)
-#define raw_atomic_fetch_and_release arch_atomic_fetch_and
+	return arch_atomic_fetch_and(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_and_release"
 #endif
+}
 
+static __always_inline int
+raw_atomic_fetch_and_relaxed(int i, atomic_t *v)
+{
 #if defined(arch_atomic_fetch_and_relaxed)
-#define raw_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+	return arch_atomic_fetch_and_relaxed(i, v);
 #elif defined(arch_atomic_fetch_and)
-#define raw_atomic_fetch_and_relaxed arch_atomic_fetch_and
+	return arch_atomic_fetch_and(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_and_relaxed"
 #endif
+}
 
-#if defined(arch_atomic_andnot)
-#define raw_atomic_andnot arch_atomic_andnot
-#else
 static __always_inline void
 raw_atomic_andnot(int i, atomic_t *v)
 {
+#if defined(arch_atomic_andnot)
+	arch_atomic_andnot(i, v);
+#else
 	raw_atomic_and(~i, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_andnot)
-#define raw_atomic_fetch_andnot arch_atomic_fetch_andnot
-#elif defined(arch_atomic_fetch_andnot_relaxed)
 static __always_inline int
 raw_atomic_fetch_andnot(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_andnot)
+	return arch_atomic_fetch_andnot(i, v);
+#elif defined(arch_atomic_fetch_andnot_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_fetch_andnot_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline int
-raw_atomic_fetch_andnot(int i, atomic_t *v)
-{
 	return raw_atomic_fetch_and(~i, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_andnot_acquire)
-#define raw_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot_acquire
-#elif defined(arch_atomic_fetch_andnot_relaxed)
 static __always_inline int
 raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_andnot_acquire)
+	return arch_atomic_fetch_andnot_acquire(i, v);
+#elif defined(arch_atomic_fetch_andnot_relaxed)
 	int ret = arch_atomic_fetch_andnot_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_fetch_andnot)
-#define raw_atomic_fetch_andnot_acquire arch_atomic_fetch_andnot
+	return arch_atomic_fetch_andnot(i, v);
 #else
-static __always_inline int
-raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
-{
 	return raw_atomic_fetch_and_acquire(~i, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_andnot_release)
-#define raw_atomic_fetch_andnot_release arch_atomic_fetch_andnot_release
-#elif defined(arch_atomic_fetch_andnot_relaxed)
 static __always_inline int
 raw_atomic_fetch_andnot_release(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_andnot_release)
+	return arch_atomic_fetch_andnot_release(i, v);
+#elif defined(arch_atomic_fetch_andnot_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_fetch_andnot_relaxed(i, v);
-}
 #elif defined(arch_atomic_fetch_andnot)
-#define raw_atomic_fetch_andnot_release arch_atomic_fetch_andnot
+	return arch_atomic_fetch_andnot(i, v);
 #else
-static __always_inline int
-raw_atomic_fetch_andnot_release(int i, atomic_t *v)
-{
 	return raw_atomic_fetch_and_release(~i, v);
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_andnot_relaxed)
-#define raw_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
-#elif defined(arch_atomic_fetch_andnot)
-#define raw_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot
-#else
 static __always_inline int
 raw_atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_andnot_relaxed)
+	return arch_atomic_fetch_andnot_relaxed(i, v);
+#elif defined(arch_atomic_fetch_andnot)
+	return arch_atomic_fetch_andnot(i, v);
+#else
 	return raw_atomic_fetch_and_relaxed(~i, v);
-}
 #endif
+}
 
-#define raw_atomic_or arch_atomic_or
+static __always_inline void
+raw_atomic_or(int i, atomic_t *v)
+{
+	arch_atomic_or(i, v);
+}
 
-#if defined(arch_atomic_fetch_or)
-#define raw_atomic_fetch_or arch_atomic_fetch_or
-#elif defined(arch_atomic_fetch_or_relaxed)
 static __always_inline int
 raw_atomic_fetch_or(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_or)
+	return arch_atomic_fetch_or(i, v);
+#elif defined(arch_atomic_fetch_or_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_fetch_or_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic_fetch_or"
 #endif
+}
 
-#if defined(arch_atomic_fetch_or_acquire)
-#define raw_atomic_fetch_or_acquire arch_atomic_fetch_or_acquire
-#elif defined(arch_atomic_fetch_or_relaxed)
 static __always_inline int
 raw_atomic_fetch_or_acquire(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_or_acquire)
+	return arch_atomic_fetch_or_acquire(i, v);
+#elif defined(arch_atomic_fetch_or_relaxed)
 	int ret = arch_atomic_fetch_or_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_fetch_or)
-#define raw_atomic_fetch_or_acquire arch_atomic_fetch_or
+	return arch_atomic_fetch_or(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_or_acquire"
 #endif
+}
 
-#if defined(arch_atomic_fetch_or_release)
-#define raw_atomic_fetch_or_release arch_atomic_fetch_or_release
-#elif defined(arch_atomic_fetch_or_relaxed)
 static __always_inline int
 raw_atomic_fetch_or_release(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_or_release)
+	return arch_atomic_fetch_or_release(i, v);
+#elif defined(arch_atomic_fetch_or_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_fetch_or_relaxed(i, v);
-}
 #elif defined(arch_atomic_fetch_or)
-#define raw_atomic_fetch_or_release arch_atomic_fetch_or
+	return arch_atomic_fetch_or(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_or_release"
 #endif
+}
 
+static __always_inline int
+raw_atomic_fetch_or_relaxed(int i, atomic_t *v)
+{
 #if defined(arch_atomic_fetch_or_relaxed)
-#define raw_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+	return arch_atomic_fetch_or_relaxed(i, v);
 #elif defined(arch_atomic_fetch_or)
-#define raw_atomic_fetch_or_relaxed arch_atomic_fetch_or
+	return arch_atomic_fetch_or(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_or_relaxed"
 #endif
+}
 
-#define raw_atomic_xor arch_atomic_xor
+static __always_inline void
+raw_atomic_xor(int i, atomic_t *v)
+{
+	arch_atomic_xor(i, v);
+}
 
-#if defined(arch_atomic_fetch_xor)
-#define raw_atomic_fetch_xor arch_atomic_fetch_xor
-#elif defined(arch_atomic_fetch_xor_relaxed)
 static __always_inline int
 raw_atomic_fetch_xor(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_xor)
+	return arch_atomic_fetch_xor(i, v);
+#elif defined(arch_atomic_fetch_xor_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_fetch_xor_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic_fetch_xor"
 #endif
+}
 
-#if defined(arch_atomic_fetch_xor_acquire)
-#define raw_atomic_fetch_xor_acquire arch_atomic_fetch_xor_acquire
-#elif defined(arch_atomic_fetch_xor_relaxed)
 static __always_inline int
 raw_atomic_fetch_xor_acquire(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_xor_acquire)
+	return arch_atomic_fetch_xor_acquire(i, v);
+#elif defined(arch_atomic_fetch_xor_relaxed)
 	int ret = arch_atomic_fetch_xor_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_fetch_xor)
-#define raw_atomic_fetch_xor_acquire arch_atomic_fetch_xor
+	return arch_atomic_fetch_xor(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_xor_acquire"
 #endif
+}
 
-#if defined(arch_atomic_fetch_xor_release)
-#define raw_atomic_fetch_xor_release arch_atomic_fetch_xor_release
-#elif defined(arch_atomic_fetch_xor_relaxed)
 static __always_inline int
 raw_atomic_fetch_xor_release(int i, atomic_t *v)
 {
+#if defined(arch_atomic_fetch_xor_release)
+	return arch_atomic_fetch_xor_release(i, v);
+#elif defined(arch_atomic_fetch_xor_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_fetch_xor_relaxed(i, v);
-}
 #elif defined(arch_atomic_fetch_xor)
-#define raw_atomic_fetch_xor_release arch_atomic_fetch_xor
+	return arch_atomic_fetch_xor(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_xor_release"
 #endif
+}
 
+static __always_inline int
+raw_atomic_fetch_xor_relaxed(int i, atomic_t *v)
+{
 #if defined(arch_atomic_fetch_xor_relaxed)
-#define raw_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
+	return arch_atomic_fetch_xor_relaxed(i, v);
 #elif defined(arch_atomic_fetch_xor)
-#define raw_atomic_fetch_xor_relaxed arch_atomic_fetch_xor
+	return arch_atomic_fetch_xor(i, v);
 #else
 #error "Unable to define raw_atomic_fetch_xor_relaxed"
 #endif
+}
 
-#if defined(arch_atomic_xchg)
-#define raw_atomic_xchg arch_atomic_xchg
-#elif defined(arch_atomic_xchg_relaxed)
 static __always_inline int
-raw_atomic_xchg(atomic_t *v, int i)
+raw_atomic_xchg(atomic_t *v, int new)
 {
+#if defined(arch_atomic_xchg)
+	return arch_atomic_xchg(v, new);
+#elif defined(arch_atomic_xchg_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic_xchg_relaxed(v, i);
+	ret = arch_atomic_xchg_relaxed(v, new);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline int
-raw_atomic_xchg(atomic_t *v, int new)
-{
 	return raw_xchg(&v->counter, new);
-}
 #endif
+}
 
-#if defined(arch_atomic_xchg_acquire)
-#define raw_atomic_xchg_acquire arch_atomic_xchg_acquire
-#elif defined(arch_atomic_xchg_relaxed)
 static __always_inline int
-raw_atomic_xchg_acquire(atomic_t *v, int i)
+raw_atomic_xchg_acquire(atomic_t *v, int new)
 {
-	int ret = arch_atomic_xchg_relaxed(v, i);
+#if defined(arch_atomic_xchg_acquire)
+	return arch_atomic_xchg_acquire(v, new);
+#elif defined(arch_atomic_xchg_relaxed)
+	int ret = arch_atomic_xchg_relaxed(v, new);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_xchg)
-#define raw_atomic_xchg_acquire arch_atomic_xchg
+	return arch_atomic_xchg(v, new);
 #else
-static __always_inline int
-raw_atomic_xchg_acquire(atomic_t *v, int new)
-{
 	return raw_xchg_acquire(&v->counter, new);
-}
 #endif
+}
 
-#if defined(arch_atomic_xchg_release)
-#define raw_atomic_xchg_release arch_atomic_xchg_release
-#elif defined(arch_atomic_xchg_relaxed)
 static __always_inline int
-raw_atomic_xchg_release(atomic_t *v, int i)
+raw_atomic_xchg_release(atomic_t *v, int new)
 {
+#if defined(arch_atomic_xchg_release)
+	return arch_atomic_xchg_release(v, new);
+#elif defined(arch_atomic_xchg_relaxed)
 	__atomic_release_fence();
-	return arch_atomic_xchg_relaxed(v, i);
-}
+	return arch_atomic_xchg_relaxed(v, new);
 #elif defined(arch_atomic_xchg)
-#define raw_atomic_xchg_release arch_atomic_xchg
+	return arch_atomic_xchg(v, new);
 #else
-static __always_inline int
-raw_atomic_xchg_release(atomic_t *v, int new)
-{
 	return raw_xchg_release(&v->counter, new);
-}
 #endif
+}
 
-#if defined(arch_atomic_xchg_relaxed)
-#define raw_atomic_xchg_relaxed arch_atomic_xchg_relaxed
-#elif defined(arch_atomic_xchg)
-#define raw_atomic_xchg_relaxed arch_atomic_xchg
-#else
 static __always_inline int
 raw_atomic_xchg_relaxed(atomic_t *v, int new)
 {
+#if defined(arch_atomic_xchg_relaxed)
+	return arch_atomic_xchg_relaxed(v, new);
+#elif defined(arch_atomic_xchg)
+	return arch_atomic_xchg(v, new);
+#else
 	return raw_xchg_relaxed(&v->counter, new);
-}
 #endif
+}
 
-#if defined(arch_atomic_cmpxchg)
-#define raw_atomic_cmpxchg arch_atomic_cmpxchg
-#elif defined(arch_atomic_cmpxchg_relaxed)
 static __always_inline int
 raw_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
+#if defined(arch_atomic_cmpxchg)
+	return arch_atomic_cmpxchg(v, old, new);
+#elif defined(arch_atomic_cmpxchg_relaxed)
 	int ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_cmpxchg_relaxed(v, old, new);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline int
-raw_atomic_cmpxchg(atomic_t *v, int old, int new)
-{
 	return raw_cmpxchg(&v->counter, old, new);
-}
 #endif
+}
 
-#if defined(arch_atomic_cmpxchg_acquire)
-#define raw_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
-#elif defined(arch_atomic_cmpxchg_relaxed)
 static __always_inline int
 raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
+#if defined(arch_atomic_cmpxchg_acquire)
+	return arch_atomic_cmpxchg_acquire(v, old, new);
+#elif defined(arch_atomic_cmpxchg_relaxed)
 	int ret = arch_atomic_cmpxchg_relaxed(v, old, new);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_cmpxchg)
-#define raw_atomic_cmpxchg_acquire arch_atomic_cmpxchg
+	return arch_atomic_cmpxchg(v, old, new);
 #else
-static __always_inline int
-raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
-{
 	return raw_cmpxchg_acquire(&v->counter, old, new);
-}
 #endif
+}
 
-#if defined(arch_atomic_cmpxchg_release)
-#define raw_atomic_cmpxchg_release arch_atomic_cmpxchg_release
-#elif defined(arch_atomic_cmpxchg_relaxed)
 static __always_inline int
 raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
+#if defined(arch_atomic_cmpxchg_release)
+	return arch_atomic_cmpxchg_release(v, old, new);
+#elif defined(arch_atomic_cmpxchg_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_cmpxchg_relaxed(v, old, new);
-}
 #elif defined(arch_atomic_cmpxchg)
-#define raw_atomic_cmpxchg_release arch_atomic_cmpxchg
+	return arch_atomic_cmpxchg(v, old, new);
 #else
-static __always_inline int
-raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
-{
 	return raw_cmpxchg_release(&v->counter, old, new);
-}
 #endif
+}
 
-#if defined(arch_atomic_cmpxchg_relaxed)
-#define raw_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
-#elif defined(arch_atomic_cmpxchg)
-#define raw_atomic_cmpxchg_relaxed arch_atomic_cmpxchg
-#else
 static __always_inline int
 raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 {
+#if defined(arch_atomic_cmpxchg_relaxed)
+	return arch_atomic_cmpxchg_relaxed(v, old, new);
+#elif defined(arch_atomic_cmpxchg)
+	return arch_atomic_cmpxchg(v, old, new);
+#else
 	return raw_cmpxchg_relaxed(&v->counter, old, new);
-}
 #endif
+}
 
-#if defined(arch_atomic_try_cmpxchg)
-#define raw_atomic_try_cmpxchg arch_atomic_try_cmpxchg
-#elif defined(arch_atomic_try_cmpxchg_relaxed)
 static __always_inline bool
 raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
+#if defined(arch_atomic_try_cmpxchg)
+	return arch_atomic_try_cmpxchg(v, old, new);
+#elif defined(arch_atomic_try_cmpxchg_relaxed)
 	bool ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_try_cmpxchg_relaxed(v, old, new);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline bool
-raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
-{
 	int r, o = *old;
 	r = raw_atomic_cmpxchg(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
-}
 #endif
+}
 
-#if defined(arch_atomic_try_cmpxchg_acquire)
-#define raw_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg_acquire
-#elif defined(arch_atomic_try_cmpxchg_relaxed)
 static __always_inline bool
 raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
+#if defined(arch_atomic_try_cmpxchg_acquire)
+	return arch_atomic_try_cmpxchg_acquire(v, old, new);
+#elif defined(arch_atomic_try_cmpxchg_relaxed)
 	bool ret = arch_atomic_try_cmpxchg_relaxed(v, old, new);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_try_cmpxchg)
-#define raw_atomic_try_cmpxchg_acquire arch_atomic_try_cmpxchg
+	return arch_atomic_try_cmpxchg(v, old, new);
 #else
-static __always_inline bool
-raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
-{
 	int r, o = *old;
 	r = raw_atomic_cmpxchg_acquire(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
-}
 #endif
+}
 
-#if defined(arch_atomic_try_cmpxchg_release)
-#define raw_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg_release
-#elif defined(arch_atomic_try_cmpxchg_relaxed)
 static __always_inline bool
 raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
+#if defined(arch_atomic_try_cmpxchg_release)
+	return arch_atomic_try_cmpxchg_release(v, old, new);
+#elif defined(arch_atomic_try_cmpxchg_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_try_cmpxchg_relaxed(v, old, new);
-}
 #elif defined(arch_atomic_try_cmpxchg)
-#define raw_atomic_try_cmpxchg_release arch_atomic_try_cmpxchg
+	return arch_atomic_try_cmpxchg(v, old, new);
 #else
-static __always_inline bool
-raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
-{
 	int r, o = *old;
 	r = raw_atomic_cmpxchg_release(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
-}
 #endif
+}
 
-#if defined(arch_atomic_try_cmpxchg_relaxed)
-#define raw_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg_relaxed
-#elif defined(arch_atomic_try_cmpxchg)
-#define raw_atomic_try_cmpxchg_relaxed arch_atomic_try_cmpxchg
-#else
 static __always_inline bool
 raw_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 {
+#if defined(arch_atomic_try_cmpxchg_relaxed)
+	return arch_atomic_try_cmpxchg_relaxed(v, old, new);
+#elif defined(arch_atomic_try_cmpxchg)
+	return arch_atomic_try_cmpxchg(v, old, new);
+#else
 	int r, o = *old;
 	r = raw_atomic_cmpxchg_relaxed(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
-}
 #endif
+}
 
-#if defined(arch_atomic_sub_and_test)
-#define raw_atomic_sub_and_test arch_atomic_sub_and_test
-#else
 static __always_inline bool
 raw_atomic_sub_and_test(int i, atomic_t *v)
 {
+#if defined(arch_atomic_sub_and_test)
+	return arch_atomic_sub_and_test(i, v);
+#else
 	return raw_atomic_sub_return(i, v) == 0;
-}
 #endif
+}
 
-#if defined(arch_atomic_dec_and_test)
-#define raw_atomic_dec_and_test arch_atomic_dec_and_test
-#else
 static __always_inline bool
 raw_atomic_dec_and_test(atomic_t *v)
 {
+#if defined(arch_atomic_dec_and_test)
+	return arch_atomic_dec_and_test(v);
+#else
 	return raw_atomic_dec_return(v) == 0;
-}
 #endif
+}
 
-#if defined(arch_atomic_inc_and_test)
-#define raw_atomic_inc_and_test arch_atomic_inc_and_test
-#else
 static __always_inline bool
 raw_atomic_inc_and_test(atomic_t *v)
 {
+#if defined(arch_atomic_inc_and_test)
+	return arch_atomic_inc_and_test(v);
+#else
 	return raw_atomic_inc_return(v) == 0;
-}
 #endif
+}
 
-#if defined(arch_atomic_add_negative)
-#define raw_atomic_add_negative arch_atomic_add_negative
-#elif defined(arch_atomic_add_negative_relaxed)
 static __always_inline bool
 raw_atomic_add_negative(int i, atomic_t *v)
 {
+#if defined(arch_atomic_add_negative)
+	return arch_atomic_add_negative(i, v);
+#elif defined(arch_atomic_add_negative_relaxed)
 	bool ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic_add_negative_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline bool
-raw_atomic_add_negative(int i, atomic_t *v)
-{
 	return raw_atomic_add_return(i, v) < 0;
-}
 #endif
+}
 
-#if defined(arch_atomic_add_negative_acquire)
-#define raw_atomic_add_negative_acquire arch_atomic_add_negative_acquire
-#elif defined(arch_atomic_add_negative_relaxed)
 static __always_inline bool
 raw_atomic_add_negative_acquire(int i, atomic_t *v)
 {
+#if defined(arch_atomic_add_negative_acquire)
+	return arch_atomic_add_negative_acquire(i, v);
+#elif defined(arch_atomic_add_negative_relaxed)
 	bool ret = arch_atomic_add_negative_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic_add_negative)
-#define raw_atomic_add_negative_acquire arch_atomic_add_negative
+	return arch_atomic_add_negative(i, v);
 #else
-static __always_inline bool
-raw_atomic_add_negative_acquire(int i, atomic_t *v)
-{
 	return raw_atomic_add_return_acquire(i, v) < 0;
-}
 #endif
+}
 
-#if defined(arch_atomic_add_negative_release)
-#define raw_atomic_add_negative_release arch_atomic_add_negative_release
-#elif defined(arch_atomic_add_negative_relaxed)
 static __always_inline bool
 raw_atomic_add_negative_release(int i, atomic_t *v)
 {
+#if defined(arch_atomic_add_negative_release)
+	return arch_atomic_add_negative_release(i, v);
+#elif defined(arch_atomic_add_negative_relaxed)
 	__atomic_release_fence();
 	return arch_atomic_add_negative_relaxed(i, v);
-}
 #elif defined(arch_atomic_add_negative)
-#define raw_atomic_add_negative_release arch_atomic_add_negative
+	return arch_atomic_add_negative(i, v);
 #else
-static __always_inline bool
-raw_atomic_add_negative_release(int i, atomic_t *v)
-{
 	return raw_atomic_add_return_release(i, v) < 0;
-}
 #endif
+}
 
-#if defined(arch_atomic_add_negative_relaxed)
-#define raw_atomic_add_negative_relaxed arch_atomic_add_negative_relaxed
-#elif defined(arch_atomic_add_negative)
-#define raw_atomic_add_negative_relaxed arch_atomic_add_negative
-#else
 static __always_inline bool
 raw_atomic_add_negative_relaxed(int i, atomic_t *v)
 {
+#if defined(arch_atomic_add_negative_relaxed)
+	return arch_atomic_add_negative_relaxed(i, v);
+#elif defined(arch_atomic_add_negative)
+	return arch_atomic_add_negative(i, v);
+#else
 	return raw_atomic_add_return_relaxed(i, v) < 0;
-}
 #endif
+}
 
-#if defined(arch_atomic_fetch_add_unless)
-#define raw_atomic_fetch_add_unless arch_atomic_fetch_add_unless
-#else
 static __always_inline int
 raw_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
+#if defined(arch_atomic_fetch_add_unless)
+	return arch_atomic_fetch_add_unless(v, a, u);
+#else
 	int c = raw_atomic_read(v);
 
 	do {
@@ -1594,35 +1542,35 @@ raw_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	} while (!raw_atomic_try_cmpxchg(v, &c, c + a));
 
 	return c;
-}
 #endif
+}
 
-#if defined(arch_atomic_add_unless)
-#define raw_atomic_add_unless arch_atomic_add_unless
-#else
 static __always_inline bool
 raw_atomic_add_unless(atomic_t *v, int a, int u)
 {
+#if defined(arch_atomic_add_unless)
+	return arch_atomic_add_unless(v, a, u);
+#else
 	return raw_atomic_fetch_add_unless(v, a, u) != u;
-}
 #endif
+}
 
-#if defined(arch_atomic_inc_not_zero)
-#define raw_atomic_inc_not_zero arch_atomic_inc_not_zero
-#else
 static __always_inline bool
 raw_atomic_inc_not_zero(atomic_t *v)
 {
+#if defined(arch_atomic_inc_not_zero)
+	return arch_atomic_inc_not_zero(v);
+#else
 	return raw_atomic_add_unless(v, 1, 0);
-}
 #endif
+}
 
-#if defined(arch_atomic_inc_unless_negative)
-#define raw_atomic_inc_unless_negative arch_atomic_inc_unless_negative
-#else
 static __always_inline bool
 raw_atomic_inc_unless_negative(atomic_t *v)
 {
+#if defined(arch_atomic_inc_unless_negative)
+	return arch_atomic_inc_unless_negative(v);
+#else
 	int c = raw_atomic_read(v);
 
 	do {
@@ -1631,15 +1579,15 @@ raw_atomic_inc_unless_negative(atomic_t *v)
 	} while (!raw_atomic_try_cmpxchg(v, &c, c + 1));
 
 	return true;
-}
 #endif
+}
 
-#if defined(arch_atomic_dec_unless_positive)
-#define raw_atomic_dec_unless_positive arch_atomic_dec_unless_positive
-#else
 static __always_inline bool
 raw_atomic_dec_unless_positive(atomic_t *v)
 {
+#if defined(arch_atomic_dec_unless_positive)
+	return arch_atomic_dec_unless_positive(v);
+#else
 	int c = raw_atomic_read(v);
 
 	do {
@@ -1648,15 +1596,15 @@ raw_atomic_dec_unless_positive(atomic_t *v)
 	} while (!raw_atomic_try_cmpxchg(v, &c, c - 1));
 
 	return true;
-}
 #endif
+}
 
-#if defined(arch_atomic_dec_if_positive)
-#define raw_atomic_dec_if_positive arch_atomic_dec_if_positive
-#else
 static __always_inline int
 raw_atomic_dec_if_positive(atomic_t *v)
 {
+#if defined(arch_atomic_dec_if_positive)
+	return arch_atomic_dec_if_positive(v);
+#else
 	int dec, c = raw_atomic_read(v);
 
 	do {
@@ -1666,23 +1614,27 @@ raw_atomic_dec_if_positive(atomic_t *v)
 	} while (!raw_atomic_try_cmpxchg(v, &c, dec));
 
 	return dec;
-}
 #endif
+}
 
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
 
-#define raw_atomic64_read arch_atomic64_read
+static __always_inline s64
+raw_atomic64_read(const atomic64_t *v)
+{
+	return arch_atomic64_read(v);
+}
 
-#if defined(arch_atomic64_read_acquire)
-#define raw_atomic64_read_acquire arch_atomic64_read_acquire
-#elif defined(arch_atomic64_read)
-#define raw_atomic64_read_acquire arch_atomic64_read
-#else
 static __always_inline s64
 raw_atomic64_read_acquire(const atomic64_t *v)
 {
+#if defined(arch_atomic64_read_acquire)
+	return arch_atomic64_read_acquire(v);
+#elif defined(arch_atomic64_read)
+	return arch_atomic64_read(v);
+#else
 	s64 ret;
 
 	if (__native_word(atomic64_t)) {
@@ -1693,1144 +1645,1088 @@ raw_atomic64_read_acquire(const atomic64_t *v)
 	}
 
 	return ret;
-}
 #endif
+}
 
-#define raw_atomic64_set arch_atomic64_set
+static __always_inline void
+raw_atomic64_set(atomic64_t *v, s64 i)
+{
+	arch_atomic64_set(v, i);
+}
 
-#if defined(arch_atomic64_set_release)
-#define raw_atomic64_set_release arch_atomic64_set_release
-#elif defined(arch_atomic64_set)
-#define raw_atomic64_set_release arch_atomic64_set
-#else
 static __always_inline void
 raw_atomic64_set_release(atomic64_t *v, s64 i)
 {
+#if defined(arch_atomic64_set_release)
+	arch_atomic64_set_release(v, i);
+#elif defined(arch_atomic64_set)
+	arch_atomic64_set(v, i);
+#else
 	if (__native_word(atomic64_t)) {
 		smp_store_release(&(v)->counter, i);
 	} else {
 		__atomic_release_fence();
 		raw_atomic64_set(v, i);
 	}
-}
 #endif
+}
 
-#define raw_atomic64_add arch_atomic64_add
+static __always_inline void
+raw_atomic64_add(s64 i, atomic64_t *v)
+{
+	arch_atomic64_add(i, v);
+}
 
-#if defined(arch_atomic64_add_return)
-#define raw_atomic64_add_return arch_atomic64_add_return
-#elif defined(arch_atomic64_add_return_relaxed)
 static __always_inline s64
 raw_atomic64_add_return(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_add_return)
+	return arch_atomic64_add_return(i, v);
+#elif defined(arch_atomic64_add_return_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_add_return_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic64_add_return"
 #endif
+}
 
-#if defined(arch_atomic64_add_return_acquire)
-#define raw_atomic64_add_return_acquire arch_atomic64_add_return_acquire
-#elif defined(arch_atomic64_add_return_relaxed)
 static __always_inline s64
 raw_atomic64_add_return_acquire(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_add_return_acquire)
+	return arch_atomic64_add_return_acquire(i, v);
+#elif defined(arch_atomic64_add_return_relaxed)
 	s64 ret = arch_atomic64_add_return_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_add_return)
-#define raw_atomic64_add_return_acquire arch_atomic64_add_return
+	return arch_atomic64_add_return(i, v);
 #else
 #error "Unable to define raw_atomic64_add_return_acquire"
 #endif
+}
 
-#if defined(arch_atomic64_add_return_release)
-#define raw_atomic64_add_return_release arch_atomic64_add_return_release
-#elif defined(arch_atomic64_add_return_relaxed)
 static __always_inline s64
 raw_atomic64_add_return_release(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_add_return_release)
+	return arch_atomic64_add_return_release(i, v);
+#elif defined(arch_atomic64_add_return_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_add_return_relaxed(i, v);
-}
 #elif defined(arch_atomic64_add_return)
-#define raw_atomic64_add_return_release arch_atomic64_add_return
+	return arch_atomic64_add_return(i, v);
 #else
 #error "Unable to define raw_atomic64_add_return_release"
 #endif
+}
 
+static __always_inline s64
+raw_atomic64_add_return_relaxed(s64 i, atomic64_t *v)
+{
 #if defined(arch_atomic64_add_return_relaxed)
-#define raw_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+	return arch_atomic64_add_return_relaxed(i, v);
 #elif defined(arch_atomic64_add_return)
-#define raw_atomic64_add_return_relaxed arch_atomic64_add_return
+	return arch_atomic64_add_return(i, v);
 #else
 #error "Unable to define raw_atomic64_add_return_relaxed"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_add)
-#define raw_atomic64_fetch_add arch_atomic64_fetch_add
-#elif defined(arch_atomic64_fetch_add_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_add(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_add)
+	return arch_atomic64_fetch_add(i, v);
+#elif defined(arch_atomic64_fetch_add_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_fetch_add_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic64_fetch_add"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_add_acquire)
-#define raw_atomic64_fetch_add_acquire arch_atomic64_fetch_add_acquire
-#elif defined(arch_atomic64_fetch_add_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_add_acquire)
+	return arch_atomic64_fetch_add_acquire(i, v);
+#elif defined(arch_atomic64_fetch_add_relaxed)
 	s64 ret = arch_atomic64_fetch_add_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_fetch_add)
-#define raw_atomic64_fetch_add_acquire arch_atomic64_fetch_add
+	return arch_atomic64_fetch_add(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_add_acquire"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_add_release)
-#define raw_atomic64_fetch_add_release arch_atomic64_fetch_add_release
-#elif defined(arch_atomic64_fetch_add_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_add_release)
+	return arch_atomic64_fetch_add_release(i, v);
+#elif defined(arch_atomic64_fetch_add_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_fetch_add_relaxed(i, v);
-}
 #elif defined(arch_atomic64_fetch_add)
-#define raw_atomic64_fetch_add_release arch_atomic64_fetch_add
+	return arch_atomic64_fetch_add(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_add_release"
 #endif
+}
 
+static __always_inline s64
+raw_atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
+{
 #if defined(arch_atomic64_fetch_add_relaxed)
-#define raw_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+	return arch_atomic64_fetch_add_relaxed(i, v);
 #elif defined(arch_atomic64_fetch_add)
-#define raw_atomic64_fetch_add_relaxed arch_atomic64_fetch_add
+	return arch_atomic64_fetch_add(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_add_relaxed"
 #endif
+}
 
-#define raw_atomic64_sub arch_atomic64_sub
+static __always_inline void
+raw_atomic64_sub(s64 i, atomic64_t *v)
+{
+	arch_atomic64_sub(i, v);
+}
 
-#if defined(arch_atomic64_sub_return)
-#define raw_atomic64_sub_return arch_atomic64_sub_return
-#elif defined(arch_atomic64_sub_return_relaxed)
 static __always_inline s64
 raw_atomic64_sub_return(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_sub_return)
+	return arch_atomic64_sub_return(i, v);
+#elif defined(arch_atomic64_sub_return_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_sub_return_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic64_sub_return"
 #endif
+}
 
-#if defined(arch_atomic64_sub_return_acquire)
-#define raw_atomic64_sub_return_acquire arch_atomic64_sub_return_acquire
-#elif defined(arch_atomic64_sub_return_relaxed)
 static __always_inline s64
 raw_atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_sub_return_acquire)
+	return arch_atomic64_sub_return_acquire(i, v);
+#elif defined(arch_atomic64_sub_return_relaxed)
 	s64 ret = arch_atomic64_sub_return_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_sub_return)
-#define raw_atomic64_sub_return_acquire arch_atomic64_sub_return
+	return arch_atomic64_sub_return(i, v);
 #else
 #error "Unable to define raw_atomic64_sub_return_acquire"
 #endif
+}
 
-#if defined(arch_atomic64_sub_return_release)
-#define raw_atomic64_sub_return_release arch_atomic64_sub_return_release
-#elif defined(arch_atomic64_sub_return_relaxed)
 static __always_inline s64
 raw_atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_sub_return_release)
+	return arch_atomic64_sub_return_release(i, v);
+#elif defined(arch_atomic64_sub_return_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_sub_return_relaxed(i, v);
-}
 #elif defined(arch_atomic64_sub_return)
-#define raw_atomic64_sub_return_release arch_atomic64_sub_return
+	return arch_atomic64_sub_return(i, v);
 #else
 #error "Unable to define raw_atomic64_sub_return_release"
 #endif
+}
 
+static __always_inline s64
+raw_atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
+{
 #if defined(arch_atomic64_sub_return_relaxed)
-#define raw_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
+	return arch_atomic64_sub_return_relaxed(i, v);
 #elif defined(arch_atomic64_sub_return)
-#define raw_atomic64_sub_return_relaxed arch_atomic64_sub_return
+	return arch_atomic64_sub_return(i, v);
 #else
 #error "Unable to define raw_atomic64_sub_return_relaxed"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_sub)
-#define raw_atomic64_fetch_sub arch_atomic64_fetch_sub
-#elif defined(arch_atomic64_fetch_sub_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_sub)
+	return arch_atomic64_fetch_sub(i, v);
+#elif defined(arch_atomic64_fetch_sub_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_fetch_sub_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic64_fetch_sub"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_sub_acquire)
-#define raw_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub_acquire
-#elif defined(arch_atomic64_fetch_sub_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_sub_acquire)
+	return arch_atomic64_fetch_sub_acquire(i, v);
+#elif defined(arch_atomic64_fetch_sub_relaxed)
 	s64 ret = arch_atomic64_fetch_sub_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_fetch_sub)
-#define raw_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub
+	return arch_atomic64_fetch_sub(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_sub_acquire"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_sub_release)
-#define raw_atomic64_fetch_sub_release arch_atomic64_fetch_sub_release
-#elif defined(arch_atomic64_fetch_sub_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_sub_release)
+	return arch_atomic64_fetch_sub_release(i, v);
+#elif defined(arch_atomic64_fetch_sub_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_fetch_sub_relaxed(i, v);
-}
 #elif defined(arch_atomic64_fetch_sub)
-#define raw_atomic64_fetch_sub_release arch_atomic64_fetch_sub
+	return arch_atomic64_fetch_sub(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_sub_release"
 #endif
+}
 
+static __always_inline s64
+raw_atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
+{
 #if defined(arch_atomic64_fetch_sub_relaxed)
-#define raw_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
+	return arch_atomic64_fetch_sub_relaxed(i, v);
 #elif defined(arch_atomic64_fetch_sub)
-#define raw_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub
+	return arch_atomic64_fetch_sub(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_sub_relaxed"
 #endif
+}
 
-#if defined(arch_atomic64_inc)
-#define raw_atomic64_inc arch_atomic64_inc
-#else
 static __always_inline void
 raw_atomic64_inc(atomic64_t *v)
 {
+#if defined(arch_atomic64_inc)
+	arch_atomic64_inc(v);
+#else
 	raw_atomic64_add(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_inc_return)
-#define raw_atomic64_inc_return arch_atomic64_inc_return
-#elif defined(arch_atomic64_inc_return_relaxed)
 static __always_inline s64
 raw_atomic64_inc_return(atomic64_t *v)
 {
+#if defined(arch_atomic64_inc_return)
+	return arch_atomic64_inc_return(v);
+#elif defined(arch_atomic64_inc_return_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_inc_return_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline s64
-raw_atomic64_inc_return(atomic64_t *v)
-{
 	return raw_atomic64_add_return(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_inc_return_acquire)
-#define raw_atomic64_inc_return_acquire arch_atomic64_inc_return_acquire
-#elif defined(arch_atomic64_inc_return_relaxed)
 static __always_inline s64
 raw_atomic64_inc_return_acquire(atomic64_t *v)
 {
+#if defined(arch_atomic64_inc_return_acquire)
+	return arch_atomic64_inc_return_acquire(v);
+#elif defined(arch_atomic64_inc_return_relaxed)
 	s64 ret = arch_atomic64_inc_return_relaxed(v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_inc_return)
-#define raw_atomic64_inc_return_acquire arch_atomic64_inc_return
+	return arch_atomic64_inc_return(v);
 #else
-static __always_inline s64
-raw_atomic64_inc_return_acquire(atomic64_t *v)
-{
 	return raw_atomic64_add_return_acquire(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_inc_return_release)
-#define raw_atomic64_inc_return_release arch_atomic64_inc_return_release
-#elif defined(arch_atomic64_inc_return_relaxed)
 static __always_inline s64
 raw_atomic64_inc_return_release(atomic64_t *v)
 {
+#if defined(arch_atomic64_inc_return_release)
+	return arch_atomic64_inc_return_release(v);
+#elif defined(arch_atomic64_inc_return_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_inc_return_relaxed(v);
-}
 #elif defined(arch_atomic64_inc_return)
-#define raw_atomic64_inc_return_release arch_atomic64_inc_return
+	return arch_atomic64_inc_return(v);
 #else
-static __always_inline s64
-raw_atomic64_inc_return_release(atomic64_t *v)
-{
 	return raw_atomic64_add_return_release(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_inc_return_relaxed)
-#define raw_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
-#elif defined(arch_atomic64_inc_return)
-#define raw_atomic64_inc_return_relaxed arch_atomic64_inc_return
-#else
 static __always_inline s64
 raw_atomic64_inc_return_relaxed(atomic64_t *v)
 {
+#if defined(arch_atomic64_inc_return_relaxed)
+	return arch_atomic64_inc_return_relaxed(v);
+#elif defined(arch_atomic64_inc_return)
+	return arch_atomic64_inc_return(v);
+#else
 	return raw_atomic64_add_return_relaxed(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_inc)
-#define raw_atomic64_fetch_inc arch_atomic64_fetch_inc
-#elif defined(arch_atomic64_fetch_inc_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_inc(atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_inc)
+	return arch_atomic64_fetch_inc(v);
+#elif defined(arch_atomic64_fetch_inc_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_fetch_inc_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline s64
-raw_atomic64_fetch_inc(atomic64_t *v)
-{
 	return raw_atomic64_fetch_add(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_inc_acquire)
-#define raw_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc_acquire
-#elif defined(arch_atomic64_fetch_inc_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_inc_acquire(atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_inc_acquire)
+	return arch_atomic64_fetch_inc_acquire(v);
+#elif defined(arch_atomic64_fetch_inc_relaxed)
 	s64 ret = arch_atomic64_fetch_inc_relaxed(v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_fetch_inc)
-#define raw_atomic64_fetch_inc_acquire arch_atomic64_fetch_inc
+	return arch_atomic64_fetch_inc(v);
 #else
-static __always_inline s64
-raw_atomic64_fetch_inc_acquire(atomic64_t *v)
-{
 	return raw_atomic64_fetch_add_acquire(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_inc_release)
-#define raw_atomic64_fetch_inc_release arch_atomic64_fetch_inc_release
-#elif defined(arch_atomic64_fetch_inc_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_inc_release(atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_inc_release)
+	return arch_atomic64_fetch_inc_release(v);
+#elif defined(arch_atomic64_fetch_inc_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_fetch_inc_relaxed(v);
-}
 #elif defined(arch_atomic64_fetch_inc)
-#define raw_atomic64_fetch_inc_release arch_atomic64_fetch_inc
+	return arch_atomic64_fetch_inc(v);
 #else
-static __always_inline s64
-raw_atomic64_fetch_inc_release(atomic64_t *v)
-{
 	return raw_atomic64_fetch_add_release(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_inc_relaxed)
-#define raw_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc_relaxed
-#elif defined(arch_atomic64_fetch_inc)
-#define raw_atomic64_fetch_inc_relaxed arch_atomic64_fetch_inc
-#else
 static __always_inline s64
 raw_atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_inc_relaxed)
+	return arch_atomic64_fetch_inc_relaxed(v);
+#elif defined(arch_atomic64_fetch_inc)
+	return arch_atomic64_fetch_inc(v);
+#else
 	return raw_atomic64_fetch_add_relaxed(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_dec)
-#define raw_atomic64_dec arch_atomic64_dec
-#else
 static __always_inline void
 raw_atomic64_dec(atomic64_t *v)
 {
+#if defined(arch_atomic64_dec)
+	arch_atomic64_dec(v);
+#else
 	raw_atomic64_sub(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_dec_return)
-#define raw_atomic64_dec_return arch_atomic64_dec_return
-#elif defined(arch_atomic64_dec_return_relaxed)
 static __always_inline s64
 raw_atomic64_dec_return(atomic64_t *v)
 {
+#if defined(arch_atomic64_dec_return)
+	return arch_atomic64_dec_return(v);
+#elif defined(arch_atomic64_dec_return_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_dec_return_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline s64
-raw_atomic64_dec_return(atomic64_t *v)
-{
 	return raw_atomic64_sub_return(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_dec_return_acquire)
-#define raw_atomic64_dec_return_acquire arch_atomic64_dec_return_acquire
-#elif defined(arch_atomic64_dec_return_relaxed)
 static __always_inline s64
 raw_atomic64_dec_return_acquire(atomic64_t *v)
 {
+#if defined(arch_atomic64_dec_return_acquire)
+	return arch_atomic64_dec_return_acquire(v);
+#elif defined(arch_atomic64_dec_return_relaxed)
 	s64 ret = arch_atomic64_dec_return_relaxed(v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_dec_return)
-#define raw_atomic64_dec_return_acquire arch_atomic64_dec_return
+	return arch_atomic64_dec_return(v);
 #else
-static __always_inline s64
-raw_atomic64_dec_return_acquire(atomic64_t *v)
-{
 	return raw_atomic64_sub_return_acquire(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_dec_return_release)
-#define raw_atomic64_dec_return_release arch_atomic64_dec_return_release
-#elif defined(arch_atomic64_dec_return_relaxed)
 static __always_inline s64
 raw_atomic64_dec_return_release(atomic64_t *v)
 {
+#if defined(arch_atomic64_dec_return_release)
+	return arch_atomic64_dec_return_release(v);
+#elif defined(arch_atomic64_dec_return_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_dec_return_relaxed(v);
-}
 #elif defined(arch_atomic64_dec_return)
-#define raw_atomic64_dec_return_release arch_atomic64_dec_return
+	return arch_atomic64_dec_return(v);
 #else
-static __always_inline s64
-raw_atomic64_dec_return_release(atomic64_t *v)
-{
 	return raw_atomic64_sub_return_release(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_dec_return_relaxed)
-#define raw_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
-#elif defined(arch_atomic64_dec_return)
-#define raw_atomic64_dec_return_relaxed arch_atomic64_dec_return
-#else
 static __always_inline s64
 raw_atomic64_dec_return_relaxed(atomic64_t *v)
 {
+#if defined(arch_atomic64_dec_return_relaxed)
+	return arch_atomic64_dec_return_relaxed(v);
+#elif defined(arch_atomic64_dec_return)
+	return arch_atomic64_dec_return(v);
+#else
 	return raw_atomic64_sub_return_relaxed(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_dec)
-#define raw_atomic64_fetch_dec arch_atomic64_fetch_dec
-#elif defined(arch_atomic64_fetch_dec_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_dec(atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_dec)
+	return arch_atomic64_fetch_dec(v);
+#elif defined(arch_atomic64_fetch_dec_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_fetch_dec_relaxed(v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline s64
-raw_atomic64_fetch_dec(atomic64_t *v)
-{
 	return raw_atomic64_fetch_sub(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_dec_acquire)
-#define raw_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec_acquire
-#elif defined(arch_atomic64_fetch_dec_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_dec_acquire(atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_dec_acquire)
+	return arch_atomic64_fetch_dec_acquire(v);
+#elif defined(arch_atomic64_fetch_dec_relaxed)
 	s64 ret = arch_atomic64_fetch_dec_relaxed(v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_fetch_dec)
-#define raw_atomic64_fetch_dec_acquire arch_atomic64_fetch_dec
+	return arch_atomic64_fetch_dec(v);
 #else
-static __always_inline s64
-raw_atomic64_fetch_dec_acquire(atomic64_t *v)
-{
 	return raw_atomic64_fetch_sub_acquire(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_dec_release)
-#define raw_atomic64_fetch_dec_release arch_atomic64_fetch_dec_release
-#elif defined(arch_atomic64_fetch_dec_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_dec_release(atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_dec_release)
+	return arch_atomic64_fetch_dec_release(v);
+#elif defined(arch_atomic64_fetch_dec_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_fetch_dec_relaxed(v);
-}
 #elif defined(arch_atomic64_fetch_dec)
-#define raw_atomic64_fetch_dec_release arch_atomic64_fetch_dec
+	return arch_atomic64_fetch_dec(v);
 #else
-static __always_inline s64
-raw_atomic64_fetch_dec_release(atomic64_t *v)
-{
 	return raw_atomic64_fetch_sub_release(1, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_dec_relaxed)
-#define raw_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec_relaxed
-#elif defined(arch_atomic64_fetch_dec)
-#define raw_atomic64_fetch_dec_relaxed arch_atomic64_fetch_dec
-#else
 static __always_inline s64
 raw_atomic64_fetch_dec_relaxed(atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_dec_relaxed)
+	return arch_atomic64_fetch_dec_relaxed(v);
+#elif defined(arch_atomic64_fetch_dec)
+	return arch_atomic64_fetch_dec(v);
+#else
 	return raw_atomic64_fetch_sub_relaxed(1, v);
-}
 #endif
+}
 
-#define raw_atomic64_and arch_atomic64_and
+static __always_inline void
+raw_atomic64_and(s64 i, atomic64_t *v)
+{
+	arch_atomic64_and(i, v);
+}
 
-#if defined(arch_atomic64_fetch_and)
-#define raw_atomic64_fetch_and arch_atomic64_fetch_and
-#elif defined(arch_atomic64_fetch_and_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_and(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_and)
+	return arch_atomic64_fetch_and(i, v);
+#elif defined(arch_atomic64_fetch_and_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_fetch_and_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic64_fetch_and"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_and_acquire)
-#define raw_atomic64_fetch_and_acquire arch_atomic64_fetch_and_acquire
-#elif defined(arch_atomic64_fetch_and_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_and_acquire)
+	return arch_atomic64_fetch_and_acquire(i, v);
+#elif defined(arch_atomic64_fetch_and_relaxed)
 	s64 ret = arch_atomic64_fetch_and_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_fetch_and)
-#define raw_atomic64_fetch_and_acquire arch_atomic64_fetch_and
+	return arch_atomic64_fetch_and(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_and_acquire"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_and_release)
-#define raw_atomic64_fetch_and_release arch_atomic64_fetch_and_release
-#elif defined(arch_atomic64_fetch_and_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_and_release)
+	return arch_atomic64_fetch_and_release(i, v);
+#elif defined(arch_atomic64_fetch_and_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_fetch_and_relaxed(i, v);
-}
 #elif defined(arch_atomic64_fetch_and)
-#define raw_atomic64_fetch_and_release arch_atomic64_fetch_and
+	return arch_atomic64_fetch_and(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_and_release"
 #endif
+}
 
+static __always_inline s64
+raw_atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
+{
 #if defined(arch_atomic64_fetch_and_relaxed)
-#define raw_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+	return arch_atomic64_fetch_and_relaxed(i, v);
 #elif defined(arch_atomic64_fetch_and)
-#define raw_atomic64_fetch_and_relaxed arch_atomic64_fetch_and
+	return arch_atomic64_fetch_and(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_and_relaxed"
 #endif
+}
 
-#if defined(arch_atomic64_andnot)
-#define raw_atomic64_andnot arch_atomic64_andnot
-#else
 static __always_inline void
 raw_atomic64_andnot(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_andnot)
+	arch_atomic64_andnot(i, v);
+#else
 	raw_atomic64_and(~i, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_andnot)
-#define raw_atomic64_fetch_andnot arch_atomic64_fetch_andnot
-#elif defined(arch_atomic64_fetch_andnot_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_andnot)
+	return arch_atomic64_fetch_andnot(i, v);
+#elif defined(arch_atomic64_fetch_andnot_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_fetch_andnot_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline s64
-raw_atomic64_fetch_andnot(s64 i, atomic64_t *v)
-{
 	return raw_atomic64_fetch_and(~i, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_andnot_acquire)
-#define raw_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot_acquire
-#elif defined(arch_atomic64_fetch_andnot_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_andnot_acquire)
+	return arch_atomic64_fetch_andnot_acquire(i, v);
+#elif defined(arch_atomic64_fetch_andnot_relaxed)
 	s64 ret = arch_atomic64_fetch_andnot_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_fetch_andnot)
-#define raw_atomic64_fetch_andnot_acquire arch_atomic64_fetch_andnot
+	return arch_atomic64_fetch_andnot(i, v);
 #else
-static __always_inline s64
-raw_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
-{
 	return raw_atomic64_fetch_and_acquire(~i, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_andnot_release)
-#define raw_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot_release
-#elif defined(arch_atomic64_fetch_andnot_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_andnot_release)
+	return arch_atomic64_fetch_andnot_release(i, v);
+#elif defined(arch_atomic64_fetch_andnot_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_fetch_andnot_relaxed(i, v);
-}
 #elif defined(arch_atomic64_fetch_andnot)
-#define raw_atomic64_fetch_andnot_release arch_atomic64_fetch_andnot
+	return arch_atomic64_fetch_andnot(i, v);
 #else
-static __always_inline s64
-raw_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
-{
 	return raw_atomic64_fetch_and_release(~i, v);
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_andnot_relaxed)
-#define raw_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
-#elif defined(arch_atomic64_fetch_andnot)
-#define raw_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot
-#else
 static __always_inline s64
 raw_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_andnot_relaxed)
+	return arch_atomic64_fetch_andnot_relaxed(i, v);
+#elif defined(arch_atomic64_fetch_andnot)
+	return arch_atomic64_fetch_andnot(i, v);
+#else
 	return raw_atomic64_fetch_and_relaxed(~i, v);
-}
 #endif
+}
 
-#define raw_atomic64_or arch_atomic64_or
+static __always_inline void
+raw_atomic64_or(s64 i, atomic64_t *v)
+{
+	arch_atomic64_or(i, v);
+}
 
-#if defined(arch_atomic64_fetch_or)
-#define raw_atomic64_fetch_or arch_atomic64_fetch_or
-#elif defined(arch_atomic64_fetch_or_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_or(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_or)
+	return arch_atomic64_fetch_or(i, v);
+#elif defined(arch_atomic64_fetch_or_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_fetch_or_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic64_fetch_or"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_or_acquire)
-#define raw_atomic64_fetch_or_acquire arch_atomic64_fetch_or_acquire
-#elif defined(arch_atomic64_fetch_or_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_or_acquire)
+	return arch_atomic64_fetch_or_acquire(i, v);
+#elif defined(arch_atomic64_fetch_or_relaxed)
 	s64 ret = arch_atomic64_fetch_or_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_fetch_or)
-#define raw_atomic64_fetch_or_acquire arch_atomic64_fetch_or
+	return arch_atomic64_fetch_or(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_or_acquire"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_or_release)
-#define raw_atomic64_fetch_or_release arch_atomic64_fetch_or_release
-#elif defined(arch_atomic64_fetch_or_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_or_release)
+	return arch_atomic64_fetch_or_release(i, v);
+#elif defined(arch_atomic64_fetch_or_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_fetch_or_relaxed(i, v);
-}
 #elif defined(arch_atomic64_fetch_or)
-#define raw_atomic64_fetch_or_release arch_atomic64_fetch_or
+	return arch_atomic64_fetch_or(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_or_release"
 #endif
+}
 
+static __always_inline s64
+raw_atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
+{
 #if defined(arch_atomic64_fetch_or_relaxed)
-#define raw_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+	return arch_atomic64_fetch_or_relaxed(i, v);
 #elif defined(arch_atomic64_fetch_or)
-#define raw_atomic64_fetch_or_relaxed arch_atomic64_fetch_or
+	return arch_atomic64_fetch_or(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_or_relaxed"
 #endif
+}
 
-#define raw_atomic64_xor arch_atomic64_xor
+static __always_inline void
+raw_atomic64_xor(s64 i, atomic64_t *v)
+{
+	arch_atomic64_xor(i, v);
+}
 
-#if defined(arch_atomic64_fetch_xor)
-#define raw_atomic64_fetch_xor arch_atomic64_fetch_xor
-#elif defined(arch_atomic64_fetch_xor_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_xor)
+	return arch_atomic64_fetch_xor(i, v);
+#elif defined(arch_atomic64_fetch_xor_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_fetch_xor_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
 #error "Unable to define raw_atomic64_fetch_xor"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_xor_acquire)
-#define raw_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor_acquire
-#elif defined(arch_atomic64_fetch_xor_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_xor_acquire)
+	return arch_atomic64_fetch_xor_acquire(i, v);
+#elif defined(arch_atomic64_fetch_xor_relaxed)
 	s64 ret = arch_atomic64_fetch_xor_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_fetch_xor)
-#define raw_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor
+	return arch_atomic64_fetch_xor(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_xor_acquire"
 #endif
+}
 
-#if defined(arch_atomic64_fetch_xor_release)
-#define raw_atomic64_fetch_xor_release arch_atomic64_fetch_xor_release
-#elif defined(arch_atomic64_fetch_xor_relaxed)
 static __always_inline s64
 raw_atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_fetch_xor_release)
+	return arch_atomic64_fetch_xor_release(i, v);
+#elif defined(arch_atomic64_fetch_xor_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_fetch_xor_relaxed(i, v);
-}
 #elif defined(arch_atomic64_fetch_xor)
-#define raw_atomic64_fetch_xor_release arch_atomic64_fetch_xor
+	return arch_atomic64_fetch_xor(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_xor_release"
 #endif
+}
 
+static __always_inline s64
+raw_atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
+{
 #if defined(arch_atomic64_fetch_xor_relaxed)
-#define raw_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
+	return arch_atomic64_fetch_xor_relaxed(i, v);
 #elif defined(arch_atomic64_fetch_xor)
-#define raw_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor
+	return arch_atomic64_fetch_xor(i, v);
 #else
 #error "Unable to define raw_atomic64_fetch_xor_relaxed"
 #endif
+}
 
-#if defined(arch_atomic64_xchg)
-#define raw_atomic64_xchg arch_atomic64_xchg
-#elif defined(arch_atomic64_xchg_relaxed)
 static __always_inline s64
-raw_atomic64_xchg(atomic64_t *v, s64 i)
+raw_atomic64_xchg(atomic64_t *v, s64 new)
 {
+#if defined(arch_atomic64_xchg)
+	return arch_atomic64_xchg(v, new);
+#elif defined(arch_atomic64_xchg_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
-	ret = arch_atomic64_xchg_relaxed(v, i);
+	ret = arch_atomic64_xchg_relaxed(v, new);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline s64
-raw_atomic64_xchg(atomic64_t *v, s64 new)
-{
 	return raw_xchg(&v->counter, new);
-}
 #endif
+}
 
-#if defined(arch_atomic64_xchg_acquire)
-#define raw_atomic64_xchg_acquire arch_atomic64_xchg_acquire
-#elif defined(arch_atomic64_xchg_relaxed)
 static __always_inline s64
-raw_atomic64_xchg_acquire(atomic64_t *v, s64 i)
+raw_atomic64_xchg_acquire(atomic64_t *v, s64 new)
 {
-	s64 ret = arch_atomic64_xchg_relaxed(v, i);
+#if defined(arch_atomic64_xchg_acquire)
+	return arch_atomic64_xchg_acquire(v, new);
+#elif defined(arch_atomic64_xchg_relaxed)
+	s64 ret = arch_atomic64_xchg_relaxed(v, new);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_xchg)
-#define raw_atomic64_xchg_acquire arch_atomic64_xchg
+	return arch_atomic64_xchg(v, new);
 #else
-static __always_inline s64
-raw_atomic64_xchg_acquire(atomic64_t *v, s64 new)
-{
 	return raw_xchg_acquire(&v->counter, new);
-}
 #endif
+}
 
-#if defined(arch_atomic64_xchg_release)
-#define raw_atomic64_xchg_release arch_atomic64_xchg_release
-#elif defined(arch_atomic64_xchg_relaxed)
 static __always_inline s64
-raw_atomic64_xchg_release(atomic64_t *v, s64 i)
+raw_atomic64_xchg_release(atomic64_t *v, s64 new)
 {
+#if defined(arch_atomic64_xchg_release)
+	return arch_atomic64_xchg_release(v, new);
+#elif defined(arch_atomic64_xchg_relaxed)
 	__atomic_release_fence();
-	return arch_atomic64_xchg_relaxed(v, i);
-}
+	return arch_atomic64_xchg_relaxed(v, new);
 #elif defined(arch_atomic64_xchg)
-#define raw_atomic64_xchg_release arch_atomic64_xchg
+	return arch_atomic64_xchg(v, new);
 #else
-static __always_inline s64
-raw_atomic64_xchg_release(atomic64_t *v, s64 new)
-{
 	return raw_xchg_release(&v->counter, new);
-}
 #endif
+}
 
-#if defined(arch_atomic64_xchg_relaxed)
-#define raw_atomic64_xchg_relaxed arch_atomic64_xchg_relaxed
-#elif defined(arch_atomic64_xchg)
-#define raw_atomic64_xchg_relaxed arch_atomic64_xchg
-#else
 static __always_inline s64
 raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new)
 {
+#if defined(arch_atomic64_xchg_relaxed)
+	return arch_atomic64_xchg_relaxed(v, new);
+#elif defined(arch_atomic64_xchg)
+	return arch_atomic64_xchg(v, new);
+#else
 	return raw_xchg_relaxed(&v->counter, new);
-}
 #endif
+}
 
-#if defined(arch_atomic64_cmpxchg)
-#define raw_atomic64_cmpxchg arch_atomic64_cmpxchg
-#elif defined(arch_atomic64_cmpxchg_relaxed)
 static __always_inline s64
 raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
+#if defined(arch_atomic64_cmpxchg)
+	return arch_atomic64_cmpxchg(v, old, new);
+#elif defined(arch_atomic64_cmpxchg_relaxed)
 	s64 ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_cmpxchg_relaxed(v, old, new);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline s64
-raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
-{
 	return raw_cmpxchg(&v->counter, old, new);
-}
 #endif
+}
 
-#if defined(arch_atomic64_cmpxchg_acquire)
-#define raw_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg_acquire
-#elif defined(arch_atomic64_cmpxchg_relaxed)
 static __always_inline s64
 raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
+#if defined(arch_atomic64_cmpxchg_acquire)
+	return arch_atomic64_cmpxchg_acquire(v, old, new);
+#elif defined(arch_atomic64_cmpxchg_relaxed)
 	s64 ret = arch_atomic64_cmpxchg_relaxed(v, old, new);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_cmpxchg)
-#define raw_atomic64_cmpxchg_acquire arch_atomic64_cmpxchg
+	return arch_atomic64_cmpxchg(v, old, new);
 #else
-static __always_inline s64
-raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
-{
 	return raw_cmpxchg_acquire(&v->counter, old, new);
-}
 #endif
+}
 
-#if defined(arch_atomic64_cmpxchg_release)
-#define raw_atomic64_cmpxchg_release arch_atomic64_cmpxchg_release
-#elif defined(arch_atomic64_cmpxchg_relaxed)
 static __always_inline s64
 raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
+#if defined(arch_atomic64_cmpxchg_release)
+	return arch_atomic64_cmpxchg_release(v, old, new);
+#elif defined(arch_atomic64_cmpxchg_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_cmpxchg_relaxed(v, old, new);
-}
 #elif defined(arch_atomic64_cmpxchg)
-#define raw_atomic64_cmpxchg_release arch_atomic64_cmpxchg
+	return arch_atomic64_cmpxchg(v, old, new);
 #else
-static __always_inline s64
-raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
-{
 	return raw_cmpxchg_release(&v->counter, old, new);
-}
 #endif
+}
 
-#if defined(arch_atomic64_cmpxchg_relaxed)
-#define raw_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg_relaxed
-#elif defined(arch_atomic64_cmpxchg)
-#define raw_atomic64_cmpxchg_relaxed arch_atomic64_cmpxchg
-#else
 static __always_inline s64
 raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 {
+#if defined(arch_atomic64_cmpxchg_relaxed)
+	return arch_atomic64_cmpxchg_relaxed(v, old, new);
+#elif defined(arch_atomic64_cmpxchg)
+	return arch_atomic64_cmpxchg(v, old, new);
+#else
 	return raw_cmpxchg_relaxed(&v->counter, old, new);
-}
 #endif
+}
 
-#if defined(arch_atomic64_try_cmpxchg)
-#define raw_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
-#elif defined(arch_atomic64_try_cmpxchg_relaxed)
 static __always_inline bool
 raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
+#if defined(arch_atomic64_try_cmpxchg)
+	return arch_atomic64_try_cmpxchg(v, old, new);
+#elif defined(arch_atomic64_try_cmpxchg_relaxed)
 	bool ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline bool
-raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
-{
 	s64 r, o = *old;
 	r = raw_atomic64_cmpxchg(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
-}
 #endif
+}
 
-#if defined(arch_atomic64_try_cmpxchg_acquire)
-#define raw_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg_acquire
-#elif defined(arch_atomic64_try_cmpxchg_relaxed)
 static __always_inline bool
 raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
+#if defined(arch_atomic64_try_cmpxchg_acquire)
+	return arch_atomic64_try_cmpxchg_acquire(v, old, new);
+#elif defined(arch_atomic64_try_cmpxchg_relaxed)
 	bool ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_try_cmpxchg)
-#define raw_atomic64_try_cmpxchg_acquire arch_atomic64_try_cmpxchg
+	return arch_atomic64_try_cmpxchg(v, old, new);
 #else
-static __always_inline bool
-raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
-{
 	s64 r, o = *old;
 	r = raw_atomic64_cmpxchg_acquire(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
-}
 #endif
+}
 
-#if defined(arch_atomic64_try_cmpxchg_release)
-#define raw_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg_release
-#elif defined(arch_atomic64_try_cmpxchg_relaxed)
 static __always_inline bool
 raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
+#if defined(arch_atomic64_try_cmpxchg_release)
+	return arch_atomic64_try_cmpxchg_release(v, old, new);
+#elif defined(arch_atomic64_try_cmpxchg_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
-}
 #elif defined(arch_atomic64_try_cmpxchg)
-#define raw_atomic64_try_cmpxchg_release arch_atomic64_try_cmpxchg
+	return arch_atomic64_try_cmpxchg(v, old, new);
 #else
-static __always_inline bool
-raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
-{
 	s64 r, o = *old;
 	r = raw_atomic64_cmpxchg_release(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
-}
 #endif
+}
 
-#if defined(arch_atomic64_try_cmpxchg_relaxed)
-#define raw_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg_relaxed
-#elif defined(arch_atomic64_try_cmpxchg)
-#define raw_atomic64_try_cmpxchg_relaxed arch_atomic64_try_cmpxchg
-#else
 static __always_inline bool
 raw_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
+#if defined(arch_atomic64_try_cmpxchg_relaxed)
+	return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
+#elif defined(arch_atomic64_try_cmpxchg)
+	return arch_atomic64_try_cmpxchg(v, old, new);
+#else
 	s64 r, o = *old;
 	r = raw_atomic64_cmpxchg_relaxed(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
-}
 #endif
+}
 
-#if defined(arch_atomic64_sub_and_test)
-#define raw_atomic64_sub_and_test arch_atomic64_sub_and_test
-#else
 static __always_inline bool
 raw_atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_sub_and_test)
+	return arch_atomic64_sub_and_test(i, v);
+#else
 	return raw_atomic64_sub_return(i, v) == 0;
-}
 #endif
+}
 
-#if defined(arch_atomic64_dec_and_test)
-#define raw_atomic64_dec_and_test arch_atomic64_dec_and_test
-#else
 static __always_inline bool
 raw_atomic64_dec_and_test(atomic64_t *v)
 {
+#if defined(arch_atomic64_dec_and_test)
+	return arch_atomic64_dec_and_test(v);
+#else
 	return raw_atomic64_dec_return(v) == 0;
-}
 #endif
+}
 
-#if defined(arch_atomic64_inc_and_test)
-#define raw_atomic64_inc_and_test arch_atomic64_inc_and_test
-#else
 static __always_inline bool
 raw_atomic64_inc_and_test(atomic64_t *v)
 {
+#if defined(arch_atomic64_inc_and_test)
+	return arch_atomic64_inc_and_test(v);
+#else
 	return raw_atomic64_inc_return(v) == 0;
-}
 #endif
+}
 
-#if defined(arch_atomic64_add_negative)
-#define raw_atomic64_add_negative arch_atomic64_add_negative
-#elif defined(arch_atomic64_add_negative_relaxed)
 static __always_inline bool
 raw_atomic64_add_negative(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_add_negative)
+	return arch_atomic64_add_negative(i, v);
+#elif defined(arch_atomic64_add_negative_relaxed)
 	bool ret;
 	__atomic_pre_full_fence();
 	ret = arch_atomic64_add_negative_relaxed(i, v);
 	__atomic_post_full_fence();
 	return ret;
-}
 #else
-static __always_inline bool
-raw_atomic64_add_negative(s64 i, atomic64_t *v)
-{
 	return raw_atomic64_add_return(i, v) < 0;
-}
 #endif
+}
 
-#if defined(arch_atomic64_add_negative_acquire)
-#define raw_atomic64_add_negative_acquire arch_atomic64_add_negative_acquire
-#elif defined(arch_atomic64_add_negative_relaxed)
 static __always_inline bool
 raw_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_add_negative_acquire)
+	return arch_atomic64_add_negative_acquire(i, v);
+#elif defined(arch_atomic64_add_negative_relaxed)
 	bool ret = arch_atomic64_add_negative_relaxed(i, v);
 	__atomic_acquire_fence();
 	return ret;
-}
 #elif defined(arch_atomic64_add_negative)
-#define raw_atomic64_add_negative_acquire arch_atomic64_add_negative
+	return arch_atomic64_add_negative(i, v);
 #else
-static __always_inline bool
-raw_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
-{
 	return raw_atomic64_add_return_acquire(i, v) < 0;
-}
 #endif
+}
 
-#if defined(arch_atomic64_add_negative_release)
-#define raw_atomic64_add_negative_release arch_atomic64_add_negative_release
-#elif defined(arch_atomic64_add_negative_relaxed)
 static __always_inline bool
 raw_atomic64_add_negative_release(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_add_negative_release)
+	return arch_atomic64_add_negative_release(i, v);
+#elif defined(arch_atomic64_add_negative_relaxed)
 	__atomic_release_fence();
 	return arch_atomic64_add_negative_relaxed(i, v);
-}
 #elif defined(arch_atomic64_add_negative)
-#define raw_atomic64_add_negative_release arch_atomic64_add_negative
+	return arch_atomic64_add_negative(i, v);
 #else
-static __always_inline bool
-raw_atomic64_add_negative_release(s64 i, atomic64_t *v)
-{
 	return raw_atomic64_add_return_release(i, v) < 0;
-}
 #endif
+}
 
-#if defined(arch_atomic64_add_negative_relaxed)
-#define raw_atomic64_add_negative_relaxed arch_atomic64_add_negative_relaxed
-#elif defined(arch_atomic64_add_negative)
-#define raw_atomic64_add_negative_relaxed arch_atomic64_add_negative
-#else
 static __always_inline bool
 raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
 {
+#if defined(arch_atomic64_add_negative_relaxed)
+	return arch_atomic64_add_negative_relaxed(i, v);
+#elif defined(arch_atomic64_add_negative)
+	return arch_atomic64_add_negative(i, v);
+#else
 	return raw_atomic64_add_return_relaxed(i, v) < 0;
-}
 #endif
+}
 
-#if defined(arch_atomic64_fetch_add_unless)
-#define raw_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
-#else
 static __always_inline s64
 raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
+#if defined(arch_atomic64_fetch_add_unless)
+	return arch_atomic64_fetch_add_unless(v, a, u);
+#else
 	s64 c = raw_atomic64_read(v);
 
 	do {
@@ -2839,35 +2735,35 @@ raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 	} while (!raw_atomic64_try_cmpxchg(v, &c, c + a));
 
 	return c;
-}
 #endif
+}
 
-#if defined(arch_atomic64_add_unless)
-#define raw_atomic64_add_unless arch_atomic64_add_unless
-#else
 static __always_inline bool
 raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
+#if defined(arch_atomic64_add_unless)
+	return arch_atomic64_add_unless(v, a, u);
+#else
 	return raw_atomic64_fetch_add_unless(v, a, u) != u;
-}
 #endif
+}
 
-#if defined(arch_atomic64_inc_not_zero)
-#define raw_atomic64_inc_not_zero arch_atomic64_inc_not_zero
-#else
 static __always_inline bool
 raw_atomic64_inc_not_zero(atomic64_t *v)
 {
+#if defined(arch_atomic64_inc_not_zero)
+	return arch_atomic64_inc_not_zero(v);
+#else
 	return raw_atomic64_add_unless(v, 1, 0);
-}
 #endif
+}
 
-#if defined(arch_atomic64_inc_unless_negative)
-#define raw_atomic64_inc_unless_negative arch_atomic64_inc_unless_negative
-#else
 static __always_inline bool
 raw_atomic64_inc_unless_negative(atomic64_t *v)
 {
+#if defined(arch_atomic64_inc_unless_negative)
+	return arch_atomic64_inc_unless_negative(v);
+#else
 	s64 c = raw_atomic64_read(v);
 
 	do {
@@ -2876,15 +2772,15 @@ raw_atomic64_inc_unless_negative(atomic64_t *v)
 	} while (!raw_atomic64_try_cmpxchg(v, &c, c + 1));
 
 	return true;
-}
 #endif
+}
 
-#if defined(arch_atomic64_dec_unless_positive)
-#define raw_atomic64_dec_unless_positive arch_atomic64_dec_unless_positive
-#else
 static __always_inline bool
 raw_atomic64_dec_unless_positive(atomic64_t *v)
 {
+#if defined(arch_atomic64_dec_unless_positive)
+	return arch_atomic64_dec_unless_positive(v);
+#else
 	s64 c = raw_atomic64_read(v);
 
 	do {
@@ -2893,15 +2789,15 @@ raw_atomic64_dec_unless_positive(atomic64_t *v)
 	} while (!raw_atomic64_try_cmpxchg(v, &c, c - 1));
 
 	return true;
-}
 #endif
+}
 
-#if defined(arch_atomic64_dec_if_positive)
-#define raw_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-#else
 static __always_inline s64
 raw_atomic64_dec_if_positive(atomic64_t *v)
 {
+#if defined(arch_atomic64_dec_if_positive)
+	return arch_atomic64_dec_if_positive(v);
+#else
 	s64 dec, c = raw_atomic64_read(v);
 
 	do {
@@ -2911,8 +2807,8 @@ raw_atomic64_dec_if_positive(atomic64_t *v)
 	} while (!raw_atomic64_try_cmpxchg(v, &c, dec));
 
 	return dec;
-}
 #endif
+}
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// c2048fccede6fac923252290e2b303949d5dec83
+// 205e090382132f1fc85e48b46e722865f9c81309
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index 90ee2f55af77..5491c89dc03a 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -462,33 +462,33 @@ atomic_fetch_xor_relaxed(int i, atomic_t *v)
 }
 
 static __always_inline int
-atomic_xchg(atomic_t *v, int i)
+atomic_xchg(atomic_t *v, int new)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic_xchg(v, i);
+	return raw_atomic_xchg(v, new);
 }
 
 static __always_inline int
-atomic_xchg_acquire(atomic_t *v, int i)
+atomic_xchg_acquire(atomic_t *v, int new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic_xchg_acquire(v, i);
+	return raw_atomic_xchg_acquire(v, new);
 }
 
 static __always_inline int
-atomic_xchg_release(atomic_t *v, int i)
+atomic_xchg_release(atomic_t *v, int new)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic_xchg_release(v, i);
+	return raw_atomic_xchg_release(v, new);
 }
 
 static __always_inline int
-atomic_xchg_relaxed(atomic_t *v, int i)
+atomic_xchg_relaxed(atomic_t *v, int new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic_xchg_relaxed(v, i);
+	return raw_atomic_xchg_relaxed(v, new);
 }
 
 static __always_inline int
@@ -1103,33 +1103,33 @@ atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 }
 
 static __always_inline s64
-atomic64_xchg(atomic64_t *v, s64 i)
+atomic64_xchg(atomic64_t *v, s64 new)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic64_xchg(v, i);
+	return raw_atomic64_xchg(v, new);
 }
 
 static __always_inline s64
-atomic64_xchg_acquire(atomic64_t *v, s64 i)
+atomic64_xchg_acquire(atomic64_t *v, s64 new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic64_xchg_acquire(v, i);
+	return raw_atomic64_xchg_acquire(v, new);
 }
 
 static __always_inline s64
-atomic64_xchg_release(atomic64_t *v, s64 i)
+atomic64_xchg_release(atomic64_t *v, s64 new)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic64_xchg_release(v, i);
+	return raw_atomic64_xchg_release(v, new);
 }
 
 static __always_inline s64
-atomic64_xchg_relaxed(atomic64_t *v, s64 i)
+atomic64_xchg_relaxed(atomic64_t *v, s64 new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic64_xchg_relaxed(v, i);
+	return raw_atomic64_xchg_relaxed(v, new);
 }
 
 static __always_inline s64
@@ -1744,33 +1744,33 @@ atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 }
 
 static __always_inline long
-atomic_long_xchg(atomic_long_t *v, long i)
+atomic_long_xchg(atomic_long_t *v, long new)
 {
 	kcsan_mb();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic_long_xchg(v, i);
+	return raw_atomic_long_xchg(v, new);
 }
 
 static __always_inline long
-atomic_long_xchg_acquire(atomic_long_t *v, long i)
+atomic_long_xchg_acquire(atomic_long_t *v, long new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic_long_xchg_acquire(v, i);
+	return raw_atomic_long_xchg_acquire(v, new);
 }
 
 static __always_inline long
-atomic_long_xchg_release(atomic_long_t *v, long i)
+atomic_long_xchg_release(atomic_long_t *v, long new)
 {
 	kcsan_release();
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic_long_xchg_release(v, i);
+	return raw_atomic_long_xchg_release(v, new);
 }
 
 static __always_inline long
-atomic_long_xchg_relaxed(atomic_long_t *v, long i)
+atomic_long_xchg_relaxed(atomic_long_t *v, long new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
-	return raw_atomic_long_xchg_relaxed(v, i);
+	return raw_atomic_long_xchg_relaxed(v, new);
 }
 
 static __always_inline long
@@ -2231,4 +2231,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// f6502977180430e61c1a7c4e5e665f04f501fb8d
+// a4c3d2b229f907654cc53cb5d40e80f7fed1ec9c
diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
index 63e0b4078ebd..f564f71ff8af 100644
--- a/include/linux/atomic/atomic-long.h
+++ b/include/linux/atomic/atomic-long.h
@@ -622,42 +622,42 @@ raw_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 }
 
 static __always_inline long
-raw_atomic_long_xchg(atomic_long_t *v, long i)
+raw_atomic_long_xchg(atomic_long_t *v, long new)
 {
 #ifdef CONFIG_64BIT
-	return raw_atomic64_xchg(v, i);
+	return raw_atomic64_xchg(v, new);
 #else
-	return raw_atomic_xchg(v, i);
+	return raw_atomic_xchg(v, new);
 #endif
 }
 
 static __always_inline long
-raw_atomic_long_xchg_acquire(atomic_long_t *v, long i)
+raw_atomic_long_xchg_acquire(atomic_long_t *v, long new)
 {
 #ifdef CONFIG_64BIT
-	return raw_atomic64_xchg_acquire(v, i);
+	return raw_atomic64_xchg_acquire(v, new);
 #else
-	return raw_atomic_xchg_acquire(v, i);
+	return raw_atomic_xchg_acquire(v, new);
 #endif
 }
 
 static __always_inline long
-raw_atomic_long_xchg_release(atomic_long_t *v, long i)
+raw_atomic_long_xchg_release(atomic_long_t *v, long new)
 {
 #ifdef CONFIG_64BIT
-	return raw_atomic64_xchg_release(v, i);
+	return raw_atomic64_xchg_release(v, new);
 #else
-	return raw_atomic_xchg_release(v, i);
+	return raw_atomic_xchg_release(v, new);
 #endif
 }
 
 static __always_inline long
-raw_atomic_long_xchg_relaxed(atomic_long_t *v, long i)
+raw_atomic_long_xchg_relaxed(atomic_long_t *v, long new)
 {
 #ifdef CONFIG_64BIT
-	return raw_atomic64_xchg_relaxed(v, i);
+	return raw_atomic64_xchg_relaxed(v, new);
 #else
-	return raw_atomic_xchg_relaxed(v, i);
+	return raw_atomic_xchg_relaxed(v, new);
 #endif
 }
 
@@ -872,4 +872,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v)
 }
 
 #endif /* _LINUX_ATOMIC_LONG_H */
-// ad09f849db0db5b30c82e497eeb9056a394c5f22
+// e785d25cc3f220b7d473d36aac9da85dd7eb13a8
diff --git a/scripts/atomic/atomics.tbl b/scripts/atomic/atomics.tbl
index 85ca8d9b5c27..903946cbf1b3 100644
--- a/scripts/atomic/atomics.tbl
+++ b/scripts/atomic/atomics.tbl
@@ -27,7 +27,7 @@ and			vF	i	v
 andnot			vF	i	v
 or			vF	i	v
 xor			vF	i	v
-xchg			I	v	i
+xchg			I	v	i:new
 cmpxchg			I	v	i:old	i:new
 try_cmpxchg		B	v	p:old	i:new
 sub_and_test		b	i	v
diff --git a/scripts/atomic/fallbacks/acquire b/scripts/atomic/fallbacks/acquire
index b0f732a5c46e..4da0cab3604e 100755
--- a/scripts/atomic/fallbacks/acquire
+++ b/scripts/atomic/fallbacks/acquire
@@ -1,9 +1,5 @@
 cat <<EOF
-static __always_inline ${ret}
-raw_${atomic}_${pfx}${name}${sfx}_acquire(${params})
-{
 	${ret} ret = arch_${atomic}_${pfx}${name}${sfx}_relaxed(${args});
 	__atomic_acquire_fence();
 	return ret;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/add_negative b/scripts/atomic/fallbacks/add_negative
index 16876118019e..1d3d4ab3a9d2 100755
--- a/scripts/atomic/fallbacks/add_negative
+++ b/scripts/atomic/fallbacks/add_negative
@@ -1,7 +1,3 @@
 cat <<EOF
-static __always_inline bool
-raw_${atomic}_add_negative${order}(${int} i, ${atomic}_t *v)
-{
 	return raw_${atomic}_add_return${order}(i, v) < 0;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/add_unless b/scripts/atomic/fallbacks/add_unless
index 88593e28b163..95ecb2b7405b 100755
--- a/scripts/atomic/fallbacks/add_unless
+++ b/scripts/atomic/fallbacks/add_unless
@@ -1,7 +1,3 @@
 cat << EOF
-static __always_inline bool
-raw_${atomic}_add_unless(${atomic}_t *v, ${int} a, ${int} u)
-{
 	return raw_${atomic}_fetch_add_unless(v, a, u) != u;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/andnot b/scripts/atomic/fallbacks/andnot
index 5b83bb63f728..66760457e67a 100755
--- a/scripts/atomic/fallbacks/andnot
+++ b/scripts/atomic/fallbacks/andnot
@@ -1,7 +1,3 @@
 cat <<EOF
-static __always_inline ${ret}
-raw_${atomic}_${pfx}andnot${sfx}${order}(${int} i, ${atomic}_t *v)
-{
 	${retstmt}raw_${atomic}_${pfx}and${sfx}${order}(~i, v);
-}
 EOF
diff --git a/scripts/atomic/fallbacks/cmpxchg b/scripts/atomic/fallbacks/cmpxchg
index 312ee67f1743..1c8507f62e04 100644
--- a/scripts/atomic/fallbacks/cmpxchg
+++ b/scripts/atomic/fallbacks/cmpxchg
@@ -1,7 +1,3 @@
 cat <<EOF
-static __always_inline ${int}
-raw_${atomic}_cmpxchg${order}(${atomic}_t *v, ${int} old, ${int} new)
-{
 	return raw_cmpxchg${order}(&v->counter, old, new);
-}
 EOF
diff --git a/scripts/atomic/fallbacks/dec b/scripts/atomic/fallbacks/dec
index a660ac65994b..60d286d40300 100755
--- a/scripts/atomic/fallbacks/dec
+++ b/scripts/atomic/fallbacks/dec
@@ -1,7 +1,3 @@
 cat <<EOF
-static __always_inline ${ret}
-raw_${atomic}_${pfx}dec${sfx}${order}(${atomic}_t *v)
-{
 	${retstmt}raw_${atomic}_${pfx}sub${sfx}${order}(1, v);
-}
 EOF
diff --git a/scripts/atomic/fallbacks/dec_and_test b/scripts/atomic/fallbacks/dec_and_test
index 521dfcae03f2..3a0278e0ddd7 100755
--- a/scripts/atomic/fallbacks/dec_and_test
+++ b/scripts/atomic/fallbacks/dec_and_test
@@ -1,7 +1,3 @@
 cat <<EOF
-static __always_inline bool
-raw_${atomic}_dec_and_test(${atomic}_t *v)
-{
 	return raw_${atomic}_dec_return(v) == 0;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/dec_if_positive b/scripts/atomic/fallbacks/dec_if_positive
index 7acb205e6ce3..f65c11b4b85b 100755
--- a/scripts/atomic/fallbacks/dec_if_positive
+++ b/scripts/atomic/fallbacks/dec_if_positive
@@ -1,7 +1,4 @@
 cat <<EOF
-static __always_inline ${ret}
-raw_${atomic}_dec_if_positive(${atomic}_t *v)
-{
 	${int} dec, c = raw_${atomic}_read(v);
 
 	do {
@@ -11,5 +8,4 @@ raw_${atomic}_dec_if_positive(${atomic}_t *v)
 	} while (!raw_${atomic}_try_cmpxchg(v, &c, dec));
 
 	return dec;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/dec_unless_positive b/scripts/atomic/fallbacks/dec_unless_positive
index bcb4f27945ea..d025361d7b85 100755
--- a/scripts/atomic/fallbacks/dec_unless_positive
+++ b/scripts/atomic/fallbacks/dec_unless_positive
@@ -1,7 +1,4 @@
 cat <<EOF
-static __always_inline bool
-raw_${atomic}_dec_unless_positive(${atomic}_t *v)
-{
 	${int} c = raw_${atomic}_read(v);
 
 	do {
@@ -10,5 +7,4 @@ raw_${atomic}_dec_unless_positive(${atomic}_t *v)
 	} while (!raw_${atomic}_try_cmpxchg(v, &c, c - 1));
 
 	return true;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/fence b/scripts/atomic/fallbacks/fence
index 067eea553f5e..40d5b397658f 100755
--- a/scripts/atomic/fallbacks/fence
+++ b/scripts/atomic/fallbacks/fence
@@ -1,11 +1,7 @@
 cat <<EOF
-static __always_inline ${ret}
-raw_${atomic}_${pfx}${name}${sfx}(${params})
-{
 	${ret} ret;
 	__atomic_pre_full_fence();
 	ret = arch_${atomic}_${pfx}${name}${sfx}_relaxed(${args});
 	__atomic_post_full_fence();
 	return ret;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/fetch_add_unless b/scripts/atomic/fallbacks/fetch_add_unless
index c18b940153df..8db7e9e17fac 100755
--- a/scripts/atomic/fallbacks/fetch_add_unless
+++ b/scripts/atomic/fallbacks/fetch_add_unless
@@ -1,7 +1,4 @@
 cat << EOF
-static __always_inline ${int}
-raw_${atomic}_fetch_add_unless(${atomic}_t *v, ${int} a, ${int} u)
-{
 	${int} c = raw_${atomic}_read(v);
 
 	do {
@@ -10,5 +7,4 @@ raw_${atomic}_fetch_add_unless(${atomic}_t *v, ${int} a, ${int} u)
 	} while (!raw_${atomic}_try_cmpxchg(v, &c, c + a));
 
 	return c;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/inc b/scripts/atomic/fallbacks/inc
index 7d838f0b6639..56c770f5919c 100755
--- a/scripts/atomic/fallbacks/inc
+++ b/scripts/atomic/fallbacks/inc
@@ -1,7 +1,3 @@
 cat <<EOF
-static __always_inline ${ret}
-raw_${atomic}_${pfx}inc${sfx}${order}(${atomic}_t *v)
-{
 	${retstmt}raw_${atomic}_${pfx}add${sfx}${order}(1, v);
-}
 EOF
diff --git a/scripts/atomic/fallbacks/inc_and_test b/scripts/atomic/fallbacks/inc_and_test
index de25aebee715..7d16a10f2257 100755
--- a/scripts/atomic/fallbacks/inc_and_test
+++ b/scripts/atomic/fallbacks/inc_and_test
@@ -1,7 +1,3 @@
 cat <<EOF
-static __always_inline bool
-raw_${atomic}_inc_and_test(${atomic}_t *v)
-{
 	return raw_${atomic}_inc_return(v) == 0;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/inc_not_zero b/scripts/atomic/fallbacks/inc_not_zero
index e02206d017f6..1fcef1e55bc9 100755
--- a/scripts/atomic/fallbacks/inc_not_zero
+++ b/scripts/atomic/fallbacks/inc_not_zero
@@ -1,7 +1,3 @@
 cat <<EOF
-static __always_inline bool
-raw_${atomic}_inc_not_zero(${atomic}_t *v)
-{
 	return raw_${atomic}_add_unless(v, 1, 0);
-}
 EOF
diff --git a/scripts/atomic/fallbacks/inc_unless_negative b/scripts/atomic/fallbacks/inc_unless_negative
index 7b85cc5b00d2..7b4b09868842 100755
--- a/scripts/atomic/fallbacks/inc_unless_negative
+++ b/scripts/atomic/fallbacks/inc_unless_negative
@@ -1,7 +1,4 @@
 cat <<EOF
-static __always_inline bool
-raw_${atomic}_inc_unless_negative(${atomic}_t *v)
-{
 	${int} c = raw_${atomic}_read(v);
 
 	do {
@@ -10,5 +7,4 @@ raw_${atomic}_inc_unless_negative(${atomic}_t *v)
 	} while (!raw_${atomic}_try_cmpxchg(v, &c, c + 1));
 
 	return true;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire
index 26d15ad92d04..e319862d2f1a 100755
--- a/scripts/atomic/fallbacks/read_acquire
+++ b/scripts/atomic/fallbacks/read_acquire
@@ -1,7 +1,4 @@
 cat <<EOF
-static __always_inline ${ret}
-raw_${atomic}_read_acquire(const ${atomic}_t *v)
-{
 	${int} ret;
 
 	if (__native_word(${atomic}_t)) {
@@ -12,5 +9,4 @@ raw_${atomic}_read_acquire(const ${atomic}_t *v)
 	}
 
 	return ret;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/release b/scripts/atomic/fallbacks/release
index cbbff708129b..1e6daf57b4ba 100755
--- a/scripts/atomic/fallbacks/release
+++ b/scripts/atomic/fallbacks/release
@@ -1,8 +1,4 @@
 cat <<EOF
-static __always_inline ${ret}
-raw_${atomic}_${pfx}${name}${sfx}_release(${params})
-{
 	__atomic_release_fence();
 	${retstmt}arch_${atomic}_${pfx}${name}${sfx}_relaxed(${args});
-}
 EOF
diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release
index 104693bc3c66..16a374ae6bb1 100755
--- a/scripts/atomic/fallbacks/set_release
+++ b/scripts/atomic/fallbacks/set_release
@@ -1,12 +1,8 @@
 cat <<EOF
-static __always_inline void
-raw_${atomic}_set_release(${atomic}_t *v, ${int} i)
-{
 	if (__native_word(${atomic}_t)) {
 		smp_store_release(&(v)->counter, i);
 	} else {
 		__atomic_release_fence();
 		raw_${atomic}_set(v, i);
 	}
-}
 EOF
diff --git a/scripts/atomic/fallbacks/sub_and_test b/scripts/atomic/fallbacks/sub_and_test
index 8975a496d495..d1f746fe0ca4 100755
--- a/scripts/atomic/fallbacks/sub_and_test
+++ b/scripts/atomic/fallbacks/sub_and_test
@@ -1,7 +1,3 @@
 cat <<EOF
-static __always_inline bool
-raw_${atomic}_sub_and_test(${int} i, ${atomic}_t *v)
-{
 	return raw_${atomic}_sub_return(i, v) == 0;
-}
 EOF
diff --git a/scripts/atomic/fallbacks/try_cmpxchg b/scripts/atomic/fallbacks/try_cmpxchg
index 4c911a6cced9..d4da82092baf 100755
--- a/scripts/atomic/fallbacks/try_cmpxchg
+++ b/scripts/atomic/fallbacks/try_cmpxchg
@@ -1,11 +1,7 @@
 cat <<EOF
-static __always_inline bool
-raw_${atomic}_try_cmpxchg${order}(${atomic}_t *v, ${int} *old, ${int} new)
-{
 	${int} r, o = *old;
 	r = raw_${atomic}_cmpxchg${order}(v, o, new);
 	if (unlikely(r != o))
 		*old = r;
 	return likely(r == o);
-}
 EOF
diff --git a/scripts/atomic/fallbacks/xchg b/scripts/atomic/fallbacks/xchg
index bdd788aa575f..e4def1e0d092 100644
--- a/scripts/atomic/fallbacks/xchg
+++ b/scripts/atomic/fallbacks/xchg
@@ -1,7 +1,3 @@
 cat <<EOF
-static __always_inline ${int}
-raw_${atomic}_xchg${order}(${atomic}_t *v, ${int} new)
-{
 	return raw_xchg${order}(&v->counter, new);
-}
 EOF
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
index 86aca4f9f315..2b470d31e353 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -60,13 +60,23 @@ gen_proto_order_variant()
 	local name="$1"; shift
 	local sfx="$1"; shift
 	local order="$1"; shift
-	local atomic="$1"
+	local atomic="$1"; shift
+	local int="$1"; shift
 
 	local atomicname="${atomic}_${pfx}${name}${sfx}${order}"
 	local basename="${atomic}_${pfx}${name}${sfx}"
 
 	local template="$(find_fallback_template "${pfx}" "${name}" "${sfx}" "${order}")"
 
+	local ret="$(gen_ret_type "${meta}" "${int}")"
+	local retstmt="$(gen_ret_stmt "${meta}")"
+	local params="$(gen_params "${int}" "${atomic}" "$@")"
+	local args="$(gen_args "$@")"
+
+	printf "static __always_inline ${ret}\n"
+	printf "raw_${atomicname}(${params})\n"
+	printf "{\n"
+
 	# Where there is no possible fallback, this order variant is mandatory
 	# and must be provided by arch code. Add a comment to the header to
 	# make this obvious.
@@ -75,33 +85,35 @@ gen_proto_order_variant()
 	# define this order variant as a C function without a preprocessor
 	# symbol.
 	if [ -z ${template} ] && [ -z "${order}" ] && ! meta_has_relaxed "${meta}"; then
-		printf "#define raw_${atomicname} arch_${atomicname}\n\n"
+		printf "\t${retstmt}arch_${atomicname}(${args});\n"
+		printf "}\n\n"
 		return
 	fi
 
 	printf "#if defined(arch_${atomicname})\n"
-	printf "#define raw_${atomicname} arch_${atomicname}\n"
+	printf "\t${retstmt}arch_${atomicname}(${args});\n"
 
 	# Allow FULL/ACQUIRE/RELEASE ops to be defined in terms of RELAXED ops
 	if [ "${order}" != "_relaxed" ] && meta_has_relaxed "${meta}"; then
 		printf "#elif defined(arch_${basename}_relaxed)\n"
-		gen_order_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "$@"
+		gen_order_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "${atomic}" "${int}" "$@"
 	fi
 
 	# Allow ACQUIRE/RELEASE/RELAXED ops to be defined in terms of FULL ops
 	if [ ! -z "${order}" ]; then
 		printf "#elif defined(arch_${basename})\n"
-		printf "#define raw_${atomicname} arch_${basename}\n"
+		printf "\t${retstmt}arch_${basename}(${args});\n"
 	fi
 
 	printf "#else\n"
 	if [ ! -z "${template}" ]; then
-		gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "$@"
+		gen_proto_fallback "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "${atomic}" "${int}" "$@"
 	else
 		printf "#error \"Unable to define raw_${atomicname}\"\n"
 	fi
 
-	printf "#endif\n\n"
+	printf "#endif\n"
+	printf "}\n\n"
 }
 
 
-- 
cgit v1.2.3


From ad8110706f381170c9f9975f1cb06010fd3ca381 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 5 Jun 2023 08:01:22 +0100
Subject: locking/atomic: scripts: generate kerneldoc comments

Currently the atomics are documented in Documentation/atomic_t.txt, and
have no kerneldoc comments. There are a sufficient number of gotchas
(e.g. semantics, noinstr-safety) that it would be nice to have comments
to call these out, and it would be nice to have kerneldoc comments such
that these can be collated.

While it's possible to derive the semantics from the code, this can be
painful given the amount of indirection we currently have (e.g. fallback
paths), and it's easy to be mislead by naming, e.g.

* The unconditional void-returning ops *only* have relaxed variants
  without a _relaxed suffix, and can easily be mistaken for being fully
  ordered.

  It would be nice to give these a _relaxed() suffix, but this would
  result in significant churn throughout the kernel.

* Our naming of conditional and unconditional+test ops is rather
  inconsistent, and it can be difficult to derive the name of an
  operation, or to identify where an op is conditional or
  unconditional+test.

  Some ops are clearly conditional:
  - dec_if_positive
  - add_unless
  - dec_unless_positive
  - inc_unless_negative

  Some ops are clearly unconditional+test:
  - sub_and_test
  - dec_and_test
  - inc_and_test

  However, what exactly those test is not obvious. A _test_zero suffix
  might be clearer.

  Others could be read ambiguously:
  - inc_not_zero	// conditional
  - add_negative	// unconditional+test

  It would probably be worth renaming these, e.g. to inc_unless_zero and
  add_test_negative.

As a step towards making this more consistent and easier to understand,
this patch adds kerneldoc comments for all generated *atomic*_*()
functions. These are generated from templates, with some common text
shared, making it easy to extend these in future if necessary.

I've tried to make these as consistent and clear as possible, and I've
deliberately ensured:

* All ops have their ordering explicitly mentioned in the short and long
  description.

* All test ops have "test" in their short description.

* All ops are described as an expression using their usual C operator.
  For example:

  andnot: "Atomically updates @v to (@v & ~@i)"
  inc:    "Atomically updates @v to (@v + 1)"

  Which may be clearer to non-naative English speakers, and allows all
  the operations to be described in the same style.

* All conditional ops have their condition described as an expression
  using the usual C operators. For example:

  add_unless: "If (@v != @u), atomically updates @v to (@v + @i)"
  cmpxchg:    "If (@v == @old), atomically updates @v to @new"

  Which may be clearer to non-naative English speakers, and allows all
  the operations to be described in the same style.

* All bitwise ops (and,andnot,or,xor) explicitly mention that they are
  bitwise in their short description, so that they are not mistaken for
  performing their logical equivalents.

* The noinstr safety of each op is explicitly described, with a
  description of whether or not to use the raw_ form of the op.

There should be no functional change as a result of this patch.

Reported-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230605070124.3741859-26-mark.rutland@arm.com
---
 include/linux/atomic/atomic-arch-fallback.h  | 1848 ++++++++++++++++-
 include/linux/atomic/atomic-instrumented.h   | 2771 +++++++++++++++++++++++++-
 include/linux/atomic/atomic-long.h           |  925 ++++++++-
 scripts/atomic/atomic-tbl.sh                 |  112 +-
 scripts/atomic/gen-atomic-fallback.sh        |    2 +
 scripts/atomic/gen-atomic-instrumented.sh    |    2 +
 scripts/atomic/gen-atomic-long.sh            |    2 +
 scripts/atomic/kerneldoc/add                 |   13 +
 scripts/atomic/kerneldoc/add_negative        |   13 +
 scripts/atomic/kerneldoc/add_unless          |   18 +
 scripts/atomic/kerneldoc/and                 |   13 +
 scripts/atomic/kerneldoc/andnot              |   13 +
 scripts/atomic/kerneldoc/cmpxchg             |   14 +
 scripts/atomic/kerneldoc/dec                 |   12 +
 scripts/atomic/kerneldoc/dec_and_test        |   12 +
 scripts/atomic/kerneldoc/dec_if_positive     |   12 +
 scripts/atomic/kerneldoc/dec_unless_positive |   12 +
 scripts/atomic/kerneldoc/inc                 |   12 +
 scripts/atomic/kerneldoc/inc_and_test        |   12 +
 scripts/atomic/kerneldoc/inc_not_zero        |   12 +
 scripts/atomic/kerneldoc/inc_unless_negative |   12 +
 scripts/atomic/kerneldoc/or                  |   13 +
 scripts/atomic/kerneldoc/read                |   12 +
 scripts/atomic/kerneldoc/set                 |   13 +
 scripts/atomic/kerneldoc/sub                 |   13 +
 scripts/atomic/kerneldoc/sub_and_test        |   13 +
 scripts/atomic/kerneldoc/try_cmpxchg         |   15 +
 scripts/atomic/kerneldoc/xchg                |   13 +
 scripts/atomic/kerneldoc/xor                 |   13 +
 29 files changed, 5940 insertions(+), 7 deletions(-)
 create mode 100644 scripts/atomic/kerneldoc/add
 create mode 100644 scripts/atomic/kerneldoc/add_negative
 create mode 100644 scripts/atomic/kerneldoc/add_unless
 create mode 100644 scripts/atomic/kerneldoc/and
 create mode 100644 scripts/atomic/kerneldoc/andnot
 create mode 100644 scripts/atomic/kerneldoc/cmpxchg
 create mode 100644 scripts/atomic/kerneldoc/dec
 create mode 100644 scripts/atomic/kerneldoc/dec_and_test
 create mode 100644 scripts/atomic/kerneldoc/dec_if_positive
 create mode 100644 scripts/atomic/kerneldoc/dec_unless_positive
 create mode 100644 scripts/atomic/kerneldoc/inc
 create mode 100644 scripts/atomic/kerneldoc/inc_and_test
 create mode 100644 scripts/atomic/kerneldoc/inc_not_zero
 create mode 100644 scripts/atomic/kerneldoc/inc_unless_negative
 create mode 100644 scripts/atomic/kerneldoc/or
 create mode 100644 scripts/atomic/kerneldoc/read
 create mode 100644 scripts/atomic/kerneldoc/set
 create mode 100644 scripts/atomic/kerneldoc/sub
 create mode 100644 scripts/atomic/kerneldoc/sub_and_test
 create mode 100644 scripts/atomic/kerneldoc/try_cmpxchg
 create mode 100644 scripts/atomic/kerneldoc/xchg
 create mode 100644 scripts/atomic/kerneldoc/xor

(limited to 'include/linux')

diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 470c2890ab8d..8cded57dd7a6 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -428,12 +428,32 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void);
 
 #define raw_sync_cmpxchg arch_sync_cmpxchg
 
+/**
+ * raw_atomic_read() - atomic load with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically loads the value of @v with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_read() elsewhere.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline int
 raw_atomic_read(const atomic_t *v)
 {
 	return arch_atomic_read(v);
 }
 
+/**
+ * raw_atomic_read_acquire() - atomic load with acquire ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically loads the value of @v with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_read_acquire() elsewhere.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline int
 raw_atomic_read_acquire(const atomic_t *v)
 {
@@ -455,12 +475,34 @@ raw_atomic_read_acquire(const atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_set() - atomic set with relaxed ordering
+ * @v: pointer to atomic_t
+ * @i: int value to assign
+ *
+ * Atomically sets @v to @i with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_set() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_set(atomic_t *v, int i)
 {
 	arch_atomic_set(v, i);
 }
 
+/**
+ * raw_atomic_set_release() - atomic set with release ordering
+ * @v: pointer to atomic_t
+ * @i: int value to assign
+ *
+ * Atomically sets @v to @i with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_set_release() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_set_release(atomic_t *v, int i)
 {
@@ -478,12 +520,34 @@ raw_atomic_set_release(atomic_t *v, int i)
 #endif
 }
 
+/**
+ * raw_atomic_add() - atomic add with relaxed ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_add() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_add(int i, atomic_t *v)
 {
 	arch_atomic_add(i, v);
 }
 
+/**
+ * raw_atomic_add_return() - atomic add with full ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_add_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_add_return(int i, atomic_t *v)
 {
@@ -500,6 +564,17 @@ raw_atomic_add_return(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_add_return_acquire() - atomic add with acquire ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_add_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_add_return_acquire(int i, atomic_t *v)
 {
@@ -516,6 +591,17 @@ raw_atomic_add_return_acquire(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_add_return_release() - atomic add with release ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_add_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_add_return_release(int i, atomic_t *v)
 {
@@ -531,6 +617,17 @@ raw_atomic_add_return_release(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_add_return_relaxed() - atomic add with relaxed ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_add_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_add_return_relaxed(int i, atomic_t *v)
 {
@@ -543,6 +640,17 @@ raw_atomic_add_return_relaxed(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_add() - atomic add with full ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_add() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_add(int i, atomic_t *v)
 {
@@ -559,6 +667,17 @@ raw_atomic_fetch_add(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_add_acquire() - atomic add with acquire ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_add_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_add_acquire(int i, atomic_t *v)
 {
@@ -575,6 +694,17 @@ raw_atomic_fetch_add_acquire(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_add_release() - atomic add with release ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_add_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_add_release(int i, atomic_t *v)
 {
@@ -590,6 +720,17 @@ raw_atomic_fetch_add_release(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_add_relaxed() - atomic add with relaxed ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_add_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_add_relaxed(int i, atomic_t *v)
 {
@@ -602,12 +743,34 @@ raw_atomic_fetch_add_relaxed(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_sub() - atomic subtract with relaxed ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_sub() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_sub(int i, atomic_t *v)
 {
 	arch_atomic_sub(i, v);
 }
 
+/**
+ * raw_atomic_sub_return() - atomic subtract with full ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_sub_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_sub_return(int i, atomic_t *v)
 {
@@ -624,6 +787,17 @@ raw_atomic_sub_return(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_sub_return_acquire() - atomic subtract with acquire ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_sub_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_sub_return_acquire(int i, atomic_t *v)
 {
@@ -640,6 +814,17 @@ raw_atomic_sub_return_acquire(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_sub_return_release() - atomic subtract with release ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_sub_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_sub_return_release(int i, atomic_t *v)
 {
@@ -655,6 +840,17 @@ raw_atomic_sub_return_release(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_sub_return_relaxed() - atomic subtract with relaxed ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_sub_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_sub_return_relaxed(int i, atomic_t *v)
 {
@@ -667,6 +863,17 @@ raw_atomic_sub_return_relaxed(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_sub() - atomic subtract with full ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_sub() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_sub(int i, atomic_t *v)
 {
@@ -683,6 +890,17 @@ raw_atomic_fetch_sub(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_sub_acquire() - atomic subtract with acquire ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_sub_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_sub_acquire(int i, atomic_t *v)
 {
@@ -699,6 +917,17 @@ raw_atomic_fetch_sub_acquire(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_sub_release() - atomic subtract with release ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_sub_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_sub_release(int i, atomic_t *v)
 {
@@ -714,6 +943,17 @@ raw_atomic_fetch_sub_release(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_sub_relaxed() - atomic subtract with relaxed ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_sub_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_sub_relaxed(int i, atomic_t *v)
 {
@@ -726,6 +966,16 @@ raw_atomic_fetch_sub_relaxed(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_inc() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_inc() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_inc(atomic_t *v)
 {
@@ -736,6 +986,16 @@ raw_atomic_inc(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_inc_return() - atomic increment with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_inc_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_inc_return(atomic_t *v)
 {
@@ -752,6 +1012,16 @@ raw_atomic_inc_return(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_inc_return_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_inc_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_inc_return_acquire(atomic_t *v)
 {
@@ -768,6 +1038,16 @@ raw_atomic_inc_return_acquire(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_inc_return_release() - atomic increment with release ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_inc_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_inc_return_release(atomic_t *v)
 {
@@ -783,6 +1063,16 @@ raw_atomic_inc_return_release(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_inc_return_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_inc_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_inc_return_relaxed(atomic_t *v)
 {
@@ -795,6 +1085,16 @@ raw_atomic_inc_return_relaxed(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_inc() - atomic increment with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_inc() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_inc(atomic_t *v)
 {
@@ -811,6 +1111,16 @@ raw_atomic_fetch_inc(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_inc_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_inc_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_inc_acquire(atomic_t *v)
 {
@@ -827,6 +1137,16 @@ raw_atomic_fetch_inc_acquire(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_inc_release() - atomic increment with release ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_inc_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_inc_release(atomic_t *v)
 {
@@ -842,6 +1162,16 @@ raw_atomic_fetch_inc_release(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_inc_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_inc_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_inc_relaxed(atomic_t *v)
 {
@@ -854,6 +1184,16 @@ raw_atomic_fetch_inc_relaxed(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_dec() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_dec() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_dec(atomic_t *v)
 {
@@ -864,6 +1204,16 @@ raw_atomic_dec(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_dec_return() - atomic decrement with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_dec_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_dec_return(atomic_t *v)
 {
@@ -880,6 +1230,16 @@ raw_atomic_dec_return(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_dec_return_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_dec_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_dec_return_acquire(atomic_t *v)
 {
@@ -896,6 +1256,16 @@ raw_atomic_dec_return_acquire(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_dec_return_release() - atomic decrement with release ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_dec_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_dec_return_release(atomic_t *v)
 {
@@ -911,6 +1281,16 @@ raw_atomic_dec_return_release(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_dec_return_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_dec_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 raw_atomic_dec_return_relaxed(atomic_t *v)
 {
@@ -923,6 +1303,16 @@ raw_atomic_dec_return_relaxed(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_dec() - atomic decrement with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_dec() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_dec(atomic_t *v)
 {
@@ -939,6 +1329,16 @@ raw_atomic_fetch_dec(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_dec_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_dec_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_dec_acquire(atomic_t *v)
 {
@@ -955,6 +1355,16 @@ raw_atomic_fetch_dec_acquire(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_dec_release() - atomic decrement with release ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_dec_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_dec_release(atomic_t *v)
 {
@@ -970,6 +1380,16 @@ raw_atomic_fetch_dec_release(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_dec_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_dec_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_dec_relaxed(atomic_t *v)
 {
@@ -982,12 +1402,34 @@ raw_atomic_fetch_dec_relaxed(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_and() - atomic bitwise AND with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_and() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_and(int i, atomic_t *v)
 {
 	arch_atomic_and(i, v);
 }
 
+/**
+ * raw_atomic_fetch_and() - atomic bitwise AND with full ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_and() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_and(int i, atomic_t *v)
 {
@@ -1004,6 +1446,17 @@ raw_atomic_fetch_and(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_and_acquire() - atomic bitwise AND with acquire ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_and_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_and_acquire(int i, atomic_t *v)
 {
@@ -1020,6 +1473,17 @@ raw_atomic_fetch_and_acquire(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_and_release() - atomic bitwise AND with release ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_and_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_and_release(int i, atomic_t *v)
 {
@@ -1035,6 +1499,17 @@ raw_atomic_fetch_and_release(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_and_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_and_relaxed(int i, atomic_t *v)
 {
@@ -1047,6 +1522,17 @@ raw_atomic_fetch_and_relaxed(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_andnot() - atomic bitwise AND NOT with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_andnot() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_andnot(int i, atomic_t *v)
 {
@@ -1057,6 +1543,17 @@ raw_atomic_andnot(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_andnot() - atomic bitwise AND NOT with full ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & ~@i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_andnot() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_andnot(int i, atomic_t *v)
 {
@@ -1073,6 +1570,17 @@ raw_atomic_fetch_andnot(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & ~@i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_andnot_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
@@ -1089,6 +1597,17 @@ raw_atomic_fetch_andnot_acquire(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & ~@i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_andnot_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_andnot_release(int i, atomic_t *v)
 {
@@ -1104,6 +1623,17 @@ raw_atomic_fetch_andnot_release(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_andnot_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
@@ -1116,12 +1646,34 @@ raw_atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_or() - atomic bitwise OR with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_or() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_or(int i, atomic_t *v)
 {
 	arch_atomic_or(i, v);
 }
 
+/**
+ * raw_atomic_fetch_or() - atomic bitwise OR with full ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v | @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_or() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_or(int i, atomic_t *v)
 {
@@ -1138,6 +1690,17 @@ raw_atomic_fetch_or(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_or_acquire() - atomic bitwise OR with acquire ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v | @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_or_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_or_acquire(int i, atomic_t *v)
 {
@@ -1154,6 +1717,17 @@ raw_atomic_fetch_or_acquire(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_or_release() - atomic bitwise OR with release ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v | @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_or_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_or_release(int i, atomic_t *v)
 {
@@ -1169,6 +1743,17 @@ raw_atomic_fetch_or_release(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_or_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_or_relaxed(int i, atomic_t *v)
 {
@@ -1181,12 +1766,34 @@ raw_atomic_fetch_or_relaxed(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_xor() - atomic bitwise XOR with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_xor() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_xor(int i, atomic_t *v)
 {
 	arch_atomic_xor(i, v);
 }
 
+/**
+ * raw_atomic_fetch_xor() - atomic bitwise XOR with full ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v ^ @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_xor() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_xor(int i, atomic_t *v)
 {
@@ -1203,6 +1810,17 @@ raw_atomic_fetch_xor(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v ^ @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_xor_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_xor_acquire(int i, atomic_t *v)
 {
@@ -1219,6 +1837,17 @@ raw_atomic_fetch_xor_acquire(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_xor_release() - atomic bitwise XOR with release ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v ^ @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_xor_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_xor_release(int i, atomic_t *v)
 {
@@ -1234,6 +1863,17 @@ raw_atomic_fetch_xor_release(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_xor_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_xor_relaxed(int i, atomic_t *v)
 {
@@ -1246,6 +1886,17 @@ raw_atomic_fetch_xor_relaxed(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_xchg() - atomic exchange with full ordering
+ * @v: pointer to atomic_t
+ * @new: int value to assign
+ *
+ * Atomically updates @v to @new with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_xchg() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_xchg(atomic_t *v, int new)
 {
@@ -1262,6 +1913,17 @@ raw_atomic_xchg(atomic_t *v, int new)
 #endif
 }
 
+/**
+ * raw_atomic_xchg_acquire() - atomic exchange with acquire ordering
+ * @v: pointer to atomic_t
+ * @new: int value to assign
+ *
+ * Atomically updates @v to @new with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_xchg_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_xchg_acquire(atomic_t *v, int new)
 {
@@ -1278,6 +1940,17 @@ raw_atomic_xchg_acquire(atomic_t *v, int new)
 #endif
 }
 
+/**
+ * raw_atomic_xchg_release() - atomic exchange with release ordering
+ * @v: pointer to atomic_t
+ * @new: int value to assign
+ *
+ * Atomically updates @v to @new with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_xchg_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_xchg_release(atomic_t *v, int new)
 {
@@ -1293,6 +1966,17 @@ raw_atomic_xchg_release(atomic_t *v, int new)
 #endif
 }
 
+/**
+ * raw_atomic_xchg_relaxed() - atomic exchange with relaxed ordering
+ * @v: pointer to atomic_t
+ * @new: int value to assign
+ *
+ * Atomically updates @v to @new with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_xchg_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_xchg_relaxed(atomic_t *v, int new)
 {
@@ -1305,6 +1989,18 @@ raw_atomic_xchg_relaxed(atomic_t *v, int new)
 #endif
 }
 
+/**
+ * raw_atomic_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic_t
+ * @old: int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_cmpxchg() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
@@ -1321,6 +2017,18 @@ raw_atomic_cmpxchg(atomic_t *v, int old, int new)
 #endif
 }
 
+/**
+ * raw_atomic_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic_t
+ * @old: int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_cmpxchg_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
@@ -1337,6 +2045,18 @@ raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 #endif
 }
 
+/**
+ * raw_atomic_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic_t
+ * @old: int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_cmpxchg_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
@@ -1352,6 +2072,18 @@ raw_atomic_cmpxchg_release(atomic_t *v, int old, int new)
 #endif
 }
 
+/**
+ * raw_atomic_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic_t
+ * @old: int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_cmpxchg_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 {
@@ -1364,6 +2096,19 @@ raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 #endif
 }
 
+/**
+ * raw_atomic_try_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic_t
+ * @old: pointer to int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic_try_cmpxchg() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
@@ -1384,6 +2129,19 @@ raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 #endif
 }
 
+/**
+ * raw_atomic_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic_t
+ * @old: pointer to int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic_try_cmpxchg_acquire() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
@@ -1404,6 +2162,19 @@ raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 #endif
 }
 
+/**
+ * raw_atomic_try_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic_t
+ * @old: pointer to int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic_try_cmpxchg_release() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
@@ -1423,6 +2194,19 @@ raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 #endif
 }
 
+/**
+ * raw_atomic_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic_t
+ * @old: pointer to int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic_try_cmpxchg_relaxed() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 {
@@ -1439,6 +2223,17 @@ raw_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 #endif
 }
 
+/**
+ * raw_atomic_sub_and_test() - atomic subtract and test if zero with full ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_sub_and_test() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_sub_and_test(int i, atomic_t *v)
 {
@@ -1449,6 +2244,16 @@ raw_atomic_sub_and_test(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_dec_and_test() - atomic decrement and test if zero with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_dec_and_test() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_dec_and_test(atomic_t *v)
 {
@@ -1459,6 +2264,16 @@ raw_atomic_dec_and_test(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_inc_and_test() - atomic increment and test if zero with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_inc_and_test() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_inc_and_test(atomic_t *v)
 {
@@ -1469,6 +2284,17 @@ raw_atomic_inc_and_test(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_add_negative() - atomic add and test if negative with full ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_add_negative() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_add_negative(int i, atomic_t *v)
 {
@@ -1485,6 +2311,17 @@ raw_atomic_add_negative(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_add_negative_acquire() - atomic add and test if negative with acquire ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_add_negative_acquire() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_add_negative_acquire(int i, atomic_t *v)
 {
@@ -1501,6 +2338,17 @@ raw_atomic_add_negative_acquire(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_add_negative_release() - atomic add and test if negative with release ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_add_negative_release() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_add_negative_release(int i, atomic_t *v)
 {
@@ -1516,6 +2364,17 @@ raw_atomic_add_negative_release(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_add_negative_relaxed() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_add_negative_relaxed(int i, atomic_t *v)
 {
@@ -1528,6 +2387,18 @@ raw_atomic_add_negative_relaxed(int i, atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_fetch_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic_t
+ * @a: int value to add
+ * @u: int value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_fetch_add_unless() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 raw_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
@@ -1545,6 +2416,18 @@ raw_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 #endif
 }
 
+/**
+ * raw_atomic_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic_t
+ * @a: int value to add
+ * @u: int value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_add_unless() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_add_unless(atomic_t *v, int a, int u)
 {
@@ -1555,6 +2438,16 @@ raw_atomic_add_unless(atomic_t *v, int a, int u)
 #endif
 }
 
+/**
+ * raw_atomic_inc_not_zero() - atomic increment unless zero with full ordering
+ * @v: pointer to atomic_t
+ *
+ * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_inc_not_zero() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_inc_not_zero(atomic_t *v)
 {
@@ -1565,6 +2458,16 @@ raw_atomic_inc_not_zero(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_inc_unless_negative() - atomic increment unless negative with full ordering
+ * @v: pointer to atomic_t
+ *
+ * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_inc_unless_negative() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_inc_unless_negative(atomic_t *v)
 {
@@ -1582,6 +2485,16 @@ raw_atomic_inc_unless_negative(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_dec_unless_positive() - atomic decrement unless positive with full ordering
+ * @v: pointer to atomic_t
+ *
+ * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_dec_unless_positive() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_dec_unless_positive(atomic_t *v)
 {
@@ -1599,6 +2512,16 @@ raw_atomic_dec_unless_positive(atomic_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_dec_if_positive() - atomic decrement if positive with full ordering
+ * @v: pointer to atomic_t
+ *
+ * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_dec_if_positive() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline int
 raw_atomic_dec_if_positive(atomic_t *v)
 {
@@ -1621,12 +2544,32 @@ raw_atomic_dec_if_positive(atomic_t *v)
 #include <asm-generic/atomic64.h>
 #endif
 
+/**
+ * raw_atomic64_read() - atomic load with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically loads the value of @v with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_read() elsewhere.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline s64
 raw_atomic64_read(const atomic64_t *v)
 {
 	return arch_atomic64_read(v);
 }
 
+/**
+ * raw_atomic64_read_acquire() - atomic load with acquire ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically loads the value of @v with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_read_acquire() elsewhere.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline s64
 raw_atomic64_read_acquire(const atomic64_t *v)
 {
@@ -1648,12 +2591,34 @@ raw_atomic64_read_acquire(const atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_set() - atomic set with relaxed ordering
+ * @v: pointer to atomic64_t
+ * @i: s64 value to assign
+ *
+ * Atomically sets @v to @i with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_set() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic64_set(atomic64_t *v, s64 i)
 {
 	arch_atomic64_set(v, i);
 }
 
+/**
+ * raw_atomic64_set_release() - atomic set with release ordering
+ * @v: pointer to atomic64_t
+ * @i: s64 value to assign
+ *
+ * Atomically sets @v to @i with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_set_release() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic64_set_release(atomic64_t *v, s64 i)
 {
@@ -1671,12 +2636,34 @@ raw_atomic64_set_release(atomic64_t *v, s64 i)
 #endif
 }
 
+/**
+ * raw_atomic64_add() - atomic add with relaxed ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_add() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic64_add(s64 i, atomic64_t *v)
 {
 	arch_atomic64_add(i, v);
 }
 
+/**
+ * raw_atomic64_add_return() - atomic add with full ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_add_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_add_return(s64 i, atomic64_t *v)
 {
@@ -1693,6 +2680,17 @@ raw_atomic64_add_return(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_add_return_acquire() - atomic add with acquire ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_add_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_add_return_acquire(s64 i, atomic64_t *v)
 {
@@ -1709,6 +2707,17 @@ raw_atomic64_add_return_acquire(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_add_return_release() - atomic add with release ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_add_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_add_return_release(s64 i, atomic64_t *v)
 {
@@ -1724,6 +2733,17 @@ raw_atomic64_add_return_release(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_add_return_relaxed() - atomic add with relaxed ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_add_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_add_return_relaxed(s64 i, atomic64_t *v)
 {
@@ -1736,6 +2756,17 @@ raw_atomic64_add_return_relaxed(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_add() - atomic add with full ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_add() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_add(s64 i, atomic64_t *v)
 {
@@ -1752,6 +2783,17 @@ raw_atomic64_fetch_add(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_add_acquire() - atomic add with acquire ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_add_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 {
@@ -1768,6 +2810,17 @@ raw_atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_add_release() - atomic add with release ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_add_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
@@ -1783,6 +2836,17 @@ raw_atomic64_fetch_add_release(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_add_relaxed() - atomic add with relaxed ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_add_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
 {
@@ -1795,12 +2859,34 @@ raw_atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_sub() - atomic subtract with relaxed ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_sub() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic64_sub(s64 i, atomic64_t *v)
 {
 	arch_atomic64_sub(i, v);
 }
 
+/**
+ * raw_atomic64_sub_return() - atomic subtract with full ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_sub_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_sub_return(s64 i, atomic64_t *v)
 {
@@ -1817,6 +2903,17 @@ raw_atomic64_sub_return(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_sub_return_acquire() - atomic subtract with acquire ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_sub_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 {
@@ -1833,6 +2930,17 @@ raw_atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_sub_return_release() - atomic subtract with release ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_sub_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
@@ -1848,6 +2956,17 @@ raw_atomic64_sub_return_release(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_sub_return_relaxed() - atomic subtract with relaxed ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_sub_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 {
@@ -1860,6 +2979,17 @@ raw_atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_sub() - atomic subtract with full ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_sub() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
@@ -1876,6 +3006,17 @@ raw_atomic64_fetch_sub(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_sub_acquire() - atomic subtract with acquire ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_sub_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
@@ -1892,6 +3033,17 @@ raw_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_sub_release() - atomic subtract with release ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_sub_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
@@ -1907,6 +3059,17 @@ raw_atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_sub_relaxed() - atomic subtract with relaxed ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_sub_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
 {
@@ -1919,6 +3082,16 @@ raw_atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_inc() - atomic increment with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_inc() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic64_inc(atomic64_t *v)
 {
@@ -1929,6 +3102,16 @@ raw_atomic64_inc(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_inc_return() - atomic increment with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_inc_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_inc_return(atomic64_t *v)
 {
@@ -1945,6 +3128,16 @@ raw_atomic64_inc_return(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_inc_return_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_inc_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_inc_return_acquire(atomic64_t *v)
 {
@@ -1961,6 +3154,16 @@ raw_atomic64_inc_return_acquire(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_inc_return_release() - atomic increment with release ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_inc_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_inc_return_release(atomic64_t *v)
 {
@@ -1976,6 +3179,16 @@ raw_atomic64_inc_return_release(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_inc_return_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_inc_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_inc_return_relaxed(atomic64_t *v)
 {
@@ -1988,6 +3201,16 @@ raw_atomic64_inc_return_relaxed(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_inc() - atomic increment with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_inc() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_inc(atomic64_t *v)
 {
@@ -2004,6 +3227,16 @@ raw_atomic64_fetch_inc(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_inc_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_inc_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_inc_acquire(atomic64_t *v)
 {
@@ -2020,6 +3253,16 @@ raw_atomic64_fetch_inc_acquire(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_inc_release() - atomic increment with release ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_inc_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_inc_release(atomic64_t *v)
 {
@@ -2035,6 +3278,16 @@ raw_atomic64_fetch_inc_release(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_inc_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_inc_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
@@ -2047,6 +3300,16 @@ raw_atomic64_fetch_inc_relaxed(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_dec() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_dec() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic64_dec(atomic64_t *v)
 {
@@ -2057,6 +3320,16 @@ raw_atomic64_dec(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_dec_return() - atomic decrement with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_dec_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_dec_return(atomic64_t *v)
 {
@@ -2073,6 +3346,16 @@ raw_atomic64_dec_return(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_dec_return_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_dec_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_dec_return_acquire(atomic64_t *v)
 {
@@ -2089,6 +3372,16 @@ raw_atomic64_dec_return_acquire(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_dec_return_release() - atomic decrement with release ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_dec_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_dec_return_release(atomic64_t *v)
 {
@@ -2104,6 +3397,16 @@ raw_atomic64_dec_return_release(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_dec_return_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_dec_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 raw_atomic64_dec_return_relaxed(atomic64_t *v)
 {
@@ -2116,6 +3419,16 @@ raw_atomic64_dec_return_relaxed(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_dec() - atomic decrement with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_dec() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_dec(atomic64_t *v)
 {
@@ -2132,6 +3445,16 @@ raw_atomic64_fetch_dec(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_dec_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_dec_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_dec_acquire(atomic64_t *v)
 {
@@ -2148,6 +3471,16 @@ raw_atomic64_fetch_dec_acquire(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_dec_release() - atomic decrement with release ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_dec_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_dec_release(atomic64_t *v)
 {
@@ -2163,6 +3496,16 @@ raw_atomic64_fetch_dec_release(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_dec_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_dec_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_dec_relaxed(atomic64_t *v)
 {
@@ -2175,12 +3518,34 @@ raw_atomic64_fetch_dec_relaxed(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_and() - atomic bitwise AND with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_and() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic64_and(s64 i, atomic64_t *v)
 {
 	arch_atomic64_and(i, v);
 }
 
+/**
+ * raw_atomic64_fetch_and() - atomic bitwise AND with full ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_and() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_and(s64 i, atomic64_t *v)
 {
@@ -2197,6 +3562,17 @@ raw_atomic64_fetch_and(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_and_acquire() - atomic bitwise AND with acquire ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_and_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
@@ -2213,6 +3589,17 @@ raw_atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_and_release() - atomic bitwise AND with release ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_and_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
@@ -2228,6 +3615,17 @@ raw_atomic64_fetch_and_release(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_and_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
 {
@@ -2240,6 +3638,17 @@ raw_atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_andnot() - atomic bitwise AND NOT with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_andnot() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic64_andnot(s64 i, atomic64_t *v)
 {
@@ -2250,6 +3659,17 @@ raw_atomic64_andnot(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_andnot() - atomic bitwise AND NOT with full ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & ~@i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_andnot() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
@@ -2266,6 +3686,17 @@ raw_atomic64_fetch_andnot(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & ~@i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_andnot_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
@@ -2282,6 +3713,17 @@ raw_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & ~@i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_andnot_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
@@ -2297,6 +3739,17 @@ raw_atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_andnot_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
@@ -2309,12 +3762,34 @@ raw_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_or() - atomic bitwise OR with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_or() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic64_or(s64 i, atomic64_t *v)
 {
 	arch_atomic64_or(i, v);
 }
 
+/**
+ * raw_atomic64_fetch_or() - atomic bitwise OR with full ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v | @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_or() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_or(s64 i, atomic64_t *v)
 {
@@ -2331,6 +3806,17 @@ raw_atomic64_fetch_or(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_or_acquire() - atomic bitwise OR with acquire ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v | @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_or_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
@@ -2347,6 +3833,17 @@ raw_atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_or_release() - atomic bitwise OR with release ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v | @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_or_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
@@ -2362,6 +3859,17 @@ raw_atomic64_fetch_or_release(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_or_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
 {
@@ -2374,12 +3882,34 @@ raw_atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_xor() - atomic bitwise XOR with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_xor() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic64_xor(s64 i, atomic64_t *v)
 {
 	arch_atomic64_xor(i, v);
 }
 
+/**
+ * raw_atomic64_fetch_xor() - atomic bitwise XOR with full ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v ^ @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_xor() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
@@ -2396,6 +3926,17 @@ raw_atomic64_fetch_xor(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v ^ @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_xor_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
@@ -2412,6 +3953,17 @@ raw_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_xor_release() - atomic bitwise XOR with release ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v ^ @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_xor_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
@@ -2427,6 +3979,17 @@ raw_atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_xor_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 {
@@ -2439,6 +4002,17 @@ raw_atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_xchg() - atomic exchange with full ordering
+ * @v: pointer to atomic64_t
+ * @new: s64 value to assign
+ *
+ * Atomically updates @v to @new with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_xchg() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_xchg(atomic64_t *v, s64 new)
 {
@@ -2455,6 +4029,17 @@ raw_atomic64_xchg(atomic64_t *v, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_xchg_acquire() - atomic exchange with acquire ordering
+ * @v: pointer to atomic64_t
+ * @new: s64 value to assign
+ *
+ * Atomically updates @v to @new with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_xchg_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_xchg_acquire(atomic64_t *v, s64 new)
 {
@@ -2471,6 +4056,17 @@ raw_atomic64_xchg_acquire(atomic64_t *v, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_xchg_release() - atomic exchange with release ordering
+ * @v: pointer to atomic64_t
+ * @new: s64 value to assign
+ *
+ * Atomically updates @v to @new with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_xchg_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_xchg_release(atomic64_t *v, s64 new)
 {
@@ -2486,6 +4082,17 @@ raw_atomic64_xchg_release(atomic64_t *v, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_xchg_relaxed() - atomic exchange with relaxed ordering
+ * @v: pointer to atomic64_t
+ * @new: s64 value to assign
+ *
+ * Atomically updates @v to @new with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_xchg_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new)
 {
@@ -2498,6 +4105,18 @@ raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic64_t
+ * @old: s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_cmpxchg() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
@@ -2514,6 +4133,18 @@ raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic64_t
+ * @old: s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_cmpxchg_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
@@ -2530,6 +4161,18 @@ raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic64_t
+ * @old: s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_cmpxchg_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
@@ -2545,6 +4188,18 @@ raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic64_t
+ * @old: s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_cmpxchg_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 {
@@ -2557,6 +4212,19 @@ raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_try_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic64_t
+ * @old: pointer to s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic64_try_cmpxchg() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
@@ -2577,6 +4245,19 @@ raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic64_t
+ * @old: pointer to s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_acquire() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
@@ -2597,6 +4278,19 @@ raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_try_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic64_t
+ * @old: pointer to s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_release() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
@@ -2616,6 +4310,19 @@ raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic64_t
+ * @old: pointer to s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_relaxed() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
@@ -2632,6 +4339,17 @@ raw_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 #endif
 }
 
+/**
+ * raw_atomic64_sub_and_test() - atomic subtract and test if zero with full ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_sub_and_test() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
@@ -2642,6 +4360,16 @@ raw_atomic64_sub_and_test(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_dec_and_test() - atomic decrement and test if zero with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_dec_and_test() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_dec_and_test(atomic64_t *v)
 {
@@ -2652,6 +4380,16 @@ raw_atomic64_dec_and_test(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_inc_and_test() - atomic increment and test if zero with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_inc_and_test() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_inc_and_test(atomic64_t *v)
 {
@@ -2662,6 +4400,17 @@ raw_atomic64_inc_and_test(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_add_negative() - atomic add and test if negative with full ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_add_negative() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_add_negative(s64 i, atomic64_t *v)
 {
@@ -2678,6 +4427,17 @@ raw_atomic64_add_negative(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_add_negative_acquire() - atomic add and test if negative with acquire ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_add_negative_acquire() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
 {
@@ -2694,6 +4454,17 @@ raw_atomic64_add_negative_acquire(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_add_negative_release() - atomic add and test if negative with release ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_add_negative_release() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_add_negative_release(s64 i, atomic64_t *v)
 {
@@ -2709,6 +4480,17 @@ raw_atomic64_add_negative_release(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_add_negative_relaxed() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
 {
@@ -2721,6 +4503,18 @@ raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_fetch_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic64_t
+ * @a: s64 value to add
+ * @u: s64 value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_fetch_add_unless() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
@@ -2738,6 +4532,18 @@ raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 #endif
 }
 
+/**
+ * raw_atomic64_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic64_t
+ * @a: s64 value to add
+ * @u: s64 value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_add_unless() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
@@ -2748,6 +4554,16 @@ raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 #endif
 }
 
+/**
+ * raw_atomic64_inc_not_zero() - atomic increment unless zero with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_inc_not_zero() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_inc_not_zero(atomic64_t *v)
 {
@@ -2758,6 +4574,16 @@ raw_atomic64_inc_not_zero(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_inc_unless_negative() - atomic increment unless negative with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_inc_unless_negative() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_inc_unless_negative(atomic64_t *v)
 {
@@ -2775,6 +4601,16 @@ raw_atomic64_inc_unless_negative(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_dec_unless_positive() - atomic decrement unless positive with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_dec_unless_positive() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic64_dec_unless_positive(atomic64_t *v)
 {
@@ -2792,6 +4628,16 @@ raw_atomic64_dec_unless_positive(atomic64_t *v)
 #endif
 }
 
+/**
+ * raw_atomic64_dec_if_positive() - atomic decrement if positive with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic64_dec_if_positive() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline s64
 raw_atomic64_dec_if_positive(atomic64_t *v)
 {
@@ -2811,4 +4657,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v)
 }
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 205e090382132f1fc85e48b46e722865f9c81309
+// 3916f02c038baa3f5190d275f68b9211667fcc9d
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index 5491c89dc03a..ebfc795f921b 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -16,6 +16,16 @@
 #include <linux/compiler.h>
 #include <linux/instrumented.h>
 
+/**
+ * atomic_read() - atomic load with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically loads the value of @v with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_read() there.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline int
 atomic_read(const atomic_t *v)
 {
@@ -23,6 +33,16 @@ atomic_read(const atomic_t *v)
 	return raw_atomic_read(v);
 }
 
+/**
+ * atomic_read_acquire() - atomic load with acquire ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically loads the value of @v with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_read_acquire() there.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline int
 atomic_read_acquire(const atomic_t *v)
 {
@@ -30,6 +50,17 @@ atomic_read_acquire(const atomic_t *v)
 	return raw_atomic_read_acquire(v);
 }
 
+/**
+ * atomic_set() - atomic set with relaxed ordering
+ * @v: pointer to atomic_t
+ * @i: int value to assign
+ *
+ * Atomically sets @v to @i with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_set() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_set(atomic_t *v, int i)
 {
@@ -37,6 +68,17 @@ atomic_set(atomic_t *v, int i)
 	raw_atomic_set(v, i);
 }
 
+/**
+ * atomic_set_release() - atomic set with release ordering
+ * @v: pointer to atomic_t
+ * @i: int value to assign
+ *
+ * Atomically sets @v to @i with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_set_release() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_set_release(atomic_t *v, int i)
 {
@@ -45,6 +87,17 @@ atomic_set_release(atomic_t *v, int i)
 	raw_atomic_set_release(v, i);
 }
 
+/**
+ * atomic_add() - atomic add with relaxed ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_add() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_add(int i, atomic_t *v)
 {
@@ -52,6 +105,17 @@ atomic_add(int i, atomic_t *v)
 	raw_atomic_add(i, v);
 }
 
+/**
+ * atomic_add_return() - atomic add with full ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_add_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_add_return(int i, atomic_t *v)
 {
@@ -60,6 +124,17 @@ atomic_add_return(int i, atomic_t *v)
 	return raw_atomic_add_return(i, v);
 }
 
+/**
+ * atomic_add_return_acquire() - atomic add with acquire ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_add_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_add_return_acquire(int i, atomic_t *v)
 {
@@ -67,6 +142,17 @@ atomic_add_return_acquire(int i, atomic_t *v)
 	return raw_atomic_add_return_acquire(i, v);
 }
 
+/**
+ * atomic_add_return_release() - atomic add with release ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_add_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_add_return_release(int i, atomic_t *v)
 {
@@ -75,6 +161,17 @@ atomic_add_return_release(int i, atomic_t *v)
 	return raw_atomic_add_return_release(i, v);
 }
 
+/**
+ * atomic_add_return_relaxed() - atomic add with relaxed ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_add_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_add_return_relaxed(int i, atomic_t *v)
 {
@@ -82,6 +179,17 @@ atomic_add_return_relaxed(int i, atomic_t *v)
 	return raw_atomic_add_return_relaxed(i, v);
 }
 
+/**
+ * atomic_fetch_add() - atomic add with full ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_add() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_add(int i, atomic_t *v)
 {
@@ -90,6 +198,17 @@ atomic_fetch_add(int i, atomic_t *v)
 	return raw_atomic_fetch_add(i, v);
 }
 
+/**
+ * atomic_fetch_add_acquire() - atomic add with acquire ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_add_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_add_acquire(int i, atomic_t *v)
 {
@@ -97,6 +216,17 @@ atomic_fetch_add_acquire(int i, atomic_t *v)
 	return raw_atomic_fetch_add_acquire(i, v);
 }
 
+/**
+ * atomic_fetch_add_release() - atomic add with release ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_add_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_add_release(int i, atomic_t *v)
 {
@@ -105,6 +235,17 @@ atomic_fetch_add_release(int i, atomic_t *v)
 	return raw_atomic_fetch_add_release(i, v);
 }
 
+/**
+ * atomic_fetch_add_relaxed() - atomic add with relaxed ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_add_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_add_relaxed(int i, atomic_t *v)
 {
@@ -112,6 +253,17 @@ atomic_fetch_add_relaxed(int i, atomic_t *v)
 	return raw_atomic_fetch_add_relaxed(i, v);
 }
 
+/**
+ * atomic_sub() - atomic subtract with relaxed ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_sub() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_sub(int i, atomic_t *v)
 {
@@ -119,6 +271,17 @@ atomic_sub(int i, atomic_t *v)
 	raw_atomic_sub(i, v);
 }
 
+/**
+ * atomic_sub_return() - atomic subtract with full ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_sub_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_sub_return(int i, atomic_t *v)
 {
@@ -127,6 +290,17 @@ atomic_sub_return(int i, atomic_t *v)
 	return raw_atomic_sub_return(i, v);
 }
 
+/**
+ * atomic_sub_return_acquire() - atomic subtract with acquire ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_sub_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_sub_return_acquire(int i, atomic_t *v)
 {
@@ -134,6 +308,17 @@ atomic_sub_return_acquire(int i, atomic_t *v)
 	return raw_atomic_sub_return_acquire(i, v);
 }
 
+/**
+ * atomic_sub_return_release() - atomic subtract with release ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_sub_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_sub_return_release(int i, atomic_t *v)
 {
@@ -142,6 +327,17 @@ atomic_sub_return_release(int i, atomic_t *v)
 	return raw_atomic_sub_return_release(i, v);
 }
 
+/**
+ * atomic_sub_return_relaxed() - atomic subtract with relaxed ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_sub_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_sub_return_relaxed(int i, atomic_t *v)
 {
@@ -149,6 +345,17 @@ atomic_sub_return_relaxed(int i, atomic_t *v)
 	return raw_atomic_sub_return_relaxed(i, v);
 }
 
+/**
+ * atomic_fetch_sub() - atomic subtract with full ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_sub() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_sub(int i, atomic_t *v)
 {
@@ -157,6 +364,17 @@ atomic_fetch_sub(int i, atomic_t *v)
 	return raw_atomic_fetch_sub(i, v);
 }
 
+/**
+ * atomic_fetch_sub_acquire() - atomic subtract with acquire ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_sub_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_sub_acquire(int i, atomic_t *v)
 {
@@ -164,6 +382,17 @@ atomic_fetch_sub_acquire(int i, atomic_t *v)
 	return raw_atomic_fetch_sub_acquire(i, v);
 }
 
+/**
+ * atomic_fetch_sub_release() - atomic subtract with release ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_sub_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_sub_release(int i, atomic_t *v)
 {
@@ -172,6 +401,17 @@ atomic_fetch_sub_release(int i, atomic_t *v)
 	return raw_atomic_fetch_sub_release(i, v);
 }
 
+/**
+ * atomic_fetch_sub_relaxed() - atomic subtract with relaxed ordering
+ * @i: int value to subtract
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_sub_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_sub_relaxed(int i, atomic_t *v)
 {
@@ -179,6 +419,16 @@ atomic_fetch_sub_relaxed(int i, atomic_t *v)
 	return raw_atomic_fetch_sub_relaxed(i, v);
 }
 
+/**
+ * atomic_inc() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_inc() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_inc(atomic_t *v)
 {
@@ -186,6 +436,16 @@ atomic_inc(atomic_t *v)
 	raw_atomic_inc(v);
 }
 
+/**
+ * atomic_inc_return() - atomic increment with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_inc_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_inc_return(atomic_t *v)
 {
@@ -194,6 +454,16 @@ atomic_inc_return(atomic_t *v)
 	return raw_atomic_inc_return(v);
 }
 
+/**
+ * atomic_inc_return_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_inc_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_inc_return_acquire(atomic_t *v)
 {
@@ -201,6 +471,16 @@ atomic_inc_return_acquire(atomic_t *v)
 	return raw_atomic_inc_return_acquire(v);
 }
 
+/**
+ * atomic_inc_return_release() - atomic increment with release ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_inc_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_inc_return_release(atomic_t *v)
 {
@@ -209,6 +489,16 @@ atomic_inc_return_release(atomic_t *v)
 	return raw_atomic_inc_return_release(v);
 }
 
+/**
+ * atomic_inc_return_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_inc_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_inc_return_relaxed(atomic_t *v)
 {
@@ -216,6 +506,16 @@ atomic_inc_return_relaxed(atomic_t *v)
 	return raw_atomic_inc_return_relaxed(v);
 }
 
+/**
+ * atomic_fetch_inc() - atomic increment with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_inc() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_inc(atomic_t *v)
 {
@@ -224,6 +524,16 @@ atomic_fetch_inc(atomic_t *v)
 	return raw_atomic_fetch_inc(v);
 }
 
+/**
+ * atomic_fetch_inc_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_inc_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_inc_acquire(atomic_t *v)
 {
@@ -231,6 +541,16 @@ atomic_fetch_inc_acquire(atomic_t *v)
 	return raw_atomic_fetch_inc_acquire(v);
 }
 
+/**
+ * atomic_fetch_inc_release() - atomic increment with release ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_inc_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_inc_release(atomic_t *v)
 {
@@ -239,6 +559,16 @@ atomic_fetch_inc_release(atomic_t *v)
 	return raw_atomic_fetch_inc_release(v);
 }
 
+/**
+ * atomic_fetch_inc_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_inc_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_inc_relaxed(atomic_t *v)
 {
@@ -246,6 +576,16 @@ atomic_fetch_inc_relaxed(atomic_t *v)
 	return raw_atomic_fetch_inc_relaxed(v);
 }
 
+/**
+ * atomic_dec() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_dec() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_dec(atomic_t *v)
 {
@@ -253,6 +593,16 @@ atomic_dec(atomic_t *v)
 	raw_atomic_dec(v);
 }
 
+/**
+ * atomic_dec_return() - atomic decrement with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_dec_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_dec_return(atomic_t *v)
 {
@@ -261,6 +611,16 @@ atomic_dec_return(atomic_t *v)
 	return raw_atomic_dec_return(v);
 }
 
+/**
+ * atomic_dec_return_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_dec_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_dec_return_acquire(atomic_t *v)
 {
@@ -268,6 +628,16 @@ atomic_dec_return_acquire(atomic_t *v)
 	return raw_atomic_dec_return_acquire(v);
 }
 
+/**
+ * atomic_dec_return_release() - atomic decrement with release ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_dec_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_dec_return_release(atomic_t *v)
 {
@@ -276,6 +646,16 @@ atomic_dec_return_release(atomic_t *v)
 	return raw_atomic_dec_return_release(v);
 }
 
+/**
+ * atomic_dec_return_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_dec_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline int
 atomic_dec_return_relaxed(atomic_t *v)
 {
@@ -283,6 +663,16 @@ atomic_dec_return_relaxed(atomic_t *v)
 	return raw_atomic_dec_return_relaxed(v);
 }
 
+/**
+ * atomic_fetch_dec() - atomic decrement with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_dec() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_dec(atomic_t *v)
 {
@@ -291,6 +681,16 @@ atomic_fetch_dec(atomic_t *v)
 	return raw_atomic_fetch_dec(v);
 }
 
+/**
+ * atomic_fetch_dec_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_dec_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_dec_acquire(atomic_t *v)
 {
@@ -298,6 +698,16 @@ atomic_fetch_dec_acquire(atomic_t *v)
 	return raw_atomic_fetch_dec_acquire(v);
 }
 
+/**
+ * atomic_fetch_dec_release() - atomic decrement with release ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_dec_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_dec_release(atomic_t *v)
 {
@@ -306,6 +716,16 @@ atomic_fetch_dec_release(atomic_t *v)
 	return raw_atomic_fetch_dec_release(v);
 }
 
+/**
+ * atomic_fetch_dec_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_dec_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_dec_relaxed(atomic_t *v)
 {
@@ -313,6 +733,17 @@ atomic_fetch_dec_relaxed(atomic_t *v)
 	return raw_atomic_fetch_dec_relaxed(v);
 }
 
+/**
+ * atomic_and() - atomic bitwise AND with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_and() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_and(int i, atomic_t *v)
 {
@@ -320,6 +751,17 @@ atomic_and(int i, atomic_t *v)
 	raw_atomic_and(i, v);
 }
 
+/**
+ * atomic_fetch_and() - atomic bitwise AND with full ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_and() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_and(int i, atomic_t *v)
 {
@@ -328,6 +770,17 @@ atomic_fetch_and(int i, atomic_t *v)
 	return raw_atomic_fetch_and(i, v);
 }
 
+/**
+ * atomic_fetch_and_acquire() - atomic bitwise AND with acquire ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_and_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_and_acquire(int i, atomic_t *v)
 {
@@ -335,6 +788,17 @@ atomic_fetch_and_acquire(int i, atomic_t *v)
 	return raw_atomic_fetch_and_acquire(i, v);
 }
 
+/**
+ * atomic_fetch_and_release() - atomic bitwise AND with release ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_and_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_and_release(int i, atomic_t *v)
 {
@@ -343,6 +807,17 @@ atomic_fetch_and_release(int i, atomic_t *v)
 	return raw_atomic_fetch_and_release(i, v);
 }
 
+/**
+ * atomic_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_and_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_and_relaxed(int i, atomic_t *v)
 {
@@ -350,6 +825,17 @@ atomic_fetch_and_relaxed(int i, atomic_t *v)
 	return raw_atomic_fetch_and_relaxed(i, v);
 }
 
+/**
+ * atomic_andnot() - atomic bitwise AND NOT with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_andnot() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_andnot(int i, atomic_t *v)
 {
@@ -357,6 +843,17 @@ atomic_andnot(int i, atomic_t *v)
 	raw_atomic_andnot(i, v);
 }
 
+/**
+ * atomic_fetch_andnot() - atomic bitwise AND NOT with full ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & ~@i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_andnot() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_andnot(int i, atomic_t *v)
 {
@@ -365,6 +862,17 @@ atomic_fetch_andnot(int i, atomic_t *v)
 	return raw_atomic_fetch_andnot(i, v);
 }
 
+/**
+ * atomic_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & ~@i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_andnot_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
@@ -372,6 +880,17 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v)
 	return raw_atomic_fetch_andnot_acquire(i, v);
 }
 
+/**
+ * atomic_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & ~@i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_andnot_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_andnot_release(int i, atomic_t *v)
 {
@@ -380,6 +899,17 @@ atomic_fetch_andnot_release(int i, atomic_t *v)
 	return raw_atomic_fetch_andnot_release(i, v);
 }
 
+/**
+ * atomic_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_andnot_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
@@ -387,6 +917,17 @@ atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 	return raw_atomic_fetch_andnot_relaxed(i, v);
 }
 
+/**
+ * atomic_or() - atomic bitwise OR with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_or() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_or(int i, atomic_t *v)
 {
@@ -394,6 +935,17 @@ atomic_or(int i, atomic_t *v)
 	raw_atomic_or(i, v);
 }
 
+/**
+ * atomic_fetch_or() - atomic bitwise OR with full ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v | @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_or() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_or(int i, atomic_t *v)
 {
@@ -402,6 +954,17 @@ atomic_fetch_or(int i, atomic_t *v)
 	return raw_atomic_fetch_or(i, v);
 }
 
+/**
+ * atomic_fetch_or_acquire() - atomic bitwise OR with acquire ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v | @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_or_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_or_acquire(int i, atomic_t *v)
 {
@@ -409,6 +972,17 @@ atomic_fetch_or_acquire(int i, atomic_t *v)
 	return raw_atomic_fetch_or_acquire(i, v);
 }
 
+/**
+ * atomic_fetch_or_release() - atomic bitwise OR with release ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v | @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_or_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_or_release(int i, atomic_t *v)
 {
@@ -417,6 +991,17 @@ atomic_fetch_or_release(int i, atomic_t *v)
 	return raw_atomic_fetch_or_release(i, v);
 }
 
+/**
+ * atomic_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_or_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_or_relaxed(int i, atomic_t *v)
 {
@@ -424,6 +1009,17 @@ atomic_fetch_or_relaxed(int i, atomic_t *v)
 	return raw_atomic_fetch_or_relaxed(i, v);
 }
 
+/**
+ * atomic_xor() - atomic bitwise XOR with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_xor() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_xor(int i, atomic_t *v)
 {
@@ -431,6 +1027,17 @@ atomic_xor(int i, atomic_t *v)
 	raw_atomic_xor(i, v);
 }
 
+/**
+ * atomic_fetch_xor() - atomic bitwise XOR with full ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v ^ @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_xor() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_xor(int i, atomic_t *v)
 {
@@ -439,6 +1046,17 @@ atomic_fetch_xor(int i, atomic_t *v)
 	return raw_atomic_fetch_xor(i, v);
 }
 
+/**
+ * atomic_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v ^ @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_xor_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_xor_acquire(int i, atomic_t *v)
 {
@@ -446,6 +1064,17 @@ atomic_fetch_xor_acquire(int i, atomic_t *v)
 	return raw_atomic_fetch_xor_acquire(i, v);
 }
 
+/**
+ * atomic_fetch_xor_release() - atomic bitwise XOR with release ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v ^ @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_xor_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_xor_release(int i, atomic_t *v)
 {
@@ -454,6 +1083,17 @@ atomic_fetch_xor_release(int i, atomic_t *v)
 	return raw_atomic_fetch_xor_release(i, v);
 }
 
+/**
+ * atomic_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
+ * @i: int value
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_xor_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_xor_relaxed(int i, atomic_t *v)
 {
@@ -461,6 +1101,17 @@ atomic_fetch_xor_relaxed(int i, atomic_t *v)
 	return raw_atomic_fetch_xor_relaxed(i, v);
 }
 
+/**
+ * atomic_xchg() - atomic exchange with full ordering
+ * @v: pointer to atomic_t
+ * @new: int value to assign
+ *
+ * Atomically updates @v to @new with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_xchg() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_xchg(atomic_t *v, int new)
 {
@@ -469,6 +1120,17 @@ atomic_xchg(atomic_t *v, int new)
 	return raw_atomic_xchg(v, new);
 }
 
+/**
+ * atomic_xchg_acquire() - atomic exchange with acquire ordering
+ * @v: pointer to atomic_t
+ * @new: int value to assign
+ *
+ * Atomically updates @v to @new with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_xchg_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_xchg_acquire(atomic_t *v, int new)
 {
@@ -476,6 +1138,17 @@ atomic_xchg_acquire(atomic_t *v, int new)
 	return raw_atomic_xchg_acquire(v, new);
 }
 
+/**
+ * atomic_xchg_release() - atomic exchange with release ordering
+ * @v: pointer to atomic_t
+ * @new: int value to assign
+ *
+ * Atomically updates @v to @new with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_xchg_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_xchg_release(atomic_t *v, int new)
 {
@@ -484,6 +1157,17 @@ atomic_xchg_release(atomic_t *v, int new)
 	return raw_atomic_xchg_release(v, new);
 }
 
+/**
+ * atomic_xchg_relaxed() - atomic exchange with relaxed ordering
+ * @v: pointer to atomic_t
+ * @new: int value to assign
+ *
+ * Atomically updates @v to @new with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_xchg_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_xchg_relaxed(atomic_t *v, int new)
 {
@@ -491,6 +1175,18 @@ atomic_xchg_relaxed(atomic_t *v, int new)
 	return raw_atomic_xchg_relaxed(v, new);
 }
 
+/**
+ * atomic_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic_t
+ * @old: int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_cmpxchg() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_cmpxchg(atomic_t *v, int old, int new)
 {
@@ -499,6 +1195,18 @@ atomic_cmpxchg(atomic_t *v, int old, int new)
 	return raw_atomic_cmpxchg(v, old, new);
 }
 
+/**
+ * atomic_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic_t
+ * @old: int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
@@ -506,6 +1214,18 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 	return raw_atomic_cmpxchg_acquire(v, old, new);
 }
 
+/**
+ * atomic_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic_t
+ * @old: int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
@@ -514,6 +1234,18 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new)
 	return raw_atomic_cmpxchg_release(v, old, new);
 }
 
+/**
+ * atomic_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic_t
+ * @old: int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_cmpxchg_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 {
@@ -521,6 +1253,19 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 	return raw_atomic_cmpxchg_relaxed(v, old, new);
 }
 
+/**
+ * atomic_try_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic_t
+ * @old: pointer to int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
@@ -530,6 +1275,19 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 	return raw_atomic_try_cmpxchg(v, old, new);
 }
 
+/**
+ * atomic_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic_t
+ * @old: pointer to int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_acquire() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
@@ -538,6 +1296,19 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 	return raw_atomic_try_cmpxchg_acquire(v, old, new);
 }
 
+/**
+ * atomic_try_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic_t
+ * @old: pointer to int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_release() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
@@ -547,6 +1318,19 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 	return raw_atomic_try_cmpxchg_release(v, old, new);
 }
 
+/**
+ * atomic_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic_t
+ * @old: pointer to int value to compare with
+ * @new: int value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_try_cmpxchg_relaxed() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 {
@@ -555,6 +1339,17 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 	return raw_atomic_try_cmpxchg_relaxed(v, old, new);
 }
 
+/**
+ * atomic_sub_and_test() - atomic subtract and test if zero with full ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_sub_and_test() there.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 atomic_sub_and_test(int i, atomic_t *v)
 {
@@ -563,6 +1358,16 @@ atomic_sub_and_test(int i, atomic_t *v)
 	return raw_atomic_sub_and_test(i, v);
 }
 
+/**
+ * atomic_dec_and_test() - atomic decrement and test if zero with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_dec_and_test() there.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 atomic_dec_and_test(atomic_t *v)
 {
@@ -571,6 +1376,16 @@ atomic_dec_and_test(atomic_t *v)
 	return raw_atomic_dec_and_test(v);
 }
 
+/**
+ * atomic_inc_and_test() - atomic increment and test if zero with full ordering
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_inc_and_test() there.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 atomic_inc_and_test(atomic_t *v)
 {
@@ -579,6 +1394,17 @@ atomic_inc_and_test(atomic_t *v)
 	return raw_atomic_inc_and_test(v);
 }
 
+/**
+ * atomic_add_negative() - atomic add and test if negative with full ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_add_negative() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic_add_negative(int i, atomic_t *v)
 {
@@ -587,6 +1413,17 @@ atomic_add_negative(int i, atomic_t *v)
 	return raw_atomic_add_negative(i, v);
 }
 
+/**
+ * atomic_add_negative_acquire() - atomic add and test if negative with acquire ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_add_negative_acquire() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic_add_negative_acquire(int i, atomic_t *v)
 {
@@ -594,6 +1431,17 @@ atomic_add_negative_acquire(int i, atomic_t *v)
 	return raw_atomic_add_negative_acquire(i, v);
 }
 
+/**
+ * atomic_add_negative_release() - atomic add and test if negative with release ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_add_negative_release() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic_add_negative_release(int i, atomic_t *v)
 {
@@ -602,6 +1450,17 @@ atomic_add_negative_release(int i, atomic_t *v)
 	return raw_atomic_add_negative_release(i, v);
 }
 
+/**
+ * atomic_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
+ * @i: int value to add
+ * @v: pointer to atomic_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_add_negative_relaxed() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic_add_negative_relaxed(int i, atomic_t *v)
 {
@@ -609,6 +1468,18 @@ atomic_add_negative_relaxed(int i, atomic_t *v)
 	return raw_atomic_add_negative_relaxed(i, v);
 }
 
+/**
+ * atomic_fetch_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic_t
+ * @a: int value to add
+ * @u: int value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_fetch_add_unless() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline int
 atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
@@ -617,6 +1488,18 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	return raw_atomic_fetch_add_unless(v, a, u);
 }
 
+/**
+ * atomic_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic_t
+ * @a: int value to add
+ * @u: int value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_add_unless() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic_add_unless(atomic_t *v, int a, int u)
 {
@@ -625,6 +1508,16 @@ atomic_add_unless(atomic_t *v, int a, int u)
 	return raw_atomic_add_unless(v, a, u);
 }
 
+/**
+ * atomic_inc_not_zero() - atomic increment unless zero with full ordering
+ * @v: pointer to atomic_t
+ *
+ * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_inc_not_zero() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic_inc_not_zero(atomic_t *v)
 {
@@ -633,6 +1526,16 @@ atomic_inc_not_zero(atomic_t *v)
 	return raw_atomic_inc_not_zero(v);
 }
 
+/**
+ * atomic_inc_unless_negative() - atomic increment unless negative with full ordering
+ * @v: pointer to atomic_t
+ *
+ * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_inc_unless_negative() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic_inc_unless_negative(atomic_t *v)
 {
@@ -641,6 +1544,16 @@ atomic_inc_unless_negative(atomic_t *v)
 	return raw_atomic_inc_unless_negative(v);
 }
 
+/**
+ * atomic_dec_unless_positive() - atomic decrement unless positive with full ordering
+ * @v: pointer to atomic_t
+ *
+ * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_dec_unless_positive() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic_dec_unless_positive(atomic_t *v)
 {
@@ -649,6 +1562,16 @@ atomic_dec_unless_positive(atomic_t *v)
 	return raw_atomic_dec_unless_positive(v);
 }
 
+/**
+ * atomic_dec_if_positive() - atomic decrement if positive with full ordering
+ * @v: pointer to atomic_t
+ *
+ * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_dec_if_positive() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline int
 atomic_dec_if_positive(atomic_t *v)
 {
@@ -657,6 +1580,16 @@ atomic_dec_if_positive(atomic_t *v)
 	return raw_atomic_dec_if_positive(v);
 }
 
+/**
+ * atomic64_read() - atomic load with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically loads the value of @v with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_read() there.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline s64
 atomic64_read(const atomic64_t *v)
 {
@@ -664,6 +1597,16 @@ atomic64_read(const atomic64_t *v)
 	return raw_atomic64_read(v);
 }
 
+/**
+ * atomic64_read_acquire() - atomic load with acquire ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically loads the value of @v with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_read_acquire() there.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline s64
 atomic64_read_acquire(const atomic64_t *v)
 {
@@ -671,6 +1614,17 @@ atomic64_read_acquire(const atomic64_t *v)
 	return raw_atomic64_read_acquire(v);
 }
 
+/**
+ * atomic64_set() - atomic set with relaxed ordering
+ * @v: pointer to atomic64_t
+ * @i: s64 value to assign
+ *
+ * Atomically sets @v to @i with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_set() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic64_set(atomic64_t *v, s64 i)
 {
@@ -678,6 +1632,17 @@ atomic64_set(atomic64_t *v, s64 i)
 	raw_atomic64_set(v, i);
 }
 
+/**
+ * atomic64_set_release() - atomic set with release ordering
+ * @v: pointer to atomic64_t
+ * @i: s64 value to assign
+ *
+ * Atomically sets @v to @i with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_set_release() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic64_set_release(atomic64_t *v, s64 i)
 {
@@ -686,6 +1651,17 @@ atomic64_set_release(atomic64_t *v, s64 i)
 	raw_atomic64_set_release(v, i);
 }
 
+/**
+ * atomic64_add() - atomic add with relaxed ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_add() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic64_add(s64 i, atomic64_t *v)
 {
@@ -693,6 +1669,17 @@ atomic64_add(s64 i, atomic64_t *v)
 	raw_atomic64_add(i, v);
 }
 
+/**
+ * atomic64_add_return() - atomic add with full ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_add_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_add_return(s64 i, atomic64_t *v)
 {
@@ -701,6 +1688,17 @@ atomic64_add_return(s64 i, atomic64_t *v)
 	return raw_atomic64_add_return(i, v);
 }
 
+/**
+ * atomic64_add_return_acquire() - atomic add with acquire ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_add_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_add_return_acquire(s64 i, atomic64_t *v)
 {
@@ -708,6 +1706,17 @@ atomic64_add_return_acquire(s64 i, atomic64_t *v)
 	return raw_atomic64_add_return_acquire(i, v);
 }
 
+/**
+ * atomic64_add_return_release() - atomic add with release ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_add_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_add_return_release(s64 i, atomic64_t *v)
 {
@@ -716,6 +1725,17 @@ atomic64_add_return_release(s64 i, atomic64_t *v)
 	return raw_atomic64_add_return_release(i, v);
 }
 
+/**
+ * atomic64_add_return_relaxed() - atomic add with relaxed ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_add_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_add_return_relaxed(s64 i, atomic64_t *v)
 {
@@ -723,6 +1743,17 @@ atomic64_add_return_relaxed(s64 i, atomic64_t *v)
 	return raw_atomic64_add_return_relaxed(i, v);
 }
 
+/**
+ * atomic64_fetch_add() - atomic add with full ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_add() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_add(s64 i, atomic64_t *v)
 {
@@ -731,6 +1762,17 @@ atomic64_fetch_add(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_add(i, v);
 }
 
+/**
+ * atomic64_fetch_add_acquire() - atomic add with acquire ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 {
@@ -738,6 +1780,17 @@ atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_add_acquire(i, v);
 }
 
+/**
+ * atomic64_fetch_add_release() - atomic add with release ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
@@ -746,6 +1799,17 @@ atomic64_fetch_add_release(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_add_release(i, v);
 }
 
+/**
+ * atomic64_fetch_add_relaxed() - atomic add with relaxed ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
 {
@@ -753,6 +1817,17 @@ atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_add_relaxed(i, v);
 }
 
+/**
+ * atomic64_sub() - atomic subtract with relaxed ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_sub() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic64_sub(s64 i, atomic64_t *v)
 {
@@ -760,6 +1835,17 @@ atomic64_sub(s64 i, atomic64_t *v)
 	raw_atomic64_sub(i, v);
 }
 
+/**
+ * atomic64_sub_return() - atomic subtract with full ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_sub_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_sub_return(s64 i, atomic64_t *v)
 {
@@ -768,6 +1854,17 @@ atomic64_sub_return(s64 i, atomic64_t *v)
 	return raw_atomic64_sub_return(i, v);
 }
 
+/**
+ * atomic64_sub_return_acquire() - atomic subtract with acquire ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_sub_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 {
@@ -775,6 +1872,17 @@ atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 	return raw_atomic64_sub_return_acquire(i, v);
 }
 
+/**
+ * atomic64_sub_return_release() - atomic subtract with release ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_sub_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
@@ -783,6 +1891,17 @@ atomic64_sub_return_release(s64 i, atomic64_t *v)
 	return raw_atomic64_sub_return_release(i, v);
 }
 
+/**
+ * atomic64_sub_return_relaxed() - atomic subtract with relaxed ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_sub_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 {
@@ -790,6 +1909,17 @@ atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 	return raw_atomic64_sub_return_relaxed(i, v);
 }
 
+/**
+ * atomic64_fetch_sub() - atomic subtract with full ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_sub() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
@@ -798,6 +1928,17 @@ atomic64_fetch_sub(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_sub(i, v);
 }
 
+/**
+ * atomic64_fetch_sub_acquire() - atomic subtract with acquire ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_sub_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
@@ -805,6 +1946,17 @@ atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_sub_acquire(i, v);
 }
 
+/**
+ * atomic64_fetch_sub_release() - atomic subtract with release ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_sub_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
@@ -813,6 +1965,17 @@ atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_sub_release(i, v);
 }
 
+/**
+ * atomic64_fetch_sub_relaxed() - atomic subtract with relaxed ordering
+ * @i: s64 value to subtract
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_sub_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
 {
@@ -820,6 +1983,16 @@ atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_sub_relaxed(i, v);
 }
 
+/**
+ * atomic64_inc() - atomic increment with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_inc() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic64_inc(atomic64_t *v)
 {
@@ -827,6 +2000,16 @@ atomic64_inc(atomic64_t *v)
 	raw_atomic64_inc(v);
 }
 
+/**
+ * atomic64_inc_return() - atomic increment with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_inc_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_inc_return(atomic64_t *v)
 {
@@ -835,6 +2018,16 @@ atomic64_inc_return(atomic64_t *v)
 	return raw_atomic64_inc_return(v);
 }
 
+/**
+ * atomic64_inc_return_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_inc_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_inc_return_acquire(atomic64_t *v)
 {
@@ -842,6 +2035,16 @@ atomic64_inc_return_acquire(atomic64_t *v)
 	return raw_atomic64_inc_return_acquire(v);
 }
 
+/**
+ * atomic64_inc_return_release() - atomic increment with release ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_inc_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_inc_return_release(atomic64_t *v)
 {
@@ -850,6 +2053,16 @@ atomic64_inc_return_release(atomic64_t *v)
 	return raw_atomic64_inc_return_release(v);
 }
 
+/**
+ * atomic64_inc_return_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_inc_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_inc_return_relaxed(atomic64_t *v)
 {
@@ -857,6 +2070,16 @@ atomic64_inc_return_relaxed(atomic64_t *v)
 	return raw_atomic64_inc_return_relaxed(v);
 }
 
+/**
+ * atomic64_fetch_inc() - atomic increment with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_inc() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_inc(atomic64_t *v)
 {
@@ -865,6 +2088,16 @@ atomic64_fetch_inc(atomic64_t *v)
 	return raw_atomic64_fetch_inc(v);
 }
 
+/**
+ * atomic64_fetch_inc_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_inc_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_inc_acquire(atomic64_t *v)
 {
@@ -872,6 +2105,16 @@ atomic64_fetch_inc_acquire(atomic64_t *v)
 	return raw_atomic64_fetch_inc_acquire(v);
 }
 
+/**
+ * atomic64_fetch_inc_release() - atomic increment with release ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_inc_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_inc_release(atomic64_t *v)
 {
@@ -880,6 +2123,16 @@ atomic64_fetch_inc_release(atomic64_t *v)
 	return raw_atomic64_fetch_inc_release(v);
 }
 
+/**
+ * atomic64_fetch_inc_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_inc_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
@@ -887,6 +2140,16 @@ atomic64_fetch_inc_relaxed(atomic64_t *v)
 	return raw_atomic64_fetch_inc_relaxed(v);
 }
 
+/**
+ * atomic64_dec() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_dec() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic64_dec(atomic64_t *v)
 {
@@ -894,6 +2157,16 @@ atomic64_dec(atomic64_t *v)
 	raw_atomic64_dec(v);
 }
 
+/**
+ * atomic64_dec_return() - atomic decrement with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_dec_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_dec_return(atomic64_t *v)
 {
@@ -902,6 +2175,16 @@ atomic64_dec_return(atomic64_t *v)
 	return raw_atomic64_dec_return(v);
 }
 
+/**
+ * atomic64_dec_return_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_dec_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_dec_return_acquire(atomic64_t *v)
 {
@@ -909,6 +2192,16 @@ atomic64_dec_return_acquire(atomic64_t *v)
 	return raw_atomic64_dec_return_acquire(v);
 }
 
+/**
+ * atomic64_dec_return_release() - atomic decrement with release ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_dec_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_dec_return_release(atomic64_t *v)
 {
@@ -917,6 +2210,16 @@ atomic64_dec_return_release(atomic64_t *v)
 	return raw_atomic64_dec_return_release(v);
 }
 
+/**
+ * atomic64_dec_return_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_dec_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline s64
 atomic64_dec_return_relaxed(atomic64_t *v)
 {
@@ -924,6 +2227,16 @@ atomic64_dec_return_relaxed(atomic64_t *v)
 	return raw_atomic64_dec_return_relaxed(v);
 }
 
+/**
+ * atomic64_fetch_dec() - atomic decrement with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_dec() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_dec(atomic64_t *v)
 {
@@ -932,6 +2245,16 @@ atomic64_fetch_dec(atomic64_t *v)
 	return raw_atomic64_fetch_dec(v);
 }
 
+/**
+ * atomic64_fetch_dec_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_dec_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_dec_acquire(atomic64_t *v)
 {
@@ -939,6 +2262,16 @@ atomic64_fetch_dec_acquire(atomic64_t *v)
 	return raw_atomic64_fetch_dec_acquire(v);
 }
 
+/**
+ * atomic64_fetch_dec_release() - atomic decrement with release ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_dec_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_dec_release(atomic64_t *v)
 {
@@ -947,6 +2280,16 @@ atomic64_fetch_dec_release(atomic64_t *v)
 	return raw_atomic64_fetch_dec_release(v);
 }
 
+/**
+ * atomic64_fetch_dec_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_dec_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_dec_relaxed(atomic64_t *v)
 {
@@ -954,6 +2297,17 @@ atomic64_fetch_dec_relaxed(atomic64_t *v)
 	return raw_atomic64_fetch_dec_relaxed(v);
 }
 
+/**
+ * atomic64_and() - atomic bitwise AND with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_and() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic64_and(s64 i, atomic64_t *v)
 {
@@ -961,6 +2315,17 @@ atomic64_and(s64 i, atomic64_t *v)
 	raw_atomic64_and(i, v);
 }
 
+/**
+ * atomic64_fetch_and() - atomic bitwise AND with full ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_and() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_and(s64 i, atomic64_t *v)
 {
@@ -969,6 +2334,17 @@ atomic64_fetch_and(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_and(i, v);
 }
 
+/**
+ * atomic64_fetch_and_acquire() - atomic bitwise AND with acquire ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_and_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
@@ -976,6 +2352,17 @@ atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_and_acquire(i, v);
 }
 
+/**
+ * atomic64_fetch_and_release() - atomic bitwise AND with release ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_and_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
@@ -984,6 +2371,17 @@ atomic64_fetch_and_release(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_and_release(i, v);
 }
 
+/**
+ * atomic64_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_and_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
 {
@@ -991,6 +2389,17 @@ atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_and_relaxed(i, v);
 }
 
+/**
+ * atomic64_andnot() - atomic bitwise AND NOT with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_andnot() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic64_andnot(s64 i, atomic64_t *v)
 {
@@ -998,6 +2407,17 @@ atomic64_andnot(s64 i, atomic64_t *v)
 	raw_atomic64_andnot(i, v);
 }
 
+/**
+ * atomic64_fetch_andnot() - atomic bitwise AND NOT with full ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & ~@i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_andnot() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
@@ -1006,6 +2426,17 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_andnot(i, v);
 }
 
+/**
+ * atomic64_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & ~@i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_andnot_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
@@ -1013,6 +2444,17 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_andnot_acquire(i, v);
 }
 
+/**
+ * atomic64_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & ~@i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_andnot_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
@@ -1021,6 +2463,17 @@ atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_andnot_release(i, v);
 }
 
+/**
+ * atomic64_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_andnot_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
@@ -1028,6 +2481,17 @@ atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_andnot_relaxed(i, v);
 }
 
+/**
+ * atomic64_or() - atomic bitwise OR with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_or() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic64_or(s64 i, atomic64_t *v)
 {
@@ -1035,6 +2499,17 @@ atomic64_or(s64 i, atomic64_t *v)
 	raw_atomic64_or(i, v);
 }
 
+/**
+ * atomic64_fetch_or() - atomic bitwise OR with full ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v | @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_or() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_or(s64 i, atomic64_t *v)
 {
@@ -1043,6 +2518,17 @@ atomic64_fetch_or(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_or(i, v);
 }
 
+/**
+ * atomic64_fetch_or_acquire() - atomic bitwise OR with acquire ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v | @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_or_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
@@ -1050,6 +2536,17 @@ atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_or_acquire(i, v);
 }
 
+/**
+ * atomic64_fetch_or_release() - atomic bitwise OR with release ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v | @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_or_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
@@ -1058,6 +2555,17 @@ atomic64_fetch_or_release(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_or_release(i, v);
 }
 
+/**
+ * atomic64_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_or_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
 {
@@ -1065,6 +2573,17 @@ atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_or_relaxed(i, v);
 }
 
+/**
+ * atomic64_xor() - atomic bitwise XOR with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_xor() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic64_xor(s64 i, atomic64_t *v)
 {
@@ -1072,6 +2591,17 @@ atomic64_xor(s64 i, atomic64_t *v)
 	raw_atomic64_xor(i, v);
 }
 
+/**
+ * atomic64_fetch_xor() - atomic bitwise XOR with full ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v ^ @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_xor() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
@@ -1080,6 +2610,17 @@ atomic64_fetch_xor(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_xor(i, v);
 }
 
+/**
+ * atomic64_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v ^ @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_xor_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
@@ -1087,6 +2628,17 @@ atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_xor_acquire(i, v);
 }
 
+/**
+ * atomic64_fetch_xor_release() - atomic bitwise XOR with release ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v ^ @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_xor_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
@@ -1095,6 +2647,17 @@ atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_xor_release(i, v);
 }
 
+/**
+ * atomic64_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
+ * @i: s64 value
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_xor_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 {
@@ -1102,6 +2665,17 @@ atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 	return raw_atomic64_fetch_xor_relaxed(i, v);
 }
 
+/**
+ * atomic64_xchg() - atomic exchange with full ordering
+ * @v: pointer to atomic64_t
+ * @new: s64 value to assign
+ *
+ * Atomically updates @v to @new with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_xchg() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_xchg(atomic64_t *v, s64 new)
 {
@@ -1110,6 +2684,17 @@ atomic64_xchg(atomic64_t *v, s64 new)
 	return raw_atomic64_xchg(v, new);
 }
 
+/**
+ * atomic64_xchg_acquire() - atomic exchange with acquire ordering
+ * @v: pointer to atomic64_t
+ * @new: s64 value to assign
+ *
+ * Atomically updates @v to @new with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_xchg_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_xchg_acquire(atomic64_t *v, s64 new)
 {
@@ -1117,6 +2702,17 @@ atomic64_xchg_acquire(atomic64_t *v, s64 new)
 	return raw_atomic64_xchg_acquire(v, new);
 }
 
+/**
+ * atomic64_xchg_release() - atomic exchange with release ordering
+ * @v: pointer to atomic64_t
+ * @new: s64 value to assign
+ *
+ * Atomically updates @v to @new with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_xchg_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_xchg_release(atomic64_t *v, s64 new)
 {
@@ -1125,6 +2721,17 @@ atomic64_xchg_release(atomic64_t *v, s64 new)
 	return raw_atomic64_xchg_release(v, new);
 }
 
+/**
+ * atomic64_xchg_relaxed() - atomic exchange with relaxed ordering
+ * @v: pointer to atomic64_t
+ * @new: s64 value to assign
+ *
+ * Atomically updates @v to @new with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_xchg_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_xchg_relaxed(atomic64_t *v, s64 new)
 {
@@ -1132,6 +2739,18 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 new)
 	return raw_atomic64_xchg_relaxed(v, new);
 }
 
+/**
+ * atomic64_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic64_t
+ * @old: s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
@@ -1140,6 +2759,18 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 	return raw_atomic64_cmpxchg(v, old, new);
 }
 
+/**
+ * atomic64_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic64_t
+ * @old: s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
@@ -1147,6 +2778,18 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 	return raw_atomic64_cmpxchg_acquire(v, old, new);
 }
 
+/**
+ * atomic64_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic64_t
+ * @old: s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
@@ -1155,6 +2798,18 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 	return raw_atomic64_cmpxchg_release(v, old, new);
 }
 
+/**
+ * atomic64_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic64_t
+ * @old: s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_cmpxchg_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 {
@@ -1162,6 +2817,19 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 	return raw_atomic64_cmpxchg_relaxed(v, old, new);
 }
 
+/**
+ * atomic64_try_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic64_t
+ * @old: pointer to s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
@@ -1171,6 +2839,19 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 	return raw_atomic64_try_cmpxchg(v, old, new);
 }
 
+/**
+ * atomic64_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic64_t
+ * @old: pointer to s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_acquire() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
@@ -1179,6 +2860,19 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 	return raw_atomic64_try_cmpxchg_acquire(v, old, new);
 }
 
+/**
+ * atomic64_try_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic64_t
+ * @old: pointer to s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_release() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
@@ -1188,6 +2882,19 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 	return raw_atomic64_try_cmpxchg_release(v, old, new);
 }
 
+/**
+ * atomic64_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic64_t
+ * @old: pointer to s64 value to compare with
+ * @new: s64 value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_try_cmpxchg_relaxed() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
@@ -1196,6 +2903,17 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 	return raw_atomic64_try_cmpxchg_relaxed(v, old, new);
 }
 
+/**
+ * atomic64_sub_and_test() - atomic subtract and test if zero with full ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_sub_and_test() there.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
@@ -1204,6 +2922,16 @@ atomic64_sub_and_test(s64 i, atomic64_t *v)
 	return raw_atomic64_sub_and_test(i, v);
 }
 
+/**
+ * atomic64_dec_and_test() - atomic decrement and test if zero with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_dec_and_test() there.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 atomic64_dec_and_test(atomic64_t *v)
 {
@@ -1212,6 +2940,16 @@ atomic64_dec_and_test(atomic64_t *v)
 	return raw_atomic64_dec_and_test(v);
 }
 
+/**
+ * atomic64_inc_and_test() - atomic increment and test if zero with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_inc_and_test() there.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 atomic64_inc_and_test(atomic64_t *v)
 {
@@ -1220,6 +2958,17 @@ atomic64_inc_and_test(atomic64_t *v)
 	return raw_atomic64_inc_and_test(v);
 }
 
+/**
+ * atomic64_add_negative() - atomic add and test if negative with full ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_add_negative() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic64_add_negative(s64 i, atomic64_t *v)
 {
@@ -1228,6 +2977,17 @@ atomic64_add_negative(s64 i, atomic64_t *v)
 	return raw_atomic64_add_negative(i, v);
 }
 
+/**
+ * atomic64_add_negative_acquire() - atomic add and test if negative with acquire ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_add_negative_acquire() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic64_add_negative_acquire(s64 i, atomic64_t *v)
 {
@@ -1235,6 +2995,17 @@ atomic64_add_negative_acquire(s64 i, atomic64_t *v)
 	return raw_atomic64_add_negative_acquire(i, v);
 }
 
+/**
+ * atomic64_add_negative_release() - atomic add and test if negative with release ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_add_negative_release() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic64_add_negative_release(s64 i, atomic64_t *v)
 {
@@ -1243,6 +3014,17 @@ atomic64_add_negative_release(s64 i, atomic64_t *v)
 	return raw_atomic64_add_negative_release(i, v);
 }
 
+/**
+ * atomic64_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
+ * @i: s64 value to add
+ * @v: pointer to atomic64_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_add_negative_relaxed() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
 {
@@ -1250,6 +3032,18 @@ atomic64_add_negative_relaxed(s64 i, atomic64_t *v)
 	return raw_atomic64_add_negative_relaxed(i, v);
 }
 
+/**
+ * atomic64_fetch_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic64_t
+ * @a: s64 value to add
+ * @u: s64 value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_fetch_add_unless() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline s64
 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
@@ -1258,6 +3052,18 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 	return raw_atomic64_fetch_add_unless(v, a, u);
 }
 
+/**
+ * atomic64_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic64_t
+ * @a: s64 value to add
+ * @u: s64 value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_add_unless() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
@@ -1266,6 +3072,16 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 	return raw_atomic64_add_unless(v, a, u);
 }
 
+/**
+ * atomic64_inc_not_zero() - atomic increment unless zero with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_inc_not_zero() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic64_inc_not_zero(atomic64_t *v)
 {
@@ -1274,6 +3090,16 @@ atomic64_inc_not_zero(atomic64_t *v)
 	return raw_atomic64_inc_not_zero(v);
 }
 
+/**
+ * atomic64_inc_unless_negative() - atomic increment unless negative with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_inc_unless_negative() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic64_inc_unless_negative(atomic64_t *v)
 {
@@ -1282,6 +3108,16 @@ atomic64_inc_unless_negative(atomic64_t *v)
 	return raw_atomic64_inc_unless_negative(v);
 }
 
+/**
+ * atomic64_dec_unless_positive() - atomic decrement unless positive with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_dec_unless_positive() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic64_dec_unless_positive(atomic64_t *v)
 {
@@ -1290,6 +3126,16 @@ atomic64_dec_unless_positive(atomic64_t *v)
 	return raw_atomic64_dec_unless_positive(v);
 }
 
+/**
+ * atomic64_dec_if_positive() - atomic decrement if positive with full ordering
+ * @v: pointer to atomic64_t
+ *
+ * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic64_dec_if_positive() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline s64
 atomic64_dec_if_positive(atomic64_t *v)
 {
@@ -1298,6 +3144,16 @@ atomic64_dec_if_positive(atomic64_t *v)
 	return raw_atomic64_dec_if_positive(v);
 }
 
+/**
+ * atomic_long_read() - atomic load with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically loads the value of @v with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_read() there.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline long
 atomic_long_read(const atomic_long_t *v)
 {
@@ -1305,6 +3161,16 @@ atomic_long_read(const atomic_long_t *v)
 	return raw_atomic_long_read(v);
 }
 
+/**
+ * atomic_long_read_acquire() - atomic load with acquire ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically loads the value of @v with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_read_acquire() there.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline long
 atomic_long_read_acquire(const atomic_long_t *v)
 {
@@ -1312,6 +3178,17 @@ atomic_long_read_acquire(const atomic_long_t *v)
 	return raw_atomic_long_read_acquire(v);
 }
 
+/**
+ * atomic_long_set() - atomic set with relaxed ordering
+ * @v: pointer to atomic_long_t
+ * @i: long value to assign
+ *
+ * Atomically sets @v to @i with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_set() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_long_set(atomic_long_t *v, long i)
 {
@@ -1319,6 +3196,17 @@ atomic_long_set(atomic_long_t *v, long i)
 	raw_atomic_long_set(v, i);
 }
 
+/**
+ * atomic_long_set_release() - atomic set with release ordering
+ * @v: pointer to atomic_long_t
+ * @i: long value to assign
+ *
+ * Atomically sets @v to @i with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_set_release() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_long_set_release(atomic_long_t *v, long i)
 {
@@ -1327,6 +3215,17 @@ atomic_long_set_release(atomic_long_t *v, long i)
 	raw_atomic_long_set_release(v, i);
 }
 
+/**
+ * atomic_long_add() - atomic add with relaxed ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_add() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_long_add(long i, atomic_long_t *v)
 {
@@ -1334,6 +3233,17 @@ atomic_long_add(long i, atomic_long_t *v)
 	raw_atomic_long_add(i, v);
 }
 
+/**
+ * atomic_long_add_return() - atomic add with full ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_add_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_add_return(long i, atomic_long_t *v)
 {
@@ -1342,6 +3252,17 @@ atomic_long_add_return(long i, atomic_long_t *v)
 	return raw_atomic_long_add_return(i, v);
 }
 
+/**
+ * atomic_long_add_return_acquire() - atomic add with acquire ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_add_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_add_return_acquire(long i, atomic_long_t *v)
 {
@@ -1349,6 +3270,17 @@ atomic_long_add_return_acquire(long i, atomic_long_t *v)
 	return raw_atomic_long_add_return_acquire(i, v);
 }
 
+/**
+ * atomic_long_add_return_release() - atomic add with release ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_add_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_add_return_release(long i, atomic_long_t *v)
 {
@@ -1357,6 +3289,17 @@ atomic_long_add_return_release(long i, atomic_long_t *v)
 	return raw_atomic_long_add_return_release(i, v);
 }
 
+/**
+ * atomic_long_add_return_relaxed() - atomic add with relaxed ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_add_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 {
@@ -1364,6 +3307,17 @@ atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 	return raw_atomic_long_add_return_relaxed(i, v);
 }
 
+/**
+ * atomic_long_fetch_add() - atomic add with full ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_add(long i, atomic_long_t *v)
 {
@@ -1372,6 +3326,17 @@ atomic_long_fetch_add(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_add(i, v);
 }
 
+/**
+ * atomic_long_fetch_add_acquire() - atomic add with acquire ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 {
@@ -1379,6 +3344,17 @@ atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_add_acquire(i, v);
 }
 
+/**
+ * atomic_long_fetch_add_release() - atomic add with release ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
@@ -1387,6 +3363,17 @@ atomic_long_fetch_add_release(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_add_release(i, v);
 }
 
+/**
+ * atomic_long_fetch_add_relaxed() - atomic add with relaxed ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 {
@@ -1394,6 +3381,17 @@ atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_add_relaxed(i, v);
 }
 
+/**
+ * atomic_long_sub() - atomic subtract with relaxed ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_sub() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_long_sub(long i, atomic_long_t *v)
 {
@@ -1401,6 +3399,17 @@ atomic_long_sub(long i, atomic_long_t *v)
 	raw_atomic_long_sub(i, v);
 }
 
+/**
+ * atomic_long_sub_return() - atomic subtract with full ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_sub_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_sub_return(long i, atomic_long_t *v)
 {
@@ -1409,6 +3418,17 @@ atomic_long_sub_return(long i, atomic_long_t *v)
 	return raw_atomic_long_sub_return(i, v);
 }
 
+/**
+ * atomic_long_sub_return_acquire() - atomic subtract with acquire ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_sub_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 {
@@ -1416,6 +3436,17 @@ atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 	return raw_atomic_long_sub_return_acquire(i, v);
 }
 
+/**
+ * atomic_long_sub_return_release() - atomic subtract with release ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_sub_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
@@ -1424,6 +3455,17 @@ atomic_long_sub_return_release(long i, atomic_long_t *v)
 	return raw_atomic_long_sub_return_release(i, v);
 }
 
+/**
+ * atomic_long_sub_return_relaxed() - atomic subtract with relaxed ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_sub_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 {
@@ -1431,6 +3473,17 @@ atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 	return raw_atomic_long_sub_return_relaxed(i, v);
 }
 
+/**
+ * atomic_long_fetch_sub() - atomic subtract with full ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_sub() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
@@ -1439,6 +3492,17 @@ atomic_long_fetch_sub(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_sub(i, v);
 }
 
+/**
+ * atomic_long_fetch_sub_acquire() - atomic subtract with acquire ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_sub_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 {
@@ -1446,6 +3510,17 @@ atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_sub_acquire(i, v);
 }
 
+/**
+ * atomic_long_fetch_sub_release() - atomic subtract with release ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_sub_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
@@ -1454,6 +3529,17 @@ atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_sub_release(i, v);
 }
 
+/**
+ * atomic_long_fetch_sub_relaxed() - atomic subtract with relaxed ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_sub_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 {
@@ -1461,6 +3547,16 @@ atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_sub_relaxed(i, v);
 }
 
+/**
+ * atomic_long_inc() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_inc() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_long_inc(atomic_long_t *v)
 {
@@ -1468,6 +3564,16 @@ atomic_long_inc(atomic_long_t *v)
 	raw_atomic_long_inc(v);
 }
 
+/**
+ * atomic_long_inc_return() - atomic increment with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_inc_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_inc_return(atomic_long_t *v)
 {
@@ -1476,6 +3582,16 @@ atomic_long_inc_return(atomic_long_t *v)
 	return raw_atomic_long_inc_return(v);
 }
 
+/**
+ * atomic_long_inc_return_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_inc_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_inc_return_acquire(atomic_long_t *v)
 {
@@ -1483,6 +3599,16 @@ atomic_long_inc_return_acquire(atomic_long_t *v)
 	return raw_atomic_long_inc_return_acquire(v);
 }
 
+/**
+ * atomic_long_inc_return_release() - atomic increment with release ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_inc_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_inc_return_release(atomic_long_t *v)
 {
@@ -1491,6 +3617,16 @@ atomic_long_inc_return_release(atomic_long_t *v)
 	return raw_atomic_long_inc_return_release(v);
 }
 
+/**
+ * atomic_long_inc_return_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_inc_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_inc_return_relaxed(atomic_long_t *v)
 {
@@ -1498,6 +3634,16 @@ atomic_long_inc_return_relaxed(atomic_long_t *v)
 	return raw_atomic_long_inc_return_relaxed(v);
 }
 
+/**
+ * atomic_long_fetch_inc() - atomic increment with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_inc() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_inc(atomic_long_t *v)
 {
@@ -1506,6 +3652,16 @@ atomic_long_fetch_inc(atomic_long_t *v)
 	return raw_atomic_long_fetch_inc(v);
 }
 
+/**
+ * atomic_long_fetch_inc_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_inc_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_inc_acquire(atomic_long_t *v)
 {
@@ -1513,6 +3669,16 @@ atomic_long_fetch_inc_acquire(atomic_long_t *v)
 	return raw_atomic_long_fetch_inc_acquire(v);
 }
 
+/**
+ * atomic_long_fetch_inc_release() - atomic increment with release ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_inc_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_inc_release(atomic_long_t *v)
 {
@@ -1521,6 +3687,16 @@ atomic_long_fetch_inc_release(atomic_long_t *v)
 	return raw_atomic_long_fetch_inc_release(v);
 }
 
+/**
+ * atomic_long_fetch_inc_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_inc_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 {
@@ -1528,6 +3704,16 @@ atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 	return raw_atomic_long_fetch_inc_relaxed(v);
 }
 
+/**
+ * atomic_long_dec() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_dec() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_long_dec(atomic_long_t *v)
 {
@@ -1535,6 +3721,16 @@ atomic_long_dec(atomic_long_t *v)
 	raw_atomic_long_dec(v);
 }
 
+/**
+ * atomic_long_dec_return() - atomic decrement with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_dec_return() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_dec_return(atomic_long_t *v)
 {
@@ -1543,6 +3739,16 @@ atomic_long_dec_return(atomic_long_t *v)
 	return raw_atomic_long_dec_return(v);
 }
 
+/**
+ * atomic_long_dec_return_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_dec_return_acquire() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_dec_return_acquire(atomic_long_t *v)
 {
@@ -1550,6 +3756,16 @@ atomic_long_dec_return_acquire(atomic_long_t *v)
 	return raw_atomic_long_dec_return_acquire(v);
 }
 
+/**
+ * atomic_long_dec_return_release() - atomic decrement with release ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_dec_return_release() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_dec_return_release(atomic_long_t *v)
 {
@@ -1558,6 +3774,16 @@ atomic_long_dec_return_release(atomic_long_t *v)
 	return raw_atomic_long_dec_return_release(v);
 }
 
+/**
+ * atomic_long_dec_return_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_dec_return_relaxed() there.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 atomic_long_dec_return_relaxed(atomic_long_t *v)
 {
@@ -1565,6 +3791,16 @@ atomic_long_dec_return_relaxed(atomic_long_t *v)
 	return raw_atomic_long_dec_return_relaxed(v);
 }
 
+/**
+ * atomic_long_fetch_dec() - atomic decrement with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_dec() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_dec(atomic_long_t *v)
 {
@@ -1573,6 +3809,16 @@ atomic_long_fetch_dec(atomic_long_t *v)
 	return raw_atomic_long_fetch_dec(v);
 }
 
+/**
+ * atomic_long_fetch_dec_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_dec_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_dec_acquire(atomic_long_t *v)
 {
@@ -1580,6 +3826,16 @@ atomic_long_fetch_dec_acquire(atomic_long_t *v)
 	return raw_atomic_long_fetch_dec_acquire(v);
 }
 
+/**
+ * atomic_long_fetch_dec_release() - atomic decrement with release ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_dec_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_dec_release(atomic_long_t *v)
 {
@@ -1588,6 +3844,16 @@ atomic_long_fetch_dec_release(atomic_long_t *v)
 	return raw_atomic_long_fetch_dec_release(v);
 }
 
+/**
+ * atomic_long_fetch_dec_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_dec_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 {
@@ -1595,6 +3861,17 @@ atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 	return raw_atomic_long_fetch_dec_relaxed(v);
 }
 
+/**
+ * atomic_long_and() - atomic bitwise AND with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_and() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_long_and(long i, atomic_long_t *v)
 {
@@ -1602,6 +3879,17 @@ atomic_long_and(long i, atomic_long_t *v)
 	raw_atomic_long_and(i, v);
 }
 
+/**
+ * atomic_long_fetch_and() - atomic bitwise AND with full ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_and() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_and(long i, atomic_long_t *v)
 {
@@ -1610,6 +3898,17 @@ atomic_long_fetch_and(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_and(i, v);
 }
 
+/**
+ * atomic_long_fetch_and_acquire() - atomic bitwise AND with acquire ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_and_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 {
@@ -1617,6 +3916,17 @@ atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_and_acquire(i, v);
 }
 
+/**
+ * atomic_long_fetch_and_release() - atomic bitwise AND with release ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_and_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
@@ -1625,6 +3935,17 @@ atomic_long_fetch_and_release(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_and_release(i, v);
 }
 
+/**
+ * atomic_long_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_and_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 {
@@ -1632,6 +3953,17 @@ atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_and_relaxed(i, v);
 }
 
+/**
+ * atomic_long_andnot() - atomic bitwise AND NOT with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_andnot() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_long_andnot(long i, atomic_long_t *v)
 {
@@ -1639,6 +3971,17 @@ atomic_long_andnot(long i, atomic_long_t *v)
 	raw_atomic_long_andnot(i, v);
 }
 
+/**
+ * atomic_long_fetch_andnot() - atomic bitwise AND NOT with full ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & ~@i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_andnot() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
@@ -1647,6 +3990,17 @@ atomic_long_fetch_andnot(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_andnot(i, v);
 }
 
+/**
+ * atomic_long_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & ~@i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_andnot_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 {
@@ -1654,6 +4008,17 @@ atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_andnot_acquire(i, v);
 }
 
+/**
+ * atomic_long_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & ~@i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_andnot_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
@@ -1662,6 +4027,17 @@ atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_andnot_release(i, v);
 }
 
+/**
+ * atomic_long_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_andnot_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 {
@@ -1669,6 +4045,17 @@ atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_andnot_relaxed(i, v);
 }
 
+/**
+ * atomic_long_or() - atomic bitwise OR with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_or() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_long_or(long i, atomic_long_t *v)
 {
@@ -1676,6 +4063,17 @@ atomic_long_or(long i, atomic_long_t *v)
 	raw_atomic_long_or(i, v);
 }
 
+/**
+ * atomic_long_fetch_or() - atomic bitwise OR with full ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v | @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_or() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_or(long i, atomic_long_t *v)
 {
@@ -1684,6 +4082,17 @@ atomic_long_fetch_or(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_or(i, v);
 }
 
+/**
+ * atomic_long_fetch_or_acquire() - atomic bitwise OR with acquire ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v | @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_or_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 {
@@ -1691,6 +4100,17 @@ atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_or_acquire(i, v);
 }
 
+/**
+ * atomic_long_fetch_or_release() - atomic bitwise OR with release ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v | @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_or_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
@@ -1699,6 +4119,17 @@ atomic_long_fetch_or_release(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_or_release(i, v);
 }
 
+/**
+ * atomic_long_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_or_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 {
@@ -1706,6 +4137,17 @@ atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_or_relaxed(i, v);
 }
 
+/**
+ * atomic_long_xor() - atomic bitwise XOR with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_xor() there.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 atomic_long_xor(long i, atomic_long_t *v)
 {
@@ -1713,6 +4155,17 @@ atomic_long_xor(long i, atomic_long_t *v)
 	raw_atomic_long_xor(i, v);
 }
 
+/**
+ * atomic_long_fetch_xor() - atomic bitwise XOR with full ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v ^ @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_xor() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
@@ -1721,6 +4174,17 @@ atomic_long_fetch_xor(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_xor(i, v);
 }
 
+/**
+ * atomic_long_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v ^ @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_xor_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 {
@@ -1728,6 +4192,17 @@ atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_xor_acquire(i, v);
 }
 
+/**
+ * atomic_long_fetch_xor_release() - atomic bitwise XOR with release ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v ^ @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_xor_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
@@ -1736,6 +4211,17 @@ atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_xor_release(i, v);
 }
 
+/**
+ * atomic_long_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_xor_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 {
@@ -1743,6 +4229,17 @@ atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 	return raw_atomic_long_fetch_xor_relaxed(i, v);
 }
 
+/**
+ * atomic_long_xchg() - atomic exchange with full ordering
+ * @v: pointer to atomic_long_t
+ * @new: long value to assign
+ *
+ * Atomically updates @v to @new with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_xchg() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_xchg(atomic_long_t *v, long new)
 {
@@ -1751,6 +4248,17 @@ atomic_long_xchg(atomic_long_t *v, long new)
 	return raw_atomic_long_xchg(v, new);
 }
 
+/**
+ * atomic_long_xchg_acquire() - atomic exchange with acquire ordering
+ * @v: pointer to atomic_long_t
+ * @new: long value to assign
+ *
+ * Atomically updates @v to @new with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_xchg_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_xchg_acquire(atomic_long_t *v, long new)
 {
@@ -1758,6 +4266,17 @@ atomic_long_xchg_acquire(atomic_long_t *v, long new)
 	return raw_atomic_long_xchg_acquire(v, new);
 }
 
+/**
+ * atomic_long_xchg_release() - atomic exchange with release ordering
+ * @v: pointer to atomic_long_t
+ * @new: long value to assign
+ *
+ * Atomically updates @v to @new with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_xchg_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_xchg_release(atomic_long_t *v, long new)
 {
@@ -1766,6 +4285,17 @@ atomic_long_xchg_release(atomic_long_t *v, long new)
 	return raw_atomic_long_xchg_release(v, new);
 }
 
+/**
+ * atomic_long_xchg_relaxed() - atomic exchange with relaxed ordering
+ * @v: pointer to atomic_long_t
+ * @new: long value to assign
+ *
+ * Atomically updates @v to @new with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_xchg_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_xchg_relaxed(atomic_long_t *v, long new)
 {
@@ -1773,6 +4303,18 @@ atomic_long_xchg_relaxed(atomic_long_t *v, long new)
 	return raw_atomic_long_xchg_relaxed(v, new);
 }
 
+/**
+ * atomic_long_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic_long_t
+ * @old: long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
@@ -1781,6 +4323,18 @@ atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 	return raw_atomic_long_cmpxchg(v, old, new);
 }
 
+/**
+ * atomic_long_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic_long_t
+ * @old: long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_acquire() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
@@ -1788,6 +4342,18 @@ atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 	return raw_atomic_long_cmpxchg_acquire(v, old, new);
 }
 
+/**
+ * atomic_long_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic_long_t
+ * @old: long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_release() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
@@ -1796,6 +4362,18 @@ atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 	return raw_atomic_long_cmpxchg_release(v, old, new);
 }
 
+/**
+ * atomic_long_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic_long_t
+ * @old: long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_cmpxchg_relaxed() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
@@ -1803,6 +4381,19 @@ atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 	return raw_atomic_long_cmpxchg_relaxed(v, old, new);
 }
 
+/**
+ * atomic_long_try_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic_long_t
+ * @old: pointer to long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 {
@@ -1812,6 +4403,19 @@ atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 	return raw_atomic_long_try_cmpxchg(v, old, new);
 }
 
+/**
+ * atomic_long_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic_long_t
+ * @old: pointer to long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_acquire() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 {
@@ -1820,6 +4424,19 @@ atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 	return raw_atomic_long_try_cmpxchg_acquire(v, old, new);
 }
 
+/**
+ * atomic_long_try_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic_long_t
+ * @old: pointer to long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_release() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 {
@@ -1829,6 +4446,19 @@ atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 	return raw_atomic_long_try_cmpxchg_release(v, old, new);
 }
 
+/**
+ * atomic_long_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic_long_t
+ * @old: pointer to long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_try_cmpxchg_relaxed() there.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 {
@@ -1837,6 +4467,17 @@ atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 	return raw_atomic_long_try_cmpxchg_relaxed(v, old, new);
 }
 
+/**
+ * atomic_long_sub_and_test() - atomic subtract and test if zero with full ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_sub_and_test() there.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
@@ -1845,6 +4486,16 @@ atomic_long_sub_and_test(long i, atomic_long_t *v)
 	return raw_atomic_long_sub_and_test(i, v);
 }
 
+/**
+ * atomic_long_dec_and_test() - atomic decrement and test if zero with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_dec_and_test() there.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_dec_and_test(atomic_long_t *v)
 {
@@ -1853,6 +4504,16 @@ atomic_long_dec_and_test(atomic_long_t *v)
 	return raw_atomic_long_dec_and_test(v);
 }
 
+/**
+ * atomic_long_inc_and_test() - atomic increment and test if zero with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_inc_and_test() there.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_inc_and_test(atomic_long_t *v)
 {
@@ -1861,6 +4522,17 @@ atomic_long_inc_and_test(atomic_long_t *v)
 	return raw_atomic_long_inc_and_test(v);
 }
 
+/**
+ * atomic_long_add_negative() - atomic add and test if negative with full ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_add_negative() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_add_negative(long i, atomic_long_t *v)
 {
@@ -1869,6 +4541,17 @@ atomic_long_add_negative(long i, atomic_long_t *v)
 	return raw_atomic_long_add_negative(i, v);
 }
 
+/**
+ * atomic_long_add_negative_acquire() - atomic add and test if negative with acquire ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_add_negative_acquire() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_add_negative_acquire(long i, atomic_long_t *v)
 {
@@ -1876,6 +4559,17 @@ atomic_long_add_negative_acquire(long i, atomic_long_t *v)
 	return raw_atomic_long_add_negative_acquire(i, v);
 }
 
+/**
+ * atomic_long_add_negative_release() - atomic add and test if negative with release ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_add_negative_release() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_add_negative_release(long i, atomic_long_t *v)
 {
@@ -1884,6 +4578,17 @@ atomic_long_add_negative_release(long i, atomic_long_t *v)
 	return raw_atomic_long_add_negative_release(i, v);
 }
 
+/**
+ * atomic_long_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_add_negative_relaxed() there.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
 {
@@ -1891,6 +4596,18 @@ atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
 	return raw_atomic_long_add_negative_relaxed(i, v);
 }
 
+/**
+ * atomic_long_fetch_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic_long_t
+ * @a: long value to add
+ * @u: long value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_fetch_add_unless() there.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
@@ -1899,6 +4616,18 @@ atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 	return raw_atomic_long_fetch_add_unless(v, a, u);
 }
 
+/**
+ * atomic_long_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic_long_t
+ * @a: long value to add
+ * @u: long value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_add_unless() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
@@ -1907,6 +4636,16 @@ atomic_long_add_unless(atomic_long_t *v, long a, long u)
 	return raw_atomic_long_add_unless(v, a, u);
 }
 
+/**
+ * atomic_long_inc_not_zero() - atomic increment unless zero with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_inc_not_zero() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_inc_not_zero(atomic_long_t *v)
 {
@@ -1915,6 +4654,16 @@ atomic_long_inc_not_zero(atomic_long_t *v)
 	return raw_atomic_long_inc_not_zero(v);
 }
 
+/**
+ * atomic_long_inc_unless_negative() - atomic increment unless negative with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_inc_unless_negative() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_inc_unless_negative(atomic_long_t *v)
 {
@@ -1923,6 +4672,16 @@ atomic_long_inc_unless_negative(atomic_long_t *v)
 	return raw_atomic_long_inc_unless_negative(v);
 }
 
+/**
+ * atomic_long_dec_unless_positive() - atomic decrement unless positive with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_dec_unless_positive() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 atomic_long_dec_unless_positive(atomic_long_t *v)
 {
@@ -1931,6 +4690,16 @@ atomic_long_dec_unless_positive(atomic_long_t *v)
 	return raw_atomic_long_dec_unless_positive(v);
 }
 
+/**
+ * atomic_long_dec_if_positive() - atomic decrement if positive with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Unsafe to use in noinstr code; use raw_atomic_long_dec_if_positive() there.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline long
 atomic_long_dec_if_positive(atomic_long_t *v)
 {
@@ -2231,4 +5000,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// a4c3d2b229f907654cc53cb5d40e80f7fed1ec9c
+// 06cec02e676a484857aee38b0071a1d846ec9457
diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
index f564f71ff8af..f6df2adadf99 100644
--- a/include/linux/atomic/atomic-long.h
+++ b/include/linux/atomic/atomic-long.h
@@ -21,6 +21,16 @@ typedef atomic_t atomic_long_t;
 #define atomic_long_cond_read_relaxed	atomic_cond_read_relaxed
 #endif
 
+/**
+ * raw_atomic_long_read() - atomic load with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically loads the value of @v with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_read() elsewhere.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline long
 raw_atomic_long_read(const atomic_long_t *v)
 {
@@ -31,6 +41,16 @@ raw_atomic_long_read(const atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_read_acquire() - atomic load with acquire ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically loads the value of @v with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_read_acquire() elsewhere.
+ *
+ * Return: The value loaded from @v.
+ */
 static __always_inline long
 raw_atomic_long_read_acquire(const atomic_long_t *v)
 {
@@ -41,6 +61,17 @@ raw_atomic_long_read_acquire(const atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_set() - atomic set with relaxed ordering
+ * @v: pointer to atomic_long_t
+ * @i: long value to assign
+ *
+ * Atomically sets @v to @i with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_set() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_long_set(atomic_long_t *v, long i)
 {
@@ -51,6 +82,17 @@ raw_atomic_long_set(atomic_long_t *v, long i)
 #endif
 }
 
+/**
+ * raw_atomic_long_set_release() - atomic set with release ordering
+ * @v: pointer to atomic_long_t
+ * @i: long value to assign
+ *
+ * Atomically sets @v to @i with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_set_release() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_long_set_release(atomic_long_t *v, long i)
 {
@@ -61,6 +103,17 @@ raw_atomic_long_set_release(atomic_long_t *v, long i)
 #endif
 }
 
+/**
+ * raw_atomic_long_add() - atomic add with relaxed ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_add() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_long_add(long i, atomic_long_t *v)
 {
@@ -71,6 +124,17 @@ raw_atomic_long_add(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_add_return() - atomic add with full ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_add_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_add_return(long i, atomic_long_t *v)
 {
@@ -81,6 +145,17 @@ raw_atomic_long_add_return(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_add_return_acquire() - atomic add with acquire ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_add_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_add_return_acquire(long i, atomic_long_t *v)
 {
@@ -91,6 +166,17 @@ raw_atomic_long_add_return_acquire(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_add_return_release() - atomic add with release ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_add_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_add_return_release(long i, atomic_long_t *v)
 {
@@ -101,6 +187,17 @@ raw_atomic_long_add_return_release(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_add_return_relaxed() - atomic add with relaxed ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_add_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 {
@@ -111,6 +208,17 @@ raw_atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_add() - atomic add with full ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_add() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_add(long i, atomic_long_t *v)
 {
@@ -121,6 +229,17 @@ raw_atomic_long_fetch_add(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_add_acquire() - atomic add with acquire ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_add_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 {
@@ -131,6 +250,17 @@ raw_atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_add_release() - atomic add with release ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_add_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
@@ -141,6 +271,17 @@ raw_atomic_long_fetch_add_release(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_add_relaxed() - atomic add with relaxed ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_add_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 {
@@ -151,6 +292,17 @@ raw_atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_sub() - atomic subtract with relaxed ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_sub() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_long_sub(long i, atomic_long_t *v)
 {
@@ -161,6 +313,17 @@ raw_atomic_long_sub(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_sub_return() - atomic subtract with full ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_sub_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_sub_return(long i, atomic_long_t *v)
 {
@@ -171,6 +334,17 @@ raw_atomic_long_sub_return(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_sub_return_acquire() - atomic subtract with acquire ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_sub_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 {
@@ -181,6 +355,17 @@ raw_atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_sub_return_release() - atomic subtract with release ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_sub_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
@@ -191,6 +376,17 @@ raw_atomic_long_sub_return_release(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_sub_return_relaxed() - atomic subtract with relaxed ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_sub_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 {
@@ -201,6 +397,17 @@ raw_atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_sub() - atomic subtract with full ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_sub() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
@@ -211,6 +418,17 @@ raw_atomic_long_fetch_sub(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_sub_acquire() - atomic subtract with acquire ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_sub_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 {
@@ -221,6 +439,17 @@ raw_atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_sub_release() - atomic subtract with release ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_sub_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
@@ -231,6 +460,17 @@ raw_atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_sub_relaxed() - atomic subtract with relaxed ordering
+ * @i: long value to subtract
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_sub_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 {
@@ -241,6 +481,16 @@ raw_atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_inc() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_inc() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_long_inc(atomic_long_t *v)
 {
@@ -251,6 +501,16 @@ raw_atomic_long_inc(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_inc_return() - atomic increment with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_inc_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_inc_return(atomic_long_t *v)
 {
@@ -261,6 +521,16 @@ raw_atomic_long_inc_return(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_inc_return_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_inc_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_inc_return_acquire(atomic_long_t *v)
 {
@@ -271,6 +541,16 @@ raw_atomic_long_inc_return_acquire(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_inc_return_release() - atomic increment with release ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_inc_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_inc_return_release(atomic_long_t *v)
 {
@@ -281,6 +561,16 @@ raw_atomic_long_inc_return_release(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_inc_return_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_inc_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_inc_return_relaxed(atomic_long_t *v)
 {
@@ -291,6 +581,16 @@ raw_atomic_long_inc_return_relaxed(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_inc() - atomic increment with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_inc() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_inc(atomic_long_t *v)
 {
@@ -301,6 +601,16 @@ raw_atomic_long_fetch_inc(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_inc_acquire() - atomic increment with acquire ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_inc_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_inc_acquire(atomic_long_t *v)
 {
@@ -311,6 +621,16 @@ raw_atomic_long_fetch_inc_acquire(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_inc_release() - atomic increment with release ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_inc_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_inc_release(atomic_long_t *v)
 {
@@ -321,6 +641,16 @@ raw_atomic_long_fetch_inc_release(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_inc_relaxed() - atomic increment with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_inc_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 {
@@ -331,6 +661,16 @@ raw_atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_dec() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_dec() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_long_dec(atomic_long_t *v)
 {
@@ -341,6 +681,16 @@ raw_atomic_long_dec(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_dec_return() - atomic decrement with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_dec_return() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_dec_return(atomic_long_t *v)
 {
@@ -351,6 +701,16 @@ raw_atomic_long_dec_return(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_dec_return_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_dec_return_acquire() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_dec_return_acquire(atomic_long_t *v)
 {
@@ -361,6 +721,16 @@ raw_atomic_long_dec_return_acquire(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_dec_return_release() - atomic decrement with release ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_dec_return_release() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_dec_return_release(atomic_long_t *v)
 {
@@ -371,6 +741,16 @@ raw_atomic_long_dec_return_release(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_dec_return_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_dec_return_relaxed() elsewhere.
+ *
+ * Return: The updated value of @v.
+ */
 static __always_inline long
 raw_atomic_long_dec_return_relaxed(atomic_long_t *v)
 {
@@ -381,6 +761,16 @@ raw_atomic_long_dec_return_relaxed(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_dec() - atomic decrement with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_dec() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_dec(atomic_long_t *v)
 {
@@ -391,6 +781,16 @@ raw_atomic_long_fetch_dec(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_dec_acquire() - atomic decrement with acquire ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_dec_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_dec_acquire(atomic_long_t *v)
 {
@@ -401,6 +801,16 @@ raw_atomic_long_fetch_dec_acquire(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_dec_release() - atomic decrement with release ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_dec_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_dec_release(atomic_long_t *v)
 {
@@ -411,6 +821,16 @@ raw_atomic_long_fetch_dec_release(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_dec_relaxed() - atomic decrement with relaxed ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_dec_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 {
@@ -421,6 +841,17 @@ raw_atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_and() - atomic bitwise AND with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_and() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_long_and(long i, atomic_long_t *v)
 {
@@ -431,6 +862,17 @@ raw_atomic_long_and(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_and() - atomic bitwise AND with full ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_and() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_and(long i, atomic_long_t *v)
 {
@@ -441,6 +883,17 @@ raw_atomic_long_fetch_and(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_and_acquire() - atomic bitwise AND with acquire ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_and_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 {
@@ -451,6 +904,17 @@ raw_atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_and_release() - atomic bitwise AND with release ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_and_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
@@ -461,6 +925,17 @@ raw_atomic_long_fetch_and_release(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_and_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 {
@@ -471,6 +946,17 @@ raw_atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_andnot() - atomic bitwise AND NOT with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_andnot() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_long_andnot(long i, atomic_long_t *v)
 {
@@ -481,6 +967,17 @@ raw_atomic_long_andnot(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_andnot() - atomic bitwise AND NOT with full ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & ~@i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_andnot() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
@@ -491,6 +988,17 @@ raw_atomic_long_fetch_andnot(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & ~@i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_andnot_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 {
@@ -501,6 +1009,17 @@ raw_atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_andnot_release() - atomic bitwise AND NOT with release ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & ~@i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_andnot_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
@@ -511,6 +1030,17 @@ raw_atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v & ~@i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_andnot_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 {
@@ -521,6 +1051,17 @@ raw_atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_or() - atomic bitwise OR with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_or() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_long_or(long i, atomic_long_t *v)
 {
@@ -531,6 +1072,17 @@ raw_atomic_long_or(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_or() - atomic bitwise OR with full ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v | @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_or() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_or(long i, atomic_long_t *v)
 {
@@ -541,6 +1093,17 @@ raw_atomic_long_fetch_or(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_or_acquire() - atomic bitwise OR with acquire ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v | @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_or_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 {
@@ -551,6 +1114,17 @@ raw_atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_or_release() - atomic bitwise OR with release ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v | @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_or_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
@@ -561,6 +1135,17 @@ raw_atomic_long_fetch_or_release(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v | @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_or_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 {
@@ -571,6 +1156,17 @@ raw_atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_xor() - atomic bitwise XOR with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_xor() elsewhere.
+ *
+ * Return: Nothing.
+ */
 static __always_inline void
 raw_atomic_long_xor(long i, atomic_long_t *v)
 {
@@ -581,6 +1177,17 @@ raw_atomic_long_xor(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_xor() - atomic bitwise XOR with full ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v ^ @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_xor() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
@@ -591,6 +1198,17 @@ raw_atomic_long_fetch_xor(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v ^ @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_xor_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 {
@@ -601,6 +1219,17 @@ raw_atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_xor_release() - atomic bitwise XOR with release ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v ^ @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_xor_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
@@ -611,6 +1240,17 @@ raw_atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering
+ * @i: long value
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v ^ @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_xor_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 {
@@ -621,6 +1261,17 @@ raw_atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_xchg() - atomic exchange with full ordering
+ * @v: pointer to atomic_long_t
+ * @new: long value to assign
+ *
+ * Atomically updates @v to @new with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_xchg() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_xchg(atomic_long_t *v, long new)
 {
@@ -631,6 +1282,17 @@ raw_atomic_long_xchg(atomic_long_t *v, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_xchg_acquire() - atomic exchange with acquire ordering
+ * @v: pointer to atomic_long_t
+ * @new: long value to assign
+ *
+ * Atomically updates @v to @new with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_xchg_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_xchg_acquire(atomic_long_t *v, long new)
 {
@@ -641,6 +1303,17 @@ raw_atomic_long_xchg_acquire(atomic_long_t *v, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_xchg_release() - atomic exchange with release ordering
+ * @v: pointer to atomic_long_t
+ * @new: long value to assign
+ *
+ * Atomically updates @v to @new with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_xchg_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_xchg_release(atomic_long_t *v, long new)
 {
@@ -651,6 +1324,17 @@ raw_atomic_long_xchg_release(atomic_long_t *v, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_xchg_relaxed() - atomic exchange with relaxed ordering
+ * @v: pointer to atomic_long_t
+ * @new: long value to assign
+ *
+ * Atomically updates @v to @new with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_xchg_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_xchg_relaxed(atomic_long_t *v, long new)
 {
@@ -661,6 +1345,18 @@ raw_atomic_long_xchg_relaxed(atomic_long_t *v, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic_long_t
+ * @old: long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_cmpxchg() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
@@ -671,6 +1367,18 @@ raw_atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic_long_t
+ * @old: long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_cmpxchg_acquire() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
@@ -681,6 +1389,18 @@ raw_atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic_long_t
+ * @old: long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_cmpxchg_release() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
@@ -691,6 +1411,18 @@ raw_atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic_long_t
+ * @old: long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_cmpxchg_relaxed() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
@@ -701,6 +1433,19 @@ raw_atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_try_cmpxchg() - atomic compare and exchange with full ordering
+ * @v: pointer to atomic_long_t
+ * @old: pointer to long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with full ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 {
@@ -711,6 +1456,19 @@ raw_atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering
+ * @v: pointer to atomic_long_t
+ * @old: pointer to long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with acquire ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_acquire() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 {
@@ -721,6 +1479,19 @@ raw_atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_try_cmpxchg_release() - atomic compare and exchange with release ordering
+ * @v: pointer to atomic_long_t
+ * @old: pointer to long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with release ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_release() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 {
@@ -731,6 +1502,19 @@ raw_atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering
+ * @v: pointer to atomic_long_t
+ * @old: pointer to long value to compare with
+ * @new: long value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with relaxed ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_try_cmpxchg_relaxed() elsewhere.
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 {
@@ -741,6 +1525,17 @@ raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 #endif
 }
 
+/**
+ * raw_atomic_long_sub_and_test() - atomic subtract and test if zero with full ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_sub_and_test() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
@@ -751,6 +1546,16 @@ raw_atomic_long_sub_and_test(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_dec_and_test() - atomic decrement and test if zero with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_dec_and_test() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_dec_and_test(atomic_long_t *v)
 {
@@ -761,6 +1566,16 @@ raw_atomic_long_dec_and_test(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_inc_and_test() - atomic increment and test if zero with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_inc_and_test() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_inc_and_test(atomic_long_t *v)
 {
@@ -771,6 +1586,17 @@ raw_atomic_long_inc_and_test(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_add_negative() - atomic add and test if negative with full ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_add_negative() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_add_negative(long i, atomic_long_t *v)
 {
@@ -781,6 +1607,17 @@ raw_atomic_long_add_negative(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_add_negative_acquire() - atomic add and test if negative with acquire ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with acquire ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_add_negative_acquire() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
 {
@@ -791,6 +1628,17 @@ raw_atomic_long_add_negative_acquire(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_add_negative_release() - atomic add and test if negative with release ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with release ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_add_negative_release() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_add_negative_release(long i, atomic_long_t *v)
 {
@@ -801,6 +1649,17 @@ raw_atomic_long_add_negative_release(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_add_negative_relaxed() - atomic add and test if negative with relaxed ordering
+ * @i: long value to add
+ * @v: pointer to atomic_long_t
+ *
+ * Atomically updates @v to (@v + @i) with relaxed ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_add_negative_relaxed() elsewhere.
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
 {
@@ -811,6 +1670,18 @@ raw_atomic_long_add_negative_relaxed(long i, atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_fetch_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic_long_t
+ * @a: long value to add
+ * @u: long value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_fetch_add_unless() elsewhere.
+ *
+ * Return: The original value of @v.
+ */
 static __always_inline long
 raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
@@ -821,6 +1692,18 @@ raw_atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 #endif
 }
 
+/**
+ * raw_atomic_long_add_unless() - atomic add unless value with full ordering
+ * @v: pointer to atomic_long_t
+ * @a: long value to add
+ * @u: long value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_add_unless() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
@@ -831,6 +1714,16 @@ raw_atomic_long_add_unless(atomic_long_t *v, long a, long u)
 #endif
 }
 
+/**
+ * raw_atomic_long_inc_not_zero() - atomic increment unless zero with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * If (@v != 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_inc_not_zero() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_inc_not_zero(atomic_long_t *v)
 {
@@ -841,6 +1734,16 @@ raw_atomic_long_inc_not_zero(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_inc_unless_negative() - atomic increment unless negative with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_inc_unless_negative() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_inc_unless_negative(atomic_long_t *v)
 {
@@ -851,6 +1754,16 @@ raw_atomic_long_inc_unless_negative(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_dec_unless_positive() - atomic decrement unless positive with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_dec_unless_positive() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline bool
 raw_atomic_long_dec_unless_positive(atomic_long_t *v)
 {
@@ -861,6 +1774,16 @@ raw_atomic_long_dec_unless_positive(atomic_long_t *v)
 #endif
 }
 
+/**
+ * raw_atomic_long_dec_if_positive() - atomic decrement if positive with full ordering
+ * @v: pointer to atomic_long_t
+ *
+ * If (@v > 0), atomically updates @v to (@v - 1) with full ordering.
+ *
+ * Safe to use in noinstr code; prefer atomic_long_dec_if_positive() elsewhere.
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
 static __always_inline long
 raw_atomic_long_dec_if_positive(atomic_long_t *v)
 {
@@ -872,4 +1795,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v)
 }
 
 #endif /* _LINUX_ATOMIC_LONG_H */
-// e785d25cc3f220b7d473d36aac9da85dd7eb13a8
+// 029d2e3a493086671e874a4c2e0e42084be42403
diff --git a/scripts/atomic/atomic-tbl.sh b/scripts/atomic/atomic-tbl.sh
index 81d5c32039dd..608ff39ebd8c 100755
--- a/scripts/atomic/atomic-tbl.sh
+++ b/scripts/atomic/atomic-tbl.sh
@@ -36,9 +36,16 @@ meta_has_relaxed()
 	meta_in "$1" "BFIR"
 }
 
-#find_fallback_template(pfx, name, sfx, order)
-find_fallback_template()
+#meta_is_implicitly_relaxed(meta)
+meta_is_implicitly_relaxed()
 {
+	meta_in "$1" "vls"
+}
+
+#find_template(tmpltype, pfx, name, sfx, order)
+find_template()
+{
+	local tmpltype="$1"; shift
 	local pfx="$1"; shift
 	local name="$1"; shift
 	local sfx="$1"; shift
@@ -52,8 +59,8 @@ find_fallback_template()
 	#
 	# Start at the most specific, and fall back to the most general. Once
 	# we find a specific fallback, don't bother looking for more.
-	for base in "${pfx}${name}${sfx}${order}" "${name}"; do
-		file="${ATOMICDIR}/fallbacks/${base}"
+	for base in "${pfx}${name}${sfx}${order}" "${pfx}${name}${sfx}" "${name}"; do
+		file="${ATOMICDIR}/${tmpltype}/${base}"
 
 		if [ -f "${file}" ]; then
 			printf "${file}"
@@ -62,6 +69,18 @@ find_fallback_template()
 	done
 }
 
+#find_fallback_template(pfx, name, sfx, order)
+find_fallback_template()
+{
+	find_template "fallbacks" "$@"
+}
+
+#find_kerneldoc_template(pfx, name, sfx, order)
+find_kerneldoc_template()
+{
+	find_template "kerneldoc" "$@"
+}
+
 #gen_ret_type(meta, int)
 gen_ret_type() {
 	local meta="$1"; shift
@@ -142,6 +161,91 @@ gen_args()
 	done
 }
 
+#gen_desc_return(meta)
+gen_desc_return()
+{
+	local meta="$1"; shift
+
+	case "${meta}" in
+	[v])
+		printf "Return: Nothing."
+		;;
+	[Ff])
+		printf "Return: The original value of @v."
+		;;
+	[R])
+		printf "Return: The updated value of @v."
+		;;
+	[l])
+		printf "Return: The value of @v."
+		;;
+	esac
+}
+
+#gen_template_kerneldoc(template, class, meta, pfx, name, sfx, order, atomic, int, args...)
+gen_template_kerneldoc()
+{
+	local template="$1"; shift
+	local class="$1"; shift
+	local meta="$1"; shift
+	local pfx="$1"; shift
+	local name="$1"; shift
+	local sfx="$1"; shift
+	local order="$1"; shift
+	local atomic="$1"; shift
+	local int="$1"; shift
+
+	local atomicname="${atomic}_${pfx}${name}${sfx}${order}"
+
+	local ret="$(gen_ret_type "${meta}" "${int}")"
+	local retstmt="$(gen_ret_stmt "${meta}")"
+	local params="$(gen_params "${int}" "${atomic}" "$@")"
+	local args="$(gen_args "$@")"
+	local desc_order=""
+	local desc_instrumentation=""
+	local desc_return=""
+
+	if [ ! -z "${order}" ]; then
+		desc_order="${order##_}"
+	elif meta_is_implicitly_relaxed "${meta}"; then
+		desc_order="relaxed"
+	else
+		desc_order="full"
+	fi
+
+	if [ -z "${class}" ]; then
+		desc_noinstr="Unsafe to use in noinstr code; use raw_${atomicname}() there."
+	else
+		desc_noinstr="Safe to use in noinstr code; prefer ${atomicname}() elsewhere."
+	fi
+
+	desc_return="$(gen_desc_return "${meta}")"
+
+	. ${template}
+}
+
+#gen_kerneldoc(class, meta, pfx, name, sfx, order, atomic, int, args...)
+gen_kerneldoc()
+{
+	local class="$1"; shift
+	local meta="$1"; shift
+	local pfx="$1"; shift
+	local name="$1"; shift
+	local sfx="$1"; shift
+	local order="$1"; shift
+
+	local atomicname="${atomic}_${pfx}${name}${sfx}${order}"
+
+	local tmpl="$(find_kerneldoc_template "${pfx}" "${name}" "${sfx}" "${order}")"
+	if [ -z "${tmpl}" ]; then
+		printf "/*\n"
+		printf " * No kerneldoc available for ${class}${atomicname}\n"
+		printf " */\n"
+	else
+	gen_template_kerneldoc "${tmpl}" "${class}" "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "$@"
+	fi
+}
+
 #gen_proto_order_variants(meta, pfx, name, sfx, ...)
 gen_proto_order_variants()
 {
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
index 2b470d31e353..c0c8a85d7c81 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -73,6 +73,8 @@ gen_proto_order_variant()
 	local params="$(gen_params "${int}" "${atomic}" "$@")"
 	local args="$(gen_args "$@")"
 
+	gen_kerneldoc "raw_" "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "${atomic}" "${int}" "$@"
+
 	printf "static __always_inline ${ret}\n"
 	printf "raw_${atomicname}(${params})\n"
 	printf "{\n"
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index 93c949aa9e54..8f8f8e3b20f9 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -68,6 +68,8 @@ gen_proto_order_variant()
 	local args="$(gen_args "$@")"
 	local retstmt="$(gen_ret_stmt "${meta}")"
 
+	gen_kerneldoc "" "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "${atomic}" "${int}" "$@"
+
 cat <<EOF
 static __always_inline ${ret}
 ${atomicname}(${params})
diff --git a/scripts/atomic/gen-atomic-long.sh b/scripts/atomic/gen-atomic-long.sh
index af27a71b37ef..9826be3ba986 100755
--- a/scripts/atomic/gen-atomic-long.sh
+++ b/scripts/atomic/gen-atomic-long.sh
@@ -49,6 +49,8 @@ gen_proto_order_variant()
 	local argscast_64="$(gen_args_cast "s64" "atomic64" "$@")"
 	local retstmt="$(gen_ret_stmt "${meta}")"
 
+	gen_kerneldoc "raw_" "${meta}" "${pfx}" "${name}" "${sfx}" "${order}" "atomic_long" "long" "$@"
+
 cat <<EOF
 static __always_inline ${ret}
 raw_atomic_long_${atomicname}(${params})
diff --git a/scripts/atomic/kerneldoc/add b/scripts/atomic/kerneldoc/add
new file mode 100644
index 000000000000..991f3dafceea
--- /dev/null
+++ b/scripts/atomic/kerneldoc/add
@@ -0,0 +1,13 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic add with ${desc_order} ordering
+ * @i: ${int} value to add
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v + @i) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * ${desc_return}
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/add_negative b/scripts/atomic/kerneldoc/add_negative
new file mode 100644
index 000000000000..f4ca1f05d1d8
--- /dev/null
+++ b/scripts/atomic/kerneldoc/add_negative
@@ -0,0 +1,13 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic add and test if negative with ${desc_order} ordering
+ * @i: ${int} value to add
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v + @i) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: @true if the resulting value of @v is negative, @false otherwise.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/add_unless b/scripts/atomic/kerneldoc/add_unless
new file mode 100644
index 000000000000..f828e5f6750c
--- /dev/null
+++ b/scripts/atomic/kerneldoc/add_unless
@@ -0,0 +1,18 @@
+if [ -z "${pfx}" ]; then
+	desc_return="Return: @true if @v was updated, @false otherwise."
+fi
+
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic add unless value with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ * @a: ${int} value to add
+ * @u: ${int} value to compare with
+ *
+ * If (@v != @u), atomically updates @v to (@v + @a) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * ${desc_return}
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/and b/scripts/atomic/kerneldoc/and
new file mode 100644
index 000000000000..a923574351fc
--- /dev/null
+++ b/scripts/atomic/kerneldoc/and
@@ -0,0 +1,13 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic bitwise AND with ${desc_order} ordering
+ * @i: ${int} value
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v & @i) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * ${desc_return}
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/andnot b/scripts/atomic/kerneldoc/andnot
new file mode 100644
index 000000000000..64bb509f866b
--- /dev/null
+++ b/scripts/atomic/kerneldoc/andnot
@@ -0,0 +1,13 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic bitwise AND NOT with ${desc_order} ordering
+ * @i: ${int} value
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v & ~@i) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * ${desc_return}
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/cmpxchg b/scripts/atomic/kerneldoc/cmpxchg
new file mode 100644
index 000000000000..3bce328f50cf
--- /dev/null
+++ b/scripts/atomic/kerneldoc/cmpxchg
@@ -0,0 +1,14 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic compare and exchange with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ * @old: ${int} value to compare with
+ * @new: ${int} value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: The original value of @v.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/dec b/scripts/atomic/kerneldoc/dec
new file mode 100644
index 000000000000..bbeecbc4c20a
--- /dev/null
+++ b/scripts/atomic/kerneldoc/dec
@@ -0,0 +1,12 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic decrement with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v - 1) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * ${desc_return}
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/dec_and_test b/scripts/atomic/kerneldoc/dec_and_test
new file mode 100644
index 000000000000..71bbd23ce4bc
--- /dev/null
+++ b/scripts/atomic/kerneldoc/dec_and_test
@@ -0,0 +1,12 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic decrement and test if zero with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v - 1) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/dec_if_positive b/scripts/atomic/kerneldoc/dec_if_positive
new file mode 100644
index 000000000000..7c742866fb6b
--- /dev/null
+++ b/scripts/atomic/kerneldoc/dec_if_positive
@@ -0,0 +1,12 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic decrement if positive with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ *
+ * If (@v > 0), atomically updates @v to (@v - 1) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/dec_unless_positive b/scripts/atomic/kerneldoc/dec_unless_positive
new file mode 100644
index 000000000000..ee73612f0354
--- /dev/null
+++ b/scripts/atomic/kerneldoc/dec_unless_positive
@@ -0,0 +1,12 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic decrement unless positive with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ *
+ * If (@v <= 0), atomically updates @v to (@v - 1) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/inc b/scripts/atomic/kerneldoc/inc
new file mode 100644
index 000000000000..9f14f1b3d2ef
--- /dev/null
+++ b/scripts/atomic/kerneldoc/inc
@@ -0,0 +1,12 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic increment with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v + 1) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * ${desc_return}
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/inc_and_test b/scripts/atomic/kerneldoc/inc_and_test
new file mode 100644
index 000000000000..971694d59bbd
--- /dev/null
+++ b/scripts/atomic/kerneldoc/inc_and_test
@@ -0,0 +1,12 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic increment and test if zero with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v + 1) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/inc_not_zero b/scripts/atomic/kerneldoc/inc_not_zero
new file mode 100644
index 000000000000..618be08e653e
--- /dev/null
+++ b/scripts/atomic/kerneldoc/inc_not_zero
@@ -0,0 +1,12 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic increment unless zero with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ *
+ * If (@v != 0), atomically updates @v to (@v + 1) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/inc_unless_negative b/scripts/atomic/kerneldoc/inc_unless_negative
new file mode 100644
index 000000000000..597f23d4dc8d
--- /dev/null
+++ b/scripts/atomic/kerneldoc/inc_unless_negative
@@ -0,0 +1,12 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic increment unless negative with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ *
+ * If (@v >= 0), atomically updates @v to (@v + 1) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: @true if @v was updated, @false otherwise.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/or b/scripts/atomic/kerneldoc/or
new file mode 100644
index 000000000000..55b33de50416
--- /dev/null
+++ b/scripts/atomic/kerneldoc/or
@@ -0,0 +1,13 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic bitwise OR with ${desc_order} ordering
+ * @i: ${int} value
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v | @i) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * ${desc_return}
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/read b/scripts/atomic/kerneldoc/read
new file mode 100644
index 000000000000..89fe6147c964
--- /dev/null
+++ b/scripts/atomic/kerneldoc/read
@@ -0,0 +1,12 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic load with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically loads the value of @v with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: The value loaded from @v.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/set b/scripts/atomic/kerneldoc/set
new file mode 100644
index 000000000000..e82cb9ebbc42
--- /dev/null
+++ b/scripts/atomic/kerneldoc/set
@@ -0,0 +1,13 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic set with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ * @i: ${int} value to assign
+ *
+ * Atomically sets @v to @i with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: Nothing.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/sub b/scripts/atomic/kerneldoc/sub
new file mode 100644
index 000000000000..3ba642d04407
--- /dev/null
+++ b/scripts/atomic/kerneldoc/sub
@@ -0,0 +1,13 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic subtract with ${desc_order} ordering
+ * @i: ${int} value to subtract
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v - @i) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * ${desc_return}
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/sub_and_test b/scripts/atomic/kerneldoc/sub_and_test
new file mode 100644
index 000000000000..d3760f7749d4
--- /dev/null
+++ b/scripts/atomic/kerneldoc/sub_and_test
@@ -0,0 +1,13 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic subtract and test if zero with ${desc_order} ordering
+ * @i: ${int} value to add
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v - @i) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: @true if the resulting value of @v is zero, @false otherwise.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/try_cmpxchg b/scripts/atomic/kerneldoc/try_cmpxchg
new file mode 100644
index 000000000000..296553206c06
--- /dev/null
+++ b/scripts/atomic/kerneldoc/try_cmpxchg
@@ -0,0 +1,15 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic compare and exchange with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ * @old: pointer to ${int} value to compare with
+ * @new: ${int} value to assign
+ *
+ * If (@v == @old), atomically updates @v to @new with ${desc_order} ordering.
+ * Otherwise, updates @old to the current value of @v.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: @true if the exchange occured, @false otherwise.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/xchg b/scripts/atomic/kerneldoc/xchg
new file mode 100644
index 000000000000..75f04c085f25
--- /dev/null
+++ b/scripts/atomic/kerneldoc/xchg
@@ -0,0 +1,13 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic exchange with ${desc_order} ordering
+ * @v: pointer to ${atomic}_t
+ * @new: ${int} value to assign
+ *
+ * Atomically updates @v to @new with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * Return: The original value of @v.
+ */
+EOF
diff --git a/scripts/atomic/kerneldoc/xor b/scripts/atomic/kerneldoc/xor
new file mode 100644
index 000000000000..8837270f2806
--- /dev/null
+++ b/scripts/atomic/kerneldoc/xor
@@ -0,0 +1,13 @@
+cat <<EOF
+/**
+ * ${class}${atomicname}() - atomic bitwise XOR with ${desc_order} ordering
+ * @i: ${int} value
+ * @v: pointer to ${atomic}_t
+ *
+ * Atomically updates @v to (@v ^ @i) with ${desc_order} ordering.
+ *
+ * ${desc_noinstr}
+ *
+ * ${desc_return}
+ */
+EOF
-- 
cgit v1.2.3


From 642af0f92cbe01c4b05eb38a0fe94867a3798b34 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Thu, 1 Jun 2023 16:14:51 +0200
Subject: net: mdio: Introduce a regmap-based mdio driver

There exists several examples today of devices that embed an ethernet
PHY or PCS directly inside an SoC. In this situation, either the device
is controlled through a vendor-specific register set, or sometimes
exposes the standard 802.3 registers that are typically accessed over
MDIO.

As phylib and phylink are designed to use mdiodevices, this driver
allows creating a virtual MDIO bus, that translates mdiodev register
accesses to regmap accesses.

The reason we use regmap is because there are at least 3 such devices
known today, 2 of them are Altera TSE PCS's, memory-mapped, exposed
with a 4-byte stride in stmmac's dwmac-socfpga variant, and a 2-byte
stride in altera-tse. The other one (nxp,sja1110-base-tx-mdio) is
exposed over SPI.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                      |  7 +++
 drivers/net/mdio/Kconfig         | 11 +++++
 drivers/net/mdio/Makefile        |  1 +
 drivers/net/mdio/mdio-regmap.c   | 93 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mdio/mdio-regmap.h | 26 +++++++++++
 5 files changed, 138 insertions(+)
 create mode 100644 drivers/net/mdio/mdio-regmap.c
 create mode 100644 include/linux/mdio/mdio-regmap.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 288d9a5edb9d..5a2ba8567be6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12844,6 +12844,13 @@ F:	Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt
 F:	drivers/net/ieee802154/mcr20a.c
 F:	drivers/net/ieee802154/mcr20a.h
 
+MDIO REGMAP DRIVER
+M:	Maxime Chevallier <maxime.chevallier@bootlin.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/mdio/mdio-regmap.c
+F:	include/linux/mdio/mdio-regmap.h
+
 MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
 M:	William Breathitt Gray <william.gray@linaro.org>
 L:	linux-iio@vger.kernel.org
diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig
index 9ff2e6f22f3f..4a7a303be2f7 100644
--- a/drivers/net/mdio/Kconfig
+++ b/drivers/net/mdio/Kconfig
@@ -185,6 +185,17 @@ config MDIO_IPQ8064
 	  This driver supports the MDIO interface found in the network
 	  interface units of the IPQ8064 SoC
 
+config MDIO_REGMAP
+	tristate
+	help
+	  This driver allows using MDIO devices that are not sitting on a
+	  regular MDIO bus, but still exposes the standard 802.3 register
+	  layout. It's regmap-based so that it can be used on integrated,
+	  memory-mapped PHYs, SPI PHYs and so on. A new virtual MDIO bus is
+	  created, and its read/write operations are mapped to the underlying
+	  regmap. Users willing to use this driver must explicitly select
+	  REGMAP.
+
 config MDIO_THUNDER
 	tristate "ThunderX SOCs MDIO buses"
 	depends on 64BIT
diff --git a/drivers/net/mdio/Makefile b/drivers/net/mdio/Makefile
index 7d4cb4c11e4e..1015f0db4531 100644
--- a/drivers/net/mdio/Makefile
+++ b/drivers/net/mdio/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_MDIO_MOXART)		+= mdio-moxart.o
 obj-$(CONFIG_MDIO_MSCC_MIIM)		+= mdio-mscc-miim.o
 obj-$(CONFIG_MDIO_MVUSB)		+= mdio-mvusb.o
 obj-$(CONFIG_MDIO_OCTEON)		+= mdio-octeon.o
+obj-$(CONFIG_MDIO_REGMAP)		+= mdio-regmap.o
 obj-$(CONFIG_MDIO_SUN4I)		+= mdio-sun4i.o
 obj-$(CONFIG_MDIO_THUNDER)		+= mdio-thunder.o
 obj-$(CONFIG_MDIO_XGENE)		+= mdio-xgene.o
diff --git a/drivers/net/mdio/mdio-regmap.c b/drivers/net/mdio/mdio-regmap.c
new file mode 100644
index 000000000000..8a742a8d6387
--- /dev/null
+++ b/drivers/net/mdio/mdio-regmap.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for MMIO-Mapped MDIO devices. Some IPs expose internal PHYs or PCS
+ * within the MMIO-mapped area
+ *
+ * Copyright (C) 2023 Maxime Chevallier <maxime.chevallier@bootlin.com>
+ */
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/mdio.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mdio/mdio-regmap.h>
+
+#define DRV_NAME "mdio-regmap"
+
+struct mdio_regmap_priv {
+	struct regmap *regmap;
+	u8 valid_addr;
+};
+
+static int mdio_regmap_read_c22(struct mii_bus *bus, int addr, int regnum)
+{
+	struct mdio_regmap_priv *ctx = bus->priv;
+	unsigned int val;
+	int ret;
+
+	if (ctx->valid_addr != addr)
+		return -ENODEV;
+
+	ret = regmap_read(ctx->regmap, regnum, &val);
+	if (ret < 0)
+		return ret;
+
+	return val;
+}
+
+static int mdio_regmap_write_c22(struct mii_bus *bus, int addr, int regnum,
+				 u16 val)
+{
+	struct mdio_regmap_priv *ctx = bus->priv;
+
+	if (ctx->valid_addr != addr)
+		return -ENODEV;
+
+	return regmap_write(ctx->regmap, regnum, val);
+}
+
+struct mii_bus *devm_mdio_regmap_register(struct device *dev,
+					  const struct mdio_regmap_config *config)
+{
+	struct mdio_regmap_priv *mr;
+	struct mii_bus *mii;
+	int rc;
+
+	if (!config->parent)
+		return ERR_PTR(-EINVAL);
+
+	mii = devm_mdiobus_alloc_size(config->parent, sizeof(*mr));
+	if (!mii)
+		return ERR_PTR(-ENOMEM);
+
+	mr = mii->priv;
+	mr->regmap = config->regmap;
+	mr->valid_addr = config->valid_addr;
+
+	mii->name = DRV_NAME;
+	strscpy(mii->id, config->name, MII_BUS_ID_SIZE);
+	mii->parent = config->parent;
+	mii->read = mdio_regmap_read_c22;
+	mii->write = mdio_regmap_write_c22;
+
+	if (config->autoscan)
+		mii->phy_mask = ~BIT(config->valid_addr);
+	else
+		mii->phy_mask = ~0;
+
+	rc = devm_mdiobus_register(dev, mii);
+	if (rc) {
+		dev_err(config->parent, "Cannot register MDIO bus![%s] (%d)\n", mii->id, rc);
+		return ERR_PTR(rc);
+	}
+
+	return mii;
+}
+EXPORT_SYMBOL_GPL(devm_mdio_regmap_register);
+
+MODULE_DESCRIPTION("MDIO API over regmap");
+MODULE_AUTHOR("Maxime Chevallier <maxime.chevallier@bootlin.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mdio/mdio-regmap.h b/include/linux/mdio/mdio-regmap.h
new file mode 100644
index 000000000000..679d9069846b
--- /dev/null
+++ b/include/linux/mdio/mdio-regmap.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Driver for MMIO-Mapped MDIO devices. Some IPs expose internal PHYs or PCS
+ * within the MMIO-mapped area
+ *
+ * Copyright (C) 2023 Maxime Chevallier <maxime.chevallier@bootlin.com>
+ */
+#ifndef MDIO_REGMAP_H
+#define MDIO_REGMAP_H
+
+#include <linux/phy.h>
+
+struct device;
+struct regmap;
+
+struct mdio_regmap_config {
+	struct device *parent;
+	struct regmap *regmap;
+	char name[MII_BUS_ID_SIZE];
+	u8 valid_addr;
+	bool autoscan;
+};
+
+struct mii_bus *devm_mdio_regmap_register(struct device *dev,
+					  const struct mdio_regmap_config *config);
+
+#endif
-- 
cgit v1.2.3


From 196eec4062b006575e441ef00339c3ebcea26b8d Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Thu, 1 Jun 2023 16:14:53 +0200
Subject: net: pcs: Drop the TSE PCS driver

Now that we can easily create a mdio-device that represents a
memory-mapped device that exposes an MDIO-like register layout, we don't
need the Altera TSE PCS anymore, since we can use the Lynx PCS instead.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                      |   7 --
 drivers/net/pcs/Kconfig          |   6 --
 drivers/net/pcs/Makefile         |   1 -
 drivers/net/pcs/pcs-altera-tse.c | 160 ---------------------------------------
 include/linux/pcs-altera-tse.h   |  17 -----
 5 files changed, 191 deletions(-)
 delete mode 100644 drivers/net/pcs/pcs-altera-tse.c
 delete mode 100644 include/linux/pcs-altera-tse.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 5a2ba8567be6..081eb65ef865 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -909,13 +909,6 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/altera/
 
-ALTERA TSE PCS
-M:	Maxime Chevallier <maxime.chevallier@bootlin.com>
-L:	netdev@vger.kernel.org
-S:	Supported
-F:	drivers/net/pcs/pcs-altera-tse.c
-F:	include/linux/pcs-altera-tse.h
-
 ALTERA UART/JTAG UART SERIAL DRIVERS
 M:	Tobias Klauser <tklauser@distanz.ch>
 L:	linux-serial@vger.kernel.org
diff --git a/drivers/net/pcs/Kconfig b/drivers/net/pcs/Kconfig
index 7c34fb7cbf7b..87cf308fc6d8 100644
--- a/drivers/net/pcs/Kconfig
+++ b/drivers/net/pcs/Kconfig
@@ -33,10 +33,4 @@ config PCS_RZN1_MIIC
 	  on RZ/N1 SoCs. This PCS converts MII to RMII/RGMII or can be set in
 	  pass-through mode for MII.
 
-config PCS_ALTERA_TSE
-	tristate
-	help
-	  This module provides helper functions for the Altera Triple Speed
-	  Ethernet SGMII PCS, that can be found on the Intel Socfpga family.
-
 endmenu
diff --git a/drivers/net/pcs/Makefile b/drivers/net/pcs/Makefile
index 9b9afd6b1c22..ea662a7989b2 100644
--- a/drivers/net/pcs/Makefile
+++ b/drivers/net/pcs/Makefile
@@ -7,4 +7,3 @@ obj-$(CONFIG_PCS_XPCS)		+= pcs_xpcs.o
 obj-$(CONFIG_PCS_LYNX)		+= pcs-lynx.o
 obj-$(CONFIG_PCS_MTK_LYNXI)	+= pcs-mtk-lynxi.o
 obj-$(CONFIG_PCS_RZN1_MIIC)	+= pcs-rzn1-miic.o
-obj-$(CONFIG_PCS_ALTERA_TSE)	+= pcs-altera-tse.o
diff --git a/drivers/net/pcs/pcs-altera-tse.c b/drivers/net/pcs/pcs-altera-tse.c
deleted file mode 100644
index d616749761f4..000000000000
--- a/drivers/net/pcs/pcs-altera-tse.c
+++ /dev/null
@@ -1,160 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2022 Bootlin
- *
- * Maxime Chevallier <maxime.chevallier@bootlin.com>
- */
-
-#include <linux/netdevice.h>
-#include <linux/phy.h>
-#include <linux/phylink.h>
-#include <linux/pcs-altera-tse.h>
-
-/* SGMII PCS register addresses
- */
-#define SGMII_PCS_LINK_TIMER_0	0x12
-#define SGMII_PCS_LINK_TIMER_1	0x13
-#define SGMII_PCS_IF_MODE	0x14
-#define   PCS_IF_MODE_SGMII_ENA		BIT(0)
-#define   PCS_IF_MODE_USE_SGMII_AN	BIT(1)
-#define   PCS_IF_MODE_SGMI_HALF_DUPLEX	BIT(4)
-#define   PCS_IF_MODE_SGMI_PHY_AN	BIT(5)
-#define SGMII_PCS_SW_RESET_TIMEOUT 100 /* usecs */
-
-struct altera_tse_pcs {
-	struct phylink_pcs pcs;
-	void __iomem *base;
-	int reg_width;
-};
-
-static struct altera_tse_pcs *phylink_pcs_to_tse_pcs(struct phylink_pcs *pcs)
-{
-	return container_of(pcs, struct altera_tse_pcs, pcs);
-}
-
-static u16 tse_pcs_read(struct altera_tse_pcs *tse_pcs, int regnum)
-{
-	if (tse_pcs->reg_width == 4)
-		return readl(tse_pcs->base + regnum * 4);
-	else
-		return readw(tse_pcs->base + regnum * 2);
-}
-
-static void tse_pcs_write(struct altera_tse_pcs *tse_pcs, int regnum,
-			  u16 value)
-{
-	if (tse_pcs->reg_width == 4)
-		writel(value, tse_pcs->base + regnum * 4);
-	else
-		writew(value, tse_pcs->base + regnum * 2);
-}
-
-static int tse_pcs_reset(struct altera_tse_pcs *tse_pcs)
-{
-	u16 bmcr;
-
-	/* Reset PCS block */
-	bmcr = tse_pcs_read(tse_pcs, MII_BMCR);
-	bmcr |= BMCR_RESET;
-	tse_pcs_write(tse_pcs, MII_BMCR, bmcr);
-
-	return read_poll_timeout(tse_pcs_read, bmcr, (bmcr & BMCR_RESET),
-				 10, SGMII_PCS_SW_RESET_TIMEOUT, 1,
-				 tse_pcs, MII_BMCR);
-}
-
-static int alt_tse_pcs_validate(struct phylink_pcs *pcs,
-				unsigned long *supported,
-				const struct phylink_link_state *state)
-{
-	if (state->interface == PHY_INTERFACE_MODE_SGMII ||
-	    state->interface == PHY_INTERFACE_MODE_1000BASEX)
-		return 1;
-
-	return -EINVAL;
-}
-
-static int alt_tse_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
-			      phy_interface_t interface,
-			      const unsigned long *advertising,
-			      bool permit_pause_to_mac)
-{
-	struct altera_tse_pcs *tse_pcs = phylink_pcs_to_tse_pcs(pcs);
-	u32 ctrl, if_mode;
-
-	ctrl = tse_pcs_read(tse_pcs, MII_BMCR);
-	if_mode = tse_pcs_read(tse_pcs, SGMII_PCS_IF_MODE);
-
-	/* Set link timer to 1.6ms, as per the MegaCore Function User Guide */
-	tse_pcs_write(tse_pcs, SGMII_PCS_LINK_TIMER_0, 0x0D40);
-	tse_pcs_write(tse_pcs, SGMII_PCS_LINK_TIMER_1, 0x03);
-
-	if (interface == PHY_INTERFACE_MODE_SGMII) {
-		if_mode |= PCS_IF_MODE_USE_SGMII_AN | PCS_IF_MODE_SGMII_ENA;
-	} else if (interface == PHY_INTERFACE_MODE_1000BASEX) {
-		if_mode &= ~(PCS_IF_MODE_USE_SGMII_AN | PCS_IF_MODE_SGMII_ENA);
-	}
-
-	ctrl |= (BMCR_SPEED1000 | BMCR_FULLDPLX | BMCR_ANENABLE);
-
-	tse_pcs_write(tse_pcs, MII_BMCR, ctrl);
-	tse_pcs_write(tse_pcs, SGMII_PCS_IF_MODE, if_mode);
-
-	return tse_pcs_reset(tse_pcs);
-}
-
-static void alt_tse_pcs_get_state(struct phylink_pcs *pcs,
-				  struct phylink_link_state *state)
-{
-	struct altera_tse_pcs *tse_pcs = phylink_pcs_to_tse_pcs(pcs);
-	u16 bmsr, lpa;
-
-	bmsr = tse_pcs_read(tse_pcs, MII_BMSR);
-	lpa = tse_pcs_read(tse_pcs, MII_LPA);
-
-	phylink_mii_c22_pcs_decode_state(state, bmsr, lpa);
-}
-
-static void alt_tse_pcs_an_restart(struct phylink_pcs *pcs)
-{
-	struct altera_tse_pcs *tse_pcs = phylink_pcs_to_tse_pcs(pcs);
-	u16 bmcr;
-
-	bmcr = tse_pcs_read(tse_pcs, MII_BMCR);
-	bmcr |= BMCR_ANRESTART;
-	tse_pcs_write(tse_pcs, MII_BMCR, bmcr);
-
-	/* This PCS seems to require a soft reset to re-sync the AN logic */
-	tse_pcs_reset(tse_pcs);
-}
-
-static const struct phylink_pcs_ops alt_tse_pcs_ops = {
-	.pcs_validate = alt_tse_pcs_validate,
-	.pcs_get_state = alt_tse_pcs_get_state,
-	.pcs_config = alt_tse_pcs_config,
-	.pcs_an_restart = alt_tse_pcs_an_restart,
-};
-
-struct phylink_pcs *alt_tse_pcs_create(struct net_device *ndev,
-				       void __iomem *pcs_base, int reg_width)
-{
-	struct altera_tse_pcs *tse_pcs;
-
-	if (reg_width != 4 && reg_width != 2)
-		return ERR_PTR(-EINVAL);
-
-	tse_pcs = devm_kzalloc(&ndev->dev, sizeof(*tse_pcs), GFP_KERNEL);
-	if (!tse_pcs)
-		return ERR_PTR(-ENOMEM);
-
-	tse_pcs->pcs.ops = &alt_tse_pcs_ops;
-	tse_pcs->base = pcs_base;
-	tse_pcs->reg_width = reg_width;
-
-	return &tse_pcs->pcs;
-}
-EXPORT_SYMBOL_GPL(alt_tse_pcs_create);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Altera TSE PCS driver");
-MODULE_AUTHOR("Maxime Chevallier <maxime.chevallier@bootlin.com>");
diff --git a/include/linux/pcs-altera-tse.h b/include/linux/pcs-altera-tse.h
deleted file mode 100644
index 92ab9f08e835..000000000000
--- a/include/linux/pcs-altera-tse.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2022 Bootlin
- *
- * Maxime Chevallier <maxime.chevallier@bootlin.com>
- */
-
-#ifndef __LINUX_PCS_ALTERA_TSE_H
-#define __LINUX_PCS_ALTERA_TSE_H
-
-struct phylink_pcs;
-struct net_device;
-
-struct phylink_pcs *alt_tse_pcs_create(struct net_device *ndev,
-				       void __iomem *pcs_base, int reg_width);
-
-#endif /* __LINUX_PCS_ALTERA_TSE_H */
-- 
cgit v1.2.3


From 4739b9f3d211b3c4ce9353fbfd9f22e2bcb64c17 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 2 Jun 2023 14:58:45 +0100
Subject: net: pcs: xpcs: remove xpcs_create() from public view

There are now no callers of xpcs_create(), so let's remove it from
public view to discourage future direct usage.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pcs/pcs-xpcs.c   | 5 ++---
 include/linux/pcs/pcs-xpcs.h | 2 --
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 1ba214429e01..23223f0f8cad 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -1224,8 +1224,8 @@ static const struct phylink_pcs_ops xpcs_phylink_ops = {
 	.pcs_link_up = xpcs_link_up,
 };
 
-struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
-			    phy_interface_t interface)
+static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
+				   phy_interface_t interface)
 {
 	struct dw_xpcs *xpcs;
 	u32 xpcs_id;
@@ -1273,7 +1273,6 @@ out:
 
 	return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(xpcs_create);
 
 void xpcs_destroy(struct dw_xpcs *xpcs)
 {
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index a99972a6d046..914e387d5387 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -35,8 +35,6 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
 void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
 		    int enable);
-struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
-			    phy_interface_t interface);
 struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr,
 				    phy_interface_t interface);
 void xpcs_destroy(struct dw_xpcs *xpcs);
-- 
cgit v1.2.3


From e15885689cf4bc92356e52ea6ef38379a749819a Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Mon, 15 May 2023 11:35:33 +0000
Subject: x86/amd_nb: Add MI200 PCI IDs

The AMD MI200 series accelerators are data center GPUs. They include
unified memory controllers and a data fabric similar to those used in
AMD x86 CPU products. The memory controllers report errors using MCA,
though these errors are generally handled through GPU drivers that
directly manage the accelerator device.

In some configurations, memory errors from these devices will be
reported through MCA and managed by x86 CPUs. The OS is expected to
handle these errors in similar fashion to MCA errors originating from
memory controllers on the CPUs. In Linux, this flow includes passing MCA
errors to a notifier chain with handlers in the EDAC subsystem.

The AMD64 EDAC module requires information from the memory controllers
and data fabric in order to provide detailed decoding of memory errors.
The information is read from hardware registers accessed through
interfaces in the data fabric.

The accelerator data fabrics are visible to the host x86 CPUs as PCI
devices just like x86 CPU data fabrics are already. However, the
accelerator fabrics have new and unique PCI IDs.

Add PCI IDs for the MI200 series of accelerator devices in order to
enable EDAC support. The data fabrics of the accelerator devices will be
enumerated as any other fabric already supported.  System-specific
implementation details will be handled within the AMD64 EDAC module.

  [ bp: Scrub off marketing speak. ]

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Co-developed-by: Muralidhara M K <muralidhara.mk@amd.com>
Signed-off-by: Muralidhara M K <muralidhara.mk@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230515113537.1052146-2-muralimk@amd.com
---
 arch/x86/kernel/amd_nb.c | 5 +++++
 include/linux/pci_ids.h  | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 7e331e8f3692..8fd955414b08 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -23,6 +23,7 @@
 #define PCI_DEVICE_ID_AMD_19H_M10H_ROOT	0x14a4
 #define PCI_DEVICE_ID_AMD_19H_M60H_ROOT	0x14d8
 #define PCI_DEVICE_ID_AMD_19H_M70H_ROOT	0x14e8
+#define PCI_DEVICE_ID_AMD_MI200_ROOT	0x14bb
 #define PCI_DEVICE_ID_AMD_17H_DF_F4	0x1464
 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
@@ -37,6 +38,7 @@
 #define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4
 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc
+#define PCI_DEVICE_ID_AMD_MI200_DF_F4	0x14d4
 
 /* Protect the PCI config register pairs used for SMN. */
 static DEFINE_MUTEX(smn_mutex);
@@ -53,6 +55,7 @@ static const struct pci_device_id amd_root_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) },
 	{}
 };
 
@@ -81,6 +84,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) },
 	{}
 };
 
@@ -101,6 +105,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) },
 	{}
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 95f33dadb2be..a99b1fcfc617 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -568,6 +568,7 @@
 #define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3
 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3
 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb
+#define PCI_DEVICE_ID_AMD_MI200_DF_F3	0x14d3
 #define PCI_DEVICE_ID_AMD_CNB17H_F3	0x1703
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
-- 
cgit v1.2.3


From d0e135408e196921da2c85ee424235382c9ed614 Mon Sep 17 00:00:00 2001
From: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
Date: Fri, 2 Jun 2023 12:33:07 +0200
Subject: highmem: Rename put_and_unmap_page() to unmap_and_put_page()

With commit 849ad04cf562a ("new helper: put_and_unmap_page()"), Al Viro
introduced the put_and_unmap_page() to use in those many places where we
have a common pattern consisting of calls to kunmap_local() +
put_page().

Obviously, first we unmap and then we put pages. Instead, the original
name of this helper seems to imply that we first put and then unmap.

Therefore, rename the helper and change the only known upstreamed user
(i.e., fs/sysv) before this helper enters common use and might become
difficult to find all call sites and instead easy to break the builds.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Message-Id: <20230602103307.5637-1-fmdefrancesco@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/sysv/dir.c           | 22 +++++++++++-----------
 fs/sysv/namei.c         |  8 ++++----
 include/linux/highmem.h |  2 +-
 3 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index cdb3d632c63d..0140010aa0c3 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -52,7 +52,7 @@ static int sysv_handle_dirsync(struct inode *dir)
 }
 
 /*
- * Calls to dir_get_page()/put_and_unmap_page() must be nested according to the
+ * Calls to dir_get_page()/unmap_and_put_page() must be nested according to the
  * rules documented in mm/highmem.rst.
  *
  * NOTE: sysv_find_entry() and sysv_dotdot() act as calls to dir_get_page()
@@ -103,11 +103,11 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
 			if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN),
 					fs16_to_cpu(SYSV_SB(sb), de->inode),
 					DT_UNKNOWN)) {
-				put_and_unmap_page(page, kaddr);
+				unmap_and_put_page(page, kaddr);
 				return 0;
 			}
 		}
-		put_and_unmap_page(page, kaddr);
+		unmap_and_put_page(page, kaddr);
 	}
 	return 0;
 }
@@ -131,7 +131,7 @@ static inline int namecompare(int len, int maxlen,
  * itself (as a parameter - res_dir). It does NOT read the inode of the
  * entry - you'll have to do that yourself if you want to.
  *
- * On Success put_and_unmap_page() should be called on *res_page.
+ * On Success unmap_and_put_page() should be called on *res_page.
  *
  * sysv_find_entry() acts as a call to dir_get_page() and must be treated
  * accordingly for nesting purposes.
@@ -166,7 +166,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
 							name, de->name))
 					goto found;
 			}
-			put_and_unmap_page(page, kaddr);
+			unmap_and_put_page(page, kaddr);
 		}
 
 		if (++n >= npages)
@@ -209,7 +209,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
 				goto out_page;
 			de++;
 		}
-		put_and_unmap_page(page, kaddr);
+		unmap_and_put_page(page, kaddr);
 	}
 	BUG();
 	return -EINVAL;
@@ -228,7 +228,7 @@ got_it:
 	mark_inode_dirty(dir);
 	err = sysv_handle_dirsync(dir);
 out_page:
-	put_and_unmap_page(page, kaddr);
+	unmap_and_put_page(page, kaddr);
 	return err;
 out_unlock:
 	unlock_page(page);
@@ -321,12 +321,12 @@ int sysv_empty_dir(struct inode * inode)
 			if (de->name[1] != '.' || de->name[2])
 				goto not_empty;
 		}
-		put_and_unmap_page(page, kaddr);
+		unmap_and_put_page(page, kaddr);
 	}
 	return 1;
 
 not_empty:
-	put_and_unmap_page(page, kaddr);
+	unmap_and_put_page(page, kaddr);
 	return 0;
 }
 
@@ -352,7 +352,7 @@ int sysv_set_link(struct sysv_dir_entry *de, struct page *page,
 }
 
 /*
- * Calls to dir_get_page()/put_and_unmap_page() must be nested according to the
+ * Calls to dir_get_page()/unmap_and_put_page() must be nested according to the
  * rules documented in mm/highmem.rst.
  *
  * sysv_dotdot() acts as a call to dir_get_page() and must be treated
@@ -376,7 +376,7 @@ ino_t sysv_inode_by_name(struct dentry *dentry)
 	
 	if (de) {
 		res = fs16_to_cpu(SYSV_SB(dentry->d_sb), de->inode);
-		put_and_unmap_page(page, de);
+		unmap_and_put_page(page, de);
 	}
 	return res;
 }
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 2b2dba4c4f56..fcf163fea3ad 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -164,7 +164,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry)
 		inode->i_ctime = dir->i_ctime;
 		inode_dec_link_count(inode);
 	}
-	put_and_unmap_page(page, de);
+	unmap_and_put_page(page, de);
 	return err;
 }
 
@@ -227,7 +227,7 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 		if (!new_de)
 			goto out_dir;
 		err = sysv_set_link(new_de, new_page, old_inode);
-		put_and_unmap_page(new_page, new_de);
+		unmap_and_put_page(new_page, new_de);
 		if (err)
 			goto out_dir;
 		new_inode->i_ctime = current_time(new_inode);
@@ -256,9 +256,9 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 
 out_dir:
 	if (dir_de)
-		put_and_unmap_page(dir_page, dir_de);
+		unmap_and_put_page(dir_page, dir_de);
 out_old:
-	put_and_unmap_page(old_page, old_de);
+	unmap_and_put_page(old_page, old_de);
 out:
 	return err;
 }
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 4de1dbcd3ef6..68da30625a6c 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -507,7 +507,7 @@ static inline void folio_zero_range(struct folio *folio,
 	zero_user_segments(&folio->page, start, start + length, 0, 0);
 }
 
-static inline void put_and_unmap_page(struct page *page, void *addr)
+static inline void unmap_and_put_page(struct page *page, void *addr)
 {
 	kunmap_local(addr);
 	put_page(page);
-- 
cgit v1.2.3


From a7bc2e8ddf3c8e1f5bfeb401f7ee112956cea259 Mon Sep 17 00:00:00 2001
From: chenzhiyin <zhiyin.chen@intel.com>
Date: Thu, 1 Jun 2023 05:24:00 -0400
Subject: fs.h: Optimize file struct to prevent false sharing

In the syscall test of UnixBench, performance regression occurred due
to false sharing.

The lock and atomic members, including file::f_lock, file::f_count and
file::f_pos_lock are highly contended and frequently updated in the
high-concurrency test scenarios. perf c2c indentified one affected
read access, file::f_op.
To prevent false sharing, the layout of file struct is changed as
following
(A) f_lock, f_count and f_pos_lock are put together to share the same
cache line.
(B) The read mostly members, including f_path, f_inode, f_op are put
into a separate cache line.
(C) f_mode is put together with f_count, since they are used frequently
 at the same time.
Due to '__randomize_layout' attribute of file struct, the updated layout
only can be effective when CONFIG_RANDSTRUCT_NONE is 'y'.

The optimization has been validated in the syscall test of UnixBench.
performance gain is 30~50%. Furthermore, to confirm the optimization
effectiveness on the other codes path, the results of fsdisk, fsbuffer
and fstime are also shown.

Here are the detailed test results of unixbench.

Command: numactl -C 3-18 ./Run -c 16 syscall fsbuffer fstime fsdisk

Without Patch
------------------------------------------------------------------------
File Copy 1024 bufsize 2000 maxblocks   875052.1 KBps  (30.0 s, 2 samples)
File Copy 256 bufsize 500 maxblocks     235484.0 KBps  (30.0 s, 2 samples)
File Copy 4096 bufsize 8000 maxblocks  2815153.5 KBps  (30.0 s, 2 samples)
System Call Overhead                   5772268.3 lps   (10.0 s, 7 samples)

System Benchmarks Partial Index         BASELINE       RESULT    INDEX
File Copy 1024 bufsize 2000 maxblocks     3960.0     875052.1   2209.7
File Copy 256 bufsize 500 maxblocks       1655.0     235484.0   1422.9
File Copy 4096 bufsize 8000 maxblocks     5800.0    2815153.5   4853.7
System Call Overhead                     15000.0    5772268.3   3848.2
                                                              ========
System Benchmarks Index Score (Partial Only)                    2768.3

With Patch
------------------------------------------------------------------------
File Copy 1024 bufsize 2000 maxblocks  1009977.2 KBps  (30.0 s, 2 samples)
File Copy 256 bufsize 500 maxblocks     264765.9 KBps  (30.0 s, 2 samples)
File Copy 4096 bufsize 8000 maxblocks  3052236.0 KBps  (30.0 s, 2 samples)
System Call Overhead                   8237404.4 lps   (10.0 s, 7 samples)

System Benchmarks Partial Index         BASELINE       RESULT    INDEX
File Copy 1024 bufsize 2000 maxblocks     3960.0    1009977.2   2550.4
File Copy 256 bufsize 500 maxblocks       1655.0     264765.9   1599.8
File Copy 4096 bufsize 8000 maxblocks     5800.0    3052236.0   5262.5
System Call Overhead                     15000.0    8237404.4   5491.6
                                                              ========
System Benchmarks Index Score (Partial Only)                    3295.3

Signed-off-by: chenzhiyin <zhiyin.chen@intel.com>
Message-Id: <20230601092400.27162-1-zhiyin.chen@intel.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21a981680856..9c2671b285a4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -956,29 +956,35 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
 		index <  ra->start + ra->size);
 }
 
+/*
+ * f_{lock,count,pos_lock} members can be highly contended and share
+ * the same cacheline. f_{lock,mode} are very frequently used together
+ * and so share the same cacheline as well. The read-mostly
+ * f_{path,inode,op} are kept on a separate cacheline.
+ */
 struct file {
 	union {
 		struct llist_node	f_llist;
 		struct rcu_head 	f_rcuhead;
 		unsigned int 		f_iocb_flags;
 	};
-	struct path		f_path;
-	struct inode		*f_inode;	/* cached value */
-	const struct file_operations	*f_op;
 
 	/*
 	 * Protects f_ep, f_flags.
 	 * Must not be taken from IRQ context.
 	 */
 	spinlock_t		f_lock;
-	atomic_long_t		f_count;
-	unsigned int 		f_flags;
 	fmode_t			f_mode;
+	atomic_long_t		f_count;
 	struct mutex		f_pos_lock;
 	loff_t			f_pos;
+	unsigned int		f_flags;
 	struct fown_struct	f_owner;
 	const struct cred	*f_cred;
 	struct file_ra_state	f_ra;
+	struct path		f_path;
+	struct inode		*f_inode;	/* cached value */
+	const struct file_operations	*f_op;
 
 	u64			f_version;
 #ifdef CONFIG_SECURITY
-- 
cgit v1.2.3


From 82078b9895bd46ce69b4a73e2da40e7e2202fdb5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 18 May 2023 13:45:36 -0400
Subject: NFSD: Ensure that xdr_write_pages updates rq_next_page

All other NFSv[23] procedures manage to keep page_ptr and
rq_next_page in lock step.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs3xdr.c          | 11 +++++++----
 fs/nfsd/nfsxdr.c           | 11 +++++++----
 include/linux/sunrpc/svc.h | 21 +++++++++++++++++++++
 3 files changed, 35 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 3308dd671ef0..f32128955ec8 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -828,7 +828,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 			return false;
 		if (xdr_stream_encode_u32(xdr, resp->len) < 0)
 			return false;
-		xdr_write_pages(xdr, resp->pages, 0, resp->len);
+		svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, 0,
+					   resp->len);
 		if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
 			return false;
 		break;
@@ -859,8 +860,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 			return false;
 		if (xdr_stream_encode_u32(xdr, resp->count) < 0)
 			return false;
-		xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
-				resp->count);
+		svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
+					   rqstp->rq_res.page_base,
+					   resp->count);
 		if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
 			return false;
 		break;
@@ -961,7 +963,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 			return false;
 		if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
 			return false;
-		xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
+		svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
+					   dirlist->len);
 		/* no more entries */
 		if (xdr_stream_encode_item_absent(xdr) < 0)
 			return false;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index caf6355b18fa..5777f40c7353 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -468,7 +468,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	case nfs_ok:
 		if (xdr_stream_encode_u32(xdr, resp->len) < 0)
 			return false;
-		xdr_write_pages(xdr, &resp->page, 0, resp->len);
+		svcxdr_encode_opaque_pages(rqstp, xdr, &resp->page, 0,
+					   resp->len);
 		if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
 			return false;
 		break;
@@ -491,8 +492,9 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 			return false;
 		if (xdr_stream_encode_u32(xdr, resp->count) < 0)
 			return false;
-		xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
-				resp->count);
+		svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
+					   rqstp->rq_res.page_base,
+					   resp->count);
 		if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
 			return false;
 		break;
@@ -511,7 +513,8 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		return false;
 	switch (resp->status) {
 	case nfs_ok:
-		xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
+		svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
+					   dirlist->len);
 		/* no more entries */
 		if (xdr_stream_encode_item_absent(xdr) < 0)
 			return false;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 762d7231e574..3b10636c51a9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -508,6 +508,27 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp)
 	xdr->rqst = NULL;
 }
 
+/**
+ * svcxdr_encode_opaque_pages - Insert pages into an xdr_stream
+ * @xdr: xdr_stream to be updated
+ * @pages: array of pages to insert
+ * @base: starting offset of first data byte in @pages
+ * @len: number of data bytes in @pages to insert
+ *
+ * After the @pages are added, the tail iovec is instantiated pointing
+ * to end of the head buffer, and the stream is set up to encode
+ * subsequent items into the tail.
+ */
+static inline void svcxdr_encode_opaque_pages(struct svc_rqst *rqstp,
+					      struct xdr_stream *xdr,
+					      struct page **pages,
+					      unsigned int base,
+					      unsigned int len)
+{
+	xdr_write_pages(xdr, pages, base, len);
+	xdr->page_ptr = rqstp->rq_next_page - 1;
+}
+
 /**
  * svcxdr_set_auth_slack -
  * @rqstp: RPC transaction
-- 
cgit v1.2.3


From b407460ee99033503993ac7437d593451fcdfe44 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 2 Jun 2023 10:50:36 +0200
Subject: iopoll: Call cpu_relax() in busy loops

It is considered good practice to call cpu_relax() in busy loops, see
Documentation/process/volatile-considered-harmful.rst.  This can not
only lower CPU power consumption or yield to a hyperthreaded twin
processor, but also allows an architecture to mitigate hardware issues
(e.g. ARM Erratum 754327 for Cortex-A9 prior to r2p0) in the
architecture-specific cpu_relax() implementation.

In addition, cpu_relax() is also a compiler barrier.  It is not
immediately obvious that the @op argument "function" will result in an
actual function call (e.g. in case of inlining).

Where a function call is a C sequence point, this is lost on inlining.
Therefore, with agressive enough optimization it might be possible for
the compiler to hoist the:

        (val) = op(args);

"load" out of the loop because it doesn't see the value changing. The
addition of cpu_relax() would inhibit this.

As the iopoll helpers lack calls to cpu_relax(), people are sometimes
reluctant to use them, and may fall back to open-coded polling loops
(including cpu_relax() calls) instead.

Fix this by adding calls to cpu_relax() to the iopoll helpers:
  - For the non-atomic case, it is sufficient to call cpu_relax() in
    case of a zero sleep-between-reads value, as a call to
    usleep_range() is a safe barrier otherwise.  However, it doesn't
    hurt to add the call regardless, for simplicity, and for similarity
    with the atomic case below.
  - For the atomic case, cpu_relax() must be called regardless of the
    sleep-between-reads value, as there is no guarantee all
    architecture-specific implementations of udelay() handle this.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/45c87bec3397fdd704376807f0eec5cc71be440f.1685692810.git.geert+renesas@glider.be
---
 include/linux/iopoll.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index 2c8860e406bd..0417360a6db9 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -53,6 +53,7 @@
 		} \
 		if (__sleep_us) \
 			usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
+		cpu_relax(); \
 	} \
 	(cond) ? 0 : -ETIMEDOUT; \
 })
@@ -95,6 +96,7 @@
 		} \
 		if (__delay_us) \
 			udelay(__delay_us); \
+		cpu_relax(); \
 	} \
 	(cond) ? 0 : -ETIMEDOUT; \
 })
-- 
cgit v1.2.3


From 7349a69cf3125e92d48e442d9f400ba446fa314f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 2 Jun 2023 10:50:37 +0200
Subject: iopoll: Do not use timekeeping in read_poll_timeout_atomic()

read_poll_timeout_atomic() uses ktime_get() to implement the timeout
feature, just like its non-atomic counterpart.  However, there are
several issues with this, due to its use in atomic contexts:

  1. When called in the s2ram path (as typically done by clock or PM
     domain drivers), timekeeping may be suspended, triggering the
     WARN_ON(timekeeping_suspended) in ktime_get():

	WARNING: CPU: 0 PID: 654 at kernel/time/timekeeping.c:843 ktime_get+0x28/0x78

     Calling ktime_get_mono_fast_ns() instead of ktime_get() would get
     rid of that warning.  However, that would break timeout handling,
     as (at least on systems with an ARM architectured timer), the time
     returned by ktime_get_mono_fast_ns() does not advance while
     timekeeping is suspended.
     Interestingly, (on the same ARM systems) the time returned by
     ktime_get() does advance while timekeeping is suspended, despite
     the warning.

  2. Depending on the actual clock source, and especially before a
     high-resolution clocksource (e.g. the ARM architectured timer)
     becomes available, time may not advance in atomic contexts, thus
     breaking timeout handling.

Fix this by abandoning the idea that one can rely on timekeeping to
implement timeout handling in all atomic contexts, and switch from a
global time-based to a locally-estimated timeout handling.  In most
(all?) cases the timeout condition is exceptional and an error
condition, hence any additional delays due to underestimating wall clock
time are irrelevant.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/3d2a2f4e553489392d871108797c3be08f88300b.1685692810.git.geert+renesas@glider.be
---
 include/linux/iopoll.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index 0417360a6db9..19a7b00baff4 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -74,6 +74,10 @@
  * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
  * case, the last read value at @args is stored in @val.
  *
+ * This macro does not rely on timekeeping.  Hence it is safe to call even when
+ * timekeeping is suspended, at the expense of an underestimation of wall clock
+ * time, which is rather minimal with a non-zero delay_us.
+ *
  * When available, you'll probably want to use one of the specialized
  * macros defined below rather than this macro directly.
  */
@@ -81,22 +85,30 @@
 					delay_before_read, args...) \
 ({ \
 	u64 __timeout_us = (timeout_us); \
+	s64 __left_ns = __timeout_us * NSEC_PER_USEC; \
 	unsigned long __delay_us = (delay_us); \
-	ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
-	if (delay_before_read && __delay_us) \
+	u64 __delay_ns = __delay_us * NSEC_PER_USEC; \
+	if (delay_before_read && __delay_us) { \
 		udelay(__delay_us); \
+		if (__timeout_us) \
+			__left_ns -= __delay_ns; \
+	} \
 	for (;;) { \
 		(val) = op(args); \
 		if (cond) \
 			break; \
-		if (__timeout_us && \
-		    ktime_compare(ktime_get(), __timeout) > 0) { \
+		if (__timeout_us && __left_ns < 0) { \
 			(val) = op(args); \
 			break; \
 		} \
-		if (__delay_us) \
+		if (__delay_us) { \
 			udelay(__delay_us); \
+			if (__timeout_us) \
+				__left_ns -= __delay_ns; \
+		} \
 		cpu_relax(); \
+		if (__timeout_us) \
+			__left_ns--; \
 	} \
 	(cond) ? 0 : -ETIMEDOUT; \
 })
-- 
cgit v1.2.3


From 8be3593b9efa8903d2ee7bb9cdf57a8e56c66f36 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 28 May 2023 09:02:05 +0100
Subject: drivers/perf: apple_m1: Force 63bit counters for M2 CPUs

Sidharth reports that on M2, the PMU never generates any interrupt
when using 'perf record', which is a annoying as you get no sample.
I'm temped to say "no sample, no problem", but others may have
a different opinion.

Upon investigation, it appears that the counters on M2 are
significantly different from the ones on M1, as they count on
64 bits instead of 48. Which of course, in the fine M1 tradition,
means that we can only use 63 bits, as the top bit is used to signal
the interrupt...

This results in having to introduce yet another flag to indicate yet
another odd counter width. Who knows what the next crazy implementation
will do...

With this, perf can work out the correct offset, and 'perf record'
works as intended.

Tested on M2 and M2-Pro CPUs.

Cc: Janne Grunau <j@jannau.net>
Cc: Hector Martin <marcan@marcan.st>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Fixes: 7d0bfb7c9977 ("drivers/perf: apple_m1: Add Apple M2 support")
Reported-by: Sidharth Kshatriya <sid.kshatriya@gmail.com>
Tested-by: Sidharth Kshatriya <sid.kshatriya@gmail.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230528080205.288446-1-maz@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/apple_m1_cpu_pmu.c | 30 ++++++++++++++++++++++++------
 drivers/perf/arm_pmu.c          |  2 ++
 include/linux/perf/arm_pmu.h    |  2 ++
 3 files changed, 28 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c
index 8574c6e58c83..cd2de44b61b9 100644
--- a/drivers/perf/apple_m1_cpu_pmu.c
+++ b/drivers/perf/apple_m1_cpu_pmu.c
@@ -493,6 +493,17 @@ static int m1_pmu_map_event(struct perf_event *event)
 	return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
 }
 
+static int m2_pmu_map_event(struct perf_event *event)
+{
+	/*
+	 * Same deal as the above, except that M2 has 64bit counters.
+	 * Which, as far as we're concerned, actually means 63 bits.
+	 * Yes, this is getting awkward.
+	 */
+	event->hw.flags |= ARMPMU_EVT_63BIT;
+	return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
+}
+
 static void m1_pmu_reset(void *info)
 {
 	int i;
@@ -525,7 +536,7 @@ static int m1_pmu_set_event_filter(struct hw_perf_event *event,
 	return 0;
 }
 
-static int m1_pmu_init(struct arm_pmu *cpu_pmu)
+static int m1_pmu_init(struct arm_pmu *cpu_pmu, u32 flags)
 {
 	cpu_pmu->handle_irq	  = m1_pmu_handle_irq;
 	cpu_pmu->enable		  = m1_pmu_enable_event;
@@ -536,7 +547,14 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->clear_event_idx  = m1_pmu_clear_event_idx;
 	cpu_pmu->start		  = m1_pmu_start;
 	cpu_pmu->stop		  = m1_pmu_stop;
-	cpu_pmu->map_event	  = m1_pmu_map_event;
+
+	if (flags & ARMPMU_EVT_47BIT)
+		cpu_pmu->map_event = m1_pmu_map_event;
+	else if (flags & ARMPMU_EVT_63BIT)
+		cpu_pmu->map_event = m2_pmu_map_event;
+	else
+		return WARN_ON(-EINVAL);
+
 	cpu_pmu->reset		  = m1_pmu_reset;
 	cpu_pmu->set_event_filter = m1_pmu_set_event_filter;
 
@@ -550,25 +568,25 @@ static int m1_pmu_init(struct arm_pmu *cpu_pmu)
 static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->name = "apple_icestorm_pmu";
-	return m1_pmu_init(cpu_pmu);
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT);
 }
 
 static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->name = "apple_firestorm_pmu";
-	return m1_pmu_init(cpu_pmu);
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_47BIT);
 }
 
 static int m2_pmu_avalanche_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->name = "apple_avalanche_pmu";
-	return m1_pmu_init(cpu_pmu);
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT);
 }
 
 static int m2_pmu_blizzard_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->name = "apple_blizzard_pmu";
-	return m1_pmu_init(cpu_pmu);
+	return m1_pmu_init(cpu_pmu, ARMPMU_EVT_63BIT);
 }
 
 static const struct of_device_id m1_pmu_of_device_ids[] = {
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 15bd1e34a88e..277e29fbd504 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -109,6 +109,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event)
 {
 	if (event->hw.flags & ARMPMU_EVT_64BIT)
 		return GENMASK_ULL(63, 0);
+	else if (event->hw.flags & ARMPMU_EVT_63BIT)
+		return GENMASK_ULL(62, 0);
 	else if (event->hw.flags & ARMPMU_EVT_47BIT)
 		return GENMASK_ULL(46, 0);
 	else
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 525b5d64e394..c0e4baf940dc 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -26,9 +26,11 @@
  */
 #define ARMPMU_EVT_64BIT		0x00001 /* Event uses a 64bit counter */
 #define ARMPMU_EVT_47BIT		0x00002 /* Event uses a 47bit counter */
+#define ARMPMU_EVT_63BIT		0x00004 /* Event uses a 63bit counter */
 
 static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_64BIT) == ARMPMU_EVT_64BIT);
 static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_47BIT) == ARMPMU_EVT_47BIT);
+static_assert((PERF_EVENT_FLAG_ARCH & ARMPMU_EVT_63BIT) == ARMPMU_EVT_63BIT);
 
 #define HW_OP_UNSUPPORTED		0xFFFF
 #define C(_x)				PERF_COUNT_HW_CACHE_##_x
-- 
cgit v1.2.3


From 9fa3682869d4e1632f0fb2ed18c4cece101ace57 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 25 Apr 2023 15:35:29 +0100
Subject: coresight: Use enum type for cs_mode wherever possible

mode is stored as a local_t, but it is also passed around a lot as a
plain u32, so use the correct type wherever local_t isn't currently
used. This helps a little bit with readability.

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230425143542.2305069-3-james.clark@arm.com
---
 drivers/hwtracing/coresight/coresight-core.c       |  7 ++++---
 drivers/hwtracing/coresight/coresight-etb10.c      |  3 ++-
 drivers/hwtracing/coresight/coresight-etm3x-core.c |  6 +++---
 drivers/hwtracing/coresight/coresight-etm4x-core.c |  6 +++---
 drivers/hwtracing/coresight/coresight-priv.h       |  9 ++-------
 drivers/hwtracing/coresight/coresight-stm.c        |  6 +++---
 drivers/hwtracing/coresight/coresight-tmc-etf.c    |  2 +-
 drivers/hwtracing/coresight/coresight-tmc-etr.c    |  7 +++----
 drivers/hwtracing/coresight/coresight-tpdm.c       |  4 ++--
 drivers/hwtracing/coresight/coresight-tpiu.c       |  3 ++-
 drivers/hwtracing/coresight/coresight-trbe.c       |  3 ++-
 drivers/hwtracing/coresight/ultrasoc-smb.c         |  3 ++-
 drivers/hwtracing/coresight/ultrasoc-smb.h         |  2 +-
 include/linux/coresight.h                          | 13 ++++++++++---
 14 files changed, 40 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 5733294ce5cd..c891078dffc6 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -308,7 +308,7 @@ void coresight_set_assoc_ectdev_mutex(struct coresight_device *csdev,
 EXPORT_SYMBOL_GPL(coresight_set_assoc_ectdev_mutex);
 
 static int coresight_enable_sink(struct coresight_device *csdev,
-				 u32 mode, void *data)
+				 enum cs_mode mode, void *data)
 {
 	int ret;
 
@@ -416,7 +416,8 @@ static void coresight_disable_link(struct coresight_device *csdev,
 	csdev->enable = false;
 }
 
-static int coresight_enable_source(struct coresight_device *csdev, u32 mode)
+static int coresight_enable_source(struct coresight_device *csdev,
+				   enum cs_mode mode)
 {
 	int ret;
 
@@ -516,7 +517,7 @@ void coresight_disable_path(struct list_head *path)
 }
 EXPORT_SYMBOL_GPL(coresight_disable_path);
 
-int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data)
+int coresight_enable_path(struct list_head *path, enum cs_mode mode, void *sink_data)
 {
 
 	int ret = 0;
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 8aa6e4f83e42..eb99c445015a 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -225,7 +225,8 @@ out:
 	return ret;
 }
 
-static int etb_enable(struct coresight_device *csdev, u32 mode, void *data)
+static int etb_enable(struct coresight_device *csdev, enum cs_mode mode,
+		      void *data)
 {
 	int ret;
 
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-core.c b/drivers/hwtracing/coresight/coresight-etm3x-core.c
index afc57195ee52..116a91d90ac2 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x-core.c
@@ -552,8 +552,8 @@ unlock_enable_sysfs:
 	return ret;
 }
 
-static int etm_enable(struct coresight_device *csdev,
-		      struct perf_event *event, u32 mode)
+static int etm_enable(struct coresight_device *csdev, struct perf_event *event,
+		      enum cs_mode mode)
 {
 	int ret;
 	u32 val;
@@ -671,7 +671,7 @@ static void etm_disable_sysfs(struct coresight_device *csdev)
 static void etm_disable(struct coresight_device *csdev,
 			struct perf_event *event)
 {
-	u32 mode;
+	enum cs_mode mode;
 	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	/*
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 4c15fae534f3..891ec8e0325b 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -822,8 +822,8 @@ unlock_sysfs_enable:
 	return ret;
 }
 
-static int etm4_enable(struct coresight_device *csdev,
-		       struct perf_event *event, u32 mode)
+static int etm4_enable(struct coresight_device *csdev, struct perf_event *event,
+		       enum cs_mode mode)
 {
 	int ret;
 	u32 val;
@@ -989,7 +989,7 @@ static void etm4_disable_sysfs(struct coresight_device *csdev)
 static void etm4_disable(struct coresight_device *csdev,
 			 struct perf_event *event)
 {
-	u32 mode;
+	enum cs_mode mode;
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	/*
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 595ce5862056..788ff19c60f6 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -82,12 +82,6 @@ enum etm_addr_type {
 	ETM_ADDR_TYPE_STOP,
 };
 
-enum cs_mode {
-	CS_MODE_DISABLED,
-	CS_MODE_SYSFS,
-	CS_MODE_PERF,
-};
-
 /**
  * struct cs_buffer - keep track of a recording session' specifics
  * @cur:	index of the current buffer
@@ -133,7 +127,8 @@ static inline void CS_UNLOCK(void __iomem *addr)
 }
 
 void coresight_disable_path(struct list_head *path);
-int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data);
+int coresight_enable_path(struct list_head *path, enum cs_mode mode,
+			  void *sink_data);
 struct coresight_device *coresight_get_sink(struct list_head *path);
 struct coresight_device *
 coresight_get_enabled_sink(struct coresight_device *source);
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index 66a614c5492c..a1c27c901ad1 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -119,7 +119,7 @@ DEFINE_CORESIGHT_DEVLIST(stm_devs, "stm");
  * @spinlock:		only one at a time pls.
  * @chs:		the channels accociated to this STM.
  * @stm:		structure associated to the generic STM interface.
- * @mode:		this tracer's mode, i.e sysFS, or disabled.
+ * @mode:		this tracer's mode (enum cs_mode), i.e sysFS, or disabled.
  * @traceid:		value of the current ID for this component.
  * @write_bytes:	Maximus bytes this STM can write at a time.
  * @stmsper:		settings for register STMSPER.
@@ -192,8 +192,8 @@ static void stm_enable_hw(struct stm_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
-static int stm_enable(struct coresight_device *csdev,
-		      struct perf_event *event, u32 mode)
+static int stm_enable(struct coresight_device *csdev, struct perf_event *event,
+		      enum cs_mode mode)
 {
 	u32 val;
 	struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index 0ab1f73c2d06..14d3c1472455 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -302,7 +302,7 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data)
 }
 
 static int tmc_enable_etf_sink(struct coresight_device *csdev,
-			       u32 mode, void *data)
+			       enum cs_mode mode, void *data)
 {
 	int ret;
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index eaa296ced167..9aa71b18731d 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1666,17 +1666,16 @@ unlock_out:
 }
 
 static int tmc_enable_etr_sink(struct coresight_device *csdev,
-			       u32 mode, void *data)
+			       enum cs_mode mode, void *data)
 {
 	switch (mode) {
 	case CS_MODE_SYSFS:
 		return tmc_enable_etr_sink_sysfs(csdev);
 	case CS_MODE_PERF:
 		return tmc_enable_etr_sink_perf(csdev, data);
+	default:
+		return -EINVAL;
 	}
-
-	/* We shouldn't be here */
-	return -EINVAL;
 }
 
 static int tmc_disable_etr_sink(struct coresight_device *csdev)
diff --git a/drivers/hwtracing/coresight/coresight-tpdm.c b/drivers/hwtracing/coresight/coresight-tpdm.c
index 9479a5e8c672..f4854af0431e 100644
--- a/drivers/hwtracing/coresight/coresight-tpdm.c
+++ b/drivers/hwtracing/coresight/coresight-tpdm.c
@@ -42,8 +42,8 @@ static void __tpdm_enable(struct tpdm_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
-static int tpdm_enable(struct coresight_device *csdev,
-		       struct perf_event *event, u32 mode)
+static int tpdm_enable(struct coresight_device *csdev, struct perf_event *event,
+		       enum cs_mode mode)
 {
 	struct tpdm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index 34d37abd2c8d..b0179f761c98 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -69,7 +69,8 @@ static void tpiu_enable_hw(struct csdev_access *csa)
 	CS_LOCK(csa->base);
 }
 
-static int tpiu_enable(struct coresight_device *csdev, u32 mode, void *__unused)
+static int tpiu_enable(struct coresight_device *csdev, enum cs_mode mode,
+		       void *__unused)
 {
 	tpiu_enable_hw(&csdev->access);
 	atomic_inc(csdev->refcnt);
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
index 1fc4fd79a1c6..5406e338c280 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.c
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -1005,7 +1005,8 @@ err:
 	return ret;
 }
 
-static int arm_trbe_enable(struct coresight_device *csdev, u32 mode, void *data)
+static int arm_trbe_enable(struct coresight_device *csdev, enum cs_mode mode,
+			   void *data)
 {
 	struct trbe_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 	struct trbe_cpudata *cpudata = dev_get_drvdata(&csdev->dev);
diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c
index b317342c7ce5..a2ced0b3c3cd 100644
--- a/drivers/hwtracing/coresight/ultrasoc-smb.c
+++ b/drivers/hwtracing/coresight/ultrasoc-smb.c
@@ -256,7 +256,8 @@ static int smb_enable_perf(struct coresight_device *csdev, void *data)
 	return 0;
 }
 
-static int smb_enable(struct coresight_device *csdev, u32 mode, void *data)
+static int smb_enable(struct coresight_device *csdev, enum cs_mode mode,
+		      void *data)
 {
 	struct smb_drv_data *drvdata = dev_get_drvdata(csdev->dev.parent);
 	int ret = 0;
diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.h b/drivers/hwtracing/coresight/ultrasoc-smb.h
index 7dfbe42e37a0..d2e14e8d2c8a 100644
--- a/drivers/hwtracing/coresight/ultrasoc-smb.h
+++ b/drivers/hwtracing/coresight/ultrasoc-smb.h
@@ -119,7 +119,7 @@ struct smb_drv_data {
 	struct mutex mutex;
 	bool reading;
 	pid_t pid;
-	u32 mode;
+	enum cs_mode mode;
 };
 
 #endif
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index f19a47b9bb5a..1f878e8ed8c4 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -272,6 +272,12 @@ static struct coresight_dev_list (var) = {				\
 
 #define to_coresight_device(d) container_of(d, struct coresight_device, dev)
 
+enum cs_mode {
+	CS_MODE_DISABLED,
+	CS_MODE_SYSFS,
+	CS_MODE_PERF,
+};
+
 #define source_ops(csdev)	csdev->ops->source_ops
 #define sink_ops(csdev)		csdev->ops->sink_ops
 #define link_ops(csdev)		csdev->ops->link_ops
@@ -288,7 +294,8 @@ static struct coresight_dev_list (var) = {				\
  * @update_buffer:	update buffer pointers after a trace session.
  */
 struct coresight_ops_sink {
-	int (*enable)(struct coresight_device *csdev, u32 mode, void *data);
+	int (*enable)(struct coresight_device *csdev, enum cs_mode mode,
+		      void *data);
 	int (*disable)(struct coresight_device *csdev);
 	void *(*alloc_buffer)(struct coresight_device *csdev,
 			      struct perf_event *event, void **pages,
@@ -320,8 +327,8 @@ struct coresight_ops_link {
  */
 struct coresight_ops_source {
 	int (*cpu_id)(struct coresight_device *csdev);
-	int (*enable)(struct coresight_device *csdev,
-		      struct perf_event *event,  u32 mode);
+	int (*enable)(struct coresight_device *csdev, struct perf_event *event,
+		      enum cs_mode mode);
 	void (*disable)(struct coresight_device *csdev,
 			struct perf_event *event);
 };
-- 
cgit v1.2.3


From 704faaf4e33ca0bd9caa1e06c379c32a13d8c3f2 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 25 Apr 2023 15:35:30 +0100
Subject: coresight: Change name of pdata->conns

conns is actually for output connections. Change the name to make it
clearer and so that we can add input connections later.

No functional changes.

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230425143542.2305069-4-james.clark@arm.com
---
 drivers/hwtracing/coresight/coresight-core.c     | 26 ++++++++++++------------
 drivers/hwtracing/coresight/coresight-platform.c | 12 +++++------
 drivers/hwtracing/coresight/coresight-tmc-etr.c  |  2 +-
 include/linux/coresight.h                        |  4 ++--
 4 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index c891078dffc6..04c9d52f8f4f 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -119,7 +119,7 @@ static int coresight_find_link_inport(struct coresight_device *csdev,
 	struct coresight_connection *conn;
 
 	for (i = 0; i < parent->pdata->nr_outport; i++) {
-		conn = &parent->pdata->conns[i];
+		conn = &parent->pdata->out_conns[i];
 		if (conn->child_dev == csdev)
 			return conn->child_port;
 	}
@@ -137,7 +137,7 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
 	struct coresight_connection *conn;
 
 	for (i = 0; i < csdev->pdata->nr_outport; i++) {
-		conn = &csdev->pdata->conns[i];
+		conn = &csdev->pdata->out_conns[i];
 		if (conn->child_dev == child)
 			return conn->outport;
 	}
@@ -606,7 +606,7 @@ coresight_find_enabled_sink(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outport; i++) {
 		struct coresight_device *child_dev;
 
-		child_dev = csdev->pdata->conns[i].child_dev;
+		child_dev = csdev->pdata->out_conns[i].child_dev;
 		if (child_dev)
 			sink = coresight_find_enabled_sink(child_dev);
 		if (sink)
@@ -722,7 +722,7 @@ static int coresight_grab_device(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outport; i++) {
 		struct coresight_device *child;
 
-		child  = csdev->pdata->conns[i].child_dev;
+		child = csdev->pdata->out_conns[i].child_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			if (!coresight_get_ref(child))
 				goto err;
@@ -733,7 +733,7 @@ err:
 	for (i--; i >= 0; i--) {
 		struct coresight_device *child;
 
-		child  = csdev->pdata->conns[i].child_dev;
+		child = csdev->pdata->out_conns[i].child_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			coresight_put_ref(child);
 	}
@@ -752,7 +752,7 @@ static void coresight_drop_device(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outport; i++) {
 		struct coresight_device *child;
 
-		child  = csdev->pdata->conns[i].child_dev;
+		child = csdev->pdata->out_conns[i].child_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			coresight_put_ref(child);
 	}
@@ -794,7 +794,7 @@ static int _coresight_build_path(struct coresight_device *csdev,
 	for (i = 0; i < csdev->pdata->nr_outport; i++) {
 		struct coresight_device *child_dev;
 
-		child_dev = csdev->pdata->conns[i].child_dev;
+		child_dev = csdev->pdata->out_conns[i].child_dev;
 		if (child_dev &&
 		    _coresight_build_path(child_dev, sink, path) == 0) {
 			found = true;
@@ -964,7 +964,7 @@ coresight_find_sink(struct coresight_device *csdev, int *depth)
 		struct coresight_device *child_dev, *sink = NULL;
 		int child_depth = curr_depth;
 
-		child_dev = csdev->pdata->conns[i].child_dev;
+		child_dev = csdev->pdata->out_conns[i].child_dev;
 		if (child_dev)
 			sink = coresight_find_sink(child_dev, &child_depth);
 
@@ -1334,7 +1334,7 @@ static int coresight_orphan_match(struct device *dev, void *data)
 	 * an orphan connection whose name matches @csdev, link it.
 	 */
 	for (i = 0; i < i_csdev->pdata->nr_outport; i++) {
-		conn = &i_csdev->pdata->conns[i];
+		conn = &i_csdev->pdata->out_conns[i];
 
 		/* Skip the port if FW doesn't describe it */
 		if (!conn->child_fwnode)
@@ -1375,7 +1375,7 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev)
 	int i, ret = 0;
 
 	for (i = 0; i < csdev->pdata->nr_outport; i++) {
-		struct coresight_connection *conn = &csdev->pdata->conns[i];
+		struct coresight_connection *conn = &csdev->pdata->out_conns[i];
 
 		if (!conn->child_fwnode)
 			continue;
@@ -1412,7 +1412,7 @@ static int coresight_remove_match(struct device *dev, void *data)
 	 * a connection whose name matches @csdev, remove it.
 	 */
 	for (i = 0; i < iterator->pdata->nr_outport; i++) {
-		conn = &iterator->pdata->conns[i];
+		conn = &iterator->pdata->out_conns[i];
 
 		if (conn->child_dev == NULL || conn->child_fwnode == NULL)
 			continue;
@@ -1543,7 +1543,7 @@ void coresight_release_platform_data(struct coresight_device *csdev,
 				     struct coresight_platform_data *pdata)
 {
 	int i;
-	struct coresight_connection *conns = pdata->conns;
+	struct coresight_connection *conns = pdata->out_conns;
 
 	for (i = 0; i < pdata->nr_outport; i++) {
 		/* If we have made the links, remove them now */
@@ -1555,7 +1555,7 @@ void coresight_release_platform_data(struct coresight_device *csdev,
 		 */
 		if (conns[i].child_fwnode) {
 			fwnode_handle_put(conns[i].child_fwnode);
-			pdata->conns[i].child_fwnode = NULL;
+			conns[i].child_fwnode = NULL;
 		}
 	}
 	if (csdev)
diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c
index 475899714104..5085525a32bb 100644
--- a/drivers/hwtracing/coresight/coresight-platform.c
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -27,9 +27,9 @@ static int coresight_alloc_conns(struct device *dev,
 				 struct coresight_platform_data *pdata)
 {
 	if (pdata->nr_outport) {
-		pdata->conns = devm_kcalloc(dev, pdata->nr_outport,
-					    sizeof(*pdata->conns), GFP_KERNEL);
-		if (!pdata->conns)
+		pdata->out_conns = devm_kcalloc(dev, pdata->nr_outport,
+					    sizeof(*pdata->out_conns), GFP_KERNEL);
+		if (!pdata->out_conns)
 			return -ENOMEM;
 	}
 
@@ -251,7 +251,7 @@ static int of_coresight_parse_endpoint(struct device *dev,
 			break;
 		}
 
-		conn = &pdata->conns[endpoint.port];
+		conn = &pdata->out_conns[endpoint.port];
 		if (conn->child_fwnode) {
 			dev_warn(dev, "Duplicate output port %d\n",
 				 endpoint.port);
@@ -744,8 +744,8 @@ static int acpi_coresight_parse_graph(struct acpi_device *adev,
 		int port = conns[i].outport;
 
 		/* Duplicate output port */
-		WARN_ON(pdata->conns[port].child_fwnode);
-		pdata->conns[port] = conns[i];
+		WARN_ON(pdata->out_conns[port].child_fwnode);
+		pdata->out_conns[port] = conns[i];
 	}
 
 	devm_kfree(&adev->dev, conns);
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 9aa71b18731d..a16f32d80ae2 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -782,7 +782,7 @@ tmc_etr_get_catu_device(struct tmc_drvdata *drvdata)
 		return NULL;
 
 	for (i = 0; i < etr->pdata->nr_outport; i++) {
-		tmp = etr->pdata->conns[i].child_dev;
+		tmp = etr->pdata->out_conns[i].child_dev;
 		if (tmp && coresight_is_catu_device(tmp))
 			return tmp;
 	}
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 1f878e8ed8c4..bf621d064ef8 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -104,12 +104,12 @@ union coresight_dev_subtype {
  *
  * @nr_inport:	Number of elements for the input connections.
  * @nr_outport:	Number of elements for the output connections.
- * @conns:	Sparse array of nr_outport connections from this component.
+ * @out_conns:	Sparse array of nr_outport connections from this component.
  */
 struct coresight_platform_data {
 	int nr_inport;
 	int nr_outport;
-	struct coresight_connection *conns;
+	struct coresight_connection *out_conns;
 };
 
 /**
-- 
cgit v1.2.3


From 81d0ea763d8a1089749a9b671a730cef6cc5c5d7 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 25 Apr 2023 15:35:31 +0100
Subject: coresight: Rename nr_outports to nr_outconns

Rename to avoid confusion between port number and the index in the
connection array. The port number is already stored in the connection,
and in a later commit the connection array will be appended to, so
the length of it will no longer reflect the number of ports.

No functional changes.

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230425143542.2305069-5-james.clark@arm.com
---
 drivers/hwtracing/coresight/coresight-core.c     | 32 +++++++++++-----------
 drivers/hwtracing/coresight/coresight-platform.c | 34 ++++++++++++------------
 drivers/hwtracing/coresight/coresight-tmc-etr.c  |  2 +-
 include/linux/coresight.h                        | 10 +++----
 4 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 04c9d52f8f4f..0f6712a6fba3 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -118,7 +118,7 @@ static int coresight_find_link_inport(struct coresight_device *csdev,
 	int i;
 	struct coresight_connection *conn;
 
-	for (i = 0; i < parent->pdata->nr_outport; i++) {
+	for (i = 0; i < parent->pdata->nr_outconns; i++) {
 		conn = &parent->pdata->out_conns[i];
 		if (conn->child_dev == csdev)
 			return conn->child_port;
@@ -136,7 +136,7 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
 	int i;
 	struct coresight_connection *conn;
 
-	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		conn = &csdev->pdata->out_conns[i];
 		if (conn->child_dev == child)
 			return conn->outport;
@@ -397,9 +397,9 @@ static void coresight_disable_link(struct coresight_device *csdev,
 	link_subtype = csdev->subtype.link_subtype;
 
 	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
-		nr_conns = csdev->pdata->nr_inport;
+		nr_conns = csdev->pdata->nr_inconns;
 	} else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) {
-		nr_conns = csdev->pdata->nr_outport;
+		nr_conns = csdev->pdata->nr_outconns;
 	} else {
 		nr_conns = 1;
 	}
@@ -603,7 +603,7 @@ coresight_find_enabled_sink(struct coresight_device *csdev)
 	/*
 	 * Recursively explore each port found on this element.
 	 */
-	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child_dev;
 
 		child_dev = csdev->pdata->out_conns[i].child_dev;
@@ -719,7 +719,7 @@ static int coresight_grab_device(struct coresight_device *csdev)
 {
 	int i;
 
-	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child;
 
 		child = csdev->pdata->out_conns[i].child_dev;
@@ -749,7 +749,7 @@ static void coresight_drop_device(struct coresight_device *csdev)
 	int i;
 
 	coresight_put_ref(csdev);
-	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child;
 
 		child = csdev->pdata->out_conns[i].child_dev;
@@ -791,7 +791,7 @@ static int _coresight_build_path(struct coresight_device *csdev,
 	}
 
 	/* Not a sink - recursively explore each port found on this element */
-	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child_dev;
 
 		child_dev = csdev->pdata->out_conns[i].child_dev;
@@ -960,7 +960,7 @@ coresight_find_sink(struct coresight_device *csdev, int *depth)
 	 * Not a sink we want - or possible child sink may be better.
 	 * recursively explore each port found on this element.
 	 */
-	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child_dev, *sink = NULL;
 		int child_depth = curr_depth;
 
@@ -1333,7 +1333,7 @@ static int coresight_orphan_match(struct device *dev, void *data)
 	 * Circle throuch all the connection of that component.  If we find
 	 * an orphan connection whose name matches @csdev, link it.
 	 */
-	for (i = 0; i < i_csdev->pdata->nr_outport; i++) {
+	for (i = 0; i < i_csdev->pdata->nr_outconns; i++) {
 		conn = &i_csdev->pdata->out_conns[i];
 
 		/* Skip the port if FW doesn't describe it */
@@ -1374,7 +1374,7 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev)
 {
 	int i, ret = 0;
 
-	for (i = 0; i < csdev->pdata->nr_outport; i++) {
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_connection *conn = &csdev->pdata->out_conns[i];
 
 		if (!conn->child_fwnode)
@@ -1411,7 +1411,7 @@ static int coresight_remove_match(struct device *dev, void *data)
 	 * Circle throuch all the connection of that component.  If we find
 	 * a connection whose name matches @csdev, remove it.
 	 */
-	for (i = 0; i < iterator->pdata->nr_outport; i++) {
+	for (i = 0; i < iterator->pdata->nr_outconns; i++) {
 		conn = &iterator->pdata->out_conns[i];
 
 		if (conn->child_dev == NULL || conn->child_fwnode == NULL)
@@ -1446,7 +1446,7 @@ static void coresight_remove_conns(struct coresight_device *csdev)
 	 * doesn't have at least one input port, there is no point
 	 * in searching all the devices.
 	 */
-	if (csdev->pdata->nr_inport)
+	if (csdev->pdata->nr_inconns)
 		bus_for_each_dev(&coresight_bustype, NULL,
 				 csdev, coresight_remove_match);
 }
@@ -1545,7 +1545,7 @@ void coresight_release_platform_data(struct coresight_device *csdev,
 	int i;
 	struct coresight_connection *conns = pdata->out_conns;
 
-	for (i = 0; i < pdata->nr_outport; i++) {
+	for (i = 0; i < pdata->nr_outconns; i++) {
 		/* If we have made the links, remove them now */
 		if (csdev && conns[i].child_dev)
 			coresight_remove_links(csdev, &conns[i]);
@@ -1582,9 +1582,9 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 		link_subtype = desc->subtype.link_subtype;
 
 		if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG)
-			nr_refcnts = desc->pdata->nr_inport;
+			nr_refcnts = desc->pdata->nr_inconns;
 		else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT)
-			nr_refcnts = desc->pdata->nr_outport;
+			nr_refcnts = desc->pdata->nr_outconns;
 	}
 
 	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c
index 5085525a32bb..ed865e0621a9 100644
--- a/drivers/hwtracing/coresight/coresight-platform.c
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -26,8 +26,8 @@
 static int coresight_alloc_conns(struct device *dev,
 				 struct coresight_platform_data *pdata)
 {
-	if (pdata->nr_outport) {
-		pdata->out_conns = devm_kcalloc(dev, pdata->nr_outport,
+	if (pdata->nr_outconns) {
+		pdata->out_conns = devm_kcalloc(dev, pdata->nr_outconns,
 					    sizeof(*pdata->out_conns), GFP_KERNEL);
 		if (!pdata->out_conns)
 			return -ENOMEM;
@@ -84,7 +84,7 @@ static inline bool of_coresight_legacy_ep_is_input(struct device_node *ep)
 }
 
 static void of_coresight_get_ports_legacy(const struct device_node *node,
-					  int *nr_inport, int *nr_outport)
+					  int *nr_inconns, int *nr_outconns)
 {
 	struct device_node *ep = NULL;
 	struct of_endpoint endpoint;
@@ -114,8 +114,8 @@ static void of_coresight_get_ports_legacy(const struct device_node *node,
 
 	} while (ep);
 
-	*nr_inport = in;
-	*nr_outport = out;
+	*nr_inconns = in;
+	*nr_outconns = out;
 }
 
 static struct device_node *of_coresight_get_port_parent(struct device_node *ep)
@@ -164,7 +164,7 @@ of_coresight_count_ports(struct device_node *port_parent)
 }
 
 static void of_coresight_get_ports(const struct device_node *node,
-				   int *nr_inport, int *nr_outport)
+				   int *nr_inconns, int *nr_outconns)
 {
 	struct device_node *input_ports = NULL, *output_ports = NULL;
 
@@ -173,16 +173,16 @@ static void of_coresight_get_ports(const struct device_node *node,
 
 	if (input_ports || output_ports) {
 		if (input_ports) {
-			*nr_inport = of_coresight_count_ports(input_ports);
+			*nr_inconns = of_coresight_count_ports(input_ports);
 			of_node_put(input_ports);
 		}
 		if (output_ports) {
-			*nr_outport = of_coresight_count_ports(output_ports);
+			*nr_outconns = of_coresight_count_ports(output_ports);
 			of_node_put(output_ports);
 		}
 	} else {
 		/* Fall back to legacy DT bindings parsing */
-		of_coresight_get_ports_legacy(node, nr_inport, nr_outport);
+		of_coresight_get_ports_legacy(node, nr_inconns, nr_outconns);
 	}
 }
 
@@ -289,10 +289,10 @@ static int of_get_coresight_platform_data(struct device *dev,
 	struct device_node *node = dev->of_node;
 
 	/* Get the number of input and output port for this component */
-	of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport);
+	of_coresight_get_ports(node, &pdata->nr_inconns, &pdata->nr_outconns);
 
 	/* If there are no output connections, we are done */
-	if (!pdata->nr_outport)
+	if (!pdata->nr_outconns)
 		return 0;
 
 	ret = coresight_alloc_conns(dev, pdata);
@@ -690,7 +690,7 @@ static int acpi_coresight_parse_graph(struct acpi_device *adev,
 	const union acpi_object *graph;
 	struct coresight_connection *conns, *ptr;
 
-	pdata->nr_inport = pdata->nr_outport = 0;
+	pdata->nr_inconns = pdata->nr_outconns = 0;
 	graph = acpi_get_coresight_graph(adev);
 	if (!graph)
 		return -ENOENT;
@@ -718,11 +718,11 @@ static int acpi_coresight_parse_graph(struct acpi_device *adev,
 			return dir;
 
 		if (dir == ACPI_CORESIGHT_LINK_MASTER) {
-			if (ptr->outport >= pdata->nr_outport)
-				pdata->nr_outport = ptr->outport + 1;
+			if (ptr->outport >= pdata->nr_outconns)
+				pdata->nr_outconns = ptr->outport + 1;
 			ptr++;
 		} else {
-			WARN_ON(pdata->nr_inport == ptr->child_port + 1);
+			WARN_ON(pdata->nr_inconns == ptr->child_port + 1);
 			/*
 			 * We do not track input port connections for a device.
 			 * However we need the highest port number described,
@@ -730,8 +730,8 @@ static int acpi_coresight_parse_graph(struct acpi_device *adev,
 			 * record for an output connection. Hence, do not move
 			 * the ptr for input connections
 			 */
-			if (ptr->child_port >= pdata->nr_inport)
-				pdata->nr_inport = ptr->child_port + 1;
+			if (ptr->child_port >= pdata->nr_inconns)
+				pdata->nr_inconns = ptr->child_port + 1;
 		}
 	}
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index a16f32d80ae2..32137b78eca2 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -781,7 +781,7 @@ tmc_etr_get_catu_device(struct tmc_drvdata *drvdata)
 	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
 		return NULL;
 
-	for (i = 0; i < etr->pdata->nr_outport; i++) {
+	for (i = 0; i < etr->pdata->nr_outconns; i++) {
 		tmp = etr->pdata->out_conns[i].child_dev;
 		if (tmp && coresight_is_catu_device(tmp))
 			return tmp;
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index bf621d064ef8..daf392fcb67a 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -102,13 +102,13 @@ union coresight_dev_subtype {
  * struct coresight_platform_data - data harvested from the firmware
  * specification.
  *
- * @nr_inport:	Number of elements for the input connections.
- * @nr_outport:	Number of elements for the output connections.
- * @out_conns:	Sparse array of nr_outport connections from this component.
+ * @nr_inconns: Number of elements for the input connections.
+ * @nr_outconns: Number of elements for the output connections.
+ * @out_conns:	Sparse array of nr_outconns connections from this component.
  */
 struct coresight_platform_data {
-	int nr_inport;
-	int nr_outport;
+	int nr_inconns;
+	int nr_outconns;
 	struct coresight_connection *out_conns;
 };
 
-- 
cgit v1.2.3


From d49c9cf15f89cdd77f3ce3f0187fa1cfbdea28f5 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 25 Apr 2023 15:35:32 +0100
Subject: coresight: Rename connection members to make the direction explicit

When input connections are added they will use the same connection
object as the output so parent and child could be misinterpreted. Making
the direction unambiguous in the names should improve readability.

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230425143542.2305069-6-james.clark@arm.com
---
 drivers/hwtracing/coresight/coresight-core.c     | 51 ++++++++++++------------
 drivers/hwtracing/coresight/coresight-platform.c | 30 +++++++-------
 drivers/hwtracing/coresight/coresight-sysfs.c    | 10 ++---
 drivers/hwtracing/coresight/coresight-tmc-etr.c  |  2 +-
 include/linux/coresight.h                        | 18 ++++-----
 5 files changed, 55 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 0f6712a6fba3..f3dc320b374c 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -120,8 +120,8 @@ static int coresight_find_link_inport(struct coresight_device *csdev,
 
 	for (i = 0; i < parent->pdata->nr_outconns; i++) {
 		conn = &parent->pdata->out_conns[i];
-		if (conn->child_dev == csdev)
-			return conn->child_port;
+		if (conn->dest_dev == csdev)
+			return conn->dest_port;
 	}
 
 	dev_err(&csdev->dev, "couldn't find inport, parent: %s, child: %s\n",
@@ -138,8 +138,8 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
 
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		conn = &csdev->pdata->out_conns[i];
-		if (conn->child_dev == child)
-			return conn->outport;
+		if (conn->dest_dev == child)
+			return conn->src_port;
 	}
 
 	dev_err(&csdev->dev, "couldn't find outport, parent: %s, child: %s\n",
@@ -606,7 +606,7 @@ coresight_find_enabled_sink(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child_dev;
 
-		child_dev = csdev->pdata->out_conns[i].child_dev;
+		child_dev = csdev->pdata->out_conns[i].dest_dev;
 		if (child_dev)
 			sink = coresight_find_enabled_sink(child_dev);
 		if (sink)
@@ -722,7 +722,7 @@ static int coresight_grab_device(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child;
 
-		child = csdev->pdata->out_conns[i].child_dev;
+		child = csdev->pdata->out_conns[i].dest_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			if (!coresight_get_ref(child))
 				goto err;
@@ -733,7 +733,7 @@ err:
 	for (i--; i >= 0; i--) {
 		struct coresight_device *child;
 
-		child = csdev->pdata->out_conns[i].child_dev;
+		child = csdev->pdata->out_conns[i].dest_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			coresight_put_ref(child);
 	}
@@ -752,7 +752,7 @@ static void coresight_drop_device(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child;
 
-		child = csdev->pdata->out_conns[i].child_dev;
+		child = csdev->pdata->out_conns[i].dest_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			coresight_put_ref(child);
 	}
@@ -794,7 +794,7 @@ static int _coresight_build_path(struct coresight_device *csdev,
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child_dev;
 
-		child_dev = csdev->pdata->out_conns[i].child_dev;
+		child_dev = csdev->pdata->out_conns[i].dest_dev;
 		if (child_dev &&
 		    _coresight_build_path(child_dev, sink, path) == 0) {
 			found = true;
@@ -964,7 +964,7 @@ coresight_find_sink(struct coresight_device *csdev, int *depth)
 		struct coresight_device *child_dev, *sink = NULL;
 		int child_depth = curr_depth;
 
-		child_dev = csdev->pdata->out_conns[i].child_dev;
+		child_dev = csdev->pdata->out_conns[i].dest_dev;
 		if (child_dev)
 			sink = coresight_find_sink(child_dev, &child_depth);
 
@@ -1337,12 +1337,12 @@ static int coresight_orphan_match(struct device *dev, void *data)
 		conn = &i_csdev->pdata->out_conns[i];
 
 		/* Skip the port if FW doesn't describe it */
-		if (!conn->child_fwnode)
+		if (!conn->dest_fwnode)
 			continue;
 		/* We have found at least one orphan connection */
-		if (conn->child_dev == NULL) {
+		if (conn->dest_dev == NULL) {
 			/* Does it match this newly added device? */
-			if (conn->child_fwnode == csdev->dev.fwnode) {
+			if (conn->dest_fwnode == csdev->dev.fwnode) {
 				ret = coresight_make_links(i_csdev,
 							   conn, csdev);
 				if (ret)
@@ -1377,13 +1377,12 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_connection *conn = &csdev->pdata->out_conns[i];
 
-		if (!conn->child_fwnode)
+		if (!conn->dest_fwnode)
 			continue;
-		conn->child_dev =
-			coresight_find_csdev_by_fwnode(conn->child_fwnode);
-		if (conn->child_dev && conn->child_dev->has_conns_grp) {
-			ret = coresight_make_links(csdev, conn,
-						   conn->child_dev);
+		conn->dest_dev =
+			coresight_find_csdev_by_fwnode(conn->dest_fwnode);
+		if (conn->dest_dev && conn->dest_dev->has_conns_grp) {
+			ret = coresight_make_links(csdev, conn, conn->dest_dev);
 			if (ret)
 				break;
 		} else {
@@ -1414,14 +1413,14 @@ static int coresight_remove_match(struct device *dev, void *data)
 	for (i = 0; i < iterator->pdata->nr_outconns; i++) {
 		conn = &iterator->pdata->out_conns[i];
 
-		if (conn->child_dev == NULL || conn->child_fwnode == NULL)
+		if (conn->dest_dev == NULL || conn->dest_fwnode == NULL)
 			continue;
 
-		if (csdev->dev.fwnode == conn->child_fwnode) {
+		if (csdev->dev.fwnode == conn->dest_fwnode) {
 			iterator->orphan = true;
 			coresight_remove_links(iterator, conn);
 
-			conn->child_dev = NULL;
+			conn->dest_dev = NULL;
 			/* No need to continue */
 			break;
 		}
@@ -1547,15 +1546,15 @@ void coresight_release_platform_data(struct coresight_device *csdev,
 
 	for (i = 0; i < pdata->nr_outconns; i++) {
 		/* If we have made the links, remove them now */
-		if (csdev && conns[i].child_dev)
+		if (csdev && conns[i].dest_dev)
 			coresight_remove_links(csdev, &conns[i]);
 		/*
 		 * Drop the refcount and clear the handle as this device
 		 * is going away
 		 */
-		if (conns[i].child_fwnode) {
-			fwnode_handle_put(conns[i].child_fwnode);
-			conns[i].child_fwnode = NULL;
+		if (conns[i].dest_fwnode) {
+			fwnode_handle_put(conns[i].dest_fwnode);
+			conns[i].dest_fwnode = NULL;
 		}
 	}
 	if (csdev)
diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c
index ed865e0621a9..566cc99a2c34 100644
--- a/drivers/hwtracing/coresight/coresight-platform.c
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -252,13 +252,13 @@ static int of_coresight_parse_endpoint(struct device *dev,
 		}
 
 		conn = &pdata->out_conns[endpoint.port];
-		if (conn->child_fwnode) {
+		if (conn->dest_fwnode) {
 			dev_warn(dev, "Duplicate output port %d\n",
 				 endpoint.port);
 			ret = -EINVAL;
 			break;
 		}
-		conn->outport = endpoint.port;
+		conn->src_port = endpoint.port;
 		/*
 		 * Hold the refcount to the target device. This could be
 		 * released via:
@@ -267,8 +267,8 @@ static int of_coresight_parse_endpoint(struct device *dev,
 		 * 2) While removing the target device via
 		 *    coresight_remove_match()
 		 */
-		conn->child_fwnode = fwnode_handle_get(rdev_fwnode);
-		conn->child_port = rendpoint.port;
+		conn->dest_fwnode = fwnode_handle_get(rdev_fwnode);
+		conn->dest_port = rendpoint.port;
 		/* Connection record updated */
 	} while (0);
 
@@ -649,8 +649,8 @@ static int acpi_coresight_parse_link(struct acpi_device *adev,
 
 	dir = fields[3].integer.value;
 	if (dir == ACPI_CORESIGHT_LINK_MASTER) {
-		conn->outport = fields[0].integer.value;
-		conn->child_port = fields[1].integer.value;
+		conn->src_port = fields[0].integer.value;
+		conn->dest_port = fields[1].integer.value;
 		rdev = coresight_find_device_by_fwnode(&r_adev->fwnode);
 		if (!rdev)
 			return -EPROBE_DEFER;
@@ -662,14 +662,14 @@ static int acpi_coresight_parse_link(struct acpi_device *adev,
 		 * 2) While removing the target device via
 		 *    coresight_remove_match().
 		 */
-		conn->child_fwnode = fwnode_handle_get(&r_adev->fwnode);
+		conn->dest_fwnode = fwnode_handle_get(&r_adev->fwnode);
 	} else if (dir == ACPI_CORESIGHT_LINK_SLAVE) {
 		/*
 		 * We are only interested in the port number
 		 * for the input ports at this component.
 		 * Store the port number in child_port.
 		 */
-		conn->child_port = fields[0].integer.value;
+		conn->dest_port = fields[0].integer.value;
 	} else {
 		/* Invalid direction */
 		return -EINVAL;
@@ -718,11 +718,11 @@ static int acpi_coresight_parse_graph(struct acpi_device *adev,
 			return dir;
 
 		if (dir == ACPI_CORESIGHT_LINK_MASTER) {
-			if (ptr->outport >= pdata->nr_outconns)
-				pdata->nr_outconns = ptr->outport + 1;
+			if (ptr->src_port >= pdata->nr_outconns)
+				pdata->nr_outconns = ptr->src_port + 1;
 			ptr++;
 		} else {
-			WARN_ON(pdata->nr_inconns == ptr->child_port + 1);
+			WARN_ON(pdata->nr_inconns == ptr->dest_port + 1);
 			/*
 			 * We do not track input port connections for a device.
 			 * However we need the highest port number described,
@@ -730,8 +730,8 @@ static int acpi_coresight_parse_graph(struct acpi_device *adev,
 			 * record for an output connection. Hence, do not move
 			 * the ptr for input connections
 			 */
-			if (ptr->child_port >= pdata->nr_inconns)
-				pdata->nr_inconns = ptr->child_port + 1;
+			if (ptr->dest_port >= pdata->nr_inconns)
+				pdata->nr_inconns = ptr->dest_port + 1;
 		}
 	}
 
@@ -741,10 +741,10 @@ static int acpi_coresight_parse_graph(struct acpi_device *adev,
 
 	/* Copy the connection information to the final location */
 	for (i = 0; conns + i < ptr; i++) {
-		int port = conns[i].outport;
+		int port = conns[i].src_port;
 
 		/* Duplicate output port */
-		WARN_ON(pdata->out_conns[port].child_fwnode);
+		WARN_ON(pdata->out_conns[port].dest_fwnode);
 		pdata->out_conns[port] = conns[i];
 	}
 
diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c
index 34d2a2d31d00..a4a8e8e642e8 100644
--- a/drivers/hwtracing/coresight/coresight-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-sysfs.c
@@ -150,11 +150,11 @@ int coresight_make_links(struct coresight_device *orig,
 
 	do {
 		outs = devm_kasprintf(&orig->dev, GFP_KERNEL,
-				      "out:%d", conn->outport);
+				      "out:%d", conn->src_port);
 		if (!outs)
 			break;
 		ins = devm_kasprintf(&target->dev, GFP_KERNEL,
-				     "in:%d", conn->child_port);
+				     "in:%d", conn->dest_port);
 		if (!ins)
 			break;
 		link = devm_kzalloc(&orig->dev,
@@ -178,7 +178,7 @@ int coresight_make_links(struct coresight_device *orig,
 		 * Install the device connection. This also indicates that
 		 * the links are operational on both ends.
 		 */
-		conn->child_dev = target;
+		conn->dest_dev = target;
 		return 0;
 	} while (0);
 
@@ -198,9 +198,9 @@ void coresight_remove_links(struct coresight_device *orig,
 
 	coresight_remove_sysfs_link(conn->link);
 
-	devm_kfree(&conn->child_dev->dev, conn->link->target_name);
+	devm_kfree(&conn->dest_dev->dev, conn->link->target_name);
 	devm_kfree(&orig->dev, conn->link->orig_name);
 	devm_kfree(&orig->dev, conn->link);
 	conn->link = NULL;
-	conn->child_dev = NULL;
+	conn->dest_dev = NULL;
 }
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 32137b78eca2..9ab77abbc7ec 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -782,7 +782,7 @@ tmc_etr_get_catu_device(struct tmc_drvdata *drvdata)
 		return NULL;
 
 	for (i = 0; i < etr->pdata->nr_outconns; i++) {
-		tmp = etr->pdata->out_conns[i].child_dev;
+		tmp = etr->pdata->out_conns[i].dest_dev;
 		if (tmp && coresight_is_catu_device(tmp))
 			return tmp;
 	}
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index daf392fcb67a..b6f444804bf3 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -164,18 +164,18 @@ struct coresight_desc {
 
 /**
  * struct coresight_connection - representation of a single connection
- * @outport:	a connection's output port number.
- * @child_port:	remote component's port number @output is connected to.
- * @chid_fwnode: remote component's fwnode handle.
- * @child_dev:	a @coresight_device representation of the component
-		connected to @outport.
+ * @src_port:	a connection's output port number.
+ * @dest_port:	destination's input port number @src_port is connected to.
+ * @dest_fwnode: destination component's fwnode handle.
+ * @dest_dev:	a @coresight_device representation of the component
+		connected to @src_port. NULL until the device is created
  * @link: Representation of the connection as a sysfs link.
  */
 struct coresight_connection {
-	int outport;
-	int child_port;
-	struct fwnode_handle *child_fwnode;
-	struct coresight_device *child_dev;
+	int src_port;
+	int dest_port;
+	struct fwnode_handle *dest_fwnode;
+	struct coresight_device *dest_dev;
 	struct coresight_sysfs_link *link;
 };
 
-- 
cgit v1.2.3


From 3d4ff657e454f8dba3e5e268e731e6e28c6031c1 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 25 Apr 2023 15:35:33 +0100
Subject: coresight: Dynamically add connections

Add a function for adding connections dynamically. This also removes
the 1:1 mapping between port number and the index into the connections
array. The only place this mapping was used was in the warning for
duplicate output ports, which has been replaced by a search. Other
uses of the port number already use the port member variable.

Being able to dynamically add connections will allow other devices like
CTI to re-use the connection mechanism despite not having explicit
connections described in the DT.

The connections array is now no longer sparse, so child_fwnode doesn't
need to be checked as all connections have a target node. Because the
array is no longer sparse, the high in and out port numbers are required
for the refcount arrays. But these will also be removed in a later
commit when the refcount is made a property of the connection.

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230425143542.2305069-7-james.clark@arm.com
---
 drivers/hwtracing/coresight/coresight-core.c     |  23 ++---
 drivers/hwtracing/coresight/coresight-platform.c | 124 +++++++++++------------
 include/linux/coresight.h                        |   8 +-
 3 files changed, 77 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index f3dc320b374c..91274e7e6944 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -397,9 +397,9 @@ static void coresight_disable_link(struct coresight_device *csdev,
 	link_subtype = csdev->subtype.link_subtype;
 
 	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
-		nr_conns = csdev->pdata->nr_inconns;
+		nr_conns = csdev->pdata->high_inport;
 	} else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) {
-		nr_conns = csdev->pdata->nr_outconns;
+		nr_conns = csdev->pdata->high_outport;
 	} else {
 		nr_conns = 1;
 	}
@@ -1336,9 +1336,6 @@ static int coresight_orphan_match(struct device *dev, void *data)
 	for (i = 0; i < i_csdev->pdata->nr_outconns; i++) {
 		conn = &i_csdev->pdata->out_conns[i];
 
-		/* Skip the port if FW doesn't describe it */
-		if (!conn->dest_fwnode)
-			continue;
 		/* We have found at least one orphan connection */
 		if (conn->dest_dev == NULL) {
 			/* Does it match this newly added device? */
@@ -1377,8 +1374,6 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_connection *conn = &csdev->pdata->out_conns[i];
 
-		if (!conn->dest_fwnode)
-			continue;
 		conn->dest_dev =
 			coresight_find_csdev_by_fwnode(conn->dest_fwnode);
 		if (conn->dest_dev && conn->dest_dev->has_conns_grp) {
@@ -1413,7 +1408,7 @@ static int coresight_remove_match(struct device *dev, void *data)
 	for (i = 0; i < iterator->pdata->nr_outconns; i++) {
 		conn = &iterator->pdata->out_conns[i];
 
-		if (conn->dest_dev == NULL || conn->dest_fwnode == NULL)
+		if (conn->dest_dev == NULL)
 			continue;
 
 		if (csdev->dev.fwnode == conn->dest_fwnode) {
@@ -1445,7 +1440,7 @@ static void coresight_remove_conns(struct coresight_device *csdev)
 	 * doesn't have at least one input port, there is no point
 	 * in searching all the devices.
 	 */
-	if (csdev->pdata->nr_inconns)
+	if (csdev->pdata->high_inport)
 		bus_for_each_dev(&coresight_bustype, NULL,
 				 csdev, coresight_remove_match);
 }
@@ -1552,10 +1547,8 @@ void coresight_release_platform_data(struct coresight_device *csdev,
 		 * Drop the refcount and clear the handle as this device
 		 * is going away
 		 */
-		if (conns[i].dest_fwnode) {
-			fwnode_handle_put(conns[i].dest_fwnode);
-			conns[i].dest_fwnode = NULL;
-		}
+		fwnode_handle_put(conns[i].dest_fwnode);
+		conns[i].dest_fwnode = NULL;
 	}
 	if (csdev)
 		coresight_remove_conns_sysfs_group(csdev);
@@ -1581,9 +1574,9 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 		link_subtype = desc->subtype.link_subtype;
 
 		if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG)
-			nr_refcnts = desc->pdata->nr_inconns;
+			nr_refcnts = desc->pdata->high_inport;
 		else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT)
-			nr_refcnts = desc->pdata->nr_outconns;
+			nr_refcnts = desc->pdata->high_outport;
 	}
 
 	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c
index 566cc99a2c34..8c2029336161 100644
--- a/drivers/hwtracing/coresight/coresight-platform.c
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -19,22 +19,45 @@
 #include <asm/smp_plat.h>
 
 #include "coresight-priv.h"
+
 /*
- * coresight_alloc_conns: Allocate connections record for each output
- * port from the device.
+ * Add an entry to the connection list and assign @conn's contents to it.
+ *
+ * If the output port is already assigned on this device, return -EINVAL
  */
-static int coresight_alloc_conns(struct device *dev,
-				 struct coresight_platform_data *pdata)
+struct coresight_connection *
+coresight_add_out_conn(struct device *dev,
+		       struct coresight_platform_data *pdata,
+		       const struct coresight_connection *new_conn)
 {
-	if (pdata->nr_outconns) {
-		pdata->out_conns = devm_kcalloc(dev, pdata->nr_outconns,
-					    sizeof(*pdata->out_conns), GFP_KERNEL);
-		if (!pdata->out_conns)
-			return -ENOMEM;
+	int i;
+	struct coresight_connection *conn;
+
+	/*
+	 * Warn on any existing duplicate output port.
+	 */
+	for (i = 0; i < pdata->nr_outconns; ++i) {
+		conn = &pdata->out_conns[i];
+		/* Output == -1 means ignore the port for example for helpers */
+		if (conn->src_port != -1 &&
+		    conn->src_port == new_conn->src_port) {
+			dev_warn(dev, "Duplicate output port %d\n",
+				 conn->src_port);
+			return ERR_PTR(-EINVAL);
+		}
 	}
 
-	return 0;
+	pdata->nr_outconns++;
+	pdata->out_conns =
+		devm_krealloc_array(dev, pdata->out_conns, pdata->nr_outconns,
+				    sizeof(*pdata->out_conns), GFP_KERNEL);
+	if (!pdata->out_conns)
+		return ERR_PTR(-ENOMEM);
+
+	pdata->out_conns[pdata->nr_outconns - 1] = *new_conn;
+	return &pdata->out_conns[pdata->nr_outconns - 1];
 }
+EXPORT_SYMBOL_GPL(coresight_add_out_conn);
 
 static struct device *
 coresight_find_device_by_fwnode(struct fwnode_handle *fwnode)
@@ -224,7 +247,8 @@ static int of_coresight_parse_endpoint(struct device *dev,
 	struct device_node *rep = NULL;
 	struct device *rdev = NULL;
 	struct fwnode_handle *rdev_fwnode;
-	struct coresight_connection *conn;
+	struct coresight_connection conn = {};
+	struct coresight_connection *new_conn;
 
 	do {
 		/* Parse the local port details */
@@ -251,14 +275,7 @@ static int of_coresight_parse_endpoint(struct device *dev,
 			break;
 		}
 
-		conn = &pdata->out_conns[endpoint.port];
-		if (conn->dest_fwnode) {
-			dev_warn(dev, "Duplicate output port %d\n",
-				 endpoint.port);
-			ret = -EINVAL;
-			break;
-		}
-		conn->src_port = endpoint.port;
+		conn.src_port = endpoint.port;
 		/*
 		 * Hold the refcount to the target device. This could be
 		 * released via:
@@ -267,8 +284,14 @@ static int of_coresight_parse_endpoint(struct device *dev,
 		 * 2) While removing the target device via
 		 *    coresight_remove_match()
 		 */
-		conn->dest_fwnode = fwnode_handle_get(rdev_fwnode);
-		conn->dest_port = rendpoint.port;
+		conn.dest_fwnode = fwnode_handle_get(rdev_fwnode);
+		conn.dest_port = rendpoint.port;
+
+		new_conn = coresight_add_out_conn(dev, pdata, &conn);
+		if (IS_ERR_VALUE(new_conn)) {
+			fwnode_handle_put(conn.dest_fwnode);
+			return PTR_ERR(new_conn);
+		}
 		/* Connection record updated */
 	} while (0);
 
@@ -289,16 +312,12 @@ static int of_get_coresight_platform_data(struct device *dev,
 	struct device_node *node = dev->of_node;
 
 	/* Get the number of input and output port for this component */
-	of_coresight_get_ports(node, &pdata->nr_inconns, &pdata->nr_outconns);
+	of_coresight_get_ports(node, &pdata->high_inport, &pdata->high_outport);
 
 	/* If there are no output connections, we are done */
-	if (!pdata->nr_outconns)
+	if (!pdata->high_outport)
 		return 0;
 
-	ret = coresight_alloc_conns(dev, pdata);
-	if (ret)
-		return ret;
-
 	parent = of_coresight_get_output_ports_node(node);
 	/*
 	 * If the DT uses obsoleted bindings, the ports are listed
@@ -683,12 +702,14 @@ static int acpi_coresight_parse_link(struct acpi_device *adev,
  * connection information and populate the supplied coresight_platform_data
  * instance.
  */
-static int acpi_coresight_parse_graph(struct acpi_device *adev,
+static int acpi_coresight_parse_graph(struct device *dev,
+				      struct acpi_device *adev,
 				      struct coresight_platform_data *pdata)
 {
-	int rc, i, nlinks;
+	int i, nlinks;
 	const union acpi_object *graph;
-	struct coresight_connection *conns, *ptr;
+	struct coresight_connection conn, zero_conn = {};
+	struct coresight_connection *new_conn;
 
 	pdata->nr_inconns = pdata->nr_outconns = 0;
 	graph = acpi_get_coresight_graph(adev);
@@ -699,30 +720,23 @@ static int acpi_coresight_parse_graph(struct acpi_device *adev,
 	if (!nlinks)
 		return 0;
 
-	/*
-	 * To avoid scanning the table twice (once for finding the number of
-	 * output links and then later for parsing the output links),
-	 * cache the links information in one go and then later copy
-	 * it to the pdata.
-	 */
-	conns = devm_kcalloc(&adev->dev, nlinks, sizeof(*conns), GFP_KERNEL);
-	if (!conns)
-		return -ENOMEM;
-	ptr = conns;
 	for (i = 0; i < nlinks; i++) {
 		const union acpi_object *link = &graph->package.elements[3 + i];
 		int dir;
 
-		dir = acpi_coresight_parse_link(adev, link, ptr);
+		conn = zero_conn;
+		dir = acpi_coresight_parse_link(adev, link, &conn);
 		if (dir < 0)
 			return dir;
 
 		if (dir == ACPI_CORESIGHT_LINK_MASTER) {
-			if (ptr->src_port >= pdata->nr_outconns)
-				pdata->nr_outconns = ptr->src_port + 1;
-			ptr++;
+			if (conn.src_port >= pdata->high_outport)
+				pdata->high_outport = conn.src_port + 1;
+			new_conn = coresight_add_out_conn(dev, pdata, &conn);
+			if (IS_ERR(new_conn))
+				return PTR_ERR(new_conn);
 		} else {
-			WARN_ON(pdata->nr_inconns == ptr->dest_port + 1);
+			WARN_ON(pdata->high_inport == conn.dest_port + 1);
 			/*
 			 * We do not track input port connections for a device.
 			 * However we need the highest port number described,
@@ -730,25 +744,11 @@ static int acpi_coresight_parse_graph(struct acpi_device *adev,
 			 * record for an output connection. Hence, do not move
 			 * the ptr for input connections
 			 */
-			if (ptr->dest_port >= pdata->nr_inconns)
-				pdata->nr_inconns = ptr->dest_port + 1;
+			if (conn.dest_port >= pdata->high_inport)
+				pdata->high_inport = conn.dest_port + 1;
 		}
 	}
 
-	rc = coresight_alloc_conns(&adev->dev, pdata);
-	if (rc)
-		return rc;
-
-	/* Copy the connection information to the final location */
-	for (i = 0; conns + i < ptr; i++) {
-		int port = conns[i].src_port;
-
-		/* Duplicate output port */
-		WARN_ON(pdata->out_conns[port].dest_fwnode);
-		pdata->out_conns[port] = conns[i];
-	}
-
-	devm_kfree(&adev->dev, conns);
 	return 0;
 }
 
@@ -809,7 +809,7 @@ acpi_get_coresight_platform_data(struct device *dev,
 	if (!adev)
 		return -EINVAL;
 
-	return acpi_coresight_parse_graph(adev, pdata);
+	return acpi_coresight_parse_graph(dev, adev, pdata);
 }
 
 #else
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index b6f444804bf3..12fdbd03e2f7 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -104,9 +104,11 @@ union coresight_dev_subtype {
  *
  * @nr_inconns: Number of elements for the input connections.
  * @nr_outconns: Number of elements for the output connections.
- * @out_conns:	Sparse array of nr_outconns connections from this component.
+ * @out_conns:	Array of nr_outconns connections from this component.
  */
 struct coresight_platform_data {
+	int high_inport;
+	int high_outport;
 	int nr_inconns;
 	int nr_outconns;
 	struct coresight_connection *out_conns;
@@ -609,5 +611,9 @@ static inline void coresight_write64(struct coresight_device *csdev, u64 val, u3
 extern int coresight_get_cpu(struct device *dev);
 
 struct coresight_platform_data *coresight_get_platform_data(struct device *dev);
+struct coresight_connection *
+coresight_add_out_conn(struct device *dev,
+		       struct coresight_platform_data *pdata,
+		       const struct coresight_connection *new_conn);
 
 #endif		/* _LINUX_COREISGHT_H */
-- 
cgit v1.2.3


From 4e8fe7e5c3a5e48295a6745727a6703adab8ff7f Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 25 Apr 2023 15:35:34 +0100
Subject: coresight: Store pointers to connections rather than an array of them

This will allow the same connection object to be referenced via the
input connection list in a later commit rather than duplicating them.

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230425143542.2305069-8-james.clark@arm.com
---
 drivers/hwtracing/coresight/coresight-core.c     | 47 ++++++++++++------------
 drivers/hwtracing/coresight/coresight-platform.c | 19 ++++++++--
 drivers/hwtracing/coresight/coresight-priv.h     |  1 +
 drivers/hwtracing/coresight/coresight-tmc-etr.c  |  2 +-
 include/linux/coresight.h                        |  5 ++-
 5 files changed, 44 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 91274e7e6944..0b738960973b 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -119,7 +119,7 @@ static int coresight_find_link_inport(struct coresight_device *csdev,
 	struct coresight_connection *conn;
 
 	for (i = 0; i < parent->pdata->nr_outconns; i++) {
-		conn = &parent->pdata->out_conns[i];
+		conn = parent->pdata->out_conns[i];
 		if (conn->dest_dev == csdev)
 			return conn->dest_port;
 	}
@@ -137,7 +137,7 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
 	struct coresight_connection *conn;
 
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
-		conn = &csdev->pdata->out_conns[i];
+		conn = csdev->pdata->out_conns[i];
 		if (conn->dest_dev == child)
 			return conn->src_port;
 	}
@@ -606,7 +606,7 @@ coresight_find_enabled_sink(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child_dev;
 
-		child_dev = csdev->pdata->out_conns[i].dest_dev;
+		child_dev = csdev->pdata->out_conns[i]->dest_dev;
 		if (child_dev)
 			sink = coresight_find_enabled_sink(child_dev);
 		if (sink)
@@ -722,7 +722,7 @@ static int coresight_grab_device(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child;
 
-		child = csdev->pdata->out_conns[i].dest_dev;
+		child = csdev->pdata->out_conns[i]->dest_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			if (!coresight_get_ref(child))
 				goto err;
@@ -733,7 +733,7 @@ err:
 	for (i--; i >= 0; i--) {
 		struct coresight_device *child;
 
-		child = csdev->pdata->out_conns[i].dest_dev;
+		child = csdev->pdata->out_conns[i]->dest_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			coresight_put_ref(child);
 	}
@@ -752,7 +752,7 @@ static void coresight_drop_device(struct coresight_device *csdev)
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child;
 
-		child = csdev->pdata->out_conns[i].dest_dev;
+		child = csdev->pdata->out_conns[i]->dest_dev;
 		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
 			coresight_put_ref(child);
 	}
@@ -794,7 +794,7 @@ static int _coresight_build_path(struct coresight_device *csdev,
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
 		struct coresight_device *child_dev;
 
-		child_dev = csdev->pdata->out_conns[i].dest_dev;
+		child_dev = csdev->pdata->out_conns[i]->dest_dev;
 		if (child_dev &&
 		    _coresight_build_path(child_dev, sink, path) == 0) {
 			found = true;
@@ -964,7 +964,7 @@ coresight_find_sink(struct coresight_device *csdev, int *depth)
 		struct coresight_device *child_dev, *sink = NULL;
 		int child_depth = curr_depth;
 
-		child_dev = csdev->pdata->out_conns[i].dest_dev;
+		child_dev = csdev->pdata->out_conns[i]->dest_dev;
 		if (child_dev)
 			sink = coresight_find_sink(child_dev, &child_depth);
 
@@ -1334,7 +1334,7 @@ static int coresight_orphan_match(struct device *dev, void *data)
 	 * an orphan connection whose name matches @csdev, link it.
 	 */
 	for (i = 0; i < i_csdev->pdata->nr_outconns; i++) {
-		conn = &i_csdev->pdata->out_conns[i];
+		conn = i_csdev->pdata->out_conns[i];
 
 		/* We have found at least one orphan connection */
 		if (conn->dest_dev == NULL) {
@@ -1372,7 +1372,7 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev)
 	int i, ret = 0;
 
 	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
-		struct coresight_connection *conn = &csdev->pdata->out_conns[i];
+		struct coresight_connection *conn = csdev->pdata->out_conns[i];
 
 		conn->dest_dev =
 			coresight_find_csdev_by_fwnode(conn->dest_fwnode);
@@ -1406,15 +1406,12 @@ static int coresight_remove_match(struct device *dev, void *data)
 	 * a connection whose name matches @csdev, remove it.
 	 */
 	for (i = 0; i < iterator->pdata->nr_outconns; i++) {
-		conn = &iterator->pdata->out_conns[i];
+		conn = iterator->pdata->out_conns[i];
 
-		if (conn->dest_dev == NULL)
-			continue;
-
-		if (csdev->dev.fwnode == conn->dest_fwnode) {
+		/* Child_dev being set signifies that the links were made */
+		if (csdev->dev.fwnode == conn->dest_fwnode && conn->dest_dev) {
 			iterator->orphan = true;
 			coresight_remove_links(iterator, conn);
-
 			conn->dest_dev = NULL;
 			/* No need to continue */
 			break;
@@ -1534,22 +1531,26 @@ void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset)
  * to the output port of this device.
  */
 void coresight_release_platform_data(struct coresight_device *csdev,
+				     struct device *dev,
 				     struct coresight_platform_data *pdata)
 {
 	int i;
-	struct coresight_connection *conns = pdata->out_conns;
+	struct coresight_connection **conns = pdata->out_conns;
 
 	for (i = 0; i < pdata->nr_outconns; i++) {
 		/* If we have made the links, remove them now */
-		if (csdev && conns[i].dest_dev)
-			coresight_remove_links(csdev, &conns[i]);
+		if (csdev && conns[i]->dest_dev)
+			coresight_remove_links(csdev, conns[i]);
 		/*
 		 * Drop the refcount and clear the handle as this device
 		 * is going away
 		 */
-		fwnode_handle_put(conns[i].dest_fwnode);
-		conns[i].dest_fwnode = NULL;
+		fwnode_handle_put(conns[i]->dest_fwnode);
+		conns[i]->dest_fwnode = NULL;
+		devm_kfree(dev, conns[i]);
 	}
+	devm_kfree(dev, pdata->out_conns);
+	devm_kfree(dev, pdata);
 	if (csdev)
 		coresight_remove_conns_sysfs_group(csdev);
 }
@@ -1666,7 +1667,7 @@ out_unlock:
 
 err_out:
 	/* Cleanup the connection information */
-	coresight_release_platform_data(NULL, desc->pdata);
+	coresight_release_platform_data(NULL, desc->dev, desc->pdata);
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(coresight_register);
@@ -1679,7 +1680,7 @@ void coresight_unregister(struct coresight_device *csdev)
 		cti_assoc_ops->remove(csdev);
 	coresight_remove_conns(csdev);
 	coresight_clear_default_sink(csdev);
-	coresight_release_platform_data(csdev, csdev->pdata);
+	coresight_release_platform_data(csdev, csdev->dev.parent, csdev->pdata);
 	device_unregister(&csdev->dev);
 }
 EXPORT_SYMBOL_GPL(coresight_unregister);
diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c
index 8c2029336161..9c05f787278b 100644
--- a/drivers/hwtracing/coresight/coresight-platform.c
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -37,7 +37,7 @@ coresight_add_out_conn(struct device *dev,
 	 * Warn on any existing duplicate output port.
 	 */
 	for (i = 0; i < pdata->nr_outconns; ++i) {
-		conn = &pdata->out_conns[i];
+		conn = pdata->out_conns[i];
 		/* Output == -1 means ignore the port for example for helpers */
 		if (conn->src_port != -1 &&
 		    conn->src_port == new_conn->src_port) {
@@ -54,8 +54,19 @@ coresight_add_out_conn(struct device *dev,
 	if (!pdata->out_conns)
 		return ERR_PTR(-ENOMEM);
 
-	pdata->out_conns[pdata->nr_outconns - 1] = *new_conn;
-	return &pdata->out_conns[pdata->nr_outconns - 1];
+	conn = devm_kmalloc(dev, sizeof(struct coresight_connection),
+			    GFP_KERNEL);
+	if (!conn)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Copy the new connection into the allocation, save the pointer to the
+	 * end of the connection array and also return it in case it needs to be
+	 * used right away.
+	 */
+	*conn = *new_conn;
+	pdata->out_conns[pdata->nr_outconns - 1] = conn;
+	return conn;
 }
 EXPORT_SYMBOL_GPL(coresight_add_out_conn);
 
@@ -863,7 +874,7 @@ coresight_get_platform_data(struct device *dev)
 error:
 	if (!IS_ERR_OR_NULL(pdata))
 		/* Cleanup the connection information */
-		coresight_release_platform_data(NULL, pdata);
+		coresight_release_platform_data(NULL, dev, pdata);
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(coresight_get_platform_data);
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 788ff19c60f6..65ae6d161c57 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -207,6 +207,7 @@ static inline void *coresight_get_uci_data(const struct amba_id *id)
 }
 
 void coresight_release_platform_data(struct coresight_device *csdev,
+				     struct device *dev,
 				     struct coresight_platform_data *pdata);
 struct coresight_device *
 coresight_find_csdev_by_fwnode(struct fwnode_handle *r_fwnode);
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 9ab77abbc7ec..e4493647d66d 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -782,7 +782,7 @@ tmc_etr_get_catu_device(struct tmc_drvdata *drvdata)
 		return NULL;
 
 	for (i = 0; i < etr->pdata->nr_outconns; i++) {
-		tmp = etr->pdata->out_conns[i].dest_dev;
+		tmp = etr->pdata->out_conns[i]->dest_dev;
 		if (tmp && coresight_is_catu_device(tmp))
 			return tmp;
 	}
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 12fdbd03e2f7..abf36a37fdb0 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -104,14 +104,15 @@ union coresight_dev_subtype {
  *
  * @nr_inconns: Number of elements for the input connections.
  * @nr_outconns: Number of elements for the output connections.
- * @out_conns:	Array of nr_outconns connections from this component.
+ * @out_conns: Array of nr_outconns pointers to connections from this
+ *	       component.
  */
 struct coresight_platform_data {
 	int high_inport;
 	int high_outport;
 	int nr_inconns;
 	int nr_outconns;
-	struct coresight_connection *out_conns;
+	struct coresight_connection **out_conns;
 };
 
 /**
-- 
cgit v1.2.3


From e3f4e68797a960869ccae556ad63163b3dc470a2 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 25 Apr 2023 15:35:36 +0100
Subject: coresight: Store in-connections as well as out-connections

This will allow CATU to get its associated ETR in a generic way where
currently the enable path has some hard coded searches which avoid
the need to store input connections.

This also means that the full search for connected devices on removal
can be replaced with a loop through only the input and output devices.

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230425143542.2305069-10-james.clark@arm.com
---
 drivers/hwtracing/coresight/coresight-core.c     | 81 ++++++++++++------------
 drivers/hwtracing/coresight/coresight-platform.c | 31 ++++++++-
 drivers/hwtracing/coresight/coresight-sysfs.c    |  7 --
 include/linux/coresight.h                        | 26 ++++++++
 4 files changed, 95 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 8d377a59e0be..a0a0ea2c626b 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -1349,6 +1349,17 @@ static int coresight_orphan_match(struct device *dev, void *data)
 			ret = coresight_make_links(src_csdev, conn, dst_csdev);
 			if (ret)
 				return ret;
+
+			/*
+			 * Install the device connection. This also indicates that
+			 * the links are operational on both ends.
+			 */
+			conn->dest_dev = dst_csdev;
+			conn->src_dev = src_csdev;
+
+			ret = coresight_add_in_conn(conn);
+			if (ret)
+				return ret;
 		} else {
 			/* This component still has an orphan */
 			still_orphan = true;
@@ -1370,58 +1381,43 @@ static int coresight_fixup_orphan_conns(struct coresight_device *csdev)
 			 csdev, coresight_orphan_match);
 }
 
-static int coresight_remove_match(struct device *dev, void *data)
+/* coresight_remove_conns - Remove other device's references to this device */
+static void coresight_remove_conns(struct coresight_device *csdev)
 {
-	int i;
-	struct coresight_device *csdev, *iterator;
+	int i, j;
 	struct coresight_connection *conn;
 
-	csdev = data;
-	iterator = to_coresight_device(dev);
-
-	/* No need to check oneself */
-	if (csdev == iterator)
-		return 0;
-
 	/*
-	 * Circle throuch all the connection of that component.  If we find
-	 * a connection whose name matches @csdev, remove it.
+	 * Remove the input connection references from the destination device
+	 * for each output connection.
 	 */
-	for (i = 0; i < iterator->pdata->nr_outconns; i++) {
-		conn = iterator->pdata->out_conns[i];
-
-		/* Child_dev being set signifies that the links were made */
-		if (csdev->dev.fwnode == conn->dest_fwnode && conn->dest_dev) {
-			iterator->orphan = true;
-			coresight_remove_links(iterator, conn);
-			conn->dest_dev = NULL;
-			/* No need to continue */
-			break;
-		}
+	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
+		conn = csdev->pdata->out_conns[i];
+		if (!conn->dest_dev)
+			continue;
+
+		for (j = 0; j < conn->dest_dev->pdata->nr_inconns; ++j)
+			if (conn->dest_dev->pdata->in_conns[j] == conn) {
+				conn->dest_dev->pdata->in_conns[j] = NULL;
+				break;
+			}
 	}
 
 	/*
-	 * Returning '0' ensures that all known component on the
-	 * bus will be checked.
+	 * For all input connections, remove references to this device.
+	 * Connection objects are shared so modifying this device's input
+	 * connections affects the other device's output connection.
 	 */
-	return 0;
-}
+	for (i = 0; i < csdev->pdata->nr_inconns; ++i) {
+		conn = csdev->pdata->in_conns[i];
+		/* Input conns array is sparse */
+		if (!conn)
+			continue;
 
-/*
- * coresight_remove_conns - Remove references to this given devices
- * from the connections of other devices.
- */
-static void coresight_remove_conns(struct coresight_device *csdev)
-{
-	/*
-	 * Another device will point to this device only if there is
-	 * an output port connected to this one. i.e, if the device
-	 * doesn't have at least one input port, there is no point
-	 * in searching all the devices.
-	 */
-	if (csdev->pdata->high_inport)
-		bus_for_each_dev(&coresight_bustype, NULL,
-				 csdev, coresight_remove_match);
+		conn->src_dev->orphan = true;
+		coresight_remove_links(conn->src_dev, conn);
+		conn->dest_dev = NULL;
+	}
 }
 
 /**
@@ -1532,6 +1528,7 @@ void coresight_release_platform_data(struct coresight_device *csdev,
 		devm_kfree(dev, conns[i]);
 	}
 	devm_kfree(dev, pdata->out_conns);
+	devm_kfree(dev, pdata->in_conns);
 	devm_kfree(dev, pdata);
 	if (csdev)
 		coresight_remove_conns_sysfs_group(csdev);
diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c
index 9c05f787278b..257ad48925a1 100644
--- a/drivers/hwtracing/coresight/coresight-platform.c
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -70,6 +70,35 @@ coresight_add_out_conn(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(coresight_add_out_conn);
 
+/*
+ * Add an input connection reference to @out_conn in the target's in_conns array
+ *
+ * @out_conn: Existing output connection to store as an input on the
+ *	      connection's remote device.
+ */
+int coresight_add_in_conn(struct coresight_connection *out_conn)
+{
+	int i;
+	struct device *dev = out_conn->dest_dev->dev.parent;
+	struct coresight_platform_data *pdata = out_conn->dest_dev->pdata;
+
+	for (i = 0; i < pdata->nr_inconns; ++i)
+		if (!pdata->in_conns[i]) {
+			pdata->in_conns[i] = out_conn;
+			return 0;
+		}
+
+	pdata->nr_inconns++;
+	pdata->in_conns =
+		devm_krealloc_array(dev, pdata->in_conns, pdata->nr_inconns,
+				    sizeof(*pdata->in_conns), GFP_KERNEL);
+	if (!pdata->in_conns)
+		return -ENOMEM;
+	pdata->in_conns[pdata->nr_inconns - 1] = out_conn;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(coresight_add_in_conn);
+
 static struct device *
 coresight_find_device_by_fwnode(struct fwnode_handle *fwnode)
 {
@@ -240,7 +269,7 @@ static int of_coresight_get_cpu(struct device *dev)
 
 /*
  * of_coresight_parse_endpoint : Parse the given output endpoint @ep
- * and fill the connection information in @conn
+ * and fill the connection information in @pdata->out_conns
  *
  * Parses the local port, remote device name and the remote port.
  *
diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c
index a4a8e8e642e8..464ba5e1343b 100644
--- a/drivers/hwtracing/coresight/coresight-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-sysfs.c
@@ -173,12 +173,6 @@ int coresight_make_links(struct coresight_device *orig,
 			break;
 
 		conn->link = link;
-
-		/*
-		 * Install the device connection. This also indicates that
-		 * the links are operational on both ends.
-		 */
-		conn->dest_dev = target;
 		return 0;
 	} while (0);
 
@@ -202,5 +196,4 @@ void coresight_remove_links(struct coresight_device *orig,
 	devm_kfree(&orig->dev, conn->link->orig_name);
 	devm_kfree(&orig->dev, conn->link);
 	conn->link = NULL;
-	conn->dest_dev = NULL;
 }
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index abf36a37fdb0..e9c52c5ca7f3 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -106,6 +106,9 @@ union coresight_dev_subtype {
  * @nr_outconns: Number of elements for the output connections.
  * @out_conns: Array of nr_outconns pointers to connections from this
  *	       component.
+ * @in_conns: Sparse array of pointers to input connections. Sparse
+ *            because the source device owns the connection so when it's
+ *            unloaded the connection leaves an empty slot.
  */
 struct coresight_platform_data {
 	int high_inport;
@@ -113,6 +116,7 @@ struct coresight_platform_data {
 	int nr_inconns;
 	int nr_outconns;
 	struct coresight_connection **out_conns;
+	struct coresight_connection **in_conns;
 };
 
 /**
@@ -173,6 +177,26 @@ struct coresight_desc {
  * @dest_dev:	a @coresight_device representation of the component
 		connected to @src_port. NULL until the device is created
  * @link: Representation of the connection as a sysfs link.
+ *
+ * The full connection structure looks like this, where in_conns store
+ * references to same connection as the source device's out_conns.
+ *
+ * +-----------------------------+   +-----------------------------+
+ * |coresight_device             |   |coresight_connection         |
+ * |-----------------------------|   |-----------------------------|
+ * |                             |   |                             |
+ * |                             |   |                    dest_dev*|<--
+ * |pdata->out_conns[nr_outconns]|<->|src_dev*                     |   |
+ * |                             |   |                             |   |
+ * +-----------------------------+   +-----------------------------+   |
+ *                                                                     |
+ *                                   +-----------------------------+   |
+ *                                   |coresight_device             |   |
+ *                                   |------------------------------   |
+ *                                   |                             |   |
+ *                                   |  pdata->in_conns[nr_inconns]|<--
+ *                                   |                             |
+ *                                   +-----------------------------+
  */
 struct coresight_connection {
 	int src_port;
@@ -180,6 +204,7 @@ struct coresight_connection {
 	struct fwnode_handle *dest_fwnode;
 	struct coresight_device *dest_dev;
 	struct coresight_sysfs_link *link;
+	struct coresight_device *src_dev;
 };
 
 /**
@@ -616,5 +641,6 @@ struct coresight_connection *
 coresight_add_out_conn(struct device *dev,
 		       struct coresight_platform_data *pdata,
 		       const struct coresight_connection *new_conn);
+int coresight_add_in_conn(struct coresight_connection *conn);
 
 #endif		/* _LINUX_COREISGHT_H */
-- 
cgit v1.2.3


From ae7f2b5a7b569f8ede4af9e215515e5a0b824edd Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 25 Apr 2023 15:35:37 +0100
Subject: coresight: Make refcount a property of the connection

This removes the need to do an additional lookup for the total number
of ports used and also removes the need to allocate an array of
refcounts which is just another representation of a connection array.

This was only used for link type devices, for regular devices a single
refcount on the coresight device is used.

There is a both an input and output refcount in case two link type
devices are connected together so that they don't overwrite each other's
counts.

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230425143542.2305069-11-james.clark@arm.com
---
 drivers/hwtracing/coresight/coresight-core.c       | 116 +++++++--------------
 drivers/hwtracing/coresight/coresight-etb10.c      |  10 +-
 drivers/hwtracing/coresight/coresight-funnel.c     |  26 +++--
 drivers/hwtracing/coresight/coresight-platform.c   | 109 ++-----------------
 drivers/hwtracing/coresight/coresight-replicator.c |  23 ++--
 drivers/hwtracing/coresight/coresight-tmc-etf.c    |  24 +++--
 drivers/hwtracing/coresight/coresight-tmc-etr.c    |  12 +--
 drivers/hwtracing/coresight/coresight-tpda.c       |  23 ++--
 drivers/hwtracing/coresight/coresight-tpiu.c       |   4 +-
 drivers/hwtracing/coresight/ultrasoc-smb.c         |   8 +-
 include/linux/coresight.h                          |  14 ++-
 11 files changed, 126 insertions(+), 243 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index a0a0ea2c626b..939b7fb751b5 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -112,40 +112,24 @@ struct coresight_device *coresight_get_percpu_sink(int cpu)
 }
 EXPORT_SYMBOL_GPL(coresight_get_percpu_sink);
 
-static int coresight_find_link_inport(struct coresight_device *csdev,
-				      struct coresight_device *parent)
+static struct coresight_connection *
+coresight_find_out_connection(struct coresight_device *src_dev,
+			      struct coresight_device *dest_dev)
 {
 	int i;
 	struct coresight_connection *conn;
 
-	for (i = 0; i < parent->pdata->nr_outconns; i++) {
-		conn = parent->pdata->out_conns[i];
-		if (conn->dest_dev == csdev)
-			return conn->dest_port;
+	for (i = 0; i < src_dev->pdata->nr_outconns; i++) {
+		conn = src_dev->pdata->out_conns[i];
+		if (conn->dest_dev == dest_dev)
+			return conn;
 	}
 
-	dev_err(&csdev->dev, "couldn't find inport, parent: %s, child: %s\n",
-		dev_name(&parent->dev), dev_name(&csdev->dev));
+	dev_err(&src_dev->dev,
+		"couldn't find output connection, src_dev: %s, dest_dev: %s\n",
+		dev_name(&src_dev->dev), dev_name(&dest_dev->dev));
 
-	return -ENODEV;
-}
-
-static int coresight_find_link_outport(struct coresight_device *csdev,
-				       struct coresight_device *child)
-{
-	int i;
-	struct coresight_connection *conn;
-
-	for (i = 0; i < csdev->pdata->nr_outconns; i++) {
-		conn = csdev->pdata->out_conns[i];
-		if (conn->dest_dev == child)
-			return conn->src_port;
-	}
-
-	dev_err(&csdev->dev, "couldn't find outport, parent: %s, child: %s\n",
-		dev_name(&csdev->dev), dev_name(&child->dev));
-
-	return -ENODEV;
+	return ERR_PTR(-ENODEV);
 }
 
 static inline u32 coresight_read_claim_tags(struct coresight_device *csdev)
@@ -352,24 +336,24 @@ static int coresight_enable_link(struct coresight_device *csdev,
 {
 	int ret = 0;
 	int link_subtype;
-	int inport, outport;
+	struct coresight_connection *inconn, *outconn;
 
 	if (!parent || !child)
 		return -EINVAL;
 
-	inport = coresight_find_link_inport(csdev, parent);
-	outport = coresight_find_link_outport(csdev, child);
+	inconn = coresight_find_out_connection(parent, csdev);
+	outconn = coresight_find_out_connection(csdev, child);
 	link_subtype = csdev->subtype.link_subtype;
 
-	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG && inport < 0)
-		return inport;
-	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT && outport < 0)
-		return outport;
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG && IS_ERR(inconn))
+		return PTR_ERR(inconn);
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT && IS_ERR(outconn))
+		return PTR_ERR(outconn);
 
 	if (link_ops(csdev)->enable) {
 		ret = coresight_control_assoc_ectdev(csdev, true);
 		if (!ret) {
-			ret = link_ops(csdev)->enable(csdev, inport, outport);
+			ret = link_ops(csdev)->enable(csdev, inconn, outconn);
 			if (ret)
 				coresight_control_assoc_ectdev(csdev, false);
 		}
@@ -385,33 +369,36 @@ static void coresight_disable_link(struct coresight_device *csdev,
 				   struct coresight_device *parent,
 				   struct coresight_device *child)
 {
-	int i, nr_conns;
+	int i;
 	int link_subtype;
-	int inport, outport;
+	struct coresight_connection *inconn, *outconn;
 
 	if (!parent || !child)
 		return;
 
-	inport = coresight_find_link_inport(csdev, parent);
-	outport = coresight_find_link_outport(csdev, child);
+	inconn = coresight_find_out_connection(parent, csdev);
+	outconn = coresight_find_out_connection(csdev, child);
 	link_subtype = csdev->subtype.link_subtype;
 
-	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
-		nr_conns = csdev->pdata->high_inport;
-	} else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) {
-		nr_conns = csdev->pdata->high_outport;
-	} else {
-		nr_conns = 1;
-	}
-
 	if (link_ops(csdev)->disable) {
-		link_ops(csdev)->disable(csdev, inport, outport);
+		link_ops(csdev)->disable(csdev, inconn, outconn);
 		coresight_control_assoc_ectdev(csdev, false);
 	}
 
-	for (i = 0; i < nr_conns; i++)
-		if (atomic_read(&csdev->refcnt[i]) != 0)
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
+		for (i = 0; i < csdev->pdata->nr_inconns; i++)
+			if (atomic_read(&csdev->pdata->in_conns[i]->dest_refcnt) !=
+			    0)
+				return;
+	} else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) {
+		for (i = 0; i < csdev->pdata->nr_outconns; i++)
+			if (atomic_read(&csdev->pdata->out_conns[i]->src_refcnt) !=
+			    0)
+				return;
+	} else {
+		if (atomic_read(&csdev->refcnt) != 0)
 			return;
+	}
 
 	csdev->enable = false;
 }
@@ -435,7 +422,7 @@ static int coresight_enable_source(struct coresight_device *csdev,
 		csdev->enable = true;
 	}
 
-	atomic_inc(csdev->refcnt);
+	atomic_inc(&csdev->refcnt);
 
 	return 0;
 }
@@ -450,7 +437,7 @@ static int coresight_enable_source(struct coresight_device *csdev,
  */
 static bool coresight_disable_source(struct coresight_device *csdev)
 {
-	if (atomic_dec_return(csdev->refcnt) == 0) {
+	if (atomic_dec_return(&csdev->refcnt) == 0) {
 		if (source_ops(csdev)->disable)
 			source_ops(csdev)->disable(csdev, NULL);
 		coresight_control_assoc_ectdev(csdev, false);
@@ -1094,7 +1081,7 @@ int coresight_enable(struct coresight_device *csdev)
 		 * source is already enabled.
 		 */
 		if (subtype == CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE)
-			atomic_inc(csdev->refcnt);
+			atomic_inc(&csdev->refcnt);
 		goto out;
 	}
 
@@ -1308,7 +1295,6 @@ static void coresight_device_release(struct device *dev)
 	struct coresight_device *csdev = to_coresight_device(dev);
 
 	fwnode_handle_put(csdev->dev.fwnode);
-	kfree(csdev->refcnt);
 	kfree(csdev);
 }
 
@@ -1537,9 +1523,6 @@ void coresight_release_platform_data(struct coresight_device *csdev,
 struct coresight_device *coresight_register(struct coresight_desc *desc)
 {
 	int ret;
-	int link_subtype;
-	int nr_refcnts = 1;
-	atomic_t *refcnts = NULL;
 	struct coresight_device *csdev;
 	bool registered = false;
 
@@ -1549,25 +1532,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 		goto err_out;
 	}
 
-	if (desc->type == CORESIGHT_DEV_TYPE_LINK ||
-	    desc->type == CORESIGHT_DEV_TYPE_LINKSINK) {
-		link_subtype = desc->subtype.link_subtype;
-
-		if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG)
-			nr_refcnts = desc->pdata->high_inport;
-		else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT)
-			nr_refcnts = desc->pdata->high_outport;
-	}
-
-	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
-	if (!refcnts) {
-		ret = -ENOMEM;
-		kfree(csdev);
-		goto err_out;
-	}
-
-	csdev->refcnt = refcnts;
-
 	csdev->pdata = desc->pdata;
 
 	csdev->type = desc->type;
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index eb99c445015a..fa80039e0821 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -163,7 +163,7 @@ static int etb_enable_sysfs(struct coresight_device *csdev)
 		drvdata->mode = CS_MODE_SYSFS;
 	}
 
-	atomic_inc(csdev->refcnt);
+	atomic_inc(&csdev->refcnt);
 out:
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 	return ret;
@@ -199,7 +199,7 @@ static int etb_enable_perf(struct coresight_device *csdev, void *data)
 	 * use for this session.
 	 */
 	if (drvdata->pid == pid) {
-		atomic_inc(csdev->refcnt);
+		atomic_inc(&csdev->refcnt);
 		goto out;
 	}
 
@@ -217,7 +217,7 @@ static int etb_enable_perf(struct coresight_device *csdev, void *data)
 		/* Associate with monitored process. */
 		drvdata->pid = pid;
 		drvdata->mode = CS_MODE_PERF;
-		atomic_inc(csdev->refcnt);
+		atomic_inc(&csdev->refcnt);
 	}
 
 out:
@@ -356,7 +356,7 @@ static int etb_disable(struct coresight_device *csdev)
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
 
-	if (atomic_dec_return(csdev->refcnt)) {
+	if (atomic_dec_return(&csdev->refcnt)) {
 		spin_unlock_irqrestore(&drvdata->spinlock, flags);
 		return -EBUSY;
 	}
@@ -447,7 +447,7 @@ static unsigned long etb_update_buffer(struct coresight_device *csdev,
 	spin_lock_irqsave(&drvdata->spinlock, flags);
 
 	/* Don't do anything if another tracer is using this sink */
-	if (atomic_read(csdev->refcnt) != 1)
+	if (atomic_read(&csdev->refcnt) != 1)
 		goto out;
 
 	__etb_disable_hw(drvdata);
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
index b363dd6bc510..b8e150e45b27 100644
--- a/drivers/hwtracing/coresight/coresight-funnel.c
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -74,8 +74,9 @@ done:
 	return rc;
 }
 
-static int funnel_enable(struct coresight_device *csdev, int inport,
-			 int outport)
+static int funnel_enable(struct coresight_device *csdev,
+			 struct coresight_connection *in,
+			 struct coresight_connection *out)
 {
 	int rc = 0;
 	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -83,18 +84,19 @@ static int funnel_enable(struct coresight_device *csdev, int inport,
 	bool first_enable = false;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (atomic_read(&csdev->refcnt[inport]) == 0) {
+	if (atomic_read(&in->dest_refcnt) == 0) {
 		if (drvdata->base)
-			rc = dynamic_funnel_enable_hw(drvdata, inport);
+			rc = dynamic_funnel_enable_hw(drvdata, in->dest_port);
 		if (!rc)
 			first_enable = true;
 	}
 	if (!rc)
-		atomic_inc(&csdev->refcnt[inport]);
+		atomic_inc(&in->dest_refcnt);
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 	if (first_enable)
-		dev_dbg(&csdev->dev, "FUNNEL inport %d enabled\n", inport);
+		dev_dbg(&csdev->dev, "FUNNEL inport %d enabled\n",
+			in->dest_port);
 	return rc;
 }
 
@@ -117,23 +119,25 @@ static void dynamic_funnel_disable_hw(struct funnel_drvdata *drvdata,
 	CS_LOCK(drvdata->base);
 }
 
-static void funnel_disable(struct coresight_device *csdev, int inport,
-			   int outport)
+static void funnel_disable(struct coresight_device *csdev,
+			   struct coresight_connection *in,
+			   struct coresight_connection *out)
 {
 	struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 	unsigned long flags;
 	bool last_disable = false;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (atomic_dec_return(&csdev->refcnt[inport]) == 0) {
+	if (atomic_dec_return(&in->dest_refcnt) == 0) {
 		if (drvdata->base)
-			dynamic_funnel_disable_hw(drvdata, inport);
+			dynamic_funnel_disable_hw(drvdata, in->dest_port);
 		last_disable = true;
 	}
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 	if (last_disable)
-		dev_dbg(&csdev->dev, "FUNNEL inport %d disabled\n", inport);
+		dev_dbg(&csdev->dev, "FUNNEL inport %d disabled\n",
+			in->dest_port);
 }
 
 static const struct coresight_ops_link funnel_link_ops = {
diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c
index 257ad48925a1..3e2e135cb8f6 100644
--- a/drivers/hwtracing/coresight/coresight-platform.c
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -146,41 +146,6 @@ static inline bool of_coresight_legacy_ep_is_input(struct device_node *ep)
 	return of_property_read_bool(ep, "slave-mode");
 }
 
-static void of_coresight_get_ports_legacy(const struct device_node *node,
-					  int *nr_inconns, int *nr_outconns)
-{
-	struct device_node *ep = NULL;
-	struct of_endpoint endpoint;
-	int in = 0, out = 0;
-
-	/*
-	 * Avoid warnings in of_graph_get_next_endpoint()
-	 * if the device doesn't have any graph connections
-	 */
-	if (!of_graph_is_present(node))
-		return;
-	do {
-		ep = of_graph_get_next_endpoint(node, ep);
-		if (!ep)
-			break;
-
-		if (of_graph_parse_endpoint(ep, &endpoint))
-			continue;
-
-		if (of_coresight_legacy_ep_is_input(ep)) {
-			in = (endpoint.port + 1 > in) ?
-				endpoint.port + 1 : in;
-		} else {
-			out = (endpoint.port + 1) > out ?
-				endpoint.port + 1 : out;
-		}
-
-	} while (ep);
-
-	*nr_inconns = in;
-	*nr_outconns = out;
-}
-
 static struct device_node *of_coresight_get_port_parent(struct device_node *ep)
 {
 	struct device_node *parent = of_graph_get_port_parent(ep);
@@ -196,59 +161,12 @@ static struct device_node *of_coresight_get_port_parent(struct device_node *ep)
 	return parent;
 }
 
-static inline struct device_node *
-of_coresight_get_input_ports_node(const struct device_node *node)
-{
-	return of_get_child_by_name(node, "in-ports");
-}
-
 static inline struct device_node *
 of_coresight_get_output_ports_node(const struct device_node *node)
 {
 	return of_get_child_by_name(node, "out-ports");
 }
 
-static inline int
-of_coresight_count_ports(struct device_node *port_parent)
-{
-	int i = 0;
-	struct device_node *ep = NULL;
-	struct of_endpoint endpoint;
-
-	while ((ep = of_graph_get_next_endpoint(port_parent, ep))) {
-		/* Defer error handling to parsing */
-		if (of_graph_parse_endpoint(ep, &endpoint))
-			continue;
-		if (endpoint.port + 1 > i)
-			i = endpoint.port + 1;
-	}
-
-	return i;
-}
-
-static void of_coresight_get_ports(const struct device_node *node,
-				   int *nr_inconns, int *nr_outconns)
-{
-	struct device_node *input_ports = NULL, *output_ports = NULL;
-
-	input_ports = of_coresight_get_input_ports_node(node);
-	output_ports = of_coresight_get_output_ports_node(node);
-
-	if (input_ports || output_ports) {
-		if (input_ports) {
-			*nr_inconns = of_coresight_count_ports(input_ports);
-			of_node_put(input_ports);
-		}
-		if (output_ports) {
-			*nr_outconns = of_coresight_count_ports(output_ports);
-			of_node_put(output_ports);
-		}
-	} else {
-		/* Fall back to legacy DT bindings parsing */
-		of_coresight_get_ports_legacy(node, nr_inconns, nr_outconns);
-	}
-}
-
 static int of_coresight_get_cpu(struct device *dev)
 {
 	int cpu;
@@ -351,13 +269,6 @@ static int of_get_coresight_platform_data(struct device *dev,
 	bool legacy_binding = false;
 	struct device_node *node = dev->of_node;
 
-	/* Get the number of input and output port for this component */
-	of_coresight_get_ports(node, &pdata->high_inport, &pdata->high_outport);
-
-	/* If there are no output connections, we are done */
-	if (!pdata->high_outport)
-		return 0;
-
 	parent = of_coresight_get_output_ports_node(node);
 	/*
 	 * If the DT uses obsoleted bindings, the ports are listed
@@ -365,6 +276,12 @@ static int of_get_coresight_platform_data(struct device *dev,
 	 * ports.
 	 */
 	if (!parent) {
+		/*
+		 * Avoid warnings in of_graph_get_next_endpoint()
+		 * if the device doesn't have any graph connections
+		 */
+		if (!of_graph_is_present(node))
+			return 0;
 		legacy_binding = true;
 		parent = node;
 		dev_warn_once(dev, "Uses obsolete Coresight DT bindings\n");
@@ -751,7 +668,6 @@ static int acpi_coresight_parse_graph(struct device *dev,
 	struct coresight_connection conn, zero_conn = {};
 	struct coresight_connection *new_conn;
 
-	pdata->nr_inconns = pdata->nr_outconns = 0;
 	graph = acpi_get_coresight_graph(adev);
 	if (!graph)
 		return -ENOENT;
@@ -770,22 +686,9 @@ static int acpi_coresight_parse_graph(struct device *dev,
 			return dir;
 
 		if (dir == ACPI_CORESIGHT_LINK_MASTER) {
-			if (conn.src_port >= pdata->high_outport)
-				pdata->high_outport = conn.src_port + 1;
 			new_conn = coresight_add_out_conn(dev, pdata, &conn);
 			if (IS_ERR(new_conn))
 				return PTR_ERR(new_conn);
-		} else {
-			WARN_ON(pdata->high_inport == conn.dest_port + 1);
-			/*
-			 * We do not track input port connections for a device.
-			 * However we need the highest port number described,
-			 * which can be recorded now and reuse this connection
-			 * record for an output connection. Hence, do not move
-			 * the ptr for input connections
-			 */
-			if (conn.dest_port >= pdata->high_inport)
-				pdata->high_inport = conn.dest_port + 1;
 		}
 	}
 
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
index 4dd50546d7e4..b6be73034996 100644
--- a/drivers/hwtracing/coresight/coresight-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -114,8 +114,9 @@ static int dynamic_replicator_enable(struct replicator_drvdata *drvdata,
 	return rc;
 }
 
-static int replicator_enable(struct coresight_device *csdev, int inport,
-			     int outport)
+static int replicator_enable(struct coresight_device *csdev,
+			     struct coresight_connection *in,
+			     struct coresight_connection *out)
 {
 	int rc = 0;
 	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -123,15 +124,15 @@ static int replicator_enable(struct coresight_device *csdev, int inport,
 	bool first_enable = false;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (atomic_read(&csdev->refcnt[outport]) == 0) {
+	if (atomic_read(&out->src_refcnt) == 0) {
 		if (drvdata->base)
-			rc = dynamic_replicator_enable(drvdata, inport,
-						       outport);
+			rc = dynamic_replicator_enable(drvdata, in->dest_port,
+						       out->src_port);
 		if (!rc)
 			first_enable = true;
 	}
 	if (!rc)
-		atomic_inc(&csdev->refcnt[outport]);
+		atomic_inc(&out->src_refcnt);
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 	if (first_enable)
@@ -168,17 +169,19 @@ static void dynamic_replicator_disable(struct replicator_drvdata *drvdata,
 	CS_LOCK(drvdata->base);
 }
 
-static void replicator_disable(struct coresight_device *csdev, int inport,
-			       int outport)
+static void replicator_disable(struct coresight_device *csdev,
+			       struct coresight_connection *in,
+			       struct coresight_connection *out)
 {
 	struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 	unsigned long flags;
 	bool last_disable = false;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
-	if (atomic_dec_return(&csdev->refcnt[outport]) == 0) {
+	if (atomic_dec_return(&out->src_refcnt) == 0) {
 		if (drvdata->base)
-			dynamic_replicator_disable(drvdata, inport, outport);
+			dynamic_replicator_disable(drvdata, in->dest_port,
+						   out->src_port);
 		last_disable = true;
 	}
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index 14d3c1472455..79d8c64eac49 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -206,7 +206,7 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev)
 	 * touched.
 	 */
 	if (drvdata->mode == CS_MODE_SYSFS) {
-		atomic_inc(csdev->refcnt);
+		atomic_inc(&csdev->refcnt);
 		goto out;
 	}
 
@@ -229,7 +229,7 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev)
 	ret = tmc_etb_enable_hw(drvdata);
 	if (!ret) {
 		drvdata->mode = CS_MODE_SYSFS;
-		atomic_inc(csdev->refcnt);
+		atomic_inc(&csdev->refcnt);
 	} else {
 		/* Free up the buffer if we failed to enable */
 		used = false;
@@ -284,7 +284,7 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data)
 		 * use for this session.
 		 */
 		if (drvdata->pid == pid) {
-			atomic_inc(csdev->refcnt);
+			atomic_inc(&csdev->refcnt);
 			break;
 		}
 
@@ -293,7 +293,7 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data)
 			/* Associate with monitored process. */
 			drvdata->pid = pid;
 			drvdata->mode = CS_MODE_PERF;
-			atomic_inc(csdev->refcnt);
+			atomic_inc(&csdev->refcnt);
 		}
 	} while (0);
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
@@ -338,7 +338,7 @@ static int tmc_disable_etf_sink(struct coresight_device *csdev)
 		return -EBUSY;
 	}
 
-	if (atomic_dec_return(csdev->refcnt)) {
+	if (atomic_dec_return(&csdev->refcnt)) {
 		spin_unlock_irqrestore(&drvdata->spinlock, flags);
 		return -EBUSY;
 	}
@@ -357,7 +357,8 @@ static int tmc_disable_etf_sink(struct coresight_device *csdev)
 }
 
 static int tmc_enable_etf_link(struct coresight_device *csdev,
-			       int inport, int outport)
+			       struct coresight_connection *in,
+			       struct coresight_connection *out)
 {
 	int ret = 0;
 	unsigned long flags;
@@ -370,7 +371,7 @@ static int tmc_enable_etf_link(struct coresight_device *csdev,
 		return -EBUSY;
 	}
 
-	if (atomic_read(&csdev->refcnt[0]) == 0) {
+	if (atomic_read(&csdev->refcnt) == 0) {
 		ret = tmc_etf_enable_hw(drvdata);
 		if (!ret) {
 			drvdata->mode = CS_MODE_SYSFS;
@@ -378,7 +379,7 @@ static int tmc_enable_etf_link(struct coresight_device *csdev,
 		}
 	}
 	if (!ret)
-		atomic_inc(&csdev->refcnt[0]);
+		atomic_inc(&csdev->refcnt);
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 	if (first_enable)
@@ -387,7 +388,8 @@ static int tmc_enable_etf_link(struct coresight_device *csdev,
 }
 
 static void tmc_disable_etf_link(struct coresight_device *csdev,
-				 int inport, int outport)
+				 struct coresight_connection *in,
+				 struct coresight_connection *out)
 {
 	unsigned long flags;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -399,7 +401,7 @@ static void tmc_disable_etf_link(struct coresight_device *csdev,
 		return;
 	}
 
-	if (atomic_dec_return(&csdev->refcnt[0]) == 0) {
+	if (atomic_dec_return(&csdev->refcnt) == 0) {
 		tmc_etf_disable_hw(drvdata);
 		drvdata->mode = CS_MODE_DISABLED;
 		last_disable = true;
@@ -487,7 +489,7 @@ static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev,
 	spin_lock_irqsave(&drvdata->spinlock, flags);
 
 	/* Don't do anything if another tracer is using this sink */
-	if (atomic_read(csdev->refcnt) != 1)
+	if (atomic_read(&csdev->refcnt) != 1)
 		goto out;
 
 	CS_UNLOCK(drvdata->base);
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index e4493647d66d..23fa70cc2098 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1209,7 +1209,7 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
 	 * touched, even if the buffer size has changed.
 	 */
 	if (drvdata->mode == CS_MODE_SYSFS) {
-		atomic_inc(csdev->refcnt);
+		atomic_inc(&csdev->refcnt);
 		goto out;
 	}
 
@@ -1226,7 +1226,7 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev)
 	ret = tmc_etr_enable_hw(drvdata, drvdata->sysfs_buf);
 	if (!ret) {
 		drvdata->mode = CS_MODE_SYSFS;
-		atomic_inc(csdev->refcnt);
+		atomic_inc(&csdev->refcnt);
 	}
 out:
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
@@ -1535,7 +1535,7 @@ tmc_update_etr_buffer(struct coresight_device *csdev,
 	spin_lock_irqsave(&drvdata->spinlock, flags);
 
 	/* Don't do anything if another tracer is using this sink */
-	if (atomic_read(csdev->refcnt) != 1) {
+	if (atomic_read(&csdev->refcnt) != 1) {
 		spin_unlock_irqrestore(&drvdata->spinlock, flags);
 		goto out;
 	}
@@ -1647,7 +1647,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
 	 * use for this session.
 	 */
 	if (drvdata->pid == pid) {
-		atomic_inc(csdev->refcnt);
+		atomic_inc(&csdev->refcnt);
 		goto unlock_out;
 	}
 
@@ -1657,7 +1657,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
 		drvdata->pid = pid;
 		drvdata->mode = CS_MODE_PERF;
 		drvdata->perf_buf = etr_perf->etr_buf;
-		atomic_inc(csdev->refcnt);
+		atomic_inc(&csdev->refcnt);
 	}
 
 unlock_out:
@@ -1690,7 +1690,7 @@ static int tmc_disable_etr_sink(struct coresight_device *csdev)
 		return -EBUSY;
 	}
 
-	if (atomic_dec_return(csdev->refcnt)) {
+	if (atomic_dec_return(&csdev->refcnt)) {
 		spin_unlock_irqrestore(&drvdata->spinlock, flags);
 		return -EBUSY;
 	}
diff --git a/drivers/hwtracing/coresight/coresight-tpda.c b/drivers/hwtracing/coresight/coresight-tpda.c
index f712e112ecff..8d2b9d29237d 100644
--- a/drivers/hwtracing/coresight/coresight-tpda.c
+++ b/drivers/hwtracing/coresight/coresight-tpda.c
@@ -54,18 +54,20 @@ static void __tpda_enable(struct tpda_drvdata *drvdata, int port)
 	CS_LOCK(drvdata->base);
 }
 
-static int tpda_enable(struct coresight_device *csdev, int inport, int outport)
+static int tpda_enable(struct coresight_device *csdev,
+		       struct coresight_connection *in,
+		       struct coresight_connection *out)
 {
 	struct tpda_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	spin_lock(&drvdata->spinlock);
-	if (atomic_read(&csdev->refcnt[inport]) == 0)
-		__tpda_enable(drvdata, inport);
+	if (atomic_read(&in->dest_refcnt) == 0)
+		__tpda_enable(drvdata, in->dest_port);
 
-	atomic_inc(&csdev->refcnt[inport]);
+	atomic_inc(&in->dest_refcnt);
 	spin_unlock(&drvdata->spinlock);
 
-	dev_dbg(drvdata->dev, "TPDA inport %d enabled.\n", inport);
+	dev_dbg(drvdata->dev, "TPDA inport %d enabled.\n", in->dest_port);
 	return 0;
 }
 
@@ -82,18 +84,19 @@ static void __tpda_disable(struct tpda_drvdata *drvdata, int port)
 	CS_LOCK(drvdata->base);
 }
 
-static void tpda_disable(struct coresight_device *csdev, int inport,
-			   int outport)
+static void tpda_disable(struct coresight_device *csdev,
+			 struct coresight_connection *in,
+			 struct coresight_connection *out)
 {
 	struct tpda_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	spin_lock(&drvdata->spinlock);
-	if (atomic_dec_return(&csdev->refcnt[inport]) == 0)
-		__tpda_disable(drvdata, inport);
+	if (atomic_dec_return(&in->dest_refcnt) == 0)
+		__tpda_disable(drvdata, in->dest_port);
 
 	spin_unlock(&drvdata->spinlock);
 
-	dev_dbg(drvdata->dev, "TPDA inport %d disabled\n", inport);
+	dev_dbg(drvdata->dev, "TPDA inport %d disabled\n", in->dest_port);
 }
 
 static const struct coresight_ops_link tpda_link_ops = {
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index b0179f761c98..59eac93fd6bb 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -73,7 +73,7 @@ static int tpiu_enable(struct coresight_device *csdev, enum cs_mode mode,
 		       void *__unused)
 {
 	tpiu_enable_hw(&csdev->access);
-	atomic_inc(csdev->refcnt);
+	atomic_inc(&csdev->refcnt);
 	dev_dbg(&csdev->dev, "TPIU enabled\n");
 	return 0;
 }
@@ -96,7 +96,7 @@ static void tpiu_disable_hw(struct csdev_access *csa)
 
 static int tpiu_disable(struct coresight_device *csdev)
 {
-	if (atomic_dec_return(csdev->refcnt))
+	if (atomic_dec_return(&csdev->refcnt))
 		return -EBUSY;
 
 	tpiu_disable_hw(&csdev->access);
diff --git a/drivers/hwtracing/coresight/ultrasoc-smb.c b/drivers/hwtracing/coresight/ultrasoc-smb.c
index a2ced0b3c3cd..e9a32a97fbee 100644
--- a/drivers/hwtracing/coresight/ultrasoc-smb.c
+++ b/drivers/hwtracing/coresight/ultrasoc-smb.c
@@ -106,7 +106,7 @@ static int smb_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 
-	if (atomic_read(drvdata->csdev->refcnt)) {
+	if (atomic_read(&drvdata->csdev->refcnt)) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -290,7 +290,7 @@ static int smb_enable(struct coresight_device *csdev, enum cs_mode mode,
 	if (ret)
 		goto out;
 
-	atomic_inc(csdev->refcnt);
+	atomic_inc(&csdev->refcnt);
 
 	dev_dbg(&csdev->dev, "Ultrasoc SMB enabled\n");
 out:
@@ -311,7 +311,7 @@ static int smb_disable(struct coresight_device *csdev)
 		goto out;
 	}
 
-	if (atomic_dec_return(csdev->refcnt)) {
+	if (atomic_dec_return(&csdev->refcnt)) {
 		ret = -EBUSY;
 		goto out;
 	}
@@ -411,7 +411,7 @@ static unsigned long smb_update_buffer(struct coresight_device *csdev,
 	mutex_lock(&drvdata->mutex);
 
 	/* Don't do anything if another tracer is using this sink. */
-	if (atomic_read(csdev->refcnt) != 1)
+	if (atomic_read(&csdev->refcnt) != 1)
 		goto out;
 
 	smb_disable_hw(drvdata);
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index e9c52c5ca7f3..b97edd24f3ec 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -111,8 +111,6 @@ union coresight_dev_subtype {
  *            unloaded the connection leaves an empty slot.
  */
 struct coresight_platform_data {
-	int high_inport;
-	int high_outport;
 	int nr_inconns;
 	int nr_outconns;
 	struct coresight_connection **out_conns;
@@ -205,6 +203,8 @@ struct coresight_connection {
 	struct coresight_device *dest_dev;
 	struct coresight_sysfs_link *link;
 	struct coresight_device *src_dev;
+	atomic_t src_refcnt;
+	atomic_t dest_refcnt;
 };
 
 /**
@@ -256,7 +256,7 @@ struct coresight_device {
 	const struct coresight_ops *ops;
 	struct csdev_access access;
 	struct device dev;
-	atomic_t *refcnt;
+	atomic_t refcnt;
 	bool orphan;
 	bool enable;	/* true only if configured as part of a path */
 	/* sink specific fields */
@@ -341,8 +341,12 @@ struct coresight_ops_sink {
  * @disable:	disables flow between iport and oport.
  */
 struct coresight_ops_link {
-	int (*enable)(struct coresight_device *csdev, int iport, int oport);
-	void (*disable)(struct coresight_device *csdev, int iport, int oport);
+	int (*enable)(struct coresight_device *csdev,
+		      struct coresight_connection *in,
+		      struct coresight_connection *out);
+	void (*disable)(struct coresight_device *csdev,
+			struct coresight_connection *in,
+			struct coresight_connection *out);
 };
 
 /**
-- 
cgit v1.2.3


From 6148652807ba89b0c9af05ebed3e005b626f90eb Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 25 Apr 2023 15:35:39 +0100
Subject: coresight: Enable and disable helper devices adjacent to the path

Currently CATU is the only helper device, and its enable and disable
calls are hard coded. To allow more helper devices to be added in a
generic way, remove these hard coded calls and just enable and disable
all helper devices.

This has to apply to helpers adjacent to the path, because they will
never be in the path. CATU was already discovered in this way, so
there is no change there.

One change that is needed is for CATU to call back into ETR to allocate
the buffer. Because the enable call was previously hard coded, it was
done at a point where the buffer was already allocated, but this is no
longer the case.

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230425143542.2305069-13-james.clark@arm.com
---
 drivers/hwtracing/coresight/coresight-catu.c     |  21 ++-
 drivers/hwtracing/coresight/coresight-core.c     | 161 +++++++++++++++++++++--
 drivers/hwtracing/coresight/coresight-etm-perf.c |   4 +-
 drivers/hwtracing/coresight/coresight-priv.h     |   3 +
 drivers/hwtracing/coresight/coresight-tmc-etr.c  |  43 +-----
 include/linux/coresight.h                        |  11 +-
 6 files changed, 188 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c
index bc90a03f478f..3949ded0d4fa 100644
--- a/drivers/hwtracing/coresight/coresight-catu.c
+++ b/drivers/hwtracing/coresight/coresight-catu.c
@@ -395,13 +395,18 @@ static inline int catu_wait_for_ready(struct catu_drvdata *drvdata)
 	return coresight_timeout(csa, CATU_STATUS, CATU_STATUS_READY, 1);
 }
 
-static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
+static int catu_enable_hw(struct catu_drvdata *drvdata, enum cs_mode cs_mode,
+			  void *data)
 {
 	int rc;
 	u32 control, mode;
-	struct etr_buf *etr_buf = data;
+	struct etr_buf *etr_buf = NULL;
 	struct device *dev = &drvdata->csdev->dev;
 	struct coresight_device *csdev = drvdata->csdev;
+	struct coresight_device *etrdev;
+	union coresight_dev_subtype etr_subtype = {
+		.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_SYSMEM
+	};
 
 	if (catu_wait_for_ready(drvdata))
 		dev_warn(dev, "Timeout while waiting for READY\n");
@@ -416,6 +421,13 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
 	if (rc)
 		return rc;
 
+	etrdev = coresight_find_input_type(
+		csdev->pdata, CORESIGHT_DEV_TYPE_SINK, etr_subtype);
+	if (etrdev) {
+		etr_buf = tmc_etr_get_buffer(etrdev, cs_mode, data);
+		if (IS_ERR(etr_buf))
+			return PTR_ERR(etr_buf);
+	}
 	control |= BIT(CATU_CONTROL_ENABLE);
 
 	if (etr_buf && etr_buf->mode == ETR_MODE_CATU) {
@@ -441,13 +453,14 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
 	return 0;
 }
 
-static int catu_enable(struct coresight_device *csdev, void *data)
+static int catu_enable(struct coresight_device *csdev, enum cs_mode mode,
+		       void *data)
 {
 	int rc;
 	struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev);
 
 	CS_UNLOCK(catu_drvdata->base);
-	rc = catu_enable_hw(catu_drvdata, data);
+	rc = catu_enable_hw(catu_drvdata, mode, data);
 	CS_LOCK(catu_drvdata->base);
 	return rc;
 }
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 939b7fb751b5..1e9a596a15bc 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -403,8 +403,8 @@ static void coresight_disable_link(struct coresight_device *csdev,
 	csdev->enable = false;
 }
 
-static int coresight_enable_source(struct coresight_device *csdev,
-				   enum cs_mode mode)
+int coresight_enable_source(struct coresight_device *csdev, enum cs_mode mode,
+			    void *data)
 {
 	int ret;
 
@@ -413,7 +413,7 @@ static int coresight_enable_source(struct coresight_device *csdev,
 			ret = coresight_control_assoc_ectdev(csdev, true);
 			if (ret)
 				return ret;
-			ret = source_ops(csdev)->enable(csdev, NULL, mode);
+			ret = source_ops(csdev)->enable(csdev, data, mode);
 			if (ret) {
 				coresight_control_assoc_ectdev(csdev, false);
 				return ret;
@@ -426,25 +426,75 @@ static int coresight_enable_source(struct coresight_device *csdev,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(coresight_enable_source);
+
+static bool coresight_is_helper(struct coresight_device *csdev)
+{
+	return csdev->type == CORESIGHT_DEV_TYPE_HELPER;
+}
+
+static int coresight_enable_helper(struct coresight_device *csdev,
+				   enum cs_mode mode, void *data)
+{
+	int ret;
+
+	if (!helper_ops(csdev)->enable)
+		return 0;
+	ret = helper_ops(csdev)->enable(csdev, mode, data);
+	if (ret)
+		return ret;
+
+	csdev->enable = true;
+	return 0;
+}
+
+static void coresight_disable_helper(struct coresight_device *csdev)
+{
+	int ret;
+
+	if (!helper_ops(csdev)->disable)
+		return;
+
+	ret = helper_ops(csdev)->disable(csdev, NULL);
+	if (ret)
+		return;
+	csdev->enable = false;
+}
+
+static void coresight_disable_helpers(struct coresight_device *csdev)
+{
+	int i;
+	struct coresight_device *helper;
+
+	for (i = 0; i < csdev->pdata->nr_outconns; ++i) {
+		helper = csdev->pdata->out_conns[i]->dest_dev;
+		if (helper && coresight_is_helper(helper))
+			coresight_disable_helper(helper);
+	}
+}
 
 /**
  *  coresight_disable_source - Drop the reference count by 1 and disable
  *  the device if there are no users left.
  *
  *  @csdev: The coresight device to disable
+ *  @data: Opaque data to pass on to the disable function of the source device.
+ *         For example in perf mode this is a pointer to the struct perf_event.
  *
  *  Returns true if the device has been disabled.
  */
-static bool coresight_disable_source(struct coresight_device *csdev)
+bool coresight_disable_source(struct coresight_device *csdev, void *data)
 {
 	if (atomic_dec_return(&csdev->refcnt) == 0) {
 		if (source_ops(csdev)->disable)
-			source_ops(csdev)->disable(csdev, NULL);
+			source_ops(csdev)->disable(csdev, data);
 		coresight_control_assoc_ectdev(csdev, false);
+		coresight_disable_helpers(csdev);
 		csdev->enable = false;
 	}
 	return !csdev->enable;
 }
+EXPORT_SYMBOL_GPL(coresight_disable_source);
 
 /*
  * coresight_disable_path_from : Disable components in the given path beyond
@@ -495,6 +545,9 @@ static void coresight_disable_path_from(struct list_head *path,
 		default:
 			break;
 		}
+
+		/* Disable all helpers adjacent along the path last */
+		coresight_disable_helpers(csdev);
 	}
 }
 
@@ -504,9 +557,28 @@ void coresight_disable_path(struct list_head *path)
 }
 EXPORT_SYMBOL_GPL(coresight_disable_path);
 
-int coresight_enable_path(struct list_head *path, enum cs_mode mode, void *sink_data)
+static int coresight_enable_helpers(struct coresight_device *csdev,
+				    enum cs_mode mode, void *data)
 {
+	int i, ret = 0;
+	struct coresight_device *helper;
+
+	for (i = 0; i < csdev->pdata->nr_outconns; ++i) {
+		helper = csdev->pdata->out_conns[i]->dest_dev;
+		if (!helper || !coresight_is_helper(helper))
+			continue;
+
+		ret = coresight_enable_helper(helper, mode, data);
+		if (ret)
+			return ret;
+	}
 
+	return 0;
+}
+
+int coresight_enable_path(struct list_head *path, enum cs_mode mode,
+			  void *sink_data)
+{
 	int ret = 0;
 	u32 type;
 	struct coresight_node *nd;
@@ -516,6 +588,10 @@ int coresight_enable_path(struct list_head *path, enum cs_mode mode, void *sink_
 		csdev = nd->csdev;
 		type = csdev->type;
 
+		/* Enable all helpers adjacent to the path first */
+		ret = coresight_enable_helpers(csdev, mode, sink_data);
+		if (ret)
+			goto err;
 		/*
 		 * ETF devices are tricky... They can be a link or a sink,
 		 * depending on how they are configured.  If an ETF has been
@@ -710,7 +786,7 @@ static int coresight_grab_device(struct coresight_device *csdev)
 		struct coresight_device *child;
 
 		child = csdev->pdata->out_conns[i]->dest_dev;
-		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+		if (child && coresight_is_helper(child))
 			if (!coresight_get_ref(child))
 				goto err;
 	}
@@ -721,7 +797,7 @@ err:
 		struct coresight_device *child;
 
 		child = csdev->pdata->out_conns[i]->dest_dev;
-		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+		if (child && coresight_is_helper(child))
 			coresight_put_ref(child);
 	}
 	return -ENODEV;
@@ -740,7 +816,7 @@ static void coresight_drop_device(struct coresight_device *csdev)
 		struct coresight_device *child;
 
 		child = csdev->pdata->out_conns[i]->dest_dev;
-		if (child && child->type == CORESIGHT_DEV_TYPE_HELPER)
+		if (child && coresight_is_helper(child))
 			coresight_put_ref(child);
 	}
 }
@@ -1102,7 +1178,7 @@ int coresight_enable(struct coresight_device *csdev)
 	if (ret)
 		goto err_path;
 
-	ret = coresight_enable_source(csdev, CS_MODE_SYSFS);
+	ret = coresight_enable_source(csdev, CS_MODE_SYSFS, NULL);
 	if (ret)
 		goto err_source;
 
@@ -1159,7 +1235,7 @@ void coresight_disable(struct coresight_device *csdev)
 	if (ret)
 		goto out;
 
-	if (!csdev->enable || !coresight_disable_source(csdev))
+	if (!csdev->enable || !coresight_disable_source(csdev, NULL))
 		goto out;
 
 	switch (csdev->subtype.source_subtype) {
@@ -1644,6 +1720,69 @@ static inline int coresight_search_device_idx(struct coresight_dev_list *dict,
 	return -ENOENT;
 }
 
+static bool coresight_compare_type(enum coresight_dev_type type_a,
+				   union coresight_dev_subtype subtype_a,
+				   enum coresight_dev_type type_b,
+				   union coresight_dev_subtype subtype_b)
+{
+	if (type_a != type_b)
+		return false;
+
+	switch (type_a) {
+	case CORESIGHT_DEV_TYPE_SINK:
+		return subtype_a.sink_subtype == subtype_b.sink_subtype;
+	case CORESIGHT_DEV_TYPE_LINK:
+		return subtype_a.link_subtype == subtype_b.link_subtype;
+	case CORESIGHT_DEV_TYPE_LINKSINK:
+		return subtype_a.link_subtype == subtype_b.link_subtype &&
+		       subtype_a.sink_subtype == subtype_b.sink_subtype;
+	case CORESIGHT_DEV_TYPE_SOURCE:
+		return subtype_a.source_subtype == subtype_b.source_subtype;
+	case CORESIGHT_DEV_TYPE_HELPER:
+		return subtype_a.helper_subtype == subtype_b.helper_subtype;
+	default:
+		return false;
+	}
+}
+
+struct coresight_device *
+coresight_find_input_type(struct coresight_platform_data *pdata,
+			  enum coresight_dev_type type,
+			  union coresight_dev_subtype subtype)
+{
+	int i;
+	struct coresight_connection *conn;
+
+	for (i = 0; i < pdata->nr_inconns; ++i) {
+		conn = pdata->in_conns[i];
+		if (conn &&
+		    coresight_compare_type(type, subtype, conn->src_dev->type,
+					   conn->src_dev->subtype))
+			return conn->src_dev;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(coresight_find_input_type);
+
+struct coresight_device *
+coresight_find_output_type(struct coresight_platform_data *pdata,
+			   enum coresight_dev_type type,
+			   union coresight_dev_subtype subtype)
+{
+	int i;
+	struct coresight_connection *conn;
+
+	for (i = 0; i < pdata->nr_outconns; ++i) {
+		conn = pdata->out_conns[i];
+		if (conn->dest_dev &&
+		    coresight_compare_type(type, subtype, conn->dest_dev->type,
+					   conn->dest_dev->subtype))
+			return conn->dest_dev;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(coresight_find_output_type);
+
 bool coresight_loses_context_with_cpu(struct device *dev)
 {
 	return fwnode_property_present(dev_fwnode(dev),
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 89e8ed214ea4..5ca6278baff4 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -493,7 +493,7 @@ static void etm_event_start(struct perf_event *event, int flags)
 		goto fail_end_stop;
 
 	/* Finally enable the tracer */
-	if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
+	if (coresight_enable_source(csdev, CS_MODE_PERF, event))
 		goto fail_disable_path;
 
 	/*
@@ -587,7 +587,7 @@ static void etm_event_stop(struct perf_event *event, int mode)
 		return;
 
 	/* stop tracer */
-	source_ops(csdev)->disable(csdev, event);
+	coresight_disable_source(csdev, event);
 
 	/* tell the core */
 	event->hw.state = PERF_HES_STOPPED;
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 65ae6d161c57..5575014f73e0 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -216,5 +216,8 @@ void coresight_set_assoc_ectdev_mutex(struct coresight_device *csdev,
 
 void coresight_set_percpu_sink(int cpu, struct coresight_device *csdev);
 struct coresight_device *coresight_get_percpu_sink(int cpu);
+int coresight_enable_source(struct coresight_device *csdev, enum cs_mode mode,
+			    void *data);
+bool coresight_disable_source(struct coresight_device *csdev, void *data);
 
 #endif
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 7993398bdcce..766325de0e29 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -775,40 +775,19 @@ static const struct etr_buf_operations etr_sg_buf_ops = {
 struct coresight_device *
 tmc_etr_get_catu_device(struct tmc_drvdata *drvdata)
 {
-	int i;
-	struct coresight_device *tmp, *etr = drvdata->csdev;
+	struct coresight_device *etr = drvdata->csdev;
+	union coresight_dev_subtype catu_subtype = {
+		.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU
+	};
 
 	if (!IS_ENABLED(CONFIG_CORESIGHT_CATU))
 		return NULL;
 
-	for (i = 0; i < etr->pdata->nr_outconns; i++) {
-		tmp = etr->pdata->out_conns[i]->dest_dev;
-		if (tmp && coresight_is_catu_device(tmp))
-			return tmp;
-	}
-
-	return NULL;
+	return coresight_find_output_type(etr->pdata, CORESIGHT_DEV_TYPE_HELPER,
+					  catu_subtype);
 }
 EXPORT_SYMBOL_GPL(tmc_etr_get_catu_device);
 
-static inline int tmc_etr_enable_catu(struct tmc_drvdata *drvdata,
-				      struct etr_buf *etr_buf)
-{
-	struct coresight_device *catu = tmc_etr_get_catu_device(drvdata);
-
-	if (catu && helper_ops(catu)->enable)
-		return helper_ops(catu)->enable(catu, etr_buf);
-	return 0;
-}
-
-static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata)
-{
-	struct coresight_device *catu = tmc_etr_get_catu_device(drvdata);
-
-	if (catu && helper_ops(catu)->disable)
-		helper_ops(catu)->disable(catu, drvdata->etr_buf);
-}
-
 static const struct etr_buf_operations *etr_buf_ops[] = {
 	[ETR_MODE_FLAT] = &etr_flat_buf_ops,
 	[ETR_MODE_ETR_SG] = &etr_sg_buf_ops,
@@ -1058,13 +1037,6 @@ static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata,
 	if (WARN_ON(drvdata->etr_buf))
 		return -EBUSY;
 
-	/*
-	 * If this ETR is connected to a CATU, enable it before we turn
-	 * this on.
-	 */
-	rc = tmc_etr_enable_catu(drvdata, etr_buf);
-	if (rc)
-		return rc;
 	rc = coresight_claim_device(drvdata->csdev);
 	if (!rc) {
 		drvdata->etr_buf = etr_buf;
@@ -1072,7 +1044,6 @@ static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata,
 		if (rc) {
 			drvdata->etr_buf = NULL;
 			coresight_disclaim_device(drvdata->csdev);
-			tmc_etr_disable_catu(drvdata);
 		}
 	}
 
@@ -1162,8 +1133,6 @@ static void __tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
 void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
 {
 	__tmc_etr_disable_hw(drvdata);
-	/* Disable CATU device if this ETR is connected to one */
-	tmc_etr_disable_catu(drvdata);
 	coresight_disclaim_device(drvdata->csdev);
 	/* Reset the ETR buf used by hardware */
 	drvdata->etr_buf = NULL;
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index b97edd24f3ec..61dfbab5fa98 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -375,7 +375,8 @@ struct coresight_ops_source {
  * @disable	: Disable the device
  */
 struct coresight_ops_helper {
-	int (*enable)(struct coresight_device *csdev, void *data);
+	int (*enable)(struct coresight_device *csdev, enum cs_mode mode,
+		      void *data);
 	int (*disable)(struct coresight_device *csdev, void *data);
 };
 
@@ -646,5 +647,13 @@ coresight_add_out_conn(struct device *dev,
 		       struct coresight_platform_data *pdata,
 		       const struct coresight_connection *new_conn);
 int coresight_add_in_conn(struct coresight_connection *conn);
+struct coresight_device *
+coresight_find_input_type(struct coresight_platform_data *pdata,
+			  enum coresight_dev_type type,
+			  union coresight_dev_subtype subtype);
+struct coresight_device *
+coresight_find_output_type(struct coresight_platform_data *pdata,
+			   enum coresight_dev_type type,
+			   union coresight_dev_subtype subtype);
 
 #endif		/* _LINUX_COREISGHT_H */
-- 
cgit v1.2.3


From 1b5b1646e63d5ce43469e00680b8c1a3a1e034cd Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 25 Apr 2023 15:35:40 +0100
Subject: coresight: Fix CTI module refcount leak by making it a helper device

The CTI module has some hard coded refcounting code that has a leak.
For example running perf and then trying to unload it fails:

  perf record -e cs_etm// -a -- ls
  rmmod coresight_cti

  rmmod: ERROR: Module coresight_cti is in use

The coresight core already handles references of devices in use, so by
making CTI a normal helper device, we get working refcounting for free.

Reviewed-by: Mike Leach  <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230425143542.2305069-14-james.clark@arm.com
---
 drivers/hwtracing/coresight/coresight-core.c      | 116 ++++++++--------------
 drivers/hwtracing/coresight/coresight-cti-core.c  |  52 +++++-----
 drivers/hwtracing/coresight/coresight-cti-sysfs.c |   4 +-
 drivers/hwtracing/coresight/coresight-cti.h       |   4 +-
 drivers/hwtracing/coresight/coresight-priv.h      |   4 +-
 drivers/hwtracing/coresight/coresight-sysfs.c     |   4 +
 include/linux/coresight.h                         |  29 +-----
 7 files changed, 81 insertions(+), 132 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 1e9a596a15bc..118fcf27854d 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2012, The Linux Foundation. All rights reserved.
  */
 
+#include <linux/build_bug.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/types.h>
@@ -236,60 +237,44 @@ void coresight_disclaim_device(struct coresight_device *csdev)
 }
 EXPORT_SYMBOL_GPL(coresight_disclaim_device);
 
-/* enable or disable an associated CTI device of the supplied CS device */
-static int
-coresight_control_assoc_ectdev(struct coresight_device *csdev, bool enable)
+/*
+ * Add a helper as an output device. This function takes the @coresight_mutex
+ * because it's assumed that it's called from the helper device, outside of the
+ * core code where the mutex would already be held. Don't add new calls to this
+ * from inside the core code, instead try to add the new helper to the DT and
+ * ACPI where it will be picked up and linked automatically.
+ */
+void coresight_add_helper(struct coresight_device *csdev,
+			  struct coresight_device *helper)
 {
-	int ect_ret = 0;
-	struct coresight_device *ect_csdev = csdev->ect_dev;
-	struct module *mod;
+	int i;
+	struct coresight_connection conn = {};
+	struct coresight_connection *new_conn;
 
-	if (!ect_csdev)
-		return 0;
-	if ((!ect_ops(ect_csdev)->enable) || (!ect_ops(ect_csdev)->disable))
-		return 0;
+	mutex_lock(&coresight_mutex);
+	conn.dest_fwnode = fwnode_handle_get(dev_fwnode(&helper->dev));
+	conn.dest_dev = helper;
+	conn.dest_port = conn.src_port = -1;
+	conn.src_dev = csdev;
 
-	mod = ect_csdev->dev.parent->driver->owner;
-	if (enable) {
-		if (try_module_get(mod)) {
-			ect_ret = ect_ops(ect_csdev)->enable(ect_csdev);
-			if (ect_ret) {
-				module_put(mod);
-			} else {
-				get_device(ect_csdev->dev.parent);
-				csdev->ect_enabled = true;
-			}
-		} else
-			ect_ret = -ENODEV;
-	} else {
-		if (csdev->ect_enabled) {
-			ect_ret = ect_ops(ect_csdev)->disable(ect_csdev);
-			put_device(ect_csdev->dev.parent);
-			module_put(mod);
-			csdev->ect_enabled = false;
-		}
-	}
+	/*
+	 * Check for duplicates because this is called every time a helper
+	 * device is re-loaded. Existing connections will get re-linked
+	 * automatically.
+	 */
+	for (i = 0; i < csdev->pdata->nr_outconns; ++i)
+		if (csdev->pdata->out_conns[i]->dest_fwnode == conn.dest_fwnode)
+			goto unlock;
 
-	/* output warning if ECT enable is preventing trace operation */
-	if (ect_ret)
-		dev_info(&csdev->dev, "Associated ECT device (%s) %s failed\n",
-			 dev_name(&ect_csdev->dev),
-			 enable ? "enable" : "disable");
-	return ect_ret;
-}
+	new_conn = coresight_add_out_conn(csdev->dev.parent, csdev->pdata,
+					  &conn);
+	if (!IS_ERR(new_conn))
+		coresight_add_in_conn(new_conn);
 
-/*
- * Set the associated ect / cti device while holding the coresight_mutex
- * to avoid a race with coresight_enable that may try to use this value.
- */
-void coresight_set_assoc_ectdev_mutex(struct coresight_device *csdev,
-				      struct coresight_device *ect_csdev)
-{
-	mutex_lock(&coresight_mutex);
-	csdev->ect_dev = ect_csdev;
+unlock:
 	mutex_unlock(&coresight_mutex);
 }
-EXPORT_SYMBOL_GPL(coresight_set_assoc_ectdev_mutex);
+EXPORT_SYMBOL_GPL(coresight_add_helper);
 
 static int coresight_enable_sink(struct coresight_device *csdev,
 				 enum cs_mode mode, void *data)
@@ -303,14 +288,10 @@ static int coresight_enable_sink(struct coresight_device *csdev,
 	if (!sink_ops(csdev)->enable)
 		return -EINVAL;
 
-	ret = coresight_control_assoc_ectdev(csdev, true);
-	if (ret)
-		return ret;
 	ret = sink_ops(csdev)->enable(csdev, mode, data);
-	if (ret) {
-		coresight_control_assoc_ectdev(csdev, false);
+	if (ret)
 		return ret;
-	}
+
 	csdev->enable = true;
 
 	return 0;
@@ -326,7 +307,6 @@ static void coresight_disable_sink(struct coresight_device *csdev)
 	ret = sink_ops(csdev)->disable(csdev);
 	if (ret)
 		return;
-	coresight_control_assoc_ectdev(csdev, false);
 	csdev->enable = false;
 }
 
@@ -351,17 +331,11 @@ static int coresight_enable_link(struct coresight_device *csdev,
 		return PTR_ERR(outconn);
 
 	if (link_ops(csdev)->enable) {
-		ret = coresight_control_assoc_ectdev(csdev, true);
-		if (!ret) {
-			ret = link_ops(csdev)->enable(csdev, inconn, outconn);
-			if (ret)
-				coresight_control_assoc_ectdev(csdev, false);
-		}
+		ret = link_ops(csdev)->enable(csdev, inconn, outconn);
+		if (!ret)
+			csdev->enable = true;
 	}
 
-	if (!ret)
-		csdev->enable = true;
-
 	return ret;
 }
 
@@ -382,7 +356,6 @@ static void coresight_disable_link(struct coresight_device *csdev,
 
 	if (link_ops(csdev)->disable) {
 		link_ops(csdev)->disable(csdev, inconn, outconn);
-		coresight_control_assoc_ectdev(csdev, false);
 	}
 
 	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
@@ -410,14 +383,9 @@ int coresight_enable_source(struct coresight_device *csdev, enum cs_mode mode,
 
 	if (!csdev->enable) {
 		if (source_ops(csdev)->enable) {
-			ret = coresight_control_assoc_ectdev(csdev, true);
-			if (ret)
-				return ret;
 			ret = source_ops(csdev)->enable(csdev, data, mode);
-			if (ret) {
-				coresight_control_assoc_ectdev(csdev, false);
+			if (ret)
 				return ret;
-			}
 		}
 		csdev->enable = true;
 	}
@@ -488,7 +456,6 @@ bool coresight_disable_source(struct coresight_device *csdev, void *data)
 	if (atomic_dec_return(&csdev->refcnt) == 0) {
 		if (source_ops(csdev)->disable)
 			source_ops(csdev)->disable(csdev, data);
-		coresight_control_assoc_ectdev(csdev, false);
 		coresight_disable_helpers(csdev);
 		csdev->enable = false;
 	}
@@ -1360,11 +1327,10 @@ static struct device_type coresight_dev_type[] = {
 	},
 	{
 		.name = "helper",
-	},
-	{
-		.name = "ect",
-	},
+	}
 };
+/* Ensure the enum matches the names and groups */
+static_assert(ARRAY_SIZE(coresight_dev_type) == CORESIGHT_DEV_TYPE_MAX);
 
 static void coresight_device_release(struct device *dev)
 {
diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c
index 277c890a1f1f..7023ff70cc28 100644
--- a/drivers/hwtracing/coresight/coresight-cti-core.c
+++ b/drivers/hwtracing/coresight/coresight-cti-core.c
@@ -555,7 +555,10 @@ static void cti_add_assoc_to_csdev(struct coresight_device *csdev)
 	mutex_lock(&ect_mutex);
 
 	/* exit if current is an ECT device.*/
-	if ((csdev->type == CORESIGHT_DEV_TYPE_ECT) || list_empty(&ect_net))
+	if ((csdev->type == CORESIGHT_DEV_TYPE_HELPER &&
+	     csdev->subtype.helper_subtype ==
+		     CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI) ||
+	    list_empty(&ect_net))
 		goto cti_add_done;
 
 	/* if we didn't find the csdev previously we used the fwnode name */
@@ -571,8 +574,7 @@ static void cti_add_assoc_to_csdev(struct coresight_device *csdev)
 			 * if we found a matching csdev then update the ECT
 			 * association pointer for the device with this CTI.
 			 */
-			coresight_set_assoc_ectdev_mutex(csdev,
-							 ect_item->csdev);
+			coresight_add_helper(csdev, ect_item->csdev);
 			break;
 		}
 	}
@@ -582,26 +584,30 @@ cti_add_done:
 
 /*
  * Removing the associated devices is easier.
- * A CTI will not have a value for csdev->ect_dev.
  */
 static void cti_remove_assoc_from_csdev(struct coresight_device *csdev)
 {
 	struct cti_drvdata *ctidrv;
 	struct cti_trig_con *tc;
 	struct cti_device *ctidev;
+	union coresight_dev_subtype cti_subtype = {
+		.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI
+	};
+	struct coresight_device *cti_csdev = coresight_find_output_type(
+		csdev->pdata, CORESIGHT_DEV_TYPE_HELPER, cti_subtype);
+
+	if (!cti_csdev)
+		return;
 
 	mutex_lock(&ect_mutex);
-	if (csdev->ect_dev) {
-		ctidrv = csdev_to_cti_drvdata(csdev->ect_dev);
-		ctidev = &ctidrv->ctidev;
-		list_for_each_entry(tc, &ctidev->trig_cons, node) {
-			if (tc->con_dev == csdev) {
-				cti_remove_sysfs_link(ctidrv, tc);
-				tc->con_dev = NULL;
-				break;
-			}
+	ctidrv = csdev_to_cti_drvdata(cti_csdev);
+	ctidev = &ctidrv->ctidev;
+	list_for_each_entry(tc, &ctidev->trig_cons, node) {
+		if (tc->con_dev == csdev) {
+			cti_remove_sysfs_link(ctidrv, tc);
+			tc->con_dev = NULL;
+			break;
 		}
-		csdev->ect_dev = NULL;
 	}
 	mutex_unlock(&ect_mutex);
 }
@@ -630,8 +636,8 @@ static void cti_update_conn_xrefs(struct cti_drvdata *drvdata)
 			/* if we can set the sysfs link */
 			if (cti_add_sysfs_link(drvdata, tc))
 				/* set the CTI/csdev association */
-				coresight_set_assoc_ectdev_mutex(tc->con_dev,
-							 drvdata->csdev);
+				coresight_add_helper(tc->con_dev,
+						     drvdata->csdev);
 			else
 				/* otherwise remove reference from CTI */
 				tc->con_dev = NULL;
@@ -646,8 +652,6 @@ static void cti_remove_conn_xrefs(struct cti_drvdata *drvdata)
 
 	list_for_each_entry(tc, &ctidev->trig_cons, node) {
 		if (tc->con_dev) {
-			coresight_set_assoc_ectdev_mutex(tc->con_dev,
-							 NULL);
 			cti_remove_sysfs_link(drvdata, tc);
 			tc->con_dev = NULL;
 		}
@@ -795,27 +799,27 @@ static void cti_pm_release(struct cti_drvdata *drvdata)
 }
 
 /** cti ect operations **/
-int cti_enable(struct coresight_device *csdev)
+int cti_enable(struct coresight_device *csdev, enum cs_mode mode, void *data)
 {
 	struct cti_drvdata *drvdata = csdev_to_cti_drvdata(csdev);
 
 	return cti_enable_hw(drvdata);
 }
 
-int cti_disable(struct coresight_device *csdev)
+int cti_disable(struct coresight_device *csdev, void *data)
 {
 	struct cti_drvdata *drvdata = csdev_to_cti_drvdata(csdev);
 
 	return cti_disable_hw(drvdata);
 }
 
-static const struct coresight_ops_ect cti_ops_ect = {
+static const struct coresight_ops_helper cti_ops_ect = {
 	.enable = cti_enable,
 	.disable = cti_disable,
 };
 
 static const struct coresight_ops cti_ops = {
-	.ect_ops = &cti_ops_ect,
+	.helper_ops = &cti_ops_ect,
 };
 
 /*
@@ -922,8 +926,8 @@ static int cti_probe(struct amba_device *adev, const struct amba_id *id)
 
 	/* set up coresight component description */
 	cti_desc.pdata = pdata;
-	cti_desc.type = CORESIGHT_DEV_TYPE_ECT;
-	cti_desc.subtype.ect_subtype = CORESIGHT_DEV_SUBTYPE_ECT_CTI;
+	cti_desc.type = CORESIGHT_DEV_TYPE_HELPER;
+	cti_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI;
 	cti_desc.ops = &cti_ops;
 	cti_desc.groups = drvdata->ctidev.con_groups;
 	cti_desc.dev = dev;
diff --git a/drivers/hwtracing/coresight/coresight-cti-sysfs.c b/drivers/hwtracing/coresight/coresight-cti-sysfs.c
index e528cff9d4e2..d25dd2737b49 100644
--- a/drivers/hwtracing/coresight/coresight-cti-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-cti-sysfs.c
@@ -112,11 +112,11 @@ static ssize_t enable_store(struct device *dev,
 		ret = pm_runtime_resume_and_get(dev->parent);
 		if (ret)
 			return ret;
-		ret = cti_enable(drvdata->csdev);
+		ret = cti_enable(drvdata->csdev, CS_MODE_SYSFS, NULL);
 		if (ret)
 			pm_runtime_put(dev->parent);
 	} else {
-		ret = cti_disable(drvdata->csdev);
+		ret = cti_disable(drvdata->csdev, NULL);
 		if (!ret)
 			pm_runtime_put(dev->parent);
 	}
diff --git a/drivers/hwtracing/coresight/coresight-cti.h b/drivers/hwtracing/coresight/coresight-cti.h
index 8b106b13a244..cb9ee616d01f 100644
--- a/drivers/hwtracing/coresight/coresight-cti.h
+++ b/drivers/hwtracing/coresight/coresight-cti.h
@@ -215,8 +215,8 @@ int cti_add_connection_entry(struct device *dev, struct cti_drvdata *drvdata,
 			     const char *assoc_dev_name);
 struct cti_trig_con *cti_allocate_trig_con(struct device *dev, int in_sigs,
 					   int out_sigs);
-int cti_enable(struct coresight_device *csdev);
-int cti_disable(struct coresight_device *csdev);
+int cti_enable(struct coresight_device *csdev, enum cs_mode mode, void *data);
+int cti_disable(struct coresight_device *csdev, void *data);
 void cti_write_all_hw_regs(struct cti_drvdata *drvdata);
 void cti_write_intack(struct device *dev, u32 ackval);
 void cti_write_single_reg(struct cti_drvdata *drvdata, int offset, u32 value);
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 5575014f73e0..1801ff4e467b 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -211,8 +211,8 @@ void coresight_release_platform_data(struct coresight_device *csdev,
 				     struct coresight_platform_data *pdata);
 struct coresight_device *
 coresight_find_csdev_by_fwnode(struct fwnode_handle *r_fwnode);
-void coresight_set_assoc_ectdev_mutex(struct coresight_device *csdev,
-				      struct coresight_device *ect_csdev);
+void coresight_add_helper(struct coresight_device *csdev,
+			  struct coresight_device *helper);
 
 void coresight_set_percpu_sink(int cpu, struct coresight_device *csdev);
 struct coresight_device *coresight_get_percpu_sink(int cpu);
diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c
index 464ba5e1343b..dd78e9fcfc4d 100644
--- a/drivers/hwtracing/coresight/coresight-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-sysfs.c
@@ -148,6 +148,10 @@ int coresight_make_links(struct coresight_device *orig,
 	char *outs = NULL, *ins = NULL;
 	struct coresight_sysfs_link *link = NULL;
 
+	/* Helper devices aren't shown in sysfs */
+	if (conn->dest_port == -1 && conn->src_port == -1)
+		return 0;
+
 	do {
 		outs = devm_kasprintf(&orig->dev, GFP_KERNEL,
 				      "out:%d", conn->src_port);
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 61dfbab5fa98..949aa24f46bd 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -41,7 +41,7 @@ enum coresight_dev_type {
 	CORESIGHT_DEV_TYPE_LINKSINK,
 	CORESIGHT_DEV_TYPE_SOURCE,
 	CORESIGHT_DEV_TYPE_HELPER,
-	CORESIGHT_DEV_TYPE_ECT,
+	CORESIGHT_DEV_TYPE_MAX
 };
 
 enum coresight_dev_subtype_sink {
@@ -66,12 +66,7 @@ enum coresight_dev_subtype_source {
 
 enum coresight_dev_subtype_helper {
 	CORESIGHT_DEV_SUBTYPE_HELPER_CATU,
-};
-
-/* Embedded Cross Trigger (ECT) sub-types */
-enum coresight_dev_subtype_ect {
-	CORESIGHT_DEV_SUBTYPE_ECT_NONE,
-	CORESIGHT_DEV_SUBTYPE_ECT_CTI,
+	CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI
 };
 
 /**
@@ -84,8 +79,6 @@ enum coresight_dev_subtype_ect {
  *			by @coresight_dev_subtype_source.
  * @helper_subtype:	type of helper this component is, as defined
  *			by @coresight_dev_subtype_helper.
- * @ect_subtype:        type of cross trigger this component is, as
- *			defined by @coresight_dev_subtype_ect
  */
 union coresight_dev_subtype {
 	/* We have some devices which acts as LINK and SINK */
@@ -95,7 +88,6 @@ union coresight_dev_subtype {
 	};
 	enum coresight_dev_subtype_source source_subtype;
 	enum coresight_dev_subtype_helper helper_subtype;
-	enum coresight_dev_subtype_ect ect_subtype;
 };
 
 /**
@@ -239,8 +231,6 @@ struct coresight_sysfs_link {
  *		from source to that sink.
  * @ea:		Device attribute for sink representation under PMU directory.
  * @def_sink:	cached reference to default sink found for this device.
- * @ect_dev:	Associated cross trigger device. Not part of the trace data
- *		path or connections.
  * @nr_links:   number of sysfs links created to other components from this
  *		device. These will appear in the "connections" group.
  * @has_conns_grp: Have added a "connections" group for sysfs links.
@@ -263,12 +253,9 @@ struct coresight_device {
 	bool activated;	/* true only if a sink is part of a path */
 	struct dev_ext_attribute *ea;
 	struct coresight_device *def_sink;
-	/* cross trigger handling */
-	struct coresight_device *ect_dev;
 	/* sysfs links between components */
 	int nr_links;
 	bool has_conns_grp;
-	bool ect_enabled; /* true only if associated ect device is enabled */
 	/* system configuration and feature lists */
 	struct list_head feature_csdev_list;
 	struct list_head config_csdev_list;
@@ -380,23 +367,11 @@ struct coresight_ops_helper {
 	int (*disable)(struct coresight_device *csdev, void *data);
 };
 
-/**
- * struct coresight_ops_ect - Ops for an embedded cross trigger device
- *
- * @enable	: Enable the device
- * @disable	: Disable the device
- */
-struct coresight_ops_ect {
-	int (*enable)(struct coresight_device *csdev);
-	int (*disable)(struct coresight_device *csdev);
-};
-
 struct coresight_ops {
 	const struct coresight_ops_sink *sink_ops;
 	const struct coresight_ops_link *link_ops;
 	const struct coresight_ops_source *source_ops;
 	const struct coresight_ops_helper *helper_ops;
-	const struct coresight_ops_ect *ect_ops;
 };
 
 #if IS_ENABLED(CONFIG_CORESIGHT)
-- 
cgit v1.2.3


From 4f4e7112666b5aa1f179b4046299f85c09b46821 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 5 Jun 2023 16:47:57 +0200
Subject: ALSA: usb-audio: Use __le16 for 16bit USB descriptor fields

Use proper notion for 16bit values for fixing the sparse warnings.

Fixes: f8ddb0fb3289 ("ALSA: usb-audio: Define USB MIDI 2.0 specs")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202305260528.wcqjXso8-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202305270534.odwHL9F0-lkp@intel.com/
Link: https://lore.kernel.org/r/20230605144758.6677-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/midi-v2.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/midi-v2.h b/include/linux/usb/midi-v2.h
index ebbffcae0417..16f09d959a2d 100644
--- a/include/linux/usb/midi-v2.h
+++ b/include/linux/usb/midi-v2.h
@@ -73,7 +73,7 @@ struct usb_ms20_gr_trm_block_header_descriptor {
 	__u8  bLength;			/* 5 */
 	__u8  bDescriptorType;		/* USB_DT_CS_GR_TRM_BLOCK */
 	__u8  bDescriptorSubtype;	/* USB_MS_GR_TRM_BLOCK_HEADER */
-	__u16 wTotalLength;		/* Total number of bytes */
+	__le16 wTotalLength;		/* Total number of bytes */
 } __packed;
 
 /* 5.4.2.1 Group Terminal Block Descriptor */
@@ -87,8 +87,8 @@ struct usb_ms20_gr_trm_block_descriptor {
 	__u8  nNumGroupTrm;		/* Number of member Group Terminals spanned */
 	__u8  iBlockItem;		/* String ID of Block item */
 	__u8  bMIDIProtocol;		/* Default MIDI protocol */
-	__u16 wMaxInputBandwidth;	/* Max input bandwidth capability in 4kB/s */
-	__u16 wMaxOutputBandwidth;	/* Max output bandwidth capability in 4kB/s */
+	__le16 wMaxInputBandwidth;	/* Max input bandwidth capability in 4kB/s */
+	__le16 wMaxOutputBandwidth;	/* Max output bandwidth capability in 4kB/s */
 } __packed;
 
 #endif /* __LINUX_USB_MIDI_V2_H */
-- 
cgit v1.2.3


From 0718afd47f70cf46877c39c25d06b786e1a3f36c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 Jun 2023 11:44:52 +0200
Subject: block: introduce holder ops

Add a new blk_holder_ops structure, which is passed to blkdev_get_by_* and
installed in the block_device for exclusive claims.  It will be used to
allow the block layer to call back into the user of the block device for
thing like notification of a removed device or a device resize.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Link: https://lore.kernel.org/r/20230601094459.1350643-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bdev.c                        | 41 ++++++++++++++++++++++++++-----------
 block/fops.c                        |  2 +-
 block/genhd.c                       |  6 ++++--
 block/ioctl.c                       |  3 ++-
 drivers/block/drbd/drbd_nl.c        |  3 ++-
 drivers/block/loop.c                |  2 +-
 drivers/block/pktcdvd.c             |  5 +++--
 drivers/block/rnbd/rnbd-srv.c       |  2 +-
 drivers/block/xen-blkback/xenbus.c  |  2 +-
 drivers/block/zram/zram_drv.c       |  2 +-
 drivers/md/bcache/super.c           |  2 +-
 drivers/md/dm.c                     |  2 +-
 drivers/md/md.c                     |  2 +-
 drivers/mtd/devices/block2mtd.c     |  4 ++--
 drivers/nvme/target/io-cmd-bdev.c   |  2 +-
 drivers/s390/block/dasd_genhd.c     |  2 +-
 drivers/target/target_core_iblock.c |  2 +-
 drivers/target/target_core_pscsi.c  |  3 ++-
 fs/btrfs/dev-replace.c              |  2 +-
 fs/btrfs/volumes.c                  |  6 +++---
 fs/erofs/super.c                    |  2 +-
 fs/ext4/super.c                     |  3 ++-
 fs/f2fs/super.c                     |  4 ++--
 fs/jfs/jfs_logmgr.c                 |  2 +-
 fs/nfs/blocklayout/dev.c            |  5 +++--
 fs/nilfs2/super.c                   |  2 +-
 fs/ocfs2/cluster/heartbeat.c        |  2 +-
 fs/reiserfs/journal.c               |  5 +++--
 fs/super.c                          |  4 ++--
 fs/xfs/xfs_super.c                  |  2 +-
 include/linux/blk_types.h           |  2 ++
 include/linux/blkdev.h              | 11 +++++++---
 kernel/power/swap.c                 |  4 ++--
 mm/swapfile.c                       |  3 ++-
 34 files changed, 90 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/block/bdev.c b/block/bdev.c
index f5ffcac762e0..5c46ff107706 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -102,7 +102,7 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
 	 * under live filesystem.
 	 */
 	if (!(mode & FMODE_EXCL)) {
-		int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
+		int err = bd_prepare_to_claim(bdev, truncate_bdev_range, NULL);
 		if (err)
 			goto invalidate;
 	}
@@ -415,6 +415,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 	bdev = I_BDEV(inode);
 	mutex_init(&bdev->bd_fsfreeze_mutex);
 	spin_lock_init(&bdev->bd_size_lock);
+	mutex_init(&bdev->bd_holder_lock);
 	bdev->bd_partno = partno;
 	bdev->bd_inode = inode;
 	bdev->bd_queue = disk->queue;
@@ -464,13 +465,15 @@ long nr_blockdev_pages(void)
  * bd_may_claim - test whether a block device can be claimed
  * @bdev: block device of interest
  * @holder: holder trying to claim @bdev
+ * @hops: holder ops
  *
  * Test whether @bdev can be claimed by @holder.
  *
  * RETURNS:
  * %true if @bdev can be claimed, %false otherwise.
  */
-static bool bd_may_claim(struct block_device *bdev, void *holder)
+static bool bd_may_claim(struct block_device *bdev, void *holder,
+		const struct blk_holder_ops *hops)
 {
 	struct block_device *whole = bdev_whole(bdev);
 
@@ -480,8 +483,11 @@ static bool bd_may_claim(struct block_device *bdev, void *holder)
 		/*
 		 * The same holder can always re-claim.
 		 */
-		if (bdev->bd_holder == holder)
+		if (bdev->bd_holder == holder) {
+			if (WARN_ON_ONCE(bdev->bd_holder_ops != hops))
+				return false;
 			return true;
+		}
 		return false;
 	}
 
@@ -499,6 +505,7 @@ static bool bd_may_claim(struct block_device *bdev, void *holder)
  * bd_prepare_to_claim - claim a block device
  * @bdev: block device of interest
  * @holder: holder trying to claim @bdev
+ * @hops: holder ops.
  *
  * Claim @bdev.  This function fails if @bdev is already claimed by another
  * holder and waits if another claiming is in progress. return, the caller
@@ -507,7 +514,8 @@ static bool bd_may_claim(struct block_device *bdev, void *holder)
  * RETURNS:
  * 0 if @bdev can be claimed, -EBUSY otherwise.
  */
-int bd_prepare_to_claim(struct block_device *bdev, void *holder)
+int bd_prepare_to_claim(struct block_device *bdev, void *holder,
+		const struct blk_holder_ops *hops)
 {
 	struct block_device *whole = bdev_whole(bdev);
 
@@ -516,7 +524,7 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder)
 retry:
 	mutex_lock(&bdev_lock);
 	/* if someone else claimed, fail */
-	if (!bd_may_claim(bdev, holder)) {
+	if (!bd_may_claim(bdev, holder, hops)) {
 		mutex_unlock(&bdev_lock);
 		return -EBUSY;
 	}
@@ -557,12 +565,13 @@ static void bd_clear_claiming(struct block_device *whole, void *holder)
  * Finish exclusive open of a block device. Mark the device as exlusively
  * open by the holder and wake up all waiters for exclusive open to finish.
  */
-static void bd_finish_claiming(struct block_device *bdev, void *holder)
+static void bd_finish_claiming(struct block_device *bdev, void *holder,
+		const struct blk_holder_ops *hops)
 {
 	struct block_device *whole = bdev_whole(bdev);
 
 	mutex_lock(&bdev_lock);
-	BUG_ON(!bd_may_claim(bdev, holder));
+	BUG_ON(!bd_may_claim(bdev, holder, hops));
 	/*
 	 * Note that for a whole device bd_holders will be incremented twice,
 	 * and bd_holder will be set to bd_may_claim before being set to holder
@@ -570,7 +579,10 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder)
 	whole->bd_holders++;
 	whole->bd_holder = bd_may_claim;
 	bdev->bd_holders++;
+	mutex_lock(&bdev->bd_holder_lock);
 	bdev->bd_holder = holder;
+	bdev->bd_holder_ops = hops;
+	mutex_unlock(&bdev->bd_holder_lock);
 	bd_clear_claiming(whole, holder);
 	mutex_unlock(&bdev_lock);
 }
@@ -605,7 +617,10 @@ static void bd_end_claim(struct block_device *bdev)
 	WARN_ON_ONCE(--bdev->bd_holders < 0);
 	WARN_ON_ONCE(--whole->bd_holders < 0);
 	if (!bdev->bd_holders) {
+		mutex_lock(&bdev->bd_holder_lock);
 		bdev->bd_holder = NULL;
+		bdev->bd_holder_ops = NULL;
+		mutex_unlock(&bdev->bd_holder_lock);
 		if (bdev->bd_write_holder)
 			unblock = true;
 	}
@@ -735,6 +750,7 @@ void blkdev_put_no_open(struct block_device *bdev)
  * @dev: device number of block device to open
  * @mode: FMODE_* mask
  * @holder: exclusive holder identifier
+ * @hops: holder operations
  *
  * Open the block device described by device number @dev. If @mode includes
  * %FMODE_EXCL, the block device is opened with exclusive access.  Specifying
@@ -751,7 +767,8 @@ void blkdev_put_no_open(struct block_device *bdev)
  * RETURNS:
  * Reference to the block_device on success, ERR_PTR(-errno) on failure.
  */
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
+		const struct blk_holder_ops *hops)
 {
 	bool unblock_events = true;
 	struct block_device *bdev;
@@ -771,7 +788,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
 	disk = bdev->bd_disk;
 
 	if (mode & FMODE_EXCL) {
-		ret = bd_prepare_to_claim(bdev, holder);
+		ret = bd_prepare_to_claim(bdev, holder, hops);
 		if (ret)
 			goto put_blkdev;
 	}
@@ -791,7 +808,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
 	if (ret)
 		goto put_module;
 	if (mode & FMODE_EXCL) {
-		bd_finish_claiming(bdev, holder);
+		bd_finish_claiming(bdev, holder, hops);
 
 		/*
 		 * Block event polling for write claims if requested.  Any write
@@ -842,7 +859,7 @@ EXPORT_SYMBOL(blkdev_get_by_dev);
  * Reference to the block_device on success, ERR_PTR(-errno) on failure.
  */
 struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
-					void *holder)
+		void *holder, const struct blk_holder_ops *hops)
 {
 	struct block_device *bdev;
 	dev_t dev;
@@ -852,7 +869,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
 	if (error)
 		return ERR_PTR(error);
 
-	bdev = blkdev_get_by_dev(dev, mode, holder);
+	bdev = blkdev_get_by_dev(dev, mode, holder, hops);
 	if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
 		blkdev_put(bdev, mode);
 		return ERR_PTR(-EACCES);
diff --git a/block/fops.c b/block/fops.c
index b12c4b2a3a69..6a3087b750a6 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -490,7 +490,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
 	if ((filp->f_flags & O_ACCMODE) == 3)
 		filp->f_mode |= FMODE_WRITE_IOCTL;
 
-	bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
+	bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp, NULL);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
 
diff --git a/block/genhd.c b/block/genhd.c
index a668d2f02087..b3bd58e9fbea 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -370,13 +370,15 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
 	 * scanners.
 	 */
 	if (!(mode & FMODE_EXCL)) {
-		ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions);
+		ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions,
+					  NULL);
 		if (ret)
 			return ret;
 	}
 
 	set_bit(GD_NEED_PART_SCAN, &disk->state);
-	bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL);
+	bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL,
+				 NULL);
 	if (IS_ERR(bdev))
 		ret =  PTR_ERR(bdev);
 	else
diff --git a/block/ioctl.c b/block/ioctl.c
index 9c5f637ff153..c7d7d4345edb 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -454,7 +454,8 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
 	if (mode & FMODE_EXCL)
 		return set_blocksize(bdev, n);
 
-	if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode | FMODE_EXCL, &bdev)))
+	if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode | FMODE_EXCL, &bdev,
+			NULL)))
 		return -EBUSY;
 	ret = set_blocksize(bdev, n);
 	blkdev_put(bdev, mode | FMODE_EXCL);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 1a5d3d72d91d..cab59dab3410 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1641,7 +1641,8 @@ static struct block_device *open_backing_dev(struct drbd_device *device,
 	int err = 0;
 
 	bdev = blkdev_get_by_path(bdev_path,
-				  FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr);
+				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+				  claim_ptr, NULL);
 	if (IS_ERR(bdev)) {
 		drbd_err(device, "open(\"%s\") failed with %ld\n",
 				bdev_path, PTR_ERR(bdev));
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bc31bb7072a2..a73c857f5bfe 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1015,7 +1015,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	 * here to avoid changing device under exclusive owner.
 	 */
 	if (!(mode & FMODE_EXCL)) {
-		error = bd_prepare_to_claim(bdev, loop_configure);
+		error = bd_prepare_to_claim(bdev, loop_configure, NULL);
 		if (error)
 			goto out_putf;
 	}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d5d7884cedd4..377f8b345352 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2125,7 +2125,8 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
 	 * to read/write from/to it. It is already opened in O_NONBLOCK mode
 	 * so open should not fail.
 	 */
-	bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd);
+	bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd,
+				 NULL);
 	if (IS_ERR(bdev)) {
 		ret = PTR_ERR(bdev);
 		goto out;
@@ -2530,7 +2531,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 		}
 	}
 
-	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL);
+	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL, NULL);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
 	sdev = scsi_device_from_queue(bdev->bd_disk->queue);
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 2cfed2e58d64..cec22bbae2f9 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -719,7 +719,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
 		goto reject;
 	}
 
-	bdev = blkdev_get_by_path(full_path, open_flags, THIS_MODULE);
+	bdev = blkdev_get_by_path(full_path, open_flags, THIS_MODULE, NULL);
 	if (IS_ERR(bdev)) {
 		ret = PTR_ERR(bdev);
 		pr_err("Opening device '%s' on session %s failed, failed to open the block device, err: %d\n",
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 4807af1d5805..43b36da9b354 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -492,7 +492,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
 	vbd->pdevice  = MKDEV(major, minor);
 
 	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
-				 FMODE_READ : FMODE_WRITE, NULL);
+				 FMODE_READ : FMODE_WRITE, NULL, NULL);
 
 	if (IS_ERR(bdev)) {
 		pr_warn("xen_vbd_create: device %08x could not be opened\n",
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index b86691d2133e..0bc779446c6f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -508,7 +508,7 @@ static ssize_t backing_dev_store(struct device *dev,
 	}
 
 	bdev = blkdev_get_by_dev(inode->i_rdev,
-			FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
+			FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram, NULL);
 	if (IS_ERR(bdev)) {
 		err = PTR_ERR(bdev);
 		bdev = NULL;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 7e9d19fd21dd..d84c09a73af8 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -2560,7 +2560,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 	err = "failed to open device";
 	bdev = blkdev_get_by_path(strim(path),
 				  FMODE_READ|FMODE_WRITE|FMODE_EXCL,
-				  sb);
+				  sb, NULL);
 	if (IS_ERR(bdev)) {
 		if (bdev == ERR_PTR(-EBUSY)) {
 			dev_t dev;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3b694ba3a106..d759f8bdb3df 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -746,7 +746,7 @@ static struct table_device *open_table_device(struct mapped_device *md,
 		return ERR_PTR(-ENOMEM);
 	refcount_set(&td->count, 1);
 
-	bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr);
+	bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr, NULL);
 	if (IS_ERR(bdev)) {
 		r = PTR_ERR(bdev);
 		goto out_free_td;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6a559a7e89c0..fabf9c543735 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3642,7 +3642,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
 
 	rdev->bdev = blkdev_get_by_dev(newdev,
 			FMODE_READ | FMODE_WRITE | FMODE_EXCL,
-			super_format == -2 ? &claim_rdev : rdev);
+			super_format == -2 ? &claim_rdev : rdev, NULL);
 	if (IS_ERR(rdev->bdev)) {
 		pr_warn("md: could not open device unknown-block(%u,%u).\n",
 			MAJOR(newdev), MINOR(newdev));
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 4cd37ec45762..7ac82c6fe350 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -235,7 +235,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size,
 		return NULL;
 
 	/* Get a handle on the device */
-	bdev = blkdev_get_by_path(devname, mode, dev);
+	bdev = blkdev_get_by_path(devname, mode, dev, NULL);
 
 #ifndef MODULE
 	/*
@@ -257,7 +257,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size,
 		devt = name_to_dev_t(devname);
 		if (!devt)
 			continue;
-		bdev = blkdev_get_by_dev(devt, mode, dev);
+		bdev = blkdev_get_by_dev(devt, mode, dev, NULL);
 	}
 #endif
 
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index c2d6cea0236b..9b6d6d85c725 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -85,7 +85,7 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
 		return -ENOTBLK;
 
 	ns->bdev = blkdev_get_by_path(ns->device_path,
-			FMODE_READ | FMODE_WRITE, NULL);
+			FMODE_READ | FMODE_WRITE, NULL, NULL);
 	if (IS_ERR(ns->bdev)) {
 		ret = PTR_ERR(ns->bdev);
 		if (ret != -ENOTBLK) {
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 998a961e1704..f21198bc483e 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -130,7 +130,7 @@ int dasd_scan_partitions(struct dasd_block *block)
 	struct block_device *bdev;
 	int rc;
 
-	bdev = blkdev_get_by_dev(disk_devt(block->gdp), FMODE_READ, NULL);
+	bdev = blkdev_get_by_dev(disk_devt(block->gdp), FMODE_READ, NULL, NULL);
 	if (IS_ERR(bdev)) {
 		DBF_DEV_EVENT(DBF_ERR, block->base,
 			      "scan partitions error, blkdev_get returned %ld",
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index cc838ffd1294..a5cbbefa78ee 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -114,7 +114,7 @@ static int iblock_configure_device(struct se_device *dev)
 	else
 		dev->dev_flags |= DF_READ_ONLY;
 
-	bd = blkdev_get_by_path(ib_dev->ibd_udev_path, mode, ib_dev);
+	bd = blkdev_get_by_path(ib_dev->ibd_udev_path, mode, ib_dev, NULL);
 	if (IS_ERR(bd)) {
 		ret = PTR_ERR(bd);
 		goto out_free_bioset;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index e7425549e39c..e3494e036c6c 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -367,7 +367,8 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 	 * for TYPE_DISK and TYPE_ZBC using supplied udev_path
 	 */
 	bd = blkdev_get_by_path(dev->udev_path,
-				FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv);
+				FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv,
+				NULL);
 	if (IS_ERR(bd)) {
 		pr_err("pSCSI: blkdev_get_by_path() failed\n");
 		scsi_device_put(sd);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 78696d331639..4de4984fa99b 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -258,7 +258,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	}
 
 	bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
-				  fs_info->bdev_holder);
+				  fs_info->bdev_holder, NULL);
 	if (IS_ERR(bdev)) {
 		btrfs_err(fs_info, "target device %s is invalid!", device_path);
 		return PTR_ERR(bdev);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 841e799dece5..784ccc8f6c69 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -496,7 +496,7 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
 {
 	int ret;
 
-	*bdev = blkdev_get_by_path(device_path, flags, holder);
+	*bdev = blkdev_get_by_path(device_path, flags, holder, NULL);
 
 	if (IS_ERR(*bdev)) {
 		ret = PTR_ERR(*bdev);
@@ -1377,7 +1377,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
 	 * values temporarily, as the device paths of the fsid are the only
 	 * required information for assembling the volume.
 	 */
-	bdev = blkdev_get_by_path(path, flags, holder);
+	bdev = blkdev_get_by_path(path, flags, holder, NULL);
 	if (IS_ERR(bdev))
 		return ERR_CAST(bdev);
 
@@ -2629,7 +2629,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 		return -EROFS;
 
 	bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
-				  fs_info->bdev_holder);
+				  fs_info->bdev_holder, NULL);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
 
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 811ab66d805e..6c263e9cd38b 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -254,7 +254,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
 		dif->fscache = fscache;
 	} else if (!sbi->devs->flatdev) {
 		bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL,
-					  sb->s_type);
+					  sb->s_type, NULL);
 		if (IS_ERR(bdev))
 			return PTR_ERR(bdev);
 		dif->bdev = bdev;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9680fe753e59..865625089ecc 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1103,7 +1103,8 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 {
 	struct block_device *bdev;
 
-	bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
+	bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb,
+				 NULL);
 	if (IS_ERR(bdev))
 		goto fail;
 	return bdev;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 9f15b03037db..7c34ab082f13 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -4025,7 +4025,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 			/* Single zoned block device mount */
 			FDEV(0).bdev =
 				blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
-					sbi->sb->s_mode, sbi->sb->s_type);
+					sbi->sb->s_mode, sbi->sb->s_type, NULL);
 		} else {
 			/* Multi-device mount */
 			memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
@@ -4044,7 +4044,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 					sbi->log_blocks_per_seg) - 1;
 			}
 			FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
-					sbi->sb->s_mode, sbi->sb->s_type);
+					sbi->sb->s_mode, sbi->sb->s_type, NULL);
 		}
 		if (IS_ERR(FDEV(i).bdev))
 			return PTR_ERR(FDEV(i).bdev);
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 15c645827dec..46d393c8088a 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1101,7 +1101,7 @@ int lmLogOpen(struct super_block *sb)
 	 */
 
 	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
-				 log);
+				 log, NULL);
 	if (IS_ERR(bdev)) {
 		rc = PTR_ERR(bdev);
 		goto free;
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index fea5f8821da5..38b066ca699e 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -243,7 +243,7 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
 	if (!dev)
 		return -EIO;
 
-	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL);
+	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL, NULL);
 	if (IS_ERR(bdev)) {
 		printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
 			MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
@@ -312,7 +312,8 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix)
 	if (!devname)
 		return ERR_PTR(-ENOMEM);
 
-	bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
+	bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL,
+				  NULL);
 	if (IS_ERR(bdev)) {
 		pr_warn("pNFS: failed to open device %s (%ld)\n",
 			devname, PTR_ERR(bdev));
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 77f1e5778d1c..91bfbd973d1d 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1285,7 +1285,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
 	if (!(flags & SB_RDONLY))
 		mode |= FMODE_WRITE;
 
-	sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type);
+	sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type, NULL);
 	if (IS_ERR(sd.bdev))
 		return ERR_CAST(sd.bdev);
 
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 60b97c92e2b2..6b13b8c3f2b8 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1786,7 +1786,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
 		goto out2;
 
 	reg->hr_bdev = blkdev_get_by_dev(f.file->f_mapping->host->i_rdev,
-					 FMODE_WRITE | FMODE_READ, NULL);
+					 FMODE_WRITE | FMODE_READ, NULL, NULL);
 	if (IS_ERR(reg->hr_bdev)) {
 		ret = PTR_ERR(reg->hr_bdev);
 		reg->hr_bdev = NULL;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 4d11d60f493c..5e4db9a0c8e5 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2616,7 +2616,7 @@ static int journal_init_dev(struct super_block *super,
 		if (jdev == super->s_dev)
 			blkdev_mode &= ~FMODE_EXCL;
 		journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
-						      journal);
+						      journal, NULL);
 		journal->j_dev_mode = blkdev_mode;
 		if (IS_ERR(journal->j_dev_bd)) {
 			result = PTR_ERR(journal->j_dev_bd);
@@ -2632,7 +2632,8 @@ static int journal_init_dev(struct super_block *super,
 	}
 
 	journal->j_dev_mode = blkdev_mode;
-	journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal);
+	journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal,
+					       NULL);
 	if (IS_ERR(journal->j_dev_bd)) {
 		result = PTR_ERR(journal->j_dev_bd);
 		journal->j_dev_bd = NULL;
diff --git a/fs/super.c b/fs/super.c
index 34afe411cf2b..012ce1400803 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1248,7 +1248,7 @@ int get_tree_bdev(struct fs_context *fc,
 	if (!fc->source)
 		return invalf(fc, "No source specified");
 
-	bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type);
+	bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type, NULL);
 	if (IS_ERR(bdev)) {
 		errorf(fc, "%s: Can't open blockdev", fc->source);
 		return PTR_ERR(bdev);
@@ -1333,7 +1333,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 	if (!(flags & SB_RDONLY))
 		mode |= FMODE_WRITE;
 
-	bdev = blkdev_get_by_path(dev_name, mode, fs_type);
+	bdev = blkdev_get_by_path(dev_name, mode, fs_type, NULL);
 	if (IS_ERR(bdev))
 		return ERR_CAST(bdev);
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 7e706255f165..5684c538eb76 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -386,7 +386,7 @@ xfs_blkdev_get(
 	int			error = 0;
 
 	*bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
-				    mp);
+				    mp, NULL);
 	if (IS_ERR(*bdevp)) {
 		error = PTR_ERR(*bdevp);
 		xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 8ef209e3aa96..deb69eeab6bd 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -55,6 +55,8 @@ struct block_device {
 	struct super_block *	bd_super;
 	void *			bd_claiming;
 	void *			bd_holder;
+	const struct blk_holder_ops *bd_holder_ops;
+	struct mutex		bd_holder_lock;
 	/* The counter of freeze processes */
 	int			bd_fsfreeze_count;
 	int			bd_holders;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d89c2da14698..44f2a8bc57e8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1470,10 +1470,15 @@ void blkdev_show(struct seq_file *seqf, off_t offset);
 #define BLKDEV_MAJOR_MAX	0
 #endif
 
+struct blk_holder_ops {
+};
+
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
+		const struct blk_holder_ops *hops);
 struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
-		void *holder);
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder);
-int bd_prepare_to_claim(struct block_device *bdev, void *holder);
+		void *holder, const struct blk_holder_ops *hops);
+int bd_prepare_to_claim(struct block_device *bdev, void *holder,
+		const struct blk_holder_ops *hops);
 void bd_abort_claiming(struct block_device *bdev, void *holder);
 void blkdev_put(struct block_device *bdev, fmode_t mode);
 
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 92e41ed292ad..801c411530d1 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -357,7 +357,7 @@ static int swsusp_swap_check(void)
 	root_swap = res;
 
 	hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, FMODE_WRITE,
-			NULL);
+			NULL, NULL);
 	if (IS_ERR(hib_resume_bdev))
 		return PTR_ERR(hib_resume_bdev);
 
@@ -1524,7 +1524,7 @@ int swsusp_check(void)
 		mode |= FMODE_EXCL;
 
 	hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
-					    mode, &holder);
+					    mode, &holder, NULL);
 	if (!IS_ERR(hib_resume_bdev)) {
 		set_blocksize(hib_resume_bdev, PAGE_SIZE);
 		clear_page(swsusp_header);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 274bbf797480..cfbcf7d5705f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2770,7 +2770,8 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 
 	if (S_ISBLK(inode->i_mode)) {
 		p->bdev = blkdev_get_by_dev(inode->i_rdev,
-				   FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
+				   FMODE_READ | FMODE_WRITE | FMODE_EXCL, p,
+				   NULL);
 		if (IS_ERR(p->bdev)) {
 			error = PTR_ERR(p->bdev);
 			p->bdev = NULL;
-- 
cgit v1.2.3


From f55e017c642051ddc01d77a89ab18f5ee71d6276 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 Jun 2023 11:44:53 +0200
Subject: block: add a mark_dead holder operation

Add a mark_dead method to blk_holder_ops that is called from blk_mark_disk_dead
to notify the holder that the block device it is using has been marked dead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Acked-by: Christian Brauner <brauner@kernel.org>
Acked-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Link: https://lore.kernel.org/r/20230601094459.1350643-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c          | 24 ++++++++++++++++++++++++
 include/linux/blkdev.h |  1 +
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index b3bd58e9fbea..a07c4d6a1476 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -565,6 +565,28 @@ out_exit_elevator:
 }
 EXPORT_SYMBOL(device_add_disk);
 
+static void blk_report_disk_dead(struct gendisk *disk)
+{
+	struct block_device *bdev;
+	unsigned long idx;
+
+	rcu_read_lock();
+	xa_for_each(&disk->part_tbl, idx, bdev) {
+		if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
+			continue;
+		rcu_read_unlock();
+
+		mutex_lock(&bdev->bd_holder_lock);
+		if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
+			bdev->bd_holder_ops->mark_dead(bdev);
+		mutex_unlock(&bdev->bd_holder_lock);
+
+		put_device(&bdev->bd_device);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+}
+
 /**
  * blk_mark_disk_dead - mark a disk as dead
  * @disk: disk to mark as dead
@@ -592,6 +614,8 @@ void blk_mark_disk_dead(struct gendisk *disk)
 	 * Prevent new I/O from crossing bio_queue_enter().
 	 */
 	blk_queue_start_drain(disk->queue);
+
+	blk_report_disk_dead(disk);
 }
 EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 44f2a8bc57e8..9e9a9e4edee9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1471,6 +1471,7 @@ void blkdev_show(struct seq_file *seqf, off_t offset);
 #endif
 
 struct blk_holder_ops {
+	void (*mark_dead)(struct block_device *bdev);
 };
 
 struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
-- 
cgit v1.2.3


From 87efb39075be6a288cd7f23858f15bd01c83028a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 Jun 2023 11:44:54 +0200
Subject: fs: add a method to shut down the file system

Add a new ->shutdown super operation that can be used to tell the file
system to shut down, and call it from newly created holder ops when the
block device under a file system shuts down.

This only covers the main block device for "simple" file systems using
get_tree_bdev / mount_bdev.  File systems their own get_tree method
or opening additional devices will need to set up their own
blk_holder_ops.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Acked-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Link: https://lore.kernel.org/r/20230601094459.1350643-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/super.c         | 21 +++++++++++++++++++--
 include/linux/fs.h |  1 +
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/super.c b/fs/super.c
index 012ce1400803..f127589700ab 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1206,6 +1206,22 @@ int get_tree_keyed(struct fs_context *fc,
 EXPORT_SYMBOL(get_tree_keyed);
 
 #ifdef CONFIG_BLOCK
+static void fs_mark_dead(struct block_device *bdev)
+{
+	struct super_block *sb;
+
+	sb = get_super(bdev);
+	if (!sb)
+		return;
+
+	if (sb->s_op->shutdown)
+		sb->s_op->shutdown(sb);
+	drop_super(sb);
+}
+
+static const struct blk_holder_ops fs_holder_ops = {
+	.mark_dead		= fs_mark_dead,
+};
 
 static int set_bdev_super(struct super_block *s, void *data)
 {
@@ -1248,7 +1264,8 @@ int get_tree_bdev(struct fs_context *fc,
 	if (!fc->source)
 		return invalf(fc, "No source specified");
 
-	bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type, NULL);
+	bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type,
+				  &fs_holder_ops);
 	if (IS_ERR(bdev)) {
 		errorf(fc, "%s: Can't open blockdev", fc->source);
 		return PTR_ERR(bdev);
@@ -1333,7 +1350,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 	if (!(flags & SB_RDONLY))
 		mode |= FMODE_WRITE;
 
-	bdev = blkdev_get_by_path(dev_name, mode, fs_type, NULL);
+	bdev = blkdev_get_by_path(dev_name, mode, fs_type, &fs_holder_ops);
 	if (IS_ERR(bdev))
 		return ERR_CAST(bdev);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 08ba2ae1d3ce..7b2053649820 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1932,6 +1932,7 @@ struct super_operations {
 				  struct shrink_control *);
 	long (*free_cached_objects)(struct super_block *,
 				    struct shrink_control *);
+	void (*shutdown)(struct super_block *sb);
 };
 
 /*
-- 
cgit v1.2.3


From aa5f6ed8c21ec1aa5fd688118d8d5cd87c5ffc1d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 31 May 2023 14:55:12 +0200
Subject: driver core: return bool from driver_probe_done

bool is the most sensible return value for a yes/no return.  Also
add __init as this funtion is only called from the early boot code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20230531125535.676098-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/base/dd.c             | 6 ++----
 include/linux/device/driver.h | 2 +-
 init/do_mounts.c              | 2 +-
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 9c09ca5c4ab6..878aa7646b37 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -751,14 +751,12 @@ static int really_probe_debug(struct device *dev, struct device_driver *drv)
  *
  * Should somehow figure out how to use a semaphore, not an atomic variable...
  */
-int driver_probe_done(void)
+bool __init driver_probe_done(void)
 {
 	int local_probe_count = atomic_read(&probe_count);
 
 	pr_debug("%s: probe_count = %d\n", __func__, local_probe_count);
-	if (local_probe_count)
-		return -EBUSY;
-	return 0;
+	return !local_probe_count;
 }
 
 /**
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index c244267a6744..7738f458995f 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -126,7 +126,7 @@ int __must_check driver_register(struct device_driver *drv);
 void driver_unregister(struct device_driver *drv);
 
 struct device_driver *driver_find(const char *name, const struct bus_type *bus);
-int driver_probe_done(void);
+bool __init driver_probe_done(void);
 void wait_for_device_probe(void);
 void __init wait_for_init_devices_probe(void);
 
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 811e94daf0a8..2fe7901b5bcf 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -635,7 +635,7 @@ void __init prepare_namespace(void)
 	if ((ROOT_DEV == 0) && root_wait) {
 		printk(KERN_INFO "Waiting for root device %s...\n",
 			saved_root_name);
-		while (driver_probe_done() != 0 ||
+		while (!driver_probe_done() ||
 			(ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
 			msleep(5);
 		async_synchronize_full();
-- 
cgit v1.2.3


From f5524c3fadba35c075a5131bad74e3041507a694 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 31 May 2023 14:55:16 +0200
Subject: init: remove pointless Root_* values

Remove all unused defines, and just use the expanded versions for
the SCSI disk majors.

I've decided to keep Root_RAM0 even if it could be expanded as there
is a lot of special casing for it in the init code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230531125535.676098-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/alpha/kernel/setup.c               | 2 +-
 arch/ia64/kernel/setup.c                | 2 +-
 arch/powerpc/platforms/powermac/setup.c | 3 ++-
 include/linux/root_dev.h                | 8 --------
 4 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 33bf3a627002..b650ff1cb022 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -658,7 +658,7 @@ setup_arch(char **cmdline_p)
 #endif
 
 	/* Default root filesystem to sda2.  */
-	ROOT_DEV = Root_SDA2;
+	ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2);
 
 #ifdef CONFIG_EISA
 	/* FIXME:  only set this when we actually have EISA in this box? */
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c05728044272..becdb4f33c21 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -627,7 +627,7 @@ setup_arch (char **cmdline_p)
 	 * is physical disk 1 partition 1 and the Linux root disk is
 	 * physical disk 1 partition 2.
 	 */
-	ROOT_DEV = Root_SDA2;		/* default to second partition on first drive */
+	ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2);
 
 	if (is_uv_system())
 		uv_setup(cmdline_p);
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 193cc9c39422..0c41f4b005bc 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -76,7 +76,8 @@ int pmac_newworld;
 
 static int current_root_goodness = -1;
 
-#define DEFAULT_ROOT_DEVICE Root_SDA1	/* sda1 - slightly silly choice */
+/* sda1 - slightly silly choice */
+#define DEFAULT_ROOT_DEVICE	MKDEV(SCSI_DISK0_MAJOR, 1)
 
 sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN;
 EXPORT_SYMBOL(sys_ctrler);
diff --git a/include/linux/root_dev.h b/include/linux/root_dev.h
index 4e78651371ba..ed3ea8da6429 100644
--- a/include/linux/root_dev.h
+++ b/include/linux/root_dev.h
@@ -10,14 +10,6 @@ enum {
 	Root_NFS = MKDEV(UNNAMED_MAJOR, 255),
 	Root_CIFS = MKDEV(UNNAMED_MAJOR, 254),
 	Root_RAM0 = MKDEV(RAMDISK_MAJOR, 0),
-	Root_RAM1 = MKDEV(RAMDISK_MAJOR, 1),
-	Root_FD0 = MKDEV(FLOPPY_MAJOR, 0),
-	Root_HDA1 = MKDEV(IDE0_MAJOR, 1),
-	Root_HDA2 = MKDEV(IDE0_MAJOR, 2),
-	Root_SDA1 = MKDEV(SCSI_DISK0_MAJOR, 1),
-	Root_SDA2 = MKDEV(SCSI_DISK0_MAJOR, 2),
-	Root_HDC1 = MKDEV(IDE1_MAJOR, 1),
-	Root_SR0 = MKDEV(SCSI_CDROM_MAJOR, 0),
 };
 
 extern dev_t ROOT_DEV;
-- 
cgit v1.2.3


From 07d63cbb67cdb5e2a7720fdd8579b3be979c2d66 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 31 May 2023 14:55:21 +0200
Subject: init: handle ubi/mtd root mounting like all other root types

Assign a Root_Generic magic value for UBI/MTD root and handle the root
mounting in mount_root like all other root types.  Besides making the
code more clear this also means that UBI/MTD root can be used together
with an initrd (not that anyone should care).

Also factor parsing of the root name into a helper now that it can
be easily done and will get more complicated with subsequent patches.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230531125535.676098-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/root_dev.h |  1 +
 init/do_mounts.c         | 23 ++++++++++++++---------
 2 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/root_dev.h b/include/linux/root_dev.h
index ed3ea8da6429..847c9a06101b 100644
--- a/include/linux/root_dev.h
+++ b/include/linux/root_dev.h
@@ -9,6 +9,7 @@
 enum {
 	Root_NFS = MKDEV(UNNAMED_MAJOR, 255),
 	Root_CIFS = MKDEV(UNNAMED_MAJOR, 254),
+	Root_Generic = MKDEV(UNNAMED_MAJOR, 253),
 	Root_RAM0 = MKDEV(RAMDISK_MAJOR, 0),
 };
 
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 74cc96bffbdd..be6d14733ba0 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -591,6 +591,10 @@ void __init mount_root(char *root_device_name)
 	case Root_CIFS:
 		mount_cifs_root();
 		break;
+	case Root_Generic:
+		mount_root_generic(root_device_name, root_device_name,
+				   root_mountflags);
+		break;
 	case 0:
 		if (root_device_name && root_fs_names &&
 		    mount_nodev_root(root_device_name) == 0)
@@ -602,6 +606,14 @@ void __init mount_root(char *root_device_name)
 	}
 }
 
+static dev_t __init parse_root_device(char *root_device_name)
+{
+	if (!strncmp(root_device_name, "mtd", 3) ||
+	    !strncmp(root_device_name, "ubi", 3))
+		return Root_Generic;
+	return name_to_dev_t(root_device_name);
+}
+
 /*
  * Prepare the namespace - decide what/where to mount, load ramdisks, etc.
  */
@@ -624,15 +636,8 @@ void __init prepare_namespace(void)
 
 	md_run_setup();
 
-	if (saved_root_name[0]) {
-		if (!strncmp(saved_root_name, "mtd", 3) ||
-		    !strncmp(saved_root_name, "ubi", 3)) {
-			mount_root_generic(saved_root_name, saved_root_name,
-					   root_mountflags);
-			goto out;
-		}
-		ROOT_DEV = name_to_dev_t(saved_root_name);
-	}
+	if (saved_root_name[0])
+		ROOT_DEV = parse_root_device(saved_root_name);
 
 	if (initrd_load(saved_root_name))
 		goto out;
-- 
cgit v1.2.3


From cf056a43121559d3642419917d405c3237ded90a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 31 May 2023 14:55:24 +0200
Subject: init: improve the name_to_dev_t interface

name_to_dev_t has a very misleading name, that doesn't make clear
it should only be used by the early init code, and also has a bad
calling convention that doesn't allow returning different kinds of
errors.  Rename it to early_lookup_bdev to make the use case clear,
and return an errno, where -EINVAL means the string could not be
parsed, and -ENODEV means it the string was valid, but there was
no device found for it.

Also stub out the whole call for !CONFIG_BLOCK as all the non-block
root cases are always covered in the caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230531125535.676098-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/kernel-parameters.txt |   4 +-
 drivers/md/dm-table.c                           |   5 +-
 drivers/md/md-autodetect.c                      |   3 +-
 drivers/mtd/devices/block2mtd.c                 |   3 +-
 fs/pstore/blk.c                                 |   4 +-
 include/linux/blkdev.h                          |   5 ++
 include/linux/mount.h                           |   1 -
 init/do_mounts.c                                | 102 ++++++++++++------------
 kernel/power/hibernate.c                        |  22 +++--
 9 files changed, 74 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 457c342d1597..a6bc31349cbb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5453,8 +5453,8 @@
 
 	root=		[KNL] Root filesystem
 			Usually this a a block device specifier of some kind,
-			see the name_to_dev_t comment in init/do_mounts.c for
-			details.
+			see the early_lookup_bdev comment in init/do_mounts.c
+			for details.
 			Alternatively this can be "ram" for the legacy initial
 			ramdisk, "nfs" and "cifs" for root on a network file
 			system, or "mtd" and "ubi" for mounting from raw flash.
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 1398f1d6e83e..05aa16da43b0 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -330,8 +330,9 @@ dev_t dm_get_dev_t(const char *path)
 {
 	dev_t dev;
 
-	if (lookup_bdev(path, &dev))
-		dev = name_to_dev_t(path);
+	if (lookup_bdev(path, &dev) &&
+	    early_lookup_bdev(path, &dev))
+		return 0;
 	return dev;
 }
 EXPORT_SYMBOL_GPL(dm_get_dev_t);
diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index 91836e6de326..6eaa0eab40f9 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -147,7 +147,8 @@ static void __init md_setup_drive(struct md_setup_args *args)
 		if (p)
 			*p++ = 0;
 
-		dev = name_to_dev_t(devname);
+		if (early_lookup_bdev(devname, &dev))
+			dev = 0;
 		if (strncmp(devname, "/dev/", 5) == 0)
 			devname += 5;
 		snprintf(comp_name, 63, "/dev/%s", devname);
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 7ac82c6fe350..a127cdde03b7 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -254,8 +254,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size,
 			msleep(1000);
 		wait_for_device_probe();
 
-		devt = name_to_dev_t(devname);
-		if (!devt)
+		if (early_lookup_bdev(devname, &devt))
 			continue;
 		bdev = blkdev_get_by_dev(devt, mode, dev, NULL);
 	}
diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index 4ae0cfcd15f2..de8cf5d75f34 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -263,9 +263,9 @@ static __init const char *early_boot_devpath(const char *initial_devname)
 	 * same scheme to find the device that we use for mounting
 	 * the root file system.
 	 */
-	dev_t dev = name_to_dev_t(initial_devname);
+	dev_t dev;
 
-	if (!dev) {
+	if (early_lookup_bdev(initial_devname, &dev)) {
 		pr_err("failed to resolve '%s'!\n", initial_devname);
 		return initial_devname;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9e9a9e4edee9..d682e233fd66 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1501,6 +1501,7 @@ int sync_blockdev_nowait(struct block_device *bdev);
 void sync_bdevs(bool wait);
 void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
 void printk_all_partitions(void);
+int early_lookup_bdev(const char *pathname, dev_t *dev);
 #else
 static inline void invalidate_bdev(struct block_device *bdev)
 {
@@ -1522,6 +1523,10 @@ static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
 static inline void printk_all_partitions(void)
 {
 }
+static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_BLOCK */
 
 int fsync_bdev(struct block_device *bdev);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 1ea326c368f7..4b81ea90440e 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -107,7 +107,6 @@ extern struct vfsmount *vfs_submount(const struct dentry *mountpoint,
 extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list);
 extern void mark_mounts_for_expiry(struct list_head *mounts);
 
-extern dev_t name_to_dev_t(const char *name);
 extern bool path_is_mountpoint(const struct path *path);
 
 extern bool our_mnt(struct vfsmount *mnt);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 86599faf2bf8..f1953aeb3219 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -96,11 +96,10 @@ static int match_dev_by_uuid(struct device *dev, const void *data)
  *
  * Returns the matching dev_t on success or 0 on failure.
  */
-static dev_t devt_from_partuuid(const char *uuid_str)
+static int devt_from_partuuid(const char *uuid_str, dev_t *devt)
 {
 	struct uuidcmp cmp;
 	struct device *dev = NULL;
-	dev_t devt = 0;
 	int offset = 0;
 	char *slash;
 
@@ -124,21 +123,21 @@ static dev_t devt_from_partuuid(const char *uuid_str)
 
 	dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid);
 	if (!dev)
-		return 0;
+		return -ENODEV;
 
 	if (offset) {
 		/*
 		 * Attempt to find the requested partition by adding an offset
 		 * to the partition number found by UUID.
 		 */
-		devt = part_devt(dev_to_disk(dev),
-				 dev_to_bdev(dev)->bd_partno + offset);
+		*devt = part_devt(dev_to_disk(dev),
+				  dev_to_bdev(dev)->bd_partno + offset);
 	} else {
-		devt = dev->devt;
+		*devt = dev->devt;
 	}
 
 	put_device(dev);
-	return devt;
+	return 0;
 
 clear_root_wait:
 	pr_err("VFS: PARTUUID= is invalid.\n"
@@ -146,7 +145,7 @@ clear_root_wait:
 	if (root_wait)
 		pr_err("Disabling rootwait; root= is invalid.\n");
 	root_wait = 0;
-	return 0;
+	return -EINVAL;
 }
 
 /**
@@ -166,38 +165,35 @@ static int match_dev_by_label(struct device *dev, const void *data)
 	return 1;
 }
 
-static dev_t devt_from_partlabel(const char *label)
+static int devt_from_partlabel(const char *label, dev_t *devt)
 {
 	struct device *dev;
-	dev_t devt = 0;
 
 	dev = class_find_device(&block_class, NULL, label, &match_dev_by_label);
-	if (dev) {
-		devt = dev->devt;
-		put_device(dev);
-	}
-
-	return devt;
+	if (!dev)
+		return -ENODEV;
+	*devt = dev->devt;
+	put_device(dev);
+	return 0;
 }
 
-static dev_t devt_from_devname(const char *name)
+static int devt_from_devname(const char *name, dev_t *devt)
 {
-	dev_t devt = 0;
 	int part;
 	char s[32];
 	char *p;
 
 	if (strlen(name) > 31)
-		return 0;
+		return -EINVAL;
 	strcpy(s, name);
 	for (p = s; *p; p++) {
 		if (*p == '/')
 			*p = '!';
 	}
 
-	devt = blk_lookup_devt(s, 0);
-	if (devt)
-		return devt;
+	*devt = blk_lookup_devt(s, 0);
+	if (*devt)
+		return 0;
 
 	/*
 	 * Try non-existent, but valid partition, which may only exist after
@@ -206,41 +202,42 @@ static dev_t devt_from_devname(const char *name)
 	while (p > s && isdigit(p[-1]))
 		p--;
 	if (p == s || !*p || *p == '0')
-		return 0;
+		return -EINVAL;
 
 	/* try disk name without <part number> */
 	part = simple_strtoul(p, NULL, 10);
 	*p = '\0';
-	devt = blk_lookup_devt(s, part);
-	if (devt)
-		return devt;
+	*devt = blk_lookup_devt(s, part);
+	if (*devt)
+		return 0;
 
 	/* try disk name without p<part number> */
 	if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
-		return 0;
+		return -EINVAL;
 	p[-1] = '\0';
-	return blk_lookup_devt(s, part);
+	*devt = blk_lookup_devt(s, part);
+	if (*devt)
+		return 0;
+	return -EINVAL;
 }
-#endif /* CONFIG_BLOCK */
 
-static dev_t devt_from_devnum(const char *name)
+static int devt_from_devnum(const char *name, dev_t *devt)
 {
 	unsigned maj, min, offset;
-	dev_t devt = 0;
 	char *p, dummy;
 
 	if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
 	    sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) {
-		devt = MKDEV(maj, min);
-		if (maj != MAJOR(devt) || min != MINOR(devt))
-			return 0;
+		*devt = MKDEV(maj, min);
+		if (maj != MAJOR(*devt) || min != MINOR(*devt))
+			return -EINVAL;
 	} else {
-		devt = new_decode_dev(simple_strtoul(name, &p, 16));
+		*devt = new_decode_dev(simple_strtoul(name, &p, 16));
 		if (*p)
-			return 0;
+			return -EINVAL;
 	}
 
-	return devt;
+	return 0;
 }
 
 /*
@@ -271,19 +268,18 @@ static dev_t devt_from_devnum(const char *name)
  *	name contains slashes, the device name has them replaced with
  *	bangs.
  */
-dev_t name_to_dev_t(const char *name)
+int early_lookup_bdev(const char *name, dev_t *devt)
 {
-#ifdef CONFIG_BLOCK
 	if (strncmp(name, "PARTUUID=", 9) == 0)
-		return devt_from_partuuid(name + 9);
+		return devt_from_partuuid(name + 9, devt);
 	if (strncmp(name, "PARTLABEL=", 10) == 0)
-		return devt_from_partlabel(name + 10);
+		return devt_from_partlabel(name + 10, devt);
 	if (strncmp(name, "/dev/", 5) == 0)
-		return devt_from_devname(name + 5);
-#endif
-	return devt_from_devnum(name);
+		return devt_from_devname(name + 5, devt);
+	return devt_from_devnum(name, devt);
 }
-EXPORT_SYMBOL_GPL(name_to_dev_t);
+EXPORT_SYMBOL_GPL(early_lookup_bdev);
+#endif
 
 static int __init root_dev_setup(char *line)
 {
@@ -606,20 +602,17 @@ static void __init wait_for_root(char *root_device_name)
 
 	pr_info("Waiting for root device %s...\n", root_device_name);
 
-	for (;;) {
-		if (driver_probe_done()) {
-			ROOT_DEV = name_to_dev_t(root_device_name);
-			if (ROOT_DEV)
-				break;
-		}
+	while (!driver_probe_done() ||
+	       early_lookup_bdev(root_device_name, &ROOT_DEV) < 0)
 		msleep(5);
-	}
 	async_synchronize_full();
 
 }
 
 static dev_t __init parse_root_device(char *root_device_name)
 {
+	dev_t dev;
+
 	if (!strncmp(root_device_name, "mtd", 3) ||
 	    !strncmp(root_device_name, "ubi", 3))
 		return Root_Generic;
@@ -629,7 +622,10 @@ static dev_t __init parse_root_device(char *root_device_name)
 		return Root_CIFS;
 	if (strcmp(root_device_name, "/dev/ram") == 0)
 		return Root_RAM0;
-	return name_to_dev_t(root_device_name);
+
+	if (early_lookup_bdev(root_device_name, &dev))
+		return 0;
+	return dev;
 }
 
 /*
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 45e24b02cd50..c52dedb9f7c8 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -11,6 +11,7 @@
 
 #define pr_fmt(fmt) "PM: hibernation: " fmt
 
+#include <linux/blkdev.h>
 #include <linux/export.h>
 #include <linux/suspend.h>
 #include <linux/reboot.h>
@@ -921,8 +922,7 @@ static int __init find_resume_device(void)
 	}
 
 	/* Check if the device is there */
-	swsusp_resume_device = name_to_dev_t(resume_file);
-	if (swsusp_resume_device)
+	if (!early_lookup_bdev(resume_file, &swsusp_resume_device))
 		return 0;
 
 	/*
@@ -931,15 +931,12 @@ static int __init find_resume_device(void)
 	 */
 	wait_for_device_probe();
 	if (resume_wait) {
-		while (!(swsusp_resume_device = name_to_dev_t(resume_file)))
+		while (early_lookup_bdev(resume_file, &swsusp_resume_device))
 			msleep(10);
 		async_synchronize_full();
 	}
 
-	swsusp_resume_device = name_to_dev_t(resume_file);
-	if (!swsusp_resume_device)
-		return -ENODEV;
-	return 0;
+	return early_lookup_bdev(resume_file, &swsusp_resume_device);
 }
 
 static int software_resume(void)
@@ -1169,7 +1166,8 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
 	unsigned int sleep_flags;
 	int len = n;
 	char *name;
-	dev_t res;
+	dev_t dev;
+	int error;
 
 	if (!hibernation_available())
 		return 0;
@@ -1180,13 +1178,13 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (!name)
 		return -ENOMEM;
 
-	res = name_to_dev_t(name);
+	error = early_lookup_bdev(name, &dev);
 	kfree(name);
-	if (!res)
-		return -EINVAL;
+	if (error)
+		return error;
 
 	sleep_flags = lock_system_sleep();
-	swsusp_resume_device = res;
+	swsusp_resume_device = dev;
 	unlock_system_sleep(sleep_flags);
 
 	pm_pr_dbg("Configured hibernation resume from disk to %u\n",
-- 
cgit v1.2.3


From 7cadcaf1d82618852745e7206fffa2c72c17ce4b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 31 May 2023 14:55:27 +0200
Subject: block: move more code to early-lookup.c

blk_lookup_devt is only used by code in early-lookup.c, so move it
there.

printk_all_partitions and it's helper bdevt_str are only used by the
early init code in init/do_mounts.c, so they should go there as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230531125535.676098-17-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/early-lookup.c   | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++
 block/genhd.c          | 92 --------------------------------------------------
 include/linux/blkdev.h |  1 -
 3 files changed, 92 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/block/early-lookup.c b/block/early-lookup.c
index 9fc30d039508..6016e781b6a0 100644
--- a/block/early-lookup.c
+++ b/block/early-lookup.c
@@ -120,6 +120,35 @@ static int devt_from_partlabel(const char *label, dev_t *devt)
 	return 0;
 }
 
+static dev_t blk_lookup_devt(const char *name, int partno)
+{
+	dev_t devt = MKDEV(0, 0);
+	struct class_dev_iter iter;
+	struct device *dev;
+
+	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+	while ((dev = class_dev_iter_next(&iter))) {
+		struct gendisk *disk = dev_to_disk(dev);
+
+		if (strcmp(dev_name(dev), name))
+			continue;
+
+		if (partno < disk->minors) {
+			/* We need to return the right devno, even
+			 * if the partition doesn't exist yet.
+			 */
+			devt = MKDEV(MAJOR(dev->devt),
+				     MINOR(dev->devt) + partno);
+		} else {
+			devt = part_devt(disk, partno);
+			if (devt)
+				break;
+		}
+	}
+	class_dev_iter_exit(&iter);
+	return devt;
+}
+
 static int devt_from_devname(const char *name, dev_t *devt)
 {
 	int part;
@@ -222,3 +251,66 @@ int early_lookup_bdev(const char *name, dev_t *devt)
 	return devt_from_devnum(name, devt);
 }
 EXPORT_SYMBOL_GPL(early_lookup_bdev);
+
+static char __init *bdevt_str(dev_t devt, char *buf)
+{
+	if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
+		char tbuf[BDEVT_SIZE];
+		snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
+		snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
+	} else
+		snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
+
+	return buf;
+}
+
+/*
+ * print a full list of all partitions - intended for places where the root
+ * filesystem can't be mounted and thus to give the victim some idea of what
+ * went wrong
+ */
+void __init printk_all_partitions(void)
+{
+	struct class_dev_iter iter;
+	struct device *dev;
+
+	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+	while ((dev = class_dev_iter_next(&iter))) {
+		struct gendisk *disk = dev_to_disk(dev);
+		struct block_device *part;
+		char devt_buf[BDEVT_SIZE];
+		unsigned long idx;
+
+		/*
+		 * Don't show empty devices or things that have been
+		 * suppressed
+		 */
+		if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN))
+			continue;
+
+		/*
+		 * Note, unlike /proc/partitions, I am showing the numbers in
+		 * hex - the same format as the root= option takes.
+		 */
+		rcu_read_lock();
+		xa_for_each(&disk->part_tbl, idx, part) {
+			if (!bdev_nr_sectors(part))
+				continue;
+			printk("%s%s %10llu %pg %s",
+			       bdev_is_partition(part) ? "  " : "",
+			       bdevt_str(part->bd_dev, devt_buf),
+			       bdev_nr_sectors(part) >> 1, part,
+			       part->bd_meta_info ?
+					part->bd_meta_info->uuid : "");
+			if (bdev_is_partition(part))
+				printk("\n");
+			else if (dev->parent && dev->parent->driver)
+				printk(" driver: %s\n",
+					dev->parent->driver->name);
+			else
+				printk(" (driver?)\n");
+		}
+		rcu_read_unlock();
+	}
+	class_dev_iter_exit(&iter);
+}
diff --git a/block/genhd.c b/block/genhd.c
index a07c4d6a1476..4e5fd6aaa883 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -318,18 +318,6 @@ void blk_free_ext_minor(unsigned int minor)
 	ida_free(&ext_devt_ida, minor);
 }
 
-static char *bdevt_str(dev_t devt, char *buf)
-{
-	if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
-		char tbuf[BDEVT_SIZE];
-		snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
-		snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
-	} else
-		snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
-
-	return buf;
-}
-
 void disk_uevent(struct gendisk *disk, enum kobject_action action)
 {
 	struct block_device *part;
@@ -795,57 +783,6 @@ void blk_request_module(dev_t devt)
 }
 #endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
 
-/*
- * print a full list of all partitions - intended for places where the root
- * filesystem can't be mounted and thus to give the victim some idea of what
- * went wrong
- */
-void __init printk_all_partitions(void)
-{
-	struct class_dev_iter iter;
-	struct device *dev;
-
-	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
-	while ((dev = class_dev_iter_next(&iter))) {
-		struct gendisk *disk = dev_to_disk(dev);
-		struct block_device *part;
-		char devt_buf[BDEVT_SIZE];
-		unsigned long idx;
-
-		/*
-		 * Don't show empty devices or things that have been
-		 * suppressed
-		 */
-		if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN))
-			continue;
-
-		/*
-		 * Note, unlike /proc/partitions, I am showing the numbers in
-		 * hex - the same format as the root= option takes.
-		 */
-		rcu_read_lock();
-		xa_for_each(&disk->part_tbl, idx, part) {
-			if (!bdev_nr_sectors(part))
-				continue;
-			printk("%s%s %10llu %pg %s",
-			       bdev_is_partition(part) ? "  " : "",
-			       bdevt_str(part->bd_dev, devt_buf),
-			       bdev_nr_sectors(part) >> 1, part,
-			       part->bd_meta_info ?
-					part->bd_meta_info->uuid : "");
-			if (bdev_is_partition(part))
-				printk("\n");
-			else if (dev->parent && dev->parent->driver)
-				printk(" driver: %s\n",
-					dev->parent->driver->name);
-			else
-				printk(" (driver?)\n");
-		}
-		rcu_read_unlock();
-	}
-	class_dev_iter_exit(&iter);
-}
-
 #ifdef CONFIG_PROC_FS
 /* iterator */
 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
@@ -1379,35 +1316,6 @@ dev_t part_devt(struct gendisk *disk, u8 partno)
 	return devt;
 }
 
-dev_t blk_lookup_devt(const char *name, int partno)
-{
-	dev_t devt = MKDEV(0, 0);
-	struct class_dev_iter iter;
-	struct device *dev;
-
-	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
-	while ((dev = class_dev_iter_next(&iter))) {
-		struct gendisk *disk = dev_to_disk(dev);
-
-		if (strcmp(dev_name(dev), name))
-			continue;
-
-		if (partno < disk->minors) {
-			/* We need to return the right devno, even
-			 * if the partition doesn't exist yet.
-			 */
-			devt = MKDEV(MAJOR(dev->devt),
-				     MINOR(dev->devt) + partno);
-		} else {
-			devt = part_devt(disk, partno);
-			if (devt)
-				break;
-		}
-	}
-	class_dev_iter_exit(&iter);
-	return devt;
-}
-
 struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 		struct lock_class_key *lkclass)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d682e233fd66..52718176d1b4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -838,7 +838,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
 
 dev_t part_devt(struct gendisk *disk, u8 partno);
 void inc_diskseq(struct gendisk *disk);
-dev_t blk_lookup_devt(const char *name, int partno);
 void blk_request_module(dev_t devt);
 
 extern int blk_register_queue(struct gendisk *disk);
-- 
cgit v1.2.3


From d4a28d7defe79006e59293a4b43d518ba8483fb0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 31 May 2023 14:55:30 +0200
Subject: dm: remove dm_get_dev_t

Open code dm_get_dev_t in the only remaining caller, and propagate the
exact error code from lookup_bdev and early_lookup_bdev.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230531125535.676098-20-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-table.c         | 22 +++++-----------------
 include/linux/device-mapper.h |  2 --
 2 files changed, 5 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 05aa16da43b0..1576b408768d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -323,20 +323,6 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
 	return 0;
 }
 
-/*
- * Convert the path to a device
- */
-dev_t dm_get_dev_t(const char *path)
-{
-	dev_t dev;
-
-	if (lookup_bdev(path, &dev) &&
-	    early_lookup_bdev(path, &dev))
-		return 0;
-	return dev;
-}
-EXPORT_SYMBOL_GPL(dm_get_dev_t);
-
 /*
  * Add a device to the list, or just increment the usage count if
  * it's already present.
@@ -359,9 +345,11 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 		if (MAJOR(dev) != major || MINOR(dev) != minor)
 			return -EOVERFLOW;
 	} else {
-		dev = dm_get_dev_t(path);
-		if (!dev)
-			return -ENODEV;
+		r = lookup_bdev(path, &dev);
+		if (r)
+			r = early_lookup_bdev(path, &dev);
+		if (r)
+			return r;
 	}
 	if (dev == disk_devt(t->md->disk))
 		return -EINVAL;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index a52d2b9a6846..c27b84002d83 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -170,8 +170,6 @@ struct dm_dev {
 	char name[16];
 };
 
-dev_t dm_get_dev_t(const char *path);
-
 /*
  * Constructors should call these functions to ensure destination devices
  * are opened/closed correctly.
-- 
cgit v1.2.3


From 2577f53f42947d8ca01666e3444bb7307319ea38 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 31 May 2023 14:55:35 +0200
Subject: block: mark early_lookup_bdev as __init

early_lookup_bdev is now only used during the early boot code as it
should, so mark it __init to not waste run time memory on it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230531125535.676098-25-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/early-lookup.c   | 19 +++++++++----------
 include/linux/blkdev.h |  2 +-
 2 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/block/early-lookup.c b/block/early-lookup.c
index 6016e781b6a0..3ff0d2e4dcbf 100644
--- a/block/early-lookup.c
+++ b/block/early-lookup.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * Code for looking up block devices in the early boot code before mounting the
- * root file system.  Unfortunately currently also abused in a few other places.
+ * root file system.
  */
 #include <linux/blkdev.h>
 #include <linux/ctype.h>
@@ -18,7 +18,7 @@ struct uuidcmp {
  *
  * Returns 1 if the device matches, and 0 otherwise.
  */
-static int match_dev_by_uuid(struct device *dev, const void *data)
+static int __init match_dev_by_uuid(struct device *dev, const void *data)
 {
 	struct block_device *bdev = dev_to_bdev(dev);
 	const struct uuidcmp *cmp = data;
@@ -42,7 +42,7 @@ static int match_dev_by_uuid(struct device *dev, const void *data)
  *
  * Returns the matching dev_t on success or 0 on failure.
  */
-static int devt_from_partuuid(const char *uuid_str, dev_t *devt)
+static int __init devt_from_partuuid(const char *uuid_str, dev_t *devt)
 {
 	struct uuidcmp cmp;
 	struct device *dev = NULL;
@@ -98,7 +98,7 @@ out_invalid:
  *
  * Returns 1 if the device matches, and 0 otherwise.
  */
-static int match_dev_by_label(struct device *dev, const void *data)
+static int __init match_dev_by_label(struct device *dev, const void *data)
 {
 	struct block_device *bdev = dev_to_bdev(dev);
 	const char *label = data;
@@ -108,7 +108,7 @@ static int match_dev_by_label(struct device *dev, const void *data)
 	return 1;
 }
 
-static int devt_from_partlabel(const char *label, dev_t *devt)
+static int __init devt_from_partlabel(const char *label, dev_t *devt)
 {
 	struct device *dev;
 
@@ -120,7 +120,7 @@ static int devt_from_partlabel(const char *label, dev_t *devt)
 	return 0;
 }
 
-static dev_t blk_lookup_devt(const char *name, int partno)
+static dev_t __init blk_lookup_devt(const char *name, int partno)
 {
 	dev_t devt = MKDEV(0, 0);
 	struct class_dev_iter iter;
@@ -149,7 +149,7 @@ static dev_t blk_lookup_devt(const char *name, int partno)
 	return devt;
 }
 
-static int devt_from_devname(const char *name, dev_t *devt)
+static int __init devt_from_devname(const char *name, dev_t *devt)
 {
 	int part;
 	char s[32];
@@ -193,7 +193,7 @@ static int devt_from_devname(const char *name, dev_t *devt)
 	return -EINVAL;
 }
 
-static int devt_from_devnum(const char *name, dev_t *devt)
+static int __init devt_from_devnum(const char *name, dev_t *devt)
 {
 	unsigned maj, min, offset;
 	char *p, dummy;
@@ -240,7 +240,7 @@ static int devt_from_devnum(const char *name, dev_t *devt)
  *	name contains slashes, the device name has them replaced with
  *	bangs.
  */
-int early_lookup_bdev(const char *name, dev_t *devt)
+int __init early_lookup_bdev(const char *name, dev_t *devt)
 {
 	if (strncmp(name, "PARTUUID=", 9) == 0)
 		return devt_from_partuuid(name + 9, devt);
@@ -250,7 +250,6 @@ int early_lookup_bdev(const char *name, dev_t *devt)
 		return devt_from_devname(name + 5, devt);
 	return devt_from_devnum(name, devt);
 }
-EXPORT_SYMBOL_GPL(early_lookup_bdev);
 
 static char __init *bdevt_str(dev_t devt, char *buf)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 52718176d1b4..f4c339d9dd03 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1500,7 +1500,7 @@ int sync_blockdev_nowait(struct block_device *bdev);
 void sync_bdevs(bool wait);
 void bdev_statx_dioalign(struct inode *inode, struct kstat *stat);
 void printk_all_partitions(void);
-int early_lookup_bdev(const char *pathname, dev_t *dev);
+int __init early_lookup_bdev(const char *pathname, dev_t *dev);
 #else
 static inline void invalidate_bdev(struct block_device *bdev)
 {
-- 
cgit v1.2.3


From d5e1586617be7093ea3419e3fa9387ed833cdbb1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 2 Jun 2023 10:42:53 +0200
Subject: sched: Unconditionally use full-fat wait_task_inactive()

While modifying wait_task_inactive() for PREEMPT_RT; the build robot
noted that UP got broken. This led to audit and consideration of the
UP implementation of wait_task_inactive().

It looks like the UP implementation is also broken for PREEMPT;
consider task_current_syscall() getting preempted between the two
calls to wait_task_inactive().

Therefore move the wait_task_inactive() implementation out of
CONFIG_SMP and unconditionally use it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230602103731.GA630648%40hirez.programming.kicks-ass.net
---
 include/linux/sched.h |   7 +-
 kernel/sched/core.c   | 216 +++++++++++++++++++++++++-------------------------
 2 files changed, 110 insertions(+), 113 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eed5d65b8d1f..1292d38d66cc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2006,15 +2006,12 @@ static __always_inline void scheduler_ipi(void)
 	 */
 	preempt_fold_need_resched();
 }
-extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
 #else
 static inline void scheduler_ipi(void) { }
-static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
-{
-	return 1;
-}
 #endif
 
+extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
+
 /*
  * Set thread flags in other task's structures.
  * See asm/thread_info.h for TIF_xxxx flags available:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 944c3ae39861..810cf7dc98cf 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2213,6 +2213,114 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 		rq_clock_skip_update(rq);
 }
 
+/*
+ * wait_task_inactive - wait for a thread to unschedule.
+ *
+ * Wait for the thread to block in any of the states set in @match_state.
+ * If it changes, i.e. @p might have woken up, then return zero.  When we
+ * succeed in waiting for @p to be off its CPU, we return a positive number
+ * (its total switch count).  If a second call a short while later returns the
+ * same number, the caller can be sure that @p has remained unscheduled the
+ * whole time.
+ *
+ * The caller must ensure that the task *will* unschedule sometime soon,
+ * else this function might spin for a *long* time. This function can't
+ * be called with interrupts off, or it may introduce deadlock with
+ * smp_call_function() if an IPI is sent by the same process we are
+ * waiting to become inactive.
+ */
+unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
+{
+	int running, queued;
+	struct rq_flags rf;
+	unsigned long ncsw;
+	struct rq *rq;
+
+	for (;;) {
+		/*
+		 * We do the initial early heuristics without holding
+		 * any task-queue locks at all. We'll only try to get
+		 * the runqueue lock when things look like they will
+		 * work out!
+		 */
+		rq = task_rq(p);
+
+		/*
+		 * If the task is actively running on another CPU
+		 * still, just relax and busy-wait without holding
+		 * any locks.
+		 *
+		 * NOTE! Since we don't hold any locks, it's not
+		 * even sure that "rq" stays as the right runqueue!
+		 * But we don't care, since "task_on_cpu()" will
+		 * return false if the runqueue has changed and p
+		 * is actually now running somewhere else!
+		 */
+		while (task_on_cpu(rq, p)) {
+			if (!(READ_ONCE(p->__state) & match_state))
+				return 0;
+			cpu_relax();
+		}
+
+		/*
+		 * Ok, time to look more closely! We need the rq
+		 * lock now, to be *sure*. If we're wrong, we'll
+		 * just go back and repeat.
+		 */
+		rq = task_rq_lock(p, &rf);
+		trace_sched_wait_task(p);
+		running = task_on_cpu(rq, p);
+		queued = task_on_rq_queued(p);
+		ncsw = 0;
+		if (READ_ONCE(p->__state) & match_state)
+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+		task_rq_unlock(rq, p, &rf);
+
+		/*
+		 * If it changed from the expected state, bail out now.
+		 */
+		if (unlikely(!ncsw))
+			break;
+
+		/*
+		 * Was it really running after all now that we
+		 * checked with the proper locks actually held?
+		 *
+		 * Oops. Go back and try again..
+		 */
+		if (unlikely(running)) {
+			cpu_relax();
+			continue;
+		}
+
+		/*
+		 * It's not enough that it's not actively running,
+		 * it must be off the runqueue _entirely_, and not
+		 * preempted!
+		 *
+		 * So if it was still runnable (but just not actively
+		 * running right now), it's preempted, and we should
+		 * yield - it could be a while.
+		 */
+		if (unlikely(queued)) {
+			ktime_t to = NSEC_PER_SEC / HZ;
+
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
+			continue;
+		}
+
+		/*
+		 * Ahh, all good. It wasn't running, and it wasn't
+		 * runnable, which means that it will never become
+		 * running in the future either. We're all done!
+		 */
+		break;
+	}
+
+	return ncsw;
+}
+
 #ifdef CONFIG_SMP
 
 static void
@@ -3341,114 +3449,6 @@ out:
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
-/*
- * wait_task_inactive - wait for a thread to unschedule.
- *
- * Wait for the thread to block in any of the states set in @match_state.
- * If it changes, i.e. @p might have woken up, then return zero.  When we
- * succeed in waiting for @p to be off its CPU, we return a positive number
- * (its total switch count).  If a second call a short while later returns the
- * same number, the caller can be sure that @p has remained unscheduled the
- * whole time.
- *
- * The caller must ensure that the task *will* unschedule sometime soon,
- * else this function might spin for a *long* time. This function can't
- * be called with interrupts off, or it may introduce deadlock with
- * smp_call_function() if an IPI is sent by the same process we are
- * waiting to become inactive.
- */
-unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
-{
-	int running, queued;
-	struct rq_flags rf;
-	unsigned long ncsw;
-	struct rq *rq;
-
-	for (;;) {
-		/*
-		 * We do the initial early heuristics without holding
-		 * any task-queue locks at all. We'll only try to get
-		 * the runqueue lock when things look like they will
-		 * work out!
-		 */
-		rq = task_rq(p);
-
-		/*
-		 * If the task is actively running on another CPU
-		 * still, just relax and busy-wait without holding
-		 * any locks.
-		 *
-		 * NOTE! Since we don't hold any locks, it's not
-		 * even sure that "rq" stays as the right runqueue!
-		 * But we don't care, since "task_on_cpu()" will
-		 * return false if the runqueue has changed and p
-		 * is actually now running somewhere else!
-		 */
-		while (task_on_cpu(rq, p)) {
-			if (!(READ_ONCE(p->__state) & match_state))
-				return 0;
-			cpu_relax();
-		}
-
-		/*
-		 * Ok, time to look more closely! We need the rq
-		 * lock now, to be *sure*. If we're wrong, we'll
-		 * just go back and repeat.
-		 */
-		rq = task_rq_lock(p, &rf);
-		trace_sched_wait_task(p);
-		running = task_on_cpu(rq, p);
-		queued = task_on_rq_queued(p);
-		ncsw = 0;
-		if (READ_ONCE(p->__state) & match_state)
-			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-		task_rq_unlock(rq, p, &rf);
-
-		/*
-		 * If it changed from the expected state, bail out now.
-		 */
-		if (unlikely(!ncsw))
-			break;
-
-		/*
-		 * Was it really running after all now that we
-		 * checked with the proper locks actually held?
-		 *
-		 * Oops. Go back and try again..
-		 */
-		if (unlikely(running)) {
-			cpu_relax();
-			continue;
-		}
-
-		/*
-		 * It's not enough that it's not actively running,
-		 * it must be off the runqueue _entirely_, and not
-		 * preempted!
-		 *
-		 * So if it was still runnable (but just not actively
-		 * running right now), it's preempted, and we should
-		 * yield - it could be a while.
-		 */
-		if (unlikely(queued)) {
-			ktime_t to = NSEC_PER_SEC / HZ;
-
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
-			continue;
-		}
-
-		/*
-		 * Ahh, all good. It wasn't running, and it wasn't
-		 * runnable, which means that it will never become
-		 * running in the future either. We're all done!
-		 */
-		break;
-	}
-
-	return ncsw;
-}
-
 /***
  * kick_process - kick a running thread to enter/exit the kernel
  * @p: the to-be-kicked thread
-- 
cgit v1.2.3


From d16317de9b412aa7bd3598c607112298e36b4352 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 19 May 2023 12:20:59 +0200
Subject: seqlock/latch: Provide raw_read_seqcount_latch_retry()

The read side of seqcount_latch consists of:

  do {
    seq = raw_read_seqcount_latch(&latch->seq);
    ...
  } while (read_seqcount_latch_retry(&latch->seq, seq));

which is asymmetric in the raw_ department, and sure enough,
read_seqcount_latch_retry() includes (explicit) instrumentation where
raw_read_seqcount_latch() does not.

This inconsistency becomes a problem when trying to use it from
noinstr code. As such, fix it by renaming and re-implementing
raw_read_seqcount_latch_retry() without the instrumentation.

Specifically the instrumentation in question is kcsan_atomic_next(0)
in do___read_seqcount_retry(). Loosing this annotation is not a
problem because raw_read_seqcount_latch() does not pass through
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Tested-by: Michael Kelley <mikelley@microsoft.com>  # Hyper-V
Link: https://lore.kernel.org/r/20230519102715.233598176@infradead.org
---
 include/linux/rbtree_latch.h |  2 +-
 include/linux/seqlock.h      | 15 ++++++++-------
 kernel/printk/printk.c       |  2 +-
 kernel/time/sched_clock.c    |  2 +-
 kernel/time/timekeeping.c    |  4 ++--
 5 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
index 3d1a9e716b80..6a0999c26c7c 100644
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -206,7 +206,7 @@ latch_tree_find(void *key, struct latch_tree_root *root,
 	do {
 		seq = raw_read_seqcount_latch(&root->seq);
 		node = __lt_find(key, root, seq & 1, ops->comp);
-	} while (read_seqcount_latch_retry(&root->seq, seq));
+	} while (raw_read_seqcount_latch_retry(&root->seq, seq));
 
 	return node;
 }
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 3926e9027947..987a59d977c5 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -671,9 +671,9 @@ typedef struct {
  *
  * Return: sequence counter raw value. Use the lowest bit as an index for
  * picking which data copy to read. The full counter must then be checked
- * with read_seqcount_latch_retry().
+ * with raw_read_seqcount_latch_retry().
  */
-static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s)
+static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s)
 {
 	/*
 	 * Pairs with the first smp_wmb() in raw_write_seqcount_latch().
@@ -683,16 +683,17 @@ static inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s)
 }
 
 /**
- * read_seqcount_latch_retry() - end a seqcount_latch_t read section
+ * raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section
  * @s:		Pointer to seqcount_latch_t
  * @start:	count, from raw_read_seqcount_latch()
  *
  * Return: true if a read section retry is required, else false
  */
-static inline int
-read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
+static __always_inline int
+raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
 {
-	return read_seqcount_retry(&s->seqcount, start);
+	smp_rmb();
+	return unlikely(READ_ONCE(s->seqcount.sequence) != start);
 }
 
 /**
@@ -752,7 +753,7 @@ read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
  *			entry = data_query(latch->data[idx], ...);
  *
  *		// This includes needed smp_rmb()
- *		} while (read_seqcount_latch_retry(&latch->seq, seq));
+ *		} while (raw_read_seqcount_latch_retry(&latch->seq, seq));
  *
  *		return entry;
  *	}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 6a333adce3b3..357a4d18f638 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -528,7 +528,7 @@ static u64 latched_seq_read_nolock(struct latched_seq *ls)
 		seq = raw_read_seqcount_latch(&ls->latch);
 		idx = seq & 0x1;
 		val = ls->val[idx];
-	} while (read_seqcount_latch_retry(&ls->latch, seq));
+	} while (raw_read_seqcount_latch_retry(&ls->latch, seq));
 
 	return val;
 }
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 8464c5acc913..e8f2fb09a214 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -77,7 +77,7 @@ notrace struct clock_read_data *sched_clock_read_begin(unsigned int *seq)
 
 notrace int sched_clock_read_retry(unsigned int seq)
 {
-	return read_seqcount_latch_retry(&cd.seq, seq);
+	return raw_read_seqcount_latch_retry(&cd.seq, seq);
 }
 
 unsigned long long notrace sched_clock(void)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 09d594900ee0..266d02809dbb 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -450,7 +450,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 		tkr = tkf->base + (seq & 0x01);
 		now = ktime_to_ns(tkr->base);
 		now += fast_tk_get_delta_ns(tkr);
-	} while (read_seqcount_latch_retry(&tkf->seq, seq));
+	} while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
 
 	return now;
 }
@@ -566,7 +566,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
 		basem = ktime_to_ns(tkr->base);
 		baser = ktime_to_ns(tkr->base_real);
 		delta = fast_tk_get_delta_ns(tkr);
-	} while (read_seqcount_latch_retry(&tkf->seq, seq));
+	} while (raw_read_seqcount_latch_retry(&tkf->seq, seq));
 
 	if (mono)
 		*mono = basem + delta;
-- 
cgit v1.2.3


From fc4a0db4149afcdae2527f0d8c376accca34adc9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 19 May 2023 12:21:05 +0200
Subject: math64: Always inline u128 version of mul_u64_u64_shr()

In order to prevent the following complaint from happening, always
inline the u128 variant of mul_u64_u64_shr() -- which is what x86_64
will use.

  vmlinux.o: warning: objtool: read_hv_sched_clock_tsc+0x5a: call to mul_u64_u64_shr.constprop.0() leaves .noinstr.text section

It should compile into something like:

  asm("mul	%[mul];"
      "shrd	%rdx, %rax, %cl"
      : "+&a" (a)
      : "c" shift, [mul] "r" (mul)
      : "d");

Which is silly not to inline, but it happens.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>  # Hyper-V
Link: https://lore.kernel.org/r/20230519102715.637420396@infradead.org
---
 include/linux/math64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/math64.h b/include/linux/math64.h
index 8b9191a2849e..bf74478926d4 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -168,7 +168,7 @@ static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
 #endif /* mul_u64_u32_shr */
 
 #ifndef mul_u64_u64_shr
-static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
+static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
 {
 	return (u64)(((unsigned __int128)a * mul) >> shift);
 }
-- 
cgit v1.2.3


From fb7d4948c4da2dbd26da4b7ec76bbd2f19ff862a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 19 May 2023 12:21:10 +0200
Subject: sched/clock: Provide local_clock_noinstr()

Now that all ARCH_WANTS_NO_INSTR architectures (arm64, loongarch,
s390, x86) provide sched_clock_noinstr(), use this to provide
local_clock_noinstr().

This local_clock_noinstr() will be safe to use from noinstr code with
the assumption that any such noinstr code is non-preemptible (it had
better be, entry code will have IRQs disabled while __cpuidle must
have preemption disabled).

Specifically, preempt_enable_notrace(), a common part of many a
sched_clock() implementation calls out to schedule() -- even though,
per the above, it will never trigger -- which frustrates noinstr
validation.

  vmlinux.o: warning: objtool: local_clock+0xb5: call to preempt_schedule_notrace_thunk() leaves .noinstr.text section

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Michael Kelley <mikelley@microsoft.com>  # Hyper-V
Link: https://lore.kernel.org/r/20230519102715.978624636@infradead.org
---
 include/linux/sched/clock.h | 17 ++++++++++++++++-
 kernel/sched/clock.c        | 19 +++++++++++++------
 2 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index ca008f7d3615..196f0ca351a2 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -12,7 +12,16 @@
  *
  * Please use one of the three interfaces below.
  */
-extern unsigned long long notrace sched_clock(void);
+extern u64 sched_clock(void);
+
+#if defined(CONFIG_ARCH_WANTS_NO_INSTR) || defined(CONFIG_GENERIC_SCHED_CLOCK)
+extern u64 sched_clock_noinstr(void);
+#else
+static __always_inline u64 sched_clock_noinstr(void)
+{
+	return sched_clock();
+}
+#endif
 
 /*
  * See the comment in kernel/sched/clock.c
@@ -45,6 +54,11 @@ static inline u64 cpu_clock(int cpu)
 	return sched_clock();
 }
 
+static __always_inline u64 local_clock_noinstr(void)
+{
+	return sched_clock_noinstr();
+}
+
 static __always_inline u64 local_clock(void)
 {
 	return sched_clock();
@@ -79,6 +93,7 @@ static inline u64 cpu_clock(int cpu)
 	return sched_clock_cpu(cpu);
 }
 
+extern u64 local_clock_noinstr(void);
 extern u64 local_clock(void);
 
 #endif
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index b5cc2b53464d..5a575a0ba4e6 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -266,7 +266,7 @@ static __always_inline u64 sched_clock_local(struct sched_clock_data *scd)
 	s64 delta;
 
 again:
-	now = sched_clock();
+	now = sched_clock_noinstr();
 	delta = now - scd->tick_raw;
 	if (unlikely(delta < 0))
 		delta = 0;
@@ -293,22 +293,29 @@ again:
 	return clock;
 }
 
-noinstr u64 local_clock(void)
+noinstr u64 local_clock_noinstr(void)
 {
 	u64 clock;
 
 	if (static_branch_likely(&__sched_clock_stable))
-		return sched_clock() + __sched_clock_offset;
+		return sched_clock_noinstr() + __sched_clock_offset;
 
 	if (!static_branch_likely(&sched_clock_running))
-		return sched_clock();
+		return sched_clock_noinstr();
 
-	preempt_disable_notrace();
 	clock = sched_clock_local(this_scd());
-	preempt_enable_notrace();
 
 	return clock;
 }
+
+u64 local_clock(void)
+{
+	u64 now;
+	preempt_disable_notrace();
+	now = local_clock_noinstr();
+	preempt_enable_notrace();
+	return now;
+}
 EXPORT_SYMBOL_GPL(local_clock);
 
 static notrace u64 sched_clock_remote(struct sched_clock_data *scd)
-- 
cgit v1.2.3


From 7a113ff6355944283402fb617dc97122f68d5a41 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Fri, 2 Jun 2023 12:21:33 +0200
Subject: lib/ref_tracker: add unlocked leak print helper

To have reliable detection of leaks, caller must be able to check under
the same lock both: tracked counter and the leaks. dir.lock is natural
candidate for such lock and unlocked print helper can be called with this
lock taken.
As a bonus we can reuse this helper in ref_tracker_dir_exit.

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ref_tracker.h |  8 ++++++
 lib/ref_tracker.c           | 66 ++++++++++++++++++++++++++-------------------
 2 files changed, 46 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index 9ca353ab712b..87a92f2bec1b 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -36,6 +36,9 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
 
 void ref_tracker_dir_exit(struct ref_tracker_dir *dir);
 
+void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
+				  unsigned int display_limit);
+
 void ref_tracker_dir_print(struct ref_tracker_dir *dir,
 			   unsigned int display_limit);
 
@@ -56,6 +59,11 @@ static inline void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
 {
 }
 
+static inline void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
+						unsigned int display_limit)
+{
+}
+
 static inline void ref_tracker_dir_print(struct ref_tracker_dir *dir,
 					 unsigned int display_limit)
 {
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index dc7b14aa3431..d4eb0929af8f 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -14,6 +14,38 @@ struct ref_tracker {
 	depot_stack_handle_t	free_stack_handle;
 };
 
+void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
+				  unsigned int display_limit)
+{
+	struct ref_tracker *tracker;
+	unsigned int i = 0;
+
+	lockdep_assert_held(&dir->lock);
+
+	list_for_each_entry(tracker, &dir->list, head) {
+		if (i < display_limit) {
+			pr_err("leaked reference.\n");
+			if (tracker->alloc_stack_handle)
+				stack_depot_print(tracker->alloc_stack_handle);
+			i++;
+		} else {
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL(ref_tracker_dir_print_locked);
+
+void ref_tracker_dir_print(struct ref_tracker_dir *dir,
+			   unsigned int display_limit)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dir->lock, flags);
+	ref_tracker_dir_print_locked(dir, display_limit);
+	spin_unlock_irqrestore(&dir->lock, flags);
+}
+EXPORT_SYMBOL(ref_tracker_dir_print);
+
 void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
 {
 	struct ref_tracker *tracker, *n;
@@ -27,13 +59,13 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
 		kfree(tracker);
 		dir->quarantine_avail++;
 	}
-	list_for_each_entry_safe(tracker, n, &dir->list, head) {
-		pr_err("leaked reference.\n");
-		if (tracker->alloc_stack_handle)
-			stack_depot_print(tracker->alloc_stack_handle);
+	if (!list_empty(&dir->list)) {
+		ref_tracker_dir_print_locked(dir, 16);
 		leak = true;
-		list_del(&tracker->head);
-		kfree(tracker);
+		list_for_each_entry_safe(tracker, n, &dir->list, head) {
+			list_del(&tracker->head);
+			kfree(tracker);
+		}
 	}
 	spin_unlock_irqrestore(&dir->lock, flags);
 	WARN_ON_ONCE(leak);
@@ -42,28 +74,6 @@ void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
 }
 EXPORT_SYMBOL(ref_tracker_dir_exit);
 
-void ref_tracker_dir_print(struct ref_tracker_dir *dir,
-			   unsigned int display_limit)
-{
-	struct ref_tracker *tracker;
-	unsigned long flags;
-	unsigned int i = 0;
-
-	spin_lock_irqsave(&dir->lock, flags);
-	list_for_each_entry(tracker, &dir->list, head) {
-		if (i < display_limit) {
-			pr_err("leaked reference.\n");
-			if (tracker->alloc_stack_handle)
-				stack_depot_print(tracker->alloc_stack_handle);
-			i++;
-		} else {
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&dir->lock, flags);
-}
-EXPORT_SYMBOL(ref_tracker_dir_print);
-
 int ref_tracker_alloc(struct ref_tracker_dir *dir,
 		      struct ref_tracker **trackerp,
 		      gfp_t gfp)
-- 
cgit v1.2.3


From b6d7c0eb2dcbd238fa233a3a1737654e380e784a Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Fri, 2 Jun 2023 12:21:34 +0200
Subject: lib/ref_tracker: improve printing stats

In case the library is tracking busy subsystem, simply
printing stack for every active reference will spam log
with long, hard to read, redundant stack traces. To improve
readabilty following changes have been made:
- reports are printed per stack_handle - log is more compact,
- added display name for ref_tracker_dir - it will differentiate
  multiple subsystems,
- stack trace is printed indented, in the same printk call,
- info about dropped references is printed as well.

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ref_tracker.h |  9 ++++-
 lib/ref_tracker.c           | 90 +++++++++++++++++++++++++++++++++++++++------
 lib/test_ref_tracker.c      |  2 +-
 net/core/dev.c              |  2 +-
 net/core/net_namespace.c    |  4 +-
 5 files changed, 90 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index 87a92f2bec1b..19a69e7809d6 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -17,12 +17,15 @@ struct ref_tracker_dir {
 	bool			dead;
 	struct list_head	list; /* List of active trackers */
 	struct list_head	quarantine; /* List of dead trackers */
+	char			name[32];
 #endif
 };
 
 #ifdef CONFIG_REF_TRACKER
+
 static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
-					unsigned int quarantine_count)
+					unsigned int quarantine_count,
+					const char *name)
 {
 	INIT_LIST_HEAD(&dir->list);
 	INIT_LIST_HEAD(&dir->quarantine);
@@ -31,6 +34,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
 	dir->dead = false;
 	refcount_set(&dir->untracked, 1);
 	refcount_set(&dir->no_tracker, 1);
+	strscpy(dir->name, name, sizeof(dir->name));
 	stack_depot_init();
 }
 
@@ -51,7 +55,8 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
 #else /* CONFIG_REF_TRACKER */
 
 static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir,
-					unsigned int quarantine_count)
+					unsigned int quarantine_count,
+					const char *name)
 {
 }
 
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index d4eb0929af8f..2ffe79c90c17 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -1,11 +1,16 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
+
+#define pr_fmt(fmt) "ref_tracker: " fmt
+
 #include <linux/export.h>
+#include <linux/list_sort.h>
 #include <linux/ref_tracker.h>
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
 #include <linux/stackdepot.h>
 
 #define REF_TRACKER_STACK_ENTRIES 16
+#define STACK_BUF_SIZE 1024
 
 struct ref_tracker {
 	struct list_head	head;   /* anchor into dir->list or dir->quarantine */
@@ -14,24 +19,87 @@ struct ref_tracker {
 	depot_stack_handle_t	free_stack_handle;
 };
 
-void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
-				  unsigned int display_limit)
+struct ref_tracker_dir_stats {
+	int total;
+	int count;
+	struct {
+		depot_stack_handle_t stack_handle;
+		unsigned int count;
+	} stacks[];
+};
+
+static struct ref_tracker_dir_stats *
+ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
 {
+	struct ref_tracker_dir_stats *stats;
 	struct ref_tracker *tracker;
-	unsigned int i = 0;
 
-	lockdep_assert_held(&dir->lock);
+	stats = kmalloc(struct_size(stats, stacks, limit),
+			GFP_NOWAIT | __GFP_NOWARN);
+	if (!stats)
+		return ERR_PTR(-ENOMEM);
+	stats->total = 0;
+	stats->count = 0;
 
 	list_for_each_entry(tracker, &dir->list, head) {
-		if (i < display_limit) {
-			pr_err("leaked reference.\n");
-			if (tracker->alloc_stack_handle)
-				stack_depot_print(tracker->alloc_stack_handle);
-			i++;
-		} else {
-			break;
+		depot_stack_handle_t stack = tracker->alloc_stack_handle;
+		int i;
+
+		++stats->total;
+		for (i = 0; i < stats->count; ++i)
+			if (stats->stacks[i].stack_handle == stack)
+				break;
+		if (i >= limit)
+			continue;
+		if (i >= stats->count) {
+			stats->stacks[i].stack_handle = stack;
+			stats->stacks[i].count = 0;
+			++stats->count;
 		}
+		++stats->stacks[i].count;
+	}
+
+	return stats;
+}
+
+void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
+				  unsigned int display_limit)
+{
+	struct ref_tracker_dir_stats *stats;
+	unsigned int i = 0, skipped;
+	depot_stack_handle_t stack;
+	char *sbuf;
+
+	lockdep_assert_held(&dir->lock);
+
+	if (list_empty(&dir->list))
+		return;
+
+	stats = ref_tracker_get_stats(dir, display_limit);
+	if (IS_ERR(stats)) {
+		pr_err("%s@%pK: couldn't get stats, error %pe\n",
+		       dir->name, dir, stats);
+		return;
 	}
+
+	sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN);
+
+	for (i = 0, skipped = stats->total; i < stats->count; ++i) {
+		stack = stats->stacks[i].stack_handle;
+		if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4))
+			sbuf[0] = 0;
+		pr_err("%s@%pK has %d/%d users at\n%s\n", dir->name, dir,
+		       stats->stacks[i].count, stats->total, sbuf);
+		skipped -= stats->stacks[i].count;
+	}
+
+	if (skipped)
+		pr_err("%s@%pK skipped reports about %d/%d users.\n",
+		       dir->name, dir, skipped, stats->total);
+
+	kfree(sbuf);
+
+	kfree(stats);
 }
 EXPORT_SYMBOL(ref_tracker_dir_print_locked);
 
diff --git a/lib/test_ref_tracker.c b/lib/test_ref_tracker.c
index 19d7dec70cc6..49970a7c96f3 100644
--- a/lib/test_ref_tracker.c
+++ b/lib/test_ref_tracker.c
@@ -64,7 +64,7 @@ static int __init test_ref_tracker_init(void)
 {
 	int i;
 
-	ref_tracker_dir_init(&ref_dir, 100);
+	ref_tracker_dir_init(&ref_dir, 100, "selftest");
 
 	timer_setup(&test_ref_tracker_timer, test_ref_tracker_timer_func, 0);
 	mod_timer(&test_ref_tracker_timer, jiffies + 1);
diff --git a/net/core/dev.c b/net/core/dev.c
index b3c13e041935..5ca7a133d3be 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10630,7 +10630,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
-	ref_tracker_dir_init(&dev->refcnt_tracker, 128);
+	ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
 #ifdef CONFIG_PCPU_DEV_REFCNT
 	dev->pcpu_refcnt = alloc_percpu(int);
 	if (!dev->pcpu_refcnt)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3e3598cd49f2..f4183c4c1ec8 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -308,7 +308,7 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_id);
 /* init code that must occur even if setup_net() is not called. */
 static __net_init void preinit_net(struct net *net)
 {
-	ref_tracker_dir_init(&net->notrefcnt_tracker, 128);
+	ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
 }
 
 /*
@@ -322,7 +322,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	LIST_HEAD(net_exit_list);
 
 	refcount_set(&net->ns.count, 1);
-	ref_tracker_dir_init(&net->refcnt_tracker, 128);
+	ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
 
 	refcount_set(&net->passive, 1);
 	get_random_bytes(&net->hash_mix, sizeof(u32));
-- 
cgit v1.2.3


From 227c6c832303cec3941166d3335ecbccd980d615 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Fri, 2 Jun 2023 12:21:35 +0200
Subject: lib/ref_tracker: add printing to memory buffer

Similar to stack_(depot|trace)_snprint the patch
adds helper to printing stats to memory buffer.
It will be helpful in case of debugfs.

Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ref_tracker.h |  8 +++++++
 lib/ref_tracker.c           | 56 ++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 56 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h
index 19a69e7809d6..8eac4f3d5254 100644
--- a/include/linux/ref_tracker.h
+++ b/include/linux/ref_tracker.h
@@ -46,6 +46,8 @@ void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
 void ref_tracker_dir_print(struct ref_tracker_dir *dir,
 			   unsigned int display_limit);
 
+int ref_tracker_dir_snprint(struct ref_tracker_dir *dir, char *buf, size_t size);
+
 int ref_tracker_alloc(struct ref_tracker_dir *dir,
 		      struct ref_tracker **trackerp, gfp_t gfp);
 
@@ -74,6 +76,12 @@ static inline void ref_tracker_dir_print(struct ref_tracker_dir *dir,
 {
 }
 
+static inline int ref_tracker_dir_snprint(struct ref_tracker_dir *dir,
+					  char *buf, size_t size)
+{
+	return 0;
+}
+
 static inline int ref_tracker_alloc(struct ref_tracker_dir *dir,
 				    struct ref_tracker **trackerp,
 				    gfp_t gfp)
diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index 2ffe79c90c17..cce4614b0794 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -62,8 +62,27 @@ ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
 	return stats;
 }
 
-void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
-				  unsigned int display_limit)
+struct ostream {
+	char *buf;
+	int size, used;
+};
+
+#define pr_ostream(stream, fmt, args...) \
+({ \
+	struct ostream *_s = (stream); \
+\
+	if (!_s->buf) { \
+		pr_err(fmt, ##args); \
+	} else { \
+		int ret, len = _s->size - _s->used; \
+		ret = snprintf(_s->buf + _s->used, len, pr_fmt(fmt), ##args); \
+		_s->used += min(ret, len); \
+	} \
+})
+
+static void
+__ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir,
+			     unsigned int display_limit, struct ostream *s)
 {
 	struct ref_tracker_dir_stats *stats;
 	unsigned int i = 0, skipped;
@@ -77,8 +96,8 @@ void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
 
 	stats = ref_tracker_get_stats(dir, display_limit);
 	if (IS_ERR(stats)) {
-		pr_err("%s@%pK: couldn't get stats, error %pe\n",
-		       dir->name, dir, stats);
+		pr_ostream(s, "%s@%pK: couldn't get stats, error %pe\n",
+			   dir->name, dir, stats);
 		return;
 	}
 
@@ -88,19 +107,27 @@ void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
 		stack = stats->stacks[i].stack_handle;
 		if (sbuf && !stack_depot_snprint(stack, sbuf, STACK_BUF_SIZE, 4))
 			sbuf[0] = 0;
-		pr_err("%s@%pK has %d/%d users at\n%s\n", dir->name, dir,
-		       stats->stacks[i].count, stats->total, sbuf);
+		pr_ostream(s, "%s@%pK has %d/%d users at\n%s\n", dir->name, dir,
+			   stats->stacks[i].count, stats->total, sbuf);
 		skipped -= stats->stacks[i].count;
 	}
 
 	if (skipped)
-		pr_err("%s@%pK skipped reports about %d/%d users.\n",
-		       dir->name, dir, skipped, stats->total);
+		pr_ostream(s, "%s@%pK skipped reports about %d/%d users.\n",
+			   dir->name, dir, skipped, stats->total);
 
 	kfree(sbuf);
 
 	kfree(stats);
 }
+
+void ref_tracker_dir_print_locked(struct ref_tracker_dir *dir,
+				  unsigned int display_limit)
+{
+	struct ostream os = {};
+
+	__ref_tracker_dir_pr_ostream(dir, display_limit, &os);
+}
 EXPORT_SYMBOL(ref_tracker_dir_print_locked);
 
 void ref_tracker_dir_print(struct ref_tracker_dir *dir,
@@ -114,6 +141,19 @@ void ref_tracker_dir_print(struct ref_tracker_dir *dir,
 }
 EXPORT_SYMBOL(ref_tracker_dir_print);
 
+int ref_tracker_dir_snprint(struct ref_tracker_dir *dir, char *buf, size_t size)
+{
+	struct ostream os = { .buf = buf, .size = size };
+	unsigned long flags;
+
+	spin_lock_irqsave(&dir->lock, flags);
+	__ref_tracker_dir_pr_ostream(dir, 16, &os);
+	spin_unlock_irqrestore(&dir->lock, flags);
+
+	return os.used;
+}
+EXPORT_SYMBOL(ref_tracker_dir_snprint);
+
 void ref_tracker_dir_exit(struct ref_tracker_dir *dir)
 {
 	struct ref_tracker *tracker, *n;
-- 
cgit v1.2.3


From d01a77afd6bef1b3a2ed15e8ca6887ca7da0cddc Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 5 Jun 2023 20:05:52 +0300
Subject: lib/string_helpers: Change returned value of the strreplace()

It's more useful to return the pointer to the string itself
with strreplace(), so it may be used like

	attr->name = strreplace(name, '/', '_');

While at it, amend the kernel documentation.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230605170553.7835-3-andriy.shevchenko@linux.intel.com
---
 include/linux/string.h |  2 +-
 lib/string_helpers.c   | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index c062c581a98b..dbfc66400050 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -169,7 +169,7 @@ static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
 #endif
 
 void *memchr_inv(const void *s, int c, size_t n);
-char *strreplace(char *s, char old, char new);
+char *strreplace(char *str, char old, char new);
 
 extern void kfree_const(const void *x);
 
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 230020a2e076..d3b1dd718daf 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -979,18 +979,22 @@ EXPORT_SYMBOL(__sysfs_match_string);
 
 /**
  * strreplace - Replace all occurrences of character in string.
- * @s: The string to operate on.
+ * @str: The string to operate on.
  * @old: The character being replaced.
  * @new: The character @old is replaced with.
  *
- * Returns pointer to the nul byte at the end of @s.
+ * Replaces the each @old character with a @new one in the given string @str.
+ *
+ * Return: pointer to the string @str itself.
  */
-char *strreplace(char *s, char old, char new)
+char *strreplace(char *str, char old, char new)
 {
+	char *s = str;
+
 	for (; *s; ++s)
 		if (*s == old)
 			*s = new;
-	return s;
+	return str;
 }
 EXPORT_SYMBOL(strreplace);
 
-- 
cgit v1.2.3


From 943211c87427f25bd22e0e63849fb486bb5f87fa Mon Sep 17 00:00:00 2001
From: Siddh Raman Pant <code@siddh.me>
Date: Mon, 5 Jun 2023 20:06:16 +0530
Subject: watch_queue: prevent dangling pipe pointer

NULL the dangling pipe reference while clearing watch_queue.

If not done, a reference to a freed pipe remains in the watch_queue,
as this function is called before freeing a pipe in free_pipe_info()
(see line 834 of fs/pipe.c).

The sole use of wqueue->defunct is for checking if the watch queue has
been cleared, but wqueue->pipe is also NULLed while clearing.

Thus, wqueue->defunct is superfluous, as wqueue->pipe can be checked
for NULL. Hence, the former can be removed.

Tested with keyutils testsuite.

Cc: stable@vger.kernel.org # 6.1
Signed-off-by: Siddh Raman Pant <code@siddh.me>
Acked-by: David Howells <dhowells@redhat.com>
Message-Id: <20230605143616.640517-1-code@siddh.me>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/watch_queue.h |  3 +--
 kernel/watch_queue.c        | 12 ++++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h
index fc6bba20273b..45cd42f55d49 100644
--- a/include/linux/watch_queue.h
+++ b/include/linux/watch_queue.h
@@ -38,7 +38,7 @@ struct watch_filter {
 struct watch_queue {
 	struct rcu_head		rcu;
 	struct watch_filter __rcu *filter;
-	struct pipe_inode_info	*pipe;		/* The pipe we're using as a buffer */
+	struct pipe_inode_info	*pipe;		/* Pipe we use as a buffer, NULL if queue closed */
 	struct hlist_head	watches;	/* Contributory watches */
 	struct page		**notes;	/* Preallocated notifications */
 	unsigned long		*notes_bitmap;	/* Allocation bitmap for notes */
@@ -46,7 +46,6 @@ struct watch_queue {
 	spinlock_t		lock;
 	unsigned int		nr_notes;	/* Number of notes */
 	unsigned int		nr_pages;	/* Number of pages in notes[] */
-	bool			defunct;	/* T when queues closed */
 };
 
 /*
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index e91cb4c2833f..d0b6b390ee42 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -42,7 +42,7 @@ MODULE_AUTHOR("Red Hat, Inc.");
 static inline bool lock_wqueue(struct watch_queue *wqueue)
 {
 	spin_lock_bh(&wqueue->lock);
-	if (unlikely(wqueue->defunct)) {
+	if (unlikely(!wqueue->pipe)) {
 		spin_unlock_bh(&wqueue->lock);
 		return false;
 	}
@@ -104,9 +104,6 @@ static bool post_one_notification(struct watch_queue *wqueue,
 	unsigned int head, tail, mask, note, offset, len;
 	bool done = false;
 
-	if (!pipe)
-		return false;
-
 	spin_lock_irq(&pipe->rd_wait.lock);
 
 	mask = pipe->ring_size - 1;
@@ -603,8 +600,11 @@ void watch_queue_clear(struct watch_queue *wqueue)
 	rcu_read_lock();
 	spin_lock_bh(&wqueue->lock);
 
-	/* Prevent new notifications from being stored. */
-	wqueue->defunct = true;
+	/*
+	 * This pipe can be freed by callers like free_pipe_info().
+	 * Removing this reference also prevents new notifications.
+	 */
+	wqueue->pipe = NULL;
 
 	while (!hlist_empty(&wqueue->watches)) {
 		watch = hlist_entry(wqueue->watches.first, struct watch, queue_node);
-- 
cgit v1.2.3


From 08dbff230048ec2812c33e78f81855635a3c1734 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 4 May 2023 16:45:08 +0300
Subject: wifi: mac80211: skip EHT BSS membership selector

Skip the EHT BSS membership selector for getting rates.
While at it, add the definitions for GLK and EPS, and
sort the list.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230504134511.828474-9-gregory.greenman@intel.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 5 ++++-
 net/mac80211/mlme.c       | 5 +++--
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index c4cf296e7eaf..c271184a3968 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1349,8 +1349,11 @@ struct ieee80211_mgmt {
 /* Supported rates membership selectors */
 #define BSS_MEMBERSHIP_SELECTOR_HT_PHY	127
 #define BSS_MEMBERSHIP_SELECTOR_VHT_PHY	126
-#define BSS_MEMBERSHIP_SELECTOR_HE_PHY	122
+#define BSS_MEMBERSHIP_SELECTOR_GLK	125
+#define BSS_MEMBERSHIP_SELECTOR_EPS	124
 #define BSS_MEMBERSHIP_SELECTOR_SAE_H2E 123
+#define BSS_MEMBERSHIP_SELECTOR_HE_PHY	122
+#define BSS_MEMBERSHIP_SELECTOR_EHT_PHY	121
 
 /* mgmt header + 1 byte category code */
 #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e13a0354c397..0020c9e41caa 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3902,8 +3902,8 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
 			*have_higher_than_11mbit = true;
 
 		/*
-		 * Skip HT, VHT, HE and SAE H2E only BSS membership selectors
-		 * since they're not rates.
+		 * Skip HT, VHT, HE, EHT and SAE H2E only BSS membership
+		 * selectors since they're not rates.
 		 *
 		 * Note: Even though the membership selector and the basic
 		 *	 rate flag share the same bit, they are not exactly
@@ -3912,6 +3912,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
 		if (supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HT_PHY) ||
 		    supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY) ||
 		    supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HE_PHY) ||
+		    supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_EHT_PHY) ||
 		    supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_SAE_H2E))
 			continue;
 
-- 
cgit v1.2.3


From ce2bb3b66273d7d122c5dbf8f1e58e8ebc82c5fb Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 4 May 2023 16:45:10 +0300
Subject: wifi: mac80211: fetch and store the EML capability information

We need to teach the low level driver about the EML capability which
includes information for EMLSR / EMLMR operation.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230504134511.828474-11-gregory.greenman@intel.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 35 +++++++++++++++++++++++++++++++++++
 include/net/mac80211.h    |  2 ++
 net/mac80211/mlme.c       | 11 +++++++++++
 3 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index c271184a3968..fba4c44da832 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4635,6 +4635,41 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
 	return sizeof(*mle) + common + mle->variable[0];
 }
 
+/**
+ * ieee80211_mle_get_eml_cap - returns the EML capability
+ * @data: pointer to the multi link EHT IE
+ *
+ * The element is assumed to be big enough. This must be checked by
+ * ieee80211_mle_size_ok().
+ * If the EML capability can't be found (the type is not basic, or
+ * the EML capability presence bit is clear), 0 will be returned.
+ */
+static inline u16 ieee80211_mle_get_eml_cap(const u8 *data)
+{
+	const struct ieee80211_multi_link_elem *mle = (const void *)data;
+	u16 control = le16_to_cpu(mle->control);
+	const u8 *common = mle->variable;
+
+	if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) !=
+	    IEEE80211_ML_CONTROL_TYPE_BASIC)
+		return 0;
+
+	/* common points now at the beginning of ieee80211_mle_basic_common_info */
+	common += sizeof(struct ieee80211_mle_basic_common_info);
+
+	if (!(control & IEEE80211_MLC_BASIC_PRES_EML_CAPA))
+		return 0;
+
+	if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID)
+		common += 1;
+	if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)
+		common += 1;
+	if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY)
+		common += 2;
+
+	return get_unaligned_le16(common);
+}
+
 /**
  * ieee80211_mle_size_ok - validate multi-link element size
  * @data: pointer to the element data
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ac0370e76874..f75d941eece8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1790,6 +1790,7 @@ enum ieee80211_offload_flags {
  * @ps: power-save mode (STA only). This flag is NOT affected by
  *	offchannel/dynamic_ps operations.
  * @aid: association ID number, valid only when @assoc is true
+ * @eml_cap: EML capabilities as described in P802.11be_D2.2 Figure 9-1002k.
  * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The
  *	may filter ARP queries targeted for other addresses than listed here.
  *	The driver must allow ARP queries targeted for all address listed here
@@ -1812,6 +1813,7 @@ struct ieee80211_vif_cfg {
 	bool ibss_creator;
 	bool ps;
 	u16 aid;
+	u16 eml_cap;
 
 	__be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
 	int arp_addr_cnt;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0020c9e41caa..4ea383aafcac 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4838,6 +4838,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 			 IEEE80211_CONN_DISABLE_EHT)) &&
 	    he_oper) {
 		const struct cfg80211_bss_ies *cbss_ies;
+		const struct element *eht_ml_elem;
 		const u8 *eht_oper_ie;
 
 		cbss_ies = rcu_dereference(cbss->ies);
@@ -4848,6 +4849,16 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 			eht_oper = (void *)(eht_oper_ie + 3);
 		else
 			eht_oper = NULL;
+
+		eht_ml_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK,
+						     cbss_ies->data, cbss_ies->len);
+
+		/* data + 1 / datalen - 1 since it's an extended element */
+		if (eht_ml_elem &&
+		    ieee80211_mle_size_ok(eht_ml_elem->data + 1,
+					  eht_ml_elem->datalen - 1))
+			sdata->vif.cfg.eml_cap =
+				ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1);
 	}
 
 	/* Allow VHT if at least one channel on the sband supports 80 MHz */
-- 
cgit v1.2.3


From 29c6e2dc3d12a188a48f2a45759e8da44840546b Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 4 Jun 2023 12:11:17 +0300
Subject: wifi: mac80211: provide a helper to fetch the medium synchronization
 delay

There are drivers which need this information.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230604120651.b1043f3126e2.Iad3806f8bf8df07f52ef0a02cc3d0373c44a8c93@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 35 +++++++++++++++++++++++++++++++++++
 include/net/mac80211.h    |  3 +++
 net/mac80211/mlme.c       |  5 ++++-
 3 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index fba4c44da832..516cd32d6196 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4635,6 +4635,41 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
 	return sizeof(*mle) + common + mle->variable[0];
 }
 
+/**
+ * ieee80211_mle_get_eml_sync_delay - returns the medium sync delay
+ * @data: pointer to the multi link EHT IE
+ *
+ * The element is assumed to be big enough. This must be checked by
+ * ieee80211_mle_size_ok().
+ * If the medium synchronization can't be found (the type is not basic, or
+ * the medium sync presence bit is clear), 0 will be returned.
+ */
+static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data)
+{
+	const struct ieee80211_multi_link_elem *mle = (const void *)data;
+	u16 control = le16_to_cpu(mle->control);
+	const u8 *common = mle->variable;
+
+	if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) !=
+	    IEEE80211_ML_CONTROL_TYPE_BASIC)
+		return 0;
+
+	/* common points now at the beginning of
+	 * ieee80211_mle_basic_common_info
+	 */
+	common += sizeof(struct ieee80211_mle_basic_common_info);
+
+	if (!(control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY))
+		return 0;
+
+	if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID)
+		common += 1;
+	if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)
+		common += 1;
+
+	return get_unaligned_le16(common);
+}
+
 /**
  * ieee80211_mle_get_eml_cap - returns the EML capability
  * @data: pointer to the multi link EHT IE
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f75d941eece8..f4516c034da2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1791,6 +1791,8 @@ enum ieee80211_offload_flags {
  *	offchannel/dynamic_ps operations.
  * @aid: association ID number, valid only when @assoc is true
  * @eml_cap: EML capabilities as described in P802.11be_D2.2 Figure 9-1002k.
+ * @eml_med_sync_delay: Medium Synchronization delay as described in
+ *	P802.11be_D2.2 Figure 9-1002j.
  * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The
  *	may filter ARP queries targeted for other addresses than listed here.
  *	The driver must allow ARP queries targeted for all address listed here
@@ -1814,6 +1816,7 @@ struct ieee80211_vif_cfg {
 	bool ps;
 	u16 aid;
 	u16 eml_cap;
+	u16 eml_med_sync_delay;
 
 	__be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
 	int arp_addr_cnt;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4ea383aafcac..56c375213202 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4856,9 +4856,12 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 		/* data + 1 / datalen - 1 since it's an extended element */
 		if (eht_ml_elem &&
 		    ieee80211_mle_size_ok(eht_ml_elem->data + 1,
-					  eht_ml_elem->datalen - 1))
+					  eht_ml_elem->datalen - 1)) {
 			sdata->vif.cfg.eml_cap =
 				ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1);
+			sdata->vif.cfg.eml_med_sync_delay =
+				ieee80211_mle_get_eml_med_sync_delay(eht_ml_elem->data + 1);
+		}
 	}
 
 	/* Allow VHT if at least one channel on the sband supports 80 MHz */
-- 
cgit v1.2.3


From cb16330d12741f6dae56aad5acf62f5be3a06c4e Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 6 Jun 2023 21:39:55 +0900
Subject: fprobe: Pass return address to the handlers

Pass return address as 'ret_ip' to the fprobe entry and return handlers
so that the fprobe user handler can get the reutrn address without
analyzing arch-dependent pt_regs.

Link: https://lore.kernel.org/all/168507467664.913472.11642316698862778600.stgit@mhiramat.roam.corp.google.com/

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/fprobe.h          |  6 ++++--
 include/linux/rethook.h         |  2 +-
 kernel/kprobes.c                |  1 +
 kernel/trace/bpf_trace.c        |  6 ++++--
 kernel/trace/fprobe.c           |  6 +++---
 kernel/trace/rethook.c          |  3 ++-
 lib/test_fprobe.c               | 10 +++++++---
 samples/fprobe/fprobe_example.c |  6 ++++--
 8 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 47fefc7f363b..134f0f59ffa8 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -35,9 +35,11 @@ struct fprobe {
 	int			nr_maxactive;
 
 	int (*entry_handler)(struct fprobe *fp, unsigned long entry_ip,
-			     struct pt_regs *regs, void *entry_data);
+			     unsigned long ret_ip, struct pt_regs *regs,
+			     void *entry_data);
 	void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip,
-			     struct pt_regs *regs, void *entry_data);
+			     unsigned long ret_ip, struct pt_regs *regs,
+			     void *entry_data);
 };
 
 /* This fprobe is soft-disabled. */
diff --git a/include/linux/rethook.h b/include/linux/rethook.h
index c8ac1e5afcd1..fdf26cd0e742 100644
--- a/include/linux/rethook.h
+++ b/include/linux/rethook.h
@@ -14,7 +14,7 @@
 
 struct rethook_node;
 
-typedef void (*rethook_handler_t) (struct rethook_node *, void *, struct pt_regs *);
+typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long, struct pt_regs *);
 
 /**
  * struct rethook - The rethook management data structure.
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 00e177de91cc..ce13f1a35251 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2127,6 +2127,7 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 NOKPROBE_SYMBOL(pre_handler_kretprobe);
 
 static void kretprobe_rethook_handler(struct rethook_node *rh, void *data,
+				      unsigned long ret_addr,
 				      struct pt_regs *regs)
 {
 	struct kretprobe *rp = (struct kretprobe *)data;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9a050e36dc6c..987c76d94604 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2642,7 +2642,8 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
 
 static int
 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
-			  struct pt_regs *regs, void *data)
+			  unsigned long ret_ip, struct pt_regs *regs,
+			  void *data)
 {
 	struct bpf_kprobe_multi_link *link;
 
@@ -2653,7 +2654,8 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
 
 static void
 kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
-			       struct pt_regs *regs, void *data)
+			       unsigned long ret_ip, struct pt_regs *regs,
+			       void *data)
 {
 	struct bpf_kprobe_multi_link *link;
 
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 18d36842faf5..32994815edf6 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -46,7 +46,7 @@ static inline void __fprobe_handler(unsigned long ip, unsigned long parent_ip,
 	}
 
 	if (fp->entry_handler)
-		ret = fp->entry_handler(fp, ip, ftrace_get_regs(fregs), entry_data);
+		ret = fp->entry_handler(fp, ip, parent_ip, ftrace_get_regs(fregs), entry_data);
 
 	/* If entry_handler returns !0, nmissed is not counted. */
 	if (rh) {
@@ -112,7 +112,7 @@ static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
 }
 
 static void fprobe_exit_handler(struct rethook_node *rh, void *data,
-				struct pt_regs *regs)
+				unsigned long ret_ip, struct pt_regs *regs)
 {
 	struct fprobe *fp = (struct fprobe *)data;
 	struct fprobe_rethook_node *fpr;
@@ -133,7 +133,7 @@ static void fprobe_exit_handler(struct rethook_node *rh, void *data,
 		return;
 	}
 
-	fp->exit_handler(fp, fpr->entry_ip, regs,
+	fp->exit_handler(fp, fpr->entry_ip, ret_ip, regs,
 			 fp->entry_data_size ? (void *)fpr->data : NULL);
 	ftrace_test_recursion_unlock(bit);
 }
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index 60f6cb2b486b..f32ee484391a 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -301,7 +301,8 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs,
 			break;
 		handler = READ_ONCE(rhn->rethook->handler);
 		if (handler)
-			handler(rhn, rhn->rethook->data, regs);
+			handler(rhn, rhn->rethook->data,
+				correct_ret_addr, regs);
 
 		if (first == node)
 			break;
diff --git a/lib/test_fprobe.c b/lib/test_fprobe.c
index 079435a2e26c..24de0e5ff859 100644
--- a/lib/test_fprobe.c
+++ b/lib/test_fprobe.c
@@ -39,7 +39,8 @@ static noinline u32 fprobe_selftest_nest_target(u32 value, u32 (*nest)(u32))
 }
 
 static notrace int fp_entry_handler(struct fprobe *fp, unsigned long ip,
-				     struct pt_regs *regs, void *data)
+				    unsigned long ret_ip,
+				    struct pt_regs *regs, void *data)
 {
 	KUNIT_EXPECT_FALSE(current_test, preemptible());
 	/* This can be called on the fprobe_selftest_target and the fprobe_selftest_target2 */
@@ -57,6 +58,7 @@ static notrace int fp_entry_handler(struct fprobe *fp, unsigned long ip,
 }
 
 static notrace void fp_exit_handler(struct fprobe *fp, unsigned long ip,
+				    unsigned long ret_ip,
 				    struct pt_regs *regs, void *data)
 {
 	unsigned long ret = regs_return_value(regs);
@@ -78,14 +80,16 @@ static notrace void fp_exit_handler(struct fprobe *fp, unsigned long ip,
 }
 
 static notrace int nest_entry_handler(struct fprobe *fp, unsigned long ip,
-				     struct pt_regs *regs, void *data)
+				      unsigned long ret_ip,
+				      struct pt_regs *regs, void *data)
 {
 	KUNIT_EXPECT_FALSE(current_test, preemptible());
 	return 0;
 }
 
 static notrace void nest_exit_handler(struct fprobe *fp, unsigned long ip,
-				    struct pt_regs *regs, void *data)
+				      unsigned long ret_ip,
+				      struct pt_regs *regs, void *data)
 {
 	KUNIT_EXPECT_FALSE(current_test, preemptible());
 	KUNIT_EXPECT_EQ(current_test, ip, target_nest_ip);
diff --git a/samples/fprobe/fprobe_example.c b/samples/fprobe/fprobe_example.c
index 4efc8feb6277..64e715e7ed11 100644
--- a/samples/fprobe/fprobe_example.c
+++ b/samples/fprobe/fprobe_example.c
@@ -49,6 +49,7 @@ static void show_backtrace(void)
 }
 
 static int sample_entry_handler(struct fprobe *fp, unsigned long ip,
+				unsigned long ret_ip,
 				struct pt_regs *regs, void *data)
 {
 	if (use_trace)
@@ -65,10 +66,11 @@ static int sample_entry_handler(struct fprobe *fp, unsigned long ip,
 	return 0;
 }
 
-static void sample_exit_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs,
+static void sample_exit_handler(struct fprobe *fp, unsigned long ip,
+				unsigned long ret_ip, struct pt_regs *regs,
 				void *data)
 {
-	unsigned long rip = instruction_pointer(regs);
+	unsigned long rip = ret_ip;
 
 	if (use_trace)
 		/*
-- 
cgit v1.2.3


From 334e5519c3757019cc591d4539d5aca199bdb114 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 6 Jun 2023 21:39:55 +0900
Subject: tracing/probes: Add fprobe events for tracing function entry and
 exit.

Add fprobe events for tracing function entry and exit instead of kprobe
events. With this change, we can continue to trace function entry/exit
even if the CONFIG_KPROBES_ON_FTRACE is not available. Since
CONFIG_KPROBES_ON_FTRACE requires the CONFIG_DYNAMIC_FTRACE_WITH_REGS,
it is not available if the architecture only supports
CONFIG_DYNAMIC_FTRACE_WITH_ARGS. And that means kprobe events can not
probe function entry/exit effectively on such architecture.
But this can be solved if the dynamic events supports fprobe events.

The fprobe event is a new dynamic events which is only for the function
(symbol) entry and exit. This event accepts non register fetch arguments
so that user can trace the function arguments and return values.

The fprobe events syntax is here;

 f[:[GRP/][EVENT]] FUNCTION [FETCHARGS]
 f[MAXACTIVE][:[GRP/][EVENT]] FUNCTION%return [FETCHARGS]

E.g.

 # echo 'f vfs_read $arg1'  >> dynamic_events
 # echo 'f vfs_read%return $retval'  >> dynamic_events
 # cat dynamic_events
 f:fprobes/vfs_read__entry vfs_read arg1=$arg1
 f:fprobes/vfs_read__exit vfs_read%return arg1=$retval
 # echo 1 > events/fprobes/enable
 # head -n 20 trace | tail
 #           TASK-PID     CPU#  |||||  TIMESTAMP  FUNCTION
 #              | |         |   |||||     |         |
              sh-142     [005] ...1.   448.386420: vfs_read__entry: (vfs_read+0x4/0x340) arg1=0xffff888007f7c540
              sh-142     [005] .....   448.386436: vfs_read__exit: (ksys_read+0x75/0x100 <- vfs_read) arg1=0x1
              sh-142     [005] ...1.   448.386451: vfs_read__entry: (vfs_read+0x4/0x340) arg1=0xffff888007f7c540
              sh-142     [005] .....   448.386458: vfs_read__exit: (ksys_read+0x75/0x100 <- vfs_read) arg1=0x1
              sh-142     [005] ...1.   448.386469: vfs_read__entry: (vfs_read+0x4/0x340) arg1=0xffff888007f7c540
              sh-142     [005] .....   448.386476: vfs_read__exit: (ksys_read+0x75/0x100 <- vfs_read) arg1=0x1
              sh-142     [005] ...1.   448.602073: vfs_read__entry: (vfs_read+0x4/0x340) arg1=0xffff888007f7c540
              sh-142     [005] .....   448.602089: vfs_read__exit: (ksys_read+0x75/0x100 <- vfs_read) arg1=0x1

Link: https://lore.kernel.org/all/168507469754.913472.6112857614708350210.stgit@mhiramat.roam.corp.google.com/

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/all/202302011530.7vm4O8Ro-lkp@intel.com/
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/fprobe.h                             |    5 +
 include/linux/trace_events.h                       |    3 +
 kernel/trace/Kconfig                               |   14 +
 kernel/trace/Makefile                              |    1 +
 kernel/trace/fprobe.c                              |   11 +-
 kernel/trace/trace.c                               |    8 +-
 kernel/trace/trace.h                               |   11 +
 kernel/trace/trace_fprobe.c                        | 1053 ++++++++++++++++++++
 kernel/trace/trace_kprobe.c                        |    2 +-
 kernel/trace/trace_probe.c                         |    4 +-
 kernel/trace/trace_probe.h                         |    3 +-
 .../ftrace/test.d/kprobe/kprobe_syntax_errors.tc   |    2 +-
 12 files changed, 1109 insertions(+), 8 deletions(-)
 create mode 100644 kernel/trace/trace_fprobe.c

(limited to 'include/linux')

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 134f0f59ffa8..3e03758151f4 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -66,6 +66,7 @@ int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter
 int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num);
 int register_fprobe_syms(struct fprobe *fp, const char **syms, int num);
 int unregister_fprobe(struct fprobe *fp);
+bool fprobe_is_registered(struct fprobe *fp);
 #else
 static inline int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
 {
@@ -83,6 +84,10 @@ static inline int unregister_fprobe(struct fprobe *fp)
 {
 	return -EOPNOTSUPP;
 }
+static inline bool fprobe_is_registered(struct fprobe *fp)
+{
+	return false;
+}
 #endif
 
 /**
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 7c4a0b72334e..3930e676436c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -318,6 +318,7 @@ enum {
 	TRACE_EVENT_FL_KPROBE_BIT,
 	TRACE_EVENT_FL_UPROBE_BIT,
 	TRACE_EVENT_FL_EPROBE_BIT,
+	TRACE_EVENT_FL_FPROBE_BIT,
 	TRACE_EVENT_FL_CUSTOM_BIT,
 };
 
@@ -332,6 +333,7 @@ enum {
  *  KPROBE        - Event is a kprobe
  *  UPROBE        - Event is a uprobe
  *  EPROBE        - Event is an event probe
+ *  FPROBE        - Event is an function probe
  *  CUSTOM        - Event is a custom event (to be attached to an exsiting tracepoint)
  *                   This is set when the custom event has not been attached
  *                   to a tracepoint yet, then it is cleared when it is.
@@ -346,6 +348,7 @@ enum {
 	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
 	TRACE_EVENT_FL_UPROBE		= (1 << TRACE_EVENT_FL_UPROBE_BIT),
 	TRACE_EVENT_FL_EPROBE		= (1 << TRACE_EVENT_FL_EPROBE_BIT),
+	TRACE_EVENT_FL_FPROBE		= (1 << TRACE_EVENT_FL_FPROBE_BIT),
 	TRACE_EVENT_FL_CUSTOM		= (1 << TRACE_EVENT_FL_CUSTOM_BIT),
 };
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8cf97fa4a4b3..8e10a9453c96 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -650,6 +650,20 @@ config BLK_DEV_IO_TRACE
 
 	  If unsure, say N.
 
+config FPROBE_EVENTS
+	depends on FPROBE
+	depends on HAVE_REGS_AND_STACK_ACCESS_API
+	bool "Enable fprobe-based dynamic events"
+	select TRACING
+	select PROBE_EVENTS
+	select DYNAMIC_EVENTS
+	default y
+	help
+	  This allows user to add tracing events on the function entry and
+	  exit via ftrace interface. The syntax is same as the kprobe events
+	  and the kprobe events on function entry and exit will be
+	  transparently converted to this fprobe events.
+
 config KPROBE_EVENTS
 	depends on KPROBES
 	depends on HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c6651e16b557..64b61f67a403 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -104,6 +104,7 @@ obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
 obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
 obj-$(CONFIG_FPROBE) += fprobe.o
 obj-$(CONFIG_RETHOOK) += rethook.o
+obj-$(CONFIG_FPROBE_EVENTS) += trace_fprobe.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 obj-$(CONFIG_RV) += rv/
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 32994815edf6..e4704ec26df7 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -348,6 +348,14 @@ int register_fprobe_syms(struct fprobe *fp, const char **syms, int num)
 }
 EXPORT_SYMBOL_GPL(register_fprobe_syms);
 
+bool fprobe_is_registered(struct fprobe *fp)
+{
+	if (!fp || (fp->ops.saved_func != fprobe_handler &&
+		    fp->ops.saved_func != fprobe_kprobe_handler))
+		return false;
+	return true;
+}
+
 /**
  * unregister_fprobe() - Unregister fprobe from ftrace
  * @fp: A fprobe data structure to be unregistered.
@@ -360,8 +368,7 @@ int unregister_fprobe(struct fprobe *fp)
 {
 	int ret;
 
-	if (!fp || (fp->ops.saved_func != fprobe_handler &&
-		    fp->ops.saved_func != fprobe_kprobe_handler))
+	if (!fprobe_is_registered(fp))
 		return -EINVAL;
 
 	/*
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 64a4dde073ef..755b0bf2e1ac 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5672,10 +5672,16 @@ static const char readme_msg[] =
 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
 #endif
-#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
+    defined(CONFIG_FPROBE_EVENTS)
 	"\t  accepts: event-definitions (one definition per line)\n"
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
+#endif
+#ifdef CONFIG_FPROBE_EVENTS
+	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
+#endif
 #ifdef CONFIG_HIST_TRIGGERS
 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
 #endif
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 79bdefe9261b..b5ab5479f9e3 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -148,6 +148,17 @@ struct kretprobe_trace_entry_head {
 	unsigned long		ret_ip;
 };
 
+struct fentry_trace_entry_head {
+	struct trace_entry	ent;
+	unsigned long		ip;
+};
+
+struct fexit_trace_entry_head {
+	struct trace_entry	ent;
+	unsigned long		func;
+	unsigned long		ret_ip;
+};
+
 #define TRACE_BUF_SIZE		1024
 
 struct trace_array;
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
new file mode 100644
index 000000000000..48dbbc72b7dd
--- /dev/null
+++ b/kernel/trace/trace_fprobe.c
@@ -0,0 +1,1053 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Fprobe-based tracing events
+ * Copyright (C) 2022 Google LLC.
+ */
+#define pr_fmt(fmt)	"trace_fprobe: " fmt
+
+#include <linux/fprobe.h>
+#include <linux/module.h>
+#include <linux/rculist.h>
+#include <linux/security.h>
+#include <linux/uaccess.h>
+
+#include "trace_dynevent.h"
+#include "trace_probe.h"
+#include "trace_probe_kernel.h"
+#include "trace_probe_tmpl.h"
+
+#define FPROBE_EVENT_SYSTEM "fprobes"
+#define RETHOOK_MAXACTIVE_MAX 4096
+
+static int trace_fprobe_create(const char *raw_command);
+static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev);
+static int trace_fprobe_release(struct dyn_event *ev);
+static bool trace_fprobe_is_busy(struct dyn_event *ev);
+static bool trace_fprobe_match(const char *system, const char *event,
+			int argc, const char **argv, struct dyn_event *ev);
+
+static struct dyn_event_operations trace_fprobe_ops = {
+	.create = trace_fprobe_create,
+	.show = trace_fprobe_show,
+	.is_busy = trace_fprobe_is_busy,
+	.free = trace_fprobe_release,
+	.match = trace_fprobe_match,
+};
+
+/*
+ * Fprobe event core functions
+ */
+struct trace_fprobe {
+	struct dyn_event	devent;
+	struct fprobe		fp;
+	const char		*symbol;
+	struct trace_probe	tp;
+};
+
+static bool is_trace_fprobe(struct dyn_event *ev)
+{
+	return ev->ops == &trace_fprobe_ops;
+}
+
+static struct trace_fprobe *to_trace_fprobe(struct dyn_event *ev)
+{
+	return container_of(ev, struct trace_fprobe, devent);
+}
+
+/**
+ * for_each_trace_fprobe - iterate over the trace_fprobe list
+ * @pos:	the struct trace_fprobe * for each entry
+ * @dpos:	the struct dyn_event * to use as a loop cursor
+ */
+#define for_each_trace_fprobe(pos, dpos)	\
+	for_each_dyn_event(dpos)		\
+		if (is_trace_fprobe(dpos) && (pos = to_trace_fprobe(dpos)))
+
+static bool trace_fprobe_is_return(struct trace_fprobe *tf)
+{
+	return tf->fp.exit_handler != NULL;
+}
+
+static const char *trace_fprobe_symbol(struct trace_fprobe *tf)
+{
+	return tf->symbol ? tf->symbol : "unknown";
+}
+
+static bool trace_fprobe_is_busy(struct dyn_event *ev)
+{
+	struct trace_fprobe *tf = to_trace_fprobe(ev);
+
+	return trace_probe_is_enabled(&tf->tp);
+}
+
+static bool trace_fprobe_match_command_head(struct trace_fprobe *tf,
+					    int argc, const char **argv)
+{
+	char buf[MAX_ARGSTR_LEN + 1];
+
+	if (!argc)
+		return true;
+
+	snprintf(buf, sizeof(buf), "%s", trace_fprobe_symbol(tf));
+	if (strcmp(buf, argv[0]))
+		return false;
+	argc--; argv++;
+
+	return trace_probe_match_command_args(&tf->tp, argc, argv);
+}
+
+static bool trace_fprobe_match(const char *system, const char *event,
+			int argc, const char **argv, struct dyn_event *ev)
+{
+	struct trace_fprobe *tf = to_trace_fprobe(ev);
+
+	if (event[0] != '\0' && strcmp(trace_probe_name(&tf->tp), event))
+		return false;
+
+	if (system && strcmp(trace_probe_group_name(&tf->tp), system))
+		return false;
+
+	return trace_fprobe_match_command_head(tf, argc, argv);
+}
+
+static bool trace_fprobe_is_registered(struct trace_fprobe *tf)
+{
+	return fprobe_is_registered(&tf->fp);
+}
+
+/*
+ * Note that we don't verify the fetch_insn code, since it does not come
+ * from user space.
+ */
+static int
+process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
+		   void *base)
+{
+	struct pt_regs *regs = rec;
+	unsigned long val;
+	int ret;
+
+retry:
+	/* 1st stage: get value from context */
+	switch (code->op) {
+	case FETCH_OP_STACK:
+		val = regs_get_kernel_stack_nth(regs, code->param);
+		break;
+	case FETCH_OP_STACKP:
+		val = kernel_stack_pointer(regs);
+		break;
+	case FETCH_OP_RETVAL:
+		val = regs_return_value(regs);
+		break;
+#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
+	case FETCH_OP_ARG:
+		val = regs_get_kernel_argument(regs, code->param);
+		break;
+#endif
+	case FETCH_NOP_SYMBOL:	/* Ignore a place holder */
+		code++;
+		goto retry;
+	default:
+		ret = process_common_fetch_insn(code, &val);
+		if (ret < 0)
+			return ret;
+	}
+	code++;
+
+	return process_fetch_insn_bottom(code, val, dest, base);
+}
+NOKPROBE_SYMBOL(process_fetch_insn)
+
+/* function entry handler */
+static nokprobe_inline void
+__fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+		    struct pt_regs *regs,
+		    struct trace_event_file *trace_file)
+{
+	struct fentry_trace_entry_head *entry;
+	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+	struct trace_event_buffer fbuffer;
+	int dsize;
+
+	if (WARN_ON_ONCE(call != trace_file->event_call))
+		return;
+
+	if (trace_trigger_soft_disabled(trace_file))
+		return;
+
+	dsize = __get_data_size(&tf->tp, regs);
+
+	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+					   sizeof(*entry) + tf->tp.size + dsize);
+	if (!entry)
+		return;
+
+	fbuffer.regs = regs;
+	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
+	entry->ip = entry_ip;
+	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+
+	trace_event_buffer_commit(&fbuffer);
+}
+
+static void
+fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+		  struct pt_regs *regs)
+{
+	struct event_file_link *link;
+
+	trace_probe_for_each_link_rcu(link, &tf->tp)
+		__fentry_trace_func(tf, entry_ip, regs, link->file);
+}
+NOKPROBE_SYMBOL(fentry_trace_func);
+
+/* Kretprobe handler */
+static nokprobe_inline void
+__fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+		   unsigned long ret_ip, struct pt_regs *regs,
+		   struct trace_event_file *trace_file)
+{
+	struct fexit_trace_entry_head *entry;
+	struct trace_event_buffer fbuffer;
+	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+	int dsize;
+
+	if (WARN_ON_ONCE(call != trace_file->event_call))
+		return;
+
+	if (trace_trigger_soft_disabled(trace_file))
+		return;
+
+	dsize = __get_data_size(&tf->tp, regs);
+
+	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+					   sizeof(*entry) + tf->tp.size + dsize);
+	if (!entry)
+		return;
+
+	fbuffer.regs = regs;
+	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
+	entry->func = entry_ip;
+	entry->ret_ip = ret_ip;
+	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+
+	trace_event_buffer_commit(&fbuffer);
+}
+
+static void
+fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
+		 unsigned long ret_ip, struct pt_regs *regs)
+{
+	struct event_file_link *link;
+
+	trace_probe_for_each_link_rcu(link, &tf->tp)
+		__fexit_trace_func(tf, entry_ip, ret_ip, regs, link->file);
+}
+NOKPROBE_SYMBOL(fexit_trace_func);
+
+#ifdef CONFIG_PERF_EVENTS
+
+static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
+			    struct pt_regs *regs)
+{
+	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+	struct fentry_trace_entry_head *entry;
+	struct hlist_head *head;
+	int size, __size, dsize;
+	int rctx;
+
+	head = this_cpu_ptr(call->perf_events);
+	if (hlist_empty(head))
+		return 0;
+
+	dsize = __get_data_size(&tf->tp, regs);
+	__size = sizeof(*entry) + tf->tp.size + dsize;
+	size = ALIGN(__size + sizeof(u32), sizeof(u64));
+	size -= sizeof(u32);
+
+	entry = perf_trace_buf_alloc(size, NULL, &rctx);
+	if (!entry)
+		return 0;
+
+	entry->ip = entry_ip;
+	memset(&entry[1], 0, dsize);
+	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
+			      head, NULL);
+	return 0;
+}
+NOKPROBE_SYMBOL(fentry_perf_func);
+
+static void
+fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
+		unsigned long ret_ip, struct pt_regs *regs)
+{
+	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+	struct fexit_trace_entry_head *entry;
+	struct hlist_head *head;
+	int size, __size, dsize;
+	int rctx;
+
+	head = this_cpu_ptr(call->perf_events);
+	if (hlist_empty(head))
+		return;
+
+	dsize = __get_data_size(&tf->tp, regs);
+	__size = sizeof(*entry) + tf->tp.size + dsize;
+	size = ALIGN(__size + sizeof(u32), sizeof(u64));
+	size -= sizeof(u32);
+
+	entry = perf_trace_buf_alloc(size, NULL, &rctx);
+	if (!entry)
+		return;
+
+	entry->func = entry_ip;
+	entry->ret_ip = ret_ip;
+	store_trace_args(&entry[1], &tf->tp, regs, sizeof(*entry), dsize);
+	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
+			      head, NULL);
+}
+NOKPROBE_SYMBOL(fexit_perf_func);
+#endif	/* CONFIG_PERF_EVENTS */
+
+static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
+			     unsigned long ret_ip, struct pt_regs *regs,
+			     void *entry_data)
+{
+	struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
+	int ret = 0;
+
+	if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
+		fentry_trace_func(tf, entry_ip, regs);
+#ifdef CONFIG_PERF_EVENTS
+	if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
+		ret = fentry_perf_func(tf, entry_ip, regs);
+#endif
+	return ret;
+}
+NOKPROBE_SYMBOL(fentry_dispatcher);
+
+static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
+			     unsigned long ret_ip, struct pt_regs *regs,
+			     void *entry_data)
+{
+	struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
+
+	if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
+		fexit_trace_func(tf, entry_ip, ret_ip, regs);
+#ifdef CONFIG_PERF_EVENTS
+	if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
+		fexit_perf_func(tf, entry_ip, ret_ip, regs);
+#endif
+}
+NOKPROBE_SYMBOL(fexit_dispatcher);
+
+static void free_trace_fprobe(struct trace_fprobe *tf)
+{
+	if (tf) {
+		trace_probe_cleanup(&tf->tp);
+		kfree(tf->symbol);
+		kfree(tf);
+	}
+}
+
+/*
+ * Allocate new trace_probe and initialize it (including fprobe).
+ */
+static struct trace_fprobe *alloc_trace_fprobe(const char *group,
+					       const char *event,
+					       const char *symbol,
+					       int maxactive,
+					       int nargs, bool is_return)
+{
+	struct trace_fprobe *tf;
+	int ret = -ENOMEM;
+
+	tf = kzalloc(struct_size(tf, tp.args, nargs), GFP_KERNEL);
+	if (!tf)
+		return ERR_PTR(ret);
+
+	tf->symbol = kstrdup(symbol, GFP_KERNEL);
+	if (!tf->symbol)
+		goto error;
+
+	if (is_return)
+		tf->fp.exit_handler = fexit_dispatcher;
+	else
+		tf->fp.entry_handler = fentry_dispatcher;
+
+	tf->fp.nr_maxactive = maxactive;
+
+	ret = trace_probe_init(&tf->tp, event, group, false);
+	if (ret < 0)
+		goto error;
+
+	dyn_event_init(&tf->devent, &trace_fprobe_ops);
+	return tf;
+error:
+	free_trace_fprobe(tf);
+	return ERR_PTR(ret);
+}
+
+static struct trace_fprobe *find_trace_fprobe(const char *event,
+					      const char *group)
+{
+	struct dyn_event *pos;
+	struct trace_fprobe *tf;
+
+	for_each_trace_fprobe(tf, pos)
+		if (strcmp(trace_probe_name(&tf->tp), event) == 0 &&
+		    strcmp(trace_probe_group_name(&tf->tp), group) == 0)
+			return tf;
+	return NULL;
+}
+
+static inline int __enable_trace_fprobe(struct trace_fprobe *tf)
+{
+	if (trace_fprobe_is_registered(tf))
+		enable_fprobe(&tf->fp);
+
+	return 0;
+}
+
+static void __disable_trace_fprobe(struct trace_probe *tp)
+{
+	struct trace_fprobe *tf;
+
+	list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
+		if (!trace_fprobe_is_registered(tf))
+			continue;
+		disable_fprobe(&tf->fp);
+	}
+}
+
+/*
+ * Enable trace_probe
+ * if the file is NULL, enable "perf" handler, or enable "trace" handler.
+ */
+static int enable_trace_fprobe(struct trace_event_call *call,
+			       struct trace_event_file *file)
+{
+	struct trace_probe *tp;
+	struct trace_fprobe *tf;
+	bool enabled;
+	int ret = 0;
+
+	tp = trace_probe_primary_from_call(call);
+	if (WARN_ON_ONCE(!tp))
+		return -ENODEV;
+	enabled = trace_probe_is_enabled(tp);
+
+	/* This also changes "enabled" state */
+	if (file) {
+		ret = trace_probe_add_file(tp, file);
+		if (ret)
+			return ret;
+	} else
+		trace_probe_set_flag(tp, TP_FLAG_PROFILE);
+
+	if (!enabled) {
+		list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
+			/* TODO: check the fprobe is gone */
+			__enable_trace_fprobe(tf);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Disable trace_probe
+ * if the file is NULL, disable "perf" handler, or disable "trace" handler.
+ */
+static int disable_trace_fprobe(struct trace_event_call *call,
+				struct trace_event_file *file)
+{
+	struct trace_probe *tp;
+
+	tp = trace_probe_primary_from_call(call);
+	if (WARN_ON_ONCE(!tp))
+		return -ENODEV;
+
+	if (file) {
+		if (!trace_probe_get_file_link(tp, file))
+			return -ENOENT;
+		if (!trace_probe_has_single_file(tp))
+			goto out;
+		trace_probe_clear_flag(tp, TP_FLAG_TRACE);
+	} else
+		trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
+
+	if (!trace_probe_is_enabled(tp))
+		__disable_trace_fprobe(tp);
+
+ out:
+	if (file)
+		/*
+		 * Synchronization is done in below function. For perf event,
+		 * file == NULL and perf_trace_event_unreg() calls
+		 * tracepoint_synchronize_unregister() to ensure synchronize
+		 * event. We don't need to care about it.
+		 */
+		trace_probe_remove_file(tp, file);
+
+	return 0;
+}
+
+/* Event entry printers */
+static enum print_line_t
+print_fentry_event(struct trace_iterator *iter, int flags,
+		   struct trace_event *event)
+{
+	struct fentry_trace_entry_head *field;
+	struct trace_seq *s = &iter->seq;
+	struct trace_probe *tp;
+
+	field = (struct fentry_trace_entry_head *)iter->ent;
+	tp = trace_probe_primary_from_call(
+		container_of(event, struct trace_event_call, event));
+	if (WARN_ON_ONCE(!tp))
+		goto out;
+
+	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
+
+	if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
+		goto out;
+
+	trace_seq_putc(s, ')');
+
+	if (trace_probe_print_args(s, tp->args, tp->nr_args,
+			     (u8 *)&field[1], field) < 0)
+		goto out;
+
+	trace_seq_putc(s, '\n');
+ out:
+	return trace_handle_return(s);
+}
+
+static enum print_line_t
+print_fexit_event(struct trace_iterator *iter, int flags,
+		  struct trace_event *event)
+{
+	struct fexit_trace_entry_head *field;
+	struct trace_seq *s = &iter->seq;
+	struct trace_probe *tp;
+
+	field = (struct fexit_trace_entry_head *)iter->ent;
+	tp = trace_probe_primary_from_call(
+		container_of(event, struct trace_event_call, event));
+	if (WARN_ON_ONCE(!tp))
+		goto out;
+
+	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
+
+	if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
+		goto out;
+
+	trace_seq_puts(s, " <- ");
+
+	if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
+		goto out;
+
+	trace_seq_putc(s, ')');
+
+	if (trace_probe_print_args(s, tp->args, tp->nr_args,
+			     (u8 *)&field[1], field) < 0)
+		goto out;
+
+	trace_seq_putc(s, '\n');
+
+ out:
+	return trace_handle_return(s);
+}
+
+static int fentry_event_define_fields(struct trace_event_call *event_call)
+{
+	int ret;
+	struct fentry_trace_entry_head field;
+	struct trace_probe *tp;
+
+	tp = trace_probe_primary_from_call(event_call);
+	if (WARN_ON_ONCE(!tp))
+		return -ENOENT;
+
+	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
+
+	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
+}
+
+static int fexit_event_define_fields(struct trace_event_call *event_call)
+{
+	int ret;
+	struct fexit_trace_entry_head field;
+	struct trace_probe *tp;
+
+	tp = trace_probe_primary_from_call(event_call);
+	if (WARN_ON_ONCE(!tp))
+		return -ENOENT;
+
+	DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
+	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
+
+	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
+}
+
+static struct trace_event_functions fentry_funcs = {
+	.trace		= print_fentry_event
+};
+
+static struct trace_event_functions fexit_funcs = {
+	.trace		= print_fexit_event
+};
+
+static struct trace_event_fields fentry_fields_array[] = {
+	{ .type = TRACE_FUNCTION_TYPE,
+	  .define_fields = fentry_event_define_fields },
+	{}
+};
+
+static struct trace_event_fields fexit_fields_array[] = {
+	{ .type = TRACE_FUNCTION_TYPE,
+	  .define_fields = fexit_event_define_fields },
+	{}
+};
+
+static int fprobe_register(struct trace_event_call *event,
+			   enum trace_reg type, void *data);
+
+static inline void init_trace_event_call(struct trace_fprobe *tf)
+{
+	struct trace_event_call *call = trace_probe_event_call(&tf->tp);
+
+	if (trace_fprobe_is_return(tf)) {
+		call->event.funcs = &fexit_funcs;
+		call->class->fields_array = fexit_fields_array;
+	} else {
+		call->event.funcs = &fentry_funcs;
+		call->class->fields_array = fentry_fields_array;
+	}
+
+	call->flags = TRACE_EVENT_FL_FPROBE;
+	call->class->reg = fprobe_register;
+}
+
+static int register_fprobe_event(struct trace_fprobe *tf)
+{
+	init_trace_event_call(tf);
+
+	return trace_probe_register_event_call(&tf->tp);
+}
+
+static int unregister_fprobe_event(struct trace_fprobe *tf)
+{
+	return trace_probe_unregister_event_call(&tf->tp);
+}
+
+/* Internal register function - just handle fprobe and flags */
+static int __register_trace_fprobe(struct trace_fprobe *tf)
+{
+	int i, ret;
+
+	/* Should we need new LOCKDOWN flag for fprobe? */
+	ret = security_locked_down(LOCKDOWN_KPROBES);
+	if (ret)
+		return ret;
+
+	if (trace_fprobe_is_registered(tf))
+		return -EINVAL;
+
+	for (i = 0; i < tf->tp.nr_args; i++) {
+		ret = traceprobe_update_arg(&tf->tp.args[i]);
+		if (ret)
+			return ret;
+	}
+
+	/* Set/clear disabled flag according to tp->flag */
+	if (trace_probe_is_enabled(&tf->tp))
+		tf->fp.flags &= ~FPROBE_FL_DISABLED;
+	else
+		tf->fp.flags |= FPROBE_FL_DISABLED;
+
+	/* TODO: handle filter, nofilter or symbol list */
+	return register_fprobe(&tf->fp, tf->symbol, NULL);
+}
+
+/* Internal unregister function - just handle fprobe and flags */
+static void __unregister_trace_fprobe(struct trace_fprobe *tf)
+{
+	if (trace_fprobe_is_registered(tf)) {
+		unregister_fprobe(&tf->fp);
+		memset(&tf->fp, 0, sizeof(tf->fp));
+	}
+}
+
+/* TODO: make this trace_*probe common function */
+/* Unregister a trace_probe and probe_event */
+static int unregister_trace_fprobe(struct trace_fprobe *tf)
+{
+	/* If other probes are on the event, just unregister fprobe */
+	if (trace_probe_has_sibling(&tf->tp))
+		goto unreg;
+
+	/* Enabled event can not be unregistered */
+	if (trace_probe_is_enabled(&tf->tp))
+		return -EBUSY;
+
+	/* If there's a reference to the dynamic event */
+	if (trace_event_dyn_busy(trace_probe_event_call(&tf->tp)))
+		return -EBUSY;
+
+	/* Will fail if probe is being used by ftrace or perf */
+	if (unregister_fprobe_event(tf))
+		return -EBUSY;
+
+unreg:
+	__unregister_trace_fprobe(tf);
+	dyn_event_remove(&tf->devent);
+	trace_probe_unlink(&tf->tp);
+
+	return 0;
+}
+
+static bool trace_fprobe_has_same_fprobe(struct trace_fprobe *orig,
+					 struct trace_fprobe *comp)
+{
+	struct trace_probe_event *tpe = orig->tp.event;
+	int i;
+
+	list_for_each_entry(orig, &tpe->probes, tp.list) {
+		if (strcmp(trace_fprobe_symbol(orig),
+			   trace_fprobe_symbol(comp)))
+			continue;
+
+		/*
+		 * trace_probe_compare_arg_type() ensured that nr_args and
+		 * each argument name and type are same. Let's compare comm.
+		 */
+		for (i = 0; i < orig->tp.nr_args; i++) {
+			if (strcmp(orig->tp.args[i].comm,
+				   comp->tp.args[i].comm))
+				break;
+		}
+
+		if (i == orig->tp.nr_args)
+			return true;
+	}
+
+	return false;
+}
+
+static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to)
+{
+	int ret;
+
+	if (trace_fprobe_is_return(tf) != trace_fprobe_is_return(to)) {
+		trace_probe_log_set_index(0);
+		trace_probe_log_err(0, DIFF_PROBE_TYPE);
+		return -EEXIST;
+	}
+	ret = trace_probe_compare_arg_type(&tf->tp, &to->tp);
+	if (ret) {
+		/* Note that argument starts index = 2 */
+		trace_probe_log_set_index(ret + 1);
+		trace_probe_log_err(0, DIFF_ARG_TYPE);
+		return -EEXIST;
+	}
+	if (trace_fprobe_has_same_fprobe(to, tf)) {
+		trace_probe_log_set_index(0);
+		trace_probe_log_err(0, SAME_PROBE);
+		return -EEXIST;
+	}
+
+	/* Append to existing event */
+	ret = trace_probe_append(&tf->tp, &to->tp);
+	if (ret)
+		return ret;
+
+	ret = __register_trace_fprobe(tf);
+	if (ret)
+		trace_probe_unlink(&tf->tp);
+	else
+		dyn_event_add(&tf->devent, trace_probe_event_call(&tf->tp));
+
+	return ret;
+}
+
+/* Register a trace_probe and probe_event */
+static int register_trace_fprobe(struct trace_fprobe *tf)
+{
+	struct trace_fprobe *old_tf;
+	int ret;
+
+	mutex_lock(&event_mutex);
+
+	old_tf = find_trace_fprobe(trace_probe_name(&tf->tp),
+				   trace_probe_group_name(&tf->tp));
+	if (old_tf) {
+		ret = append_trace_fprobe(tf, old_tf);
+		goto end;
+	}
+
+	/* Register new event */
+	ret = register_fprobe_event(tf);
+	if (ret) {
+		if (ret == -EEXIST) {
+			trace_probe_log_set_index(0);
+			trace_probe_log_err(0, EVENT_EXIST);
+		} else
+			pr_warn("Failed to register probe event(%d)\n", ret);
+		goto end;
+	}
+
+	/* Register fprobe */
+	ret = __register_trace_fprobe(tf);
+	if (ret < 0)
+		unregister_fprobe_event(tf);
+	else
+		dyn_event_add(&tf->devent, trace_probe_event_call(&tf->tp));
+
+end:
+	mutex_unlock(&event_mutex);
+	return ret;
+}
+
+static int __trace_fprobe_create(int argc, const char *argv[])
+{
+	/*
+	 * Argument syntax:
+	 *  - Add fentry probe:
+	 *      f[:[GRP/][EVENT]] [MOD:]KSYM [FETCHARGS]
+	 *  - Add fexit probe:
+	 *      f[N][:[GRP/][EVENT]] [MOD:]KSYM%return [FETCHARGS]
+	 *
+	 * Fetch args:
+	 *  $retval	: fetch return value
+	 *  $stack	: fetch stack address
+	 *  $stackN	: fetch Nth entry of stack (N:0-)
+	 *  $argN	: fetch Nth argument (N:1-)
+	 *  $comm       : fetch current task comm
+	 *  @ADDR	: fetch memory at ADDR (ADDR should be in kernel)
+	 *  @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
+	 * Dereferencing memory fetch:
+	 *  +|-offs(ARG) : fetch memory at ARG +|- offs address.
+	 * Alias name of args:
+	 *  NAME=FETCHARG : set NAME as alias of FETCHARG.
+	 * Type of args:
+	 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
+	 */
+	struct trace_fprobe *tf = NULL;
+	int i, len, ret = 0;
+	bool is_return = false;
+	char *symbol = NULL, *tmp = NULL;
+	const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
+	int maxactive = 0;
+	char buf[MAX_EVENT_NAME_LEN];
+	char gbuf[MAX_EVENT_NAME_LEN];
+	unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE;
+
+	if (argv[0][0] != 'f' || argc < 2)
+		return -ECANCELED;
+
+	trace_probe_log_init("trace_fprobe", argc, argv);
+
+	event = strchr(&argv[0][1], ':');
+	if (event)
+		event++;
+
+	if (isdigit(argv[0][1])) {
+		if (event)
+			len = event - &argv[0][1] - 1;
+		else
+			len = strlen(&argv[0][1]);
+		if (len > MAX_EVENT_NAME_LEN - 1) {
+			trace_probe_log_err(1, BAD_MAXACT);
+			goto parse_error;
+		}
+		memcpy(buf, &argv[0][1], len);
+		buf[len] = '\0';
+		ret = kstrtouint(buf, 0, &maxactive);
+		if (ret || !maxactive) {
+			trace_probe_log_err(1, BAD_MAXACT);
+			goto parse_error;
+		}
+		/* fprobe rethook instances are iterated over via a list. The
+		 * maximum should stay reasonable.
+		 */
+		if (maxactive > RETHOOK_MAXACTIVE_MAX) {
+			trace_probe_log_err(1, MAXACT_TOO_BIG);
+			goto parse_error;
+		}
+	}
+
+	trace_probe_log_set_index(1);
+
+	/* a symbol specified */
+	symbol = kstrdup(argv[1], GFP_KERNEL);
+	if (!symbol)
+		return -ENOMEM;
+
+	tmp = strchr(symbol, '%');
+	if (tmp) {
+		if (!strcmp(tmp, "%return")) {
+			*tmp = '\0';
+			is_return = true;
+		} else {
+			trace_probe_log_err(tmp - symbol, BAD_ADDR_SUFFIX);
+			goto parse_error;
+		}
+	}
+	if (!is_return && maxactive) {
+		trace_probe_log_set_index(0);
+		trace_probe_log_err(1, BAD_MAXACT_TYPE);
+		goto parse_error;
+	}
+
+	if (is_return)
+		flags |= TPARG_FL_RETURN;
+	else
+		flags |= TPARG_FL_FENTRY;
+
+	trace_probe_log_set_index(0);
+	if (event) {
+		ret = traceprobe_parse_event_name(&event, &group, gbuf,
+						  event - argv[0]);
+		if (ret)
+			goto parse_error;
+	}
+
+	if (!event) {
+		/* Make a new event name */
+		snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol,
+			 is_return ? "exit" : "entry");
+		sanitize_event_name(buf);
+		event = buf;
+	}
+
+	/* setup a probe */
+	tf = alloc_trace_fprobe(group, event, symbol, maxactive,
+				argc - 2, is_return);
+	if (IS_ERR(tf)) {
+		ret = PTR_ERR(tf);
+		/* This must return -ENOMEM, else there is a bug */
+		WARN_ON_ONCE(ret != -ENOMEM);
+		goto out;	/* We know tf is not allocated */
+	}
+	argc -= 2; argv += 2;
+
+	/* parse arguments */
+	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
+		trace_probe_log_set_index(i + 2);
+		ret = traceprobe_parse_probe_arg(&tf->tp, i, argv[i], flags);
+		if (ret)
+			goto error;	/* This can be -ENOMEM */
+	}
+
+	ret = traceprobe_set_print_fmt(&tf->tp,
+			is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL);
+	if (ret < 0)
+		goto error;
+
+	ret = register_trace_fprobe(tf);
+	if (ret) {
+		trace_probe_log_set_index(1);
+		if (ret == -EILSEQ)
+			trace_probe_log_err(0, BAD_INSN_BNDRY);
+		else if (ret == -ENOENT)
+			trace_probe_log_err(0, BAD_PROBE_ADDR);
+		else if (ret != -ENOMEM && ret != -EEXIST)
+			trace_probe_log_err(0, FAIL_REG_PROBE);
+		goto error;
+	}
+
+out:
+	trace_probe_log_clear();
+	kfree(symbol);
+	return ret;
+
+parse_error:
+	ret = -EINVAL;
+error:
+	free_trace_fprobe(tf);
+	goto out;
+}
+
+static int trace_fprobe_create(const char *raw_command)
+{
+	return trace_probe_create(raw_command, __trace_fprobe_create);
+}
+
+static int trace_fprobe_release(struct dyn_event *ev)
+{
+	struct trace_fprobe *tf = to_trace_fprobe(ev);
+	int ret = unregister_trace_fprobe(tf);
+
+	if (!ret)
+		free_trace_fprobe(tf);
+	return ret;
+}
+
+static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
+{
+	struct trace_fprobe *tf = to_trace_fprobe(ev);
+	int i;
+
+	seq_putc(m, 'f');
+	if (trace_fprobe_is_return(tf) && tf->fp.nr_maxactive)
+		seq_printf(m, "%d", tf->fp.nr_maxactive);
+	seq_printf(m, ":%s/%s", trace_probe_group_name(&tf->tp),
+				trace_probe_name(&tf->tp));
+
+	seq_printf(m, " %s%s", trace_fprobe_symbol(tf),
+			       trace_fprobe_is_return(tf) ? "%return" : "");
+
+	for (i = 0; i < tf->tp.nr_args; i++)
+		seq_printf(m, " %s=%s", tf->tp.args[i].name, tf->tp.args[i].comm);
+	seq_putc(m, '\n');
+
+	return 0;
+}
+
+/*
+ * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
+ */
+static int fprobe_register(struct trace_event_call *event,
+			   enum trace_reg type, void *data)
+{
+	struct trace_event_file *file = data;
+
+	switch (type) {
+	case TRACE_REG_REGISTER:
+		return enable_trace_fprobe(event, file);
+	case TRACE_REG_UNREGISTER:
+		return disable_trace_fprobe(event, file);
+
+#ifdef CONFIG_PERF_EVENTS
+	case TRACE_REG_PERF_REGISTER:
+		return enable_trace_fprobe(event, NULL);
+	case TRACE_REG_PERF_UNREGISTER:
+		return disable_trace_fprobe(event, NULL);
+	case TRACE_REG_PERF_OPEN:
+	case TRACE_REG_PERF_CLOSE:
+	case TRACE_REG_PERF_ADD:
+	case TRACE_REG_PERF_DEL:
+		return 0;
+#endif
+	}
+	return 0;
+}
+
+/*
+ * Register dynevent at core_initcall. This allows kernel to setup fprobe
+ * events in postcore_initcall without tracefs.
+ */
+static __init int init_fprobe_trace_early(void)
+{
+	int ret;
+
+	ret = dyn_event_register(&trace_fprobe_ops);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+core_initcall(init_fprobe_trace_early);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 867ffb7ee31d..b7a4409674b3 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -764,7 +764,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
 
 	if (isdigit(argv[0][1])) {
 		if (!is_return) {
-			trace_probe_log_err(1, MAXACT_NO_KPROBE);
+			trace_probe_log_err(1, BAD_MAXACT_TYPE);
 			goto parse_error;
 		}
 		if (event)
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 2d2616678295..c39860fb2e41 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -393,8 +393,8 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
 		break;
 
 	case '%':	/* named register */
-		if (flags & TPARG_FL_TPOINT) {
-			/* eprobes do not handle registers */
+		if (flags & (TPARG_FL_TPOINT | TPARG_FL_FPROBE)) {
+			/* eprobe and fprobe do not handle registers */
 			trace_probe_log_err(offs, BAD_VAR);
 			break;
 		}
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 5df59714f9f5..8f4f23e8b234 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -367,6 +367,7 @@ int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_a
 #define TPARG_FL_FENTRY BIT(2)
 #define TPARG_FL_TPOINT BIT(3)
 #define TPARG_FL_USER   BIT(4)
+#define TPARG_FL_FPROBE BIT(5)
 #define TPARG_FL_MASK	GENMASK(4, 0)
 
 extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
@@ -409,7 +410,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
 	C(REFCNT_OPEN_BRACE,	"Reference counter brace is not closed"), \
 	C(BAD_REFCNT_SUFFIX,	"Reference counter has wrong suffix"),	\
 	C(BAD_UPROBE_OFFS,	"Invalid uprobe offset"),		\
-	C(MAXACT_NO_KPROBE,	"Maxactive is not for kprobe"),		\
+	C(BAD_MAXACT_TYPE,	"Maxactive is only for function exit"),	\
 	C(BAD_MAXACT,		"Invalid maxactive number"),		\
 	C(MAXACT_TOO_BIG,	"Maxactive is too big"),		\
 	C(BAD_PROBE_ADDR,	"Invalid probed address or symbol"),	\
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index 9e85d3019ff0..97c08867490a 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -8,7 +8,7 @@ check_error() { # command-with-error-pos-by-^
 }
 
 if grep -q 'r\[maxactive\]' README; then
-check_error 'p^100 vfs_read'		# MAXACT_NO_KPROBE
+check_error 'p^100 vfs_read'		# BAD_MAXACT_TYPE
 check_error 'r^1a111 vfs_read'		# BAD_MAXACT
 check_error 'r^100000 vfs_read'		# MAXACT_TOO_BIG
 fi
-- 
cgit v1.2.3


From e2d0d7b2f42dcaf924e9c891c91c9aa22cbbebce Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 6 Jun 2023 21:39:55 +0900
Subject: tracing/probes: Add tracepoint support on fprobe_events

Allow fprobe_events to trace raw tracepoints so that user can trace
tracepoints which don't have traceevent wrappers. This new event is
always available if the fprobe_events is enabled (thus no kconfig),
because the fprobe_events depends on the trace-event and traceporint.

e.g.
 # echo 't sched_overutilized_tp' >> dynamic_events
 # echo 't 9p_client_req' >> dynamic_events
 # cat dynamic_events
t:tracepoints/sched_overutilized_tp sched_overutilized_tp
t:tracepoints/_9p_client_req 9p_client_req

The event name is based on the tracepoint name, but if it is started
with digit character, an underscore '_' will be added.

NOTE: to avoid further confusion, this renames TPARG_FL_TPOINT to
TPARG_FL_TEVENT because this flag is used for eprobe (trace-event probe).
And reuse TPARG_FL_TPOINT for this raw tracepoint probe.

Link: https://lore.kernel.org/all/168507471874.913472.17214624519622959593.stgit@mhiramat.roam.corp.google.com/

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/oe-kbuild-all/202305020453.afTJ3VVp-lkp@intel.com/
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 include/linux/tracepoint-defs.h |   1 +
 include/linux/tracepoint.h      |   5 ++
 kernel/trace/trace.c            |   1 +
 kernel/trace/trace_eprobe.c     |   2 +-
 kernel/trace/trace_fprobe.c     | 134 +++++++++++++++++++++++++++++++++++++---
 kernel/trace/trace_probe.c      |  15 +++--
 kernel/trace/trace_probe.h      |  15 ++++-
 7 files changed, 157 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index e7c2276be33e..4dc4955f0fbf 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -35,6 +35,7 @@ struct tracepoint {
 	struct static_call_key *static_call_key;
 	void *static_call_tramp;
 	void *iterator;
+	void *probestub;
 	int (*regfunc)(void);
 	void (*unregfunc)(void);
 	struct tracepoint_func __rcu *funcs;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 6811e43c1b5c..88c0ba623ee6 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -303,6 +303,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 	__section("__tracepoints_strings") = #_name;			\
 	extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name);	\
 	int __traceiter_##_name(void *__data, proto);			\
+	void __probestub_##_name(void *__data, proto);			\
 	struct tracepoint __tracepoint_##_name	__used			\
 	__section("__tracepoints") = {					\
 		.name = __tpstrtab_##_name,				\
@@ -310,6 +311,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		.static_call_key = &STATIC_CALL_KEY(tp_func_##_name),	\
 		.static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \
 		.iterator = &__traceiter_##_name,			\
+		.probestub = &__probestub_##_name,			\
 		.regfunc = _reg,					\
 		.unregfunc = _unreg,					\
 		.funcs = NULL };					\
@@ -330,6 +332,9 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		}							\
 		return 0;						\
 	}								\
+	void __probestub_##_name(void *__data, proto)			\
+	{								\
+	}								\
 	DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name);
 
 #define DEFINE_TRACE(name, proto, args)		\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 755b0bf2e1ac..fa4e1a18da70 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5681,6 +5681,7 @@ static const char readme_msg[] =
 #endif
 #ifdef CONFIG_FPROBE_EVENTS
 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
+	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
 #endif
 #ifdef CONFIG_HIST_TRIGGERS
 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index 67e854979d53..fd64cd5d5745 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -817,7 +817,7 @@ find_and_get_event(const char *system, const char *event_name)
 
 static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i)
 {
-	unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TPOINT;
+	unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT;
 	int ret;
 
 	ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], flags);
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 48dbbc72b7dd..aa71ccb4205c 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/rculist.h>
 #include <linux/security.h>
+#include <linux/tracepoint.h>
 #include <linux/uaccess.h>
 
 #include "trace_dynevent.h"
@@ -17,6 +18,7 @@
 #include "trace_probe_tmpl.h"
 
 #define FPROBE_EVENT_SYSTEM "fprobes"
+#define TRACEPOINT_EVENT_SYSTEM "tracepoints"
 #define RETHOOK_MAXACTIVE_MAX 4096
 
 static int trace_fprobe_create(const char *raw_command);
@@ -41,6 +43,8 @@ struct trace_fprobe {
 	struct dyn_event	devent;
 	struct fprobe		fp;
 	const char		*symbol;
+	struct tracepoint	*tpoint;
+	struct module		*mod;
 	struct trace_probe	tp;
 };
 
@@ -68,6 +72,11 @@ static bool trace_fprobe_is_return(struct trace_fprobe *tf)
 	return tf->fp.exit_handler != NULL;
 }
 
+static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf)
+{
+	return tf->tpoint != NULL;
+}
+
 static const char *trace_fprobe_symbol(struct trace_fprobe *tf)
 {
 	return tf->symbol ? tf->symbol : "unknown";
@@ -668,6 +677,21 @@ static int __register_trace_fprobe(struct trace_fprobe *tf)
 	else
 		tf->fp.flags |= FPROBE_FL_DISABLED;
 
+	if (trace_fprobe_is_tracepoint(tf)) {
+		struct tracepoint *tpoint = tf->tpoint;
+		unsigned long ip = (unsigned long)tpoint->probestub;
+		/*
+		 * Here, we do 2 steps to enable fprobe on a tracepoint.
+		 * At first, put __probestub_##TP function on the tracepoint
+		 * and put a fprobe on the stub function.
+		 */
+		ret = tracepoint_probe_register_prio_may_exist(tpoint,
+					tpoint->probestub, NULL, 0);
+		if (ret < 0)
+			return ret;
+		return register_fprobe_ips(&tf->fp, &ip, 1);
+	}
+
 	/* TODO: handle filter, nofilter or symbol list */
 	return register_fprobe(&tf->fp, tf->symbol, NULL);
 }
@@ -678,6 +702,12 @@ static void __unregister_trace_fprobe(struct trace_fprobe *tf)
 	if (trace_fprobe_is_registered(tf)) {
 		unregister_fprobe(&tf->fp);
 		memset(&tf->fp, 0, sizeof(tf->fp));
+		if (trace_fprobe_is_tracepoint(tf)) {
+			tracepoint_probe_unregister(tf->tpoint,
+					tf->tpoint->probestub, NULL);
+			tf->tpoint = NULL;
+			tf->mod = NULL;
+		}
 	}
 }
 
@@ -741,7 +771,8 @@ static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to)
 {
 	int ret;
 
-	if (trace_fprobe_is_return(tf) != trace_fprobe_is_return(to)) {
+	if (trace_fprobe_is_return(tf) != trace_fprobe_is_return(to) ||
+	    trace_fprobe_is_tracepoint(tf) != trace_fprobe_is_tracepoint(to)) {
 		trace_probe_log_set_index(0);
 		trace_probe_log_err(0, DIFF_PROBE_TYPE);
 		return -EEXIST;
@@ -811,6 +842,60 @@ end:
 	return ret;
 }
 
+#ifdef CONFIG_MODULES
+static int __tracepoint_probe_module_cb(struct notifier_block *self,
+					unsigned long val, void *data)
+{
+	struct tp_module *tp_mod = data;
+	struct trace_fprobe *tf;
+	struct dyn_event *pos;
+
+	if (val != MODULE_STATE_GOING)
+		return NOTIFY_DONE;
+
+	mutex_lock(&event_mutex);
+	for_each_trace_fprobe(tf, pos) {
+		if (tp_mod->mod == tf->mod) {
+			tracepoint_probe_unregister(tf->tpoint,
+					tf->tpoint->probestub, NULL);
+			tf->tpoint = NULL;
+			tf->mod = NULL;
+		}
+	}
+	mutex_unlock(&event_mutex);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block tracepoint_module_nb = {
+	.notifier_call = __tracepoint_probe_module_cb,
+};
+#endif /* CONFIG_MODULES */
+
+struct __find_tracepoint_cb_data {
+	const char *tp_name;
+	struct tracepoint *tpoint;
+};
+
+static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
+{
+	struct __find_tracepoint_cb_data *data = priv;
+
+	if (!data->tpoint && !strcmp(data->tp_name, tp->name))
+		data->tpoint = tp;
+}
+
+static struct tracepoint *find_tracepoint(const char *tp_name)
+{
+	struct __find_tracepoint_cb_data data = {
+		.tp_name = tp_name,
+	};
+
+	for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
+
+	return data.tpoint;
+}
+
 static int __trace_fprobe_create(int argc, const char *argv[])
 {
 	/*
@@ -819,6 +904,8 @@ static int __trace_fprobe_create(int argc, const char *argv[])
 	 *      f[:[GRP/][EVENT]] [MOD:]KSYM [FETCHARGS]
 	 *  - Add fexit probe:
 	 *      f[N][:[GRP/][EVENT]] [MOD:]KSYM%return [FETCHARGS]
+	 *  - Add tracepoint probe:
+	 *      t[:[GRP/][EVENT]] TRACEPOINT [FETCHARGS]
 	 *
 	 * Fetch args:
 	 *  $retval	: fetch return value
@@ -844,10 +931,16 @@ static int __trace_fprobe_create(int argc, const char *argv[])
 	char buf[MAX_EVENT_NAME_LEN];
 	char gbuf[MAX_EVENT_NAME_LEN];
 	unsigned int flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE;
+	bool is_tracepoint = false;
 
-	if (argv[0][0] != 'f' || argc < 2)
+	if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2)
 		return -ECANCELED;
 
+	if (argv[0][0] == 't') {
+		is_tracepoint = true;
+		group = TRACEPOINT_EVENT_SYSTEM;
+	}
+
 	trace_probe_log_init("trace_fprobe", argc, argv);
 
 	event = strchr(&argv[0][1], ':');
@@ -881,14 +974,14 @@ static int __trace_fprobe_create(int argc, const char *argv[])
 
 	trace_probe_log_set_index(1);
 
-	/* a symbol specified */
+	/* a symbol(or tracepoint) must be specified */
 	symbol = kstrdup(argv[1], GFP_KERNEL);
 	if (!symbol)
 		return -ENOMEM;
 
 	tmp = strchr(symbol, '%');
 	if (tmp) {
-		if (!strcmp(tmp, "%return")) {
+		if (!is_tracepoint && !strcmp(tmp, "%return")) {
 			*tmp = '\0';
 			is_return = true;
 		} else {
@@ -907,6 +1000,9 @@ static int __trace_fprobe_create(int argc, const char *argv[])
 	else
 		flags |= TPARG_FL_FENTRY;
 
+	if (is_tracepoint)
+		flags |= TPARG_FL_TPOINT;
+
 	trace_probe_log_set_index(0);
 	if (event) {
 		ret = traceprobe_parse_event_name(&event, &group, gbuf,
@@ -917,8 +1013,11 @@ static int __trace_fprobe_create(int argc, const char *argv[])
 
 	if (!event) {
 		/* Make a new event name */
-		snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol,
-			 is_return ? "exit" : "entry");
+		if (is_tracepoint)
+			strscpy(buf, symbol, MAX_EVENT_NAME_LEN);
+		else
+			snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol,
+				 is_return ? "exit" : "entry");
 		sanitize_event_name(buf);
 		event = buf;
 	}
@@ -932,6 +1031,18 @@ static int __trace_fprobe_create(int argc, const char *argv[])
 		WARN_ON_ONCE(ret != -ENOMEM);
 		goto out;	/* We know tf is not allocated */
 	}
+
+	if (is_tracepoint) {
+		tf->tpoint = find_tracepoint(tf->symbol);
+		if (!tf->tpoint) {
+			trace_probe_log_set_index(1);
+			trace_probe_log_err(0, NO_TRACEPOINT);
+			goto parse_error;
+		}
+		tf->mod = __module_text_address(
+				(unsigned long)tf->tpoint->probestub);
+	}
+
 	argc -= 2; argv += 2;
 
 	/* parse arguments */
@@ -991,7 +1102,10 @@ static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
 	struct trace_fprobe *tf = to_trace_fprobe(ev);
 	int i;
 
-	seq_putc(m, 'f');
+	if (trace_fprobe_is_tracepoint(tf))
+		seq_putc(m, 't');
+	else
+		seq_putc(m, 'f');
 	if (trace_fprobe_is_return(tf) && tf->fp.nr_maxactive)
 		seq_printf(m, "%d", tf->fp.nr_maxactive);
 	seq_printf(m, ":%s/%s", trace_probe_group_name(&tf->tp),
@@ -1048,6 +1162,12 @@ static __init int init_fprobe_trace_early(void)
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_MODULES
+	ret = register_tracepoint_module_notifier(&tracepoint_module_nb);
+	if (ret)
+		return ret;
+#endif
+
 	return 0;
 }
 core_initcall(init_fprobe_trace_early);
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index c39860fb2e41..798f18d24ebc 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -292,7 +292,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
 	int ret = 0;
 	int len;
 
-	if (flags & TPARG_FL_TPOINT) {
+	if (flags & TPARG_FL_TEVENT) {
 		if (code->data)
 			return -EFAULT;
 		code->data = kstrdup(arg, GFP_KERNEL);
@@ -326,8 +326,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
 	} else if (strcmp(arg, "comm") == 0 || strcmp(arg, "COMM") == 0) {
 		code->op = FETCH_OP_COMM;
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
-	} else if (((flags & TPARG_FL_MASK) ==
-		    (TPARG_FL_KERNEL | TPARG_FL_FENTRY)) &&
+	} else if (tparg_is_function_entry(flags) &&
 		   (len = str_has_prefix(arg, "arg"))) {
 		ret = kstrtoul(arg + len, 10, &param);
 		if (ret) {
@@ -338,6 +337,12 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
 		}
 		code->op = FETCH_OP_ARG;
 		code->param = (unsigned int)param - 1;
+		/*
+		 * The tracepoint probe will probe a stub function, and the
+		 * first parameter of the stub is a dummy and should be ignored.
+		 */
+		if (flags & TPARG_FL_TPOINT)
+			code->param++;
 #endif
 	} else
 		goto inval_var;
@@ -393,7 +398,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
 		break;
 
 	case '%':	/* named register */
-		if (flags & (TPARG_FL_TPOINT | TPARG_FL_FPROBE)) {
+		if (flags & (TPARG_FL_TEVENT | TPARG_FL_FPROBE)) {
 			/* eprobe and fprobe do not handle registers */
 			trace_probe_log_err(offs, BAD_VAR);
 			break;
@@ -633,7 +638,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
 	 * Since $comm and immediate string can not be dereferenced,
 	 * we can find those by strcmp. But ignore for eprobes.
 	 */
-	if (!(flags & TPARG_FL_TPOINT) &&
+	if (!(flags & TPARG_FL_TEVENT) &&
 	    (strcmp(arg, "$comm") == 0 || strcmp(arg, "$COMM") == 0 ||
 	     strncmp(arg, "\\\"", 2) == 0)) {
 		/* The type of $comm must be "string", and not an array. */
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 8f4f23e8b234..e6b94fcdb886 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -359,16 +359,24 @@ int trace_probe_print_args(struct trace_seq *s, struct probe_arg *args, int nr_a
 
 /*
  * The flags used for parsing trace_probe arguments.
- * TPARG_FL_RETURN, TPARG_FL_FENTRY and TPARG_FL_TPOINT are mutually exclusive.
+ * TPARG_FL_RETURN, TPARG_FL_FENTRY and TPARG_FL_TEVENT are mutually exclusive.
  * TPARG_FL_KERNEL and TPARG_FL_USER are also mutually exclusive.
+ * TPARG_FL_FPROBE and TPARG_FL_TPOINT are optional but it should be with
+ * TPARG_FL_KERNEL.
  */
 #define TPARG_FL_RETURN BIT(0)
 #define TPARG_FL_KERNEL BIT(1)
 #define TPARG_FL_FENTRY BIT(2)
-#define TPARG_FL_TPOINT BIT(3)
+#define TPARG_FL_TEVENT BIT(3)
 #define TPARG_FL_USER   BIT(4)
 #define TPARG_FL_FPROBE BIT(5)
-#define TPARG_FL_MASK	GENMASK(4, 0)
+#define TPARG_FL_TPOINT BIT(6)
+#define TPARG_FL_LOC_MASK	GENMASK(4, 0)
+
+static inline bool tparg_is_function_entry(unsigned int flags)
+{
+	return (flags & TPARG_FL_LOC_MASK) == (TPARG_FL_KERNEL | TPARG_FL_FENTRY);
+}
 
 extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,
 				const char *argv, unsigned int flags);
@@ -415,6 +423,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,
 	C(MAXACT_TOO_BIG,	"Maxactive is too big"),		\
 	C(BAD_PROBE_ADDR,	"Invalid probed address or symbol"),	\
 	C(BAD_RETPROBE,		"Retprobe address must be an function entry"), \
+	C(NO_TRACEPOINT,	"Tracepoint is not found"),		\
 	C(BAD_ADDR_SUFFIX,	"Invalid probed address suffix"), \
 	C(NO_GROUP_NAME,	"Group name is not specified"),		\
 	C(GROUP_TOO_LONG,	"Group name is too long"),		\
-- 
cgit v1.2.3


From 758cd5fc13b20a5874d33b7d381e78408743f587 Mon Sep 17 00:00:00 2001
From: Cristian Marussi <cristian.marussi@arm.com>
Date: Wed, 31 May 2023 16:20:38 +0100
Subject: firmware: arm_scmi: Add Powercap protocol enable support

SCMI powercap protocol v3.2 supports disabling the powercap on a zone
by zone basis by providing a zero valued powercap.

Expose new operations to enable/disable powercapping on a per-zone base.

Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
Link: https://lore.kernel.org/r/20230531152039.2363181-3-cristian.marussi@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scmi/powercap.c | 110 +++++++++++++++++++++++++++++++++--
 include/linux/scmi_protocol.h        |  18 ++++++
 2 files changed, 122 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/arm_scmi/powercap.c b/drivers/firmware/arm_scmi/powercap.c
index 2e490492f187..244929cb4f3e 100644
--- a/drivers/firmware/arm_scmi/powercap.c
+++ b/drivers/firmware/arm_scmi/powercap.c
@@ -108,6 +108,8 @@ struct scmi_powercap_meas_changed_notify_payld {
 };
 
 struct scmi_powercap_state {
+	bool enabled;
+	u32 last_pcap;
 	bool meas_notif_enabled;
 	u64 thresholds;
 #define THRESH_LOW(p, id)				\
@@ -412,6 +414,10 @@ static int __scmi_powercap_cap_set(const struct scmi_protocol_handle *ph,
 						 ignore_dresp);
 	}
 
+	/* Save the last explicitly set non-zero powercap value */
+	if (PROTOCOL_REV_MAJOR(pi->version) >= 0x2 && !ret && power_cap)
+		pi->states[domain_id].last_pcap = power_cap;
+
 	return ret;
 }
 
@@ -421,6 +427,20 @@ static int scmi_powercap_cap_set(const struct scmi_protocol_handle *ph,
 {
 	struct powercap_info *pi = ph->get_priv(ph);
 
+	/*
+	 * Disallow zero as a possible explicitly requested powercap:
+	 * there are enable/disable operations for this.
+	 */
+	if (!power_cap)
+		return -EINVAL;
+
+	/* Just log the last set request if acting on a disabled domain */
+	if (PROTOCOL_REV_MAJOR(pi->version) >= 0x2 &&
+	    !pi->states[domain_id].enabled) {
+		pi->states[domain_id].last_pcap = power_cap;
+		return 0;
+	}
+
 	return __scmi_powercap_cap_set(ph, pi, domain_id,
 				       power_cap, ignore_dresp);
 }
@@ -589,11 +609,78 @@ scmi_powercap_measurements_threshold_set(const struct scmi_protocol_handle *ph,
 	return ret;
 }
 
+static int scmi_powercap_cap_enable_set(const struct scmi_protocol_handle *ph,
+					u32 domain_id, bool enable)
+{
+	int ret;
+	u32 power_cap;
+	struct powercap_info *pi = ph->get_priv(ph);
+
+	if (PROTOCOL_REV_MAJOR(pi->version) < 0x2)
+		return -EINVAL;
+
+	if (enable == pi->states[domain_id].enabled)
+		return 0;
+
+	if (enable) {
+		/* Cannot enable with a zero powercap. */
+		if (!pi->states[domain_id].last_pcap)
+			return -EINVAL;
+
+		ret = __scmi_powercap_cap_set(ph, pi, domain_id,
+					      pi->states[domain_id].last_pcap,
+					      true);
+	} else {
+		ret = __scmi_powercap_cap_set(ph, pi, domain_id, 0, true);
+	}
+
+	if (ret)
+		return ret;
+
+	/*
+	 * Update our internal state to reflect final platform state: the SCMI
+	 * server could have ignored a disable request and kept enforcing some
+	 * powercap limit requested by other agents.
+	 */
+	ret = scmi_powercap_cap_get(ph, domain_id, &power_cap);
+	if (!ret)
+		pi->states[domain_id].enabled = !!power_cap;
+
+	return ret;
+}
+
+static int scmi_powercap_cap_enable_get(const struct scmi_protocol_handle *ph,
+					u32 domain_id, bool *enable)
+{
+	int ret;
+	u32 power_cap;
+	struct powercap_info *pi = ph->get_priv(ph);
+
+	*enable = true;
+	if (PROTOCOL_REV_MAJOR(pi->version) < 0x2)
+		return 0;
+
+	/*
+	 * Report always real platform state; platform could have ignored
+	 * a previous disable request. Default true on any error.
+	 */
+	ret = scmi_powercap_cap_get(ph, domain_id, &power_cap);
+	if (!ret)
+		*enable = !!power_cap;
+
+	/* Update internal state with current real platform state */
+	pi->states[domain_id].enabled = *enable;
+
+	return 0;
+}
+
 static const struct scmi_powercap_proto_ops powercap_proto_ops = {
 	.num_domains_get = scmi_powercap_num_domains_get,
 	.info_get = scmi_powercap_dom_info_get,
 	.cap_get = scmi_powercap_cap_get,
 	.cap_set = scmi_powercap_cap_set,
+	.cap_enable_set = scmi_powercap_cap_enable_set,
+	.cap_enable_get = scmi_powercap_cap_enable_get,
 	.pai_get = scmi_powercap_pai_get,
 	.pai_set = scmi_powercap_pai_set,
 	.measurements_get = scmi_powercap_measurements_get,
@@ -854,6 +941,11 @@ scmi_powercap_protocol_init(const struct scmi_protocol_handle *ph)
 	if (!pinfo->powercaps)
 		return -ENOMEM;
 
+	pinfo->states = devm_kcalloc(ph->dev, pinfo->num_domains,
+				     sizeof(*pinfo->states), GFP_KERNEL);
+	if (!pinfo->states)
+		return -ENOMEM;
+
 	/*
 	 * Note that any failure in retrieving any domain attribute leads to
 	 * the whole Powercap protocol initialization failure: this way the
@@ -868,15 +960,21 @@ scmi_powercap_protocol_init(const struct scmi_protocol_handle *ph)
 		if (pinfo->powercaps[domain].fastchannels)
 			scmi_powercap_domain_init_fc(ph, domain,
 						     &pinfo->powercaps[domain].fc_info);
-	}
 
-	pinfo->states = devm_kcalloc(ph->dev, pinfo->num_domains,
-				     sizeof(*pinfo->states), GFP_KERNEL);
-	if (!pinfo->states)
-		return -ENOMEM;
+		/* Grab initial state when disable is supported. */
+		if (PROTOCOL_REV_MAJOR(version) >= 0x2) {
+			ret = __scmi_powercap_cap_get(ph,
+						      &pinfo->powercaps[domain],
+						      &pinfo->states[domain].last_pcap);
+			if (ret)
+				return ret;
 
-	pinfo->version = version;
+			pinfo->states[domain].enabled =
+				!!pinfo->states[domain].last_pcap;
+		}
+	}
 
+	pinfo->version = version;
 	return ph->set_priv(ph, pinfo);
 }
 
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index 0ce5746a4470..e6fe4f73ffe6 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -629,11 +629,25 @@ struct scmi_powercap_info {
  * @num_domains_get: get the count of powercap domains provided by SCMI.
  * @info_get: get the information for the specified domain.
  * @cap_get: get the current CAP value for the specified domain.
+ *	     On SCMI platforms supporting powercap zone disabling, this could
+ *	     report a zero value for a zone where powercapping is disabled.
  * @cap_set: set the CAP value for the specified domain to the provided value;
  *	     if the domain supports setting the CAP with an asynchronous command
  *	     this request will finally trigger an asynchronous transfer, but, if
  *	     @ignore_dresp here is set to true, this call will anyway return
  *	     immediately without waiting for the related delayed response.
+ *	     Note that the powercap requested value must NOT be zero, even if
+ *	     the platform supports disabling a powercap by setting its cap to
+ *	     zero (since SCMI v3.2): there are dedicated operations that should
+ *	     be used for that. (@cap_enable_set/get)
+ * @cap_enable_set: enable or disable the powercapping on the specified domain,
+ *		    if supported by the SCMI platform implementation.
+ *		    Note that, by the SCMI specification, the platform can
+ *		    silently ignore our disable request and decide to enforce
+ *		    anyway some other powercap value requested by another agent
+ *		    on the system: for this reason @cap_get and @cap_enable_get
+ *		    will always report the final platform view of the powercaps.
+ * @cap_enable_get: get the current CAP enable status for the specified domain.
  * @pai_get: get the current PAI value for the specified domain.
  * @pai_set: set the PAI value for the specified domain to the provided value.
  * @measurements_get: retrieve the current average power measurements for the
@@ -662,6 +676,10 @@ struct scmi_powercap_proto_ops {
 		       u32 *power_cap);
 	int (*cap_set)(const struct scmi_protocol_handle *ph, u32 domain_id,
 		       u32 power_cap, bool ignore_dresp);
+	int (*cap_enable_set)(const struct scmi_protocol_handle *ph,
+			      u32 domain_id, bool enable);
+	int (*cap_enable_get)(const struct scmi_protocol_handle *ph,
+			      u32 domain_id, bool *enable);
 	int (*pai_get)(const struct scmi_protocol_handle *ph, u32 domain_id,
 		       u32 *pai);
 	int (*pai_set)(const struct scmi_protocol_handle *ph, u32 domain_id,
-- 
cgit v1.2.3


From dcdfdd40fa82b6704d2841938e5c8ec3051eb0d6 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 6 Jun 2023 17:26:29 +0300
Subject: mm: Add support for unaccepted memory

UEFI Specification version 2.9 introduces the concept of memory
acceptance. Some Virtual Machine platforms, such as Intel TDX or AMD
SEV-SNP, require memory to be accepted before it can be used by the
guest. Accepting happens via a protocol specific to the Virtual Machine
platform.

There are several ways the kernel can deal with unaccepted memory:

 1. Accept all the memory during boot. It is easy to implement and it
    doesn't have runtime cost once the system is booted. The downside is
    very long boot time.

    Accept can be parallelized to multiple CPUs to keep it manageable
    (i.e. via DEFERRED_STRUCT_PAGE_INIT), but it tends to saturate
    memory bandwidth and does not scale beyond the point.

 2. Accept a block of memory on the first use. It requires more
    infrastructure and changes in page allocator to make it work, but
    it provides good boot time.

    On-demand memory accept means latency spikes every time kernel steps
    onto a new memory block. The spikes will go away once workload data
    set size gets stabilized or all memory gets accepted.

 3. Accept all memory in background. Introduce a thread (or multiple)
    that gets memory accepted proactively. It will minimize time the
    system experience latency spikes on memory allocation while keeping
    low boot time.

    This approach cannot function on its own. It is an extension of #2:
    background memory acceptance requires functional scheduler, but the
    page allocator may need to tap into unaccepted memory before that.

    The downside of the approach is that these threads also steal CPU
    cycles and memory bandwidth from the user's workload and may hurt
    user experience.

Implement #1 and #2 for now. #2 is the default. Some workloads may want
to use #1 with accept_memory=eager in kernel command line. #3 can be
implemented later based on user's demands.

Support of unaccepted memory requires a few changes in core-mm code:

  - memblock accepts memory on allocation. It serves early boot memory
    allocations and doesn't limit them to pre-accepted pool of memory.

  - page allocator accepts memory on the first allocation of the page.
    When kernel runs out of accepted memory, it accepts memory until the
    high watermark is reached. It helps to minimize fragmentation.

EFI code will provide two helpers if the platform supports unaccepted
memory:

 - accept_memory() makes a range of physical addresses accepted.

 - range_contains_unaccepted_memory() checks anything within the range
   of physical addresses requires acceptance.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>	# memblock
Link: https://lore.kernel.org/r/20230606142637.5171-2-kirill.shutemov@linux.intel.com
---
 drivers/base/node.c    |   7 ++
 fs/proc/meminfo.c      |   5 ++
 include/linux/mm.h     |  19 ++++++
 include/linux/mmzone.h |   8 +++
 mm/memblock.c          |   9 +++
 mm/mm_init.c           |   7 ++
 mm/page_alloc.c        | 173 +++++++++++++++++++++++++++++++++++++++++++++++++
 mm/vmstat.c            |   3 +
 8 files changed, 231 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index b46db17124f3..655975946ef6 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -448,6 +448,9 @@ static ssize_t node_read_meminfo(struct device *dev,
 			     "Node %d ShmemPmdMapped: %8lu kB\n"
 			     "Node %d FileHugePages: %8lu kB\n"
 			     "Node %d FilePmdMapped: %8lu kB\n"
+#endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+			     "Node %d Unaccepted:     %8lu kB\n"
 #endif
 			     ,
 			     nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
@@ -477,6 +480,10 @@ static ssize_t node_read_meminfo(struct device *dev,
 			     nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
 			     nid, K(node_page_state(pgdat, NR_FILE_THPS)),
 			     nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED))
+#endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+			     ,
+			     nid, K(sum_zone_node_page_state(nid, NR_UNACCEPTED))
 #endif
 			    );
 	len += hugetlb_report_node_meminfo(buf, len, nid);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index b43d0bd42762..8dca4d6d96c7 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -168,6 +168,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		    global_zone_page_state(NR_FREE_CMA_PAGES));
 #endif
 
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	show_val_kb(m, "Unaccepted:     ",
+		    global_zone_page_state(NR_UNACCEPTED));
+#endif
+
 	hugetlb_report_meminfo(m);
 
 	arch_report_meminfo(m);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27ce77080c79..d9174d464348 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3816,4 +3816,23 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
 }
 #endif
 
+#ifdef CONFIG_UNACCEPTED_MEMORY
+
+bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end);
+void accept_memory(phys_addr_t start, phys_addr_t end);
+
+#else
+
+static inline bool range_contains_unaccepted_memory(phys_addr_t start,
+						    phys_addr_t end)
+{
+	return false;
+}
+
+static inline void accept_memory(phys_addr_t start, phys_addr_t end)
+{
+}
+
+#endif
+
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a4889c9d4055..6c1c2fc13017 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -143,6 +143,9 @@ enum zone_stat_item {
 	NR_ZSPAGES,		/* allocated in zsmalloc */
 #endif
 	NR_FREE_CMA_PAGES,
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	NR_UNACCEPTED,
+#endif
 	NR_VM_ZONE_STAT_ITEMS };
 
 enum node_stat_item {
@@ -910,6 +913,11 @@ struct zone {
 	/* free areas of different sizes */
 	struct free_area	free_area[MAX_ORDER + 1];
 
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	/* Pages to be accepted. All pages on the list are MAX_ORDER */
+	struct list_head	unaccepted_pages;
+#endif
+
 	/* zone flags, see below */
 	unsigned long		flags;
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 3feafea06ab2..50b921119600 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1436,6 +1436,15 @@ done:
 		 */
 		kmemleak_alloc_phys(found, size, 0);
 
+	/*
+	 * Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP,
+	 * require memory to be accepted before it can be used by the
+	 * guest.
+	 *
+	 * Accept the memory of the allocated buffer.
+	 */
+	accept_memory(found, found + size);
+
 	return found;
 }
 
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 7f7f9c677854..1cfc08e25f93 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1375,6 +1375,10 @@ static void __meminit zone_init_free_lists(struct zone *zone)
 		INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
 		zone->free_area[order].nr_free = 0;
 	}
+
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	INIT_LIST_HEAD(&zone->unaccepted_pages);
+#endif
 }
 
 void __meminit init_currently_empty_zone(struct zone *zone,
@@ -1960,6 +1964,9 @@ static void __init deferred_free_range(unsigned long pfn,
 		return;
 	}
 
+	/* Accept chunks smaller than MAX_ORDER upfront */
+	accept_memory(PFN_PHYS(pfn), PFN_PHYS(pfn + nr_pages));
+
 	for (i = 0; i < nr_pages; i++, page++, pfn++) {
 		if (pageblock_aligned(pfn))
 			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 47421bedc12b..d239fba3f31c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -387,6 +387,12 @@ EXPORT_SYMBOL(nr_node_ids);
 EXPORT_SYMBOL(nr_online_nodes);
 #endif
 
+static bool page_contains_unaccepted(struct page *page, unsigned int order);
+static void accept_page(struct page *page, unsigned int order);
+static bool try_to_accept_memory(struct zone *zone, unsigned int order);
+static inline bool has_unaccepted_memory(void);
+static bool __free_unaccepted(struct page *page);
+
 int page_group_by_mobility_disabled __read_mostly;
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
@@ -1481,6 +1487,13 @@ void __free_pages_core(struct page *page, unsigned int order)
 
 	atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
 
+	if (page_contains_unaccepted(page, order)) {
+		if (order == MAX_ORDER && __free_unaccepted(page))
+			return;
+
+		accept_page(page, order);
+	}
+
 	/*
 	 * Bypass PCP and place fresh pages right to the tail, primarily
 	 * relevant for memory onlining.
@@ -3159,6 +3172,9 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
 	if (!(alloc_flags & ALLOC_CMA))
 		unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
 #endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	unusable_free += zone_page_state(z, NR_UNACCEPTED);
+#endif
 
 	return unusable_free;
 }
@@ -3458,6 +3474,11 @@ retry:
 				       gfp_mask)) {
 			int ret;
 
+			if (has_unaccepted_memory()) {
+				if (try_to_accept_memory(zone, order))
+					goto try_this_zone;
+			}
+
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 			/*
 			 * Watermark failed for this zone, but see if we can
@@ -3510,6 +3531,11 @@ try_this_zone:
 
 			return page;
 		} else {
+			if (has_unaccepted_memory()) {
+				if (try_to_accept_memory(zone, order))
+					goto try_this_zone;
+			}
+
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 			/* Try again if zone has deferred pages */
 			if (deferred_pages_enabled()) {
@@ -7215,3 +7241,150 @@ bool has_managed_dma(void)
 	return false;
 }
 #endif /* CONFIG_ZONE_DMA */
+
+#ifdef CONFIG_UNACCEPTED_MEMORY
+
+/* Counts number of zones with unaccepted pages. */
+static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages);
+
+static bool lazy_accept = true;
+
+static int __init accept_memory_parse(char *p)
+{
+	if (!strcmp(p, "lazy")) {
+		lazy_accept = true;
+		return 0;
+	} else if (!strcmp(p, "eager")) {
+		lazy_accept = false;
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+}
+early_param("accept_memory", accept_memory_parse);
+
+static bool page_contains_unaccepted(struct page *page, unsigned int order)
+{
+	phys_addr_t start = page_to_phys(page);
+	phys_addr_t end = start + (PAGE_SIZE << order);
+
+	return range_contains_unaccepted_memory(start, end);
+}
+
+static void accept_page(struct page *page, unsigned int order)
+{
+	phys_addr_t start = page_to_phys(page);
+
+	accept_memory(start, start + (PAGE_SIZE << order));
+}
+
+static bool try_to_accept_memory_one(struct zone *zone)
+{
+	unsigned long flags;
+	struct page *page;
+	bool last;
+
+	if (list_empty(&zone->unaccepted_pages))
+		return false;
+
+	spin_lock_irqsave(&zone->lock, flags);
+	page = list_first_entry_or_null(&zone->unaccepted_pages,
+					struct page, lru);
+	if (!page) {
+		spin_unlock_irqrestore(&zone->lock, flags);
+		return false;
+	}
+
+	list_del(&page->lru);
+	last = list_empty(&zone->unaccepted_pages);
+
+	__mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
+	__mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
+	spin_unlock_irqrestore(&zone->lock, flags);
+
+	accept_page(page, MAX_ORDER);
+
+	__free_pages_ok(page, MAX_ORDER, FPI_TO_TAIL);
+
+	if (last)
+		static_branch_dec(&zones_with_unaccepted_pages);
+
+	return true;
+}
+
+static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+{
+	long to_accept;
+	int ret = false;
+
+	/* How much to accept to get to high watermark? */
+	to_accept = high_wmark_pages(zone) -
+		    (zone_page_state(zone, NR_FREE_PAGES) -
+		    __zone_watermark_unusable_free(zone, order, 0));
+
+	/* Accept at least one page */
+	do {
+		if (!try_to_accept_memory_one(zone))
+			break;
+		ret = true;
+		to_accept -= MAX_ORDER_NR_PAGES;
+	} while (to_accept > 0);
+
+	return ret;
+}
+
+static inline bool has_unaccepted_memory(void)
+{
+	return static_branch_unlikely(&zones_with_unaccepted_pages);
+}
+
+static bool __free_unaccepted(struct page *page)
+{
+	struct zone *zone = page_zone(page);
+	unsigned long flags;
+	bool first = false;
+
+	if (!lazy_accept)
+		return false;
+
+	spin_lock_irqsave(&zone->lock, flags);
+	first = list_empty(&zone->unaccepted_pages);
+	list_add_tail(&page->lru, &zone->unaccepted_pages);
+	__mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
+	__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
+	spin_unlock_irqrestore(&zone->lock, flags);
+
+	if (first)
+		static_branch_inc(&zones_with_unaccepted_pages);
+
+	return true;
+}
+
+#else
+
+static bool page_contains_unaccepted(struct page *page, unsigned int order)
+{
+	return false;
+}
+
+static void accept_page(struct page *page, unsigned int order)
+{
+}
+
+static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+{
+	return false;
+}
+
+static inline bool has_unaccepted_memory(void)
+{
+	return false;
+}
+
+static bool __free_unaccepted(struct page *page)
+{
+	BUILD_BUG();
+	return false;
+}
+
+#endif /* CONFIG_UNACCEPTED_MEMORY */
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c28046371b45..282349cabf01 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1180,6 +1180,9 @@ const char * const vmstat_text[] = {
 	"nr_zspages",
 #endif
 	"nr_free_cma",
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	"nr_unaccepted",
+#endif
 
 	/* enum numa_stat_item counters */
 #ifdef CONFIG_NUMA
-- 
cgit v1.2.3


From 745e3ed85f71a6382a239b03d9278a8025f2beae Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 6 Jun 2023 17:26:31 +0300
Subject: efi/libstub: Implement support for unaccepted memory

UEFI Specification version 2.9 introduces the concept of memory
acceptance: Some Virtual Machine platforms, such as Intel TDX or AMD
SEV-SNP, requiring memory to be accepted before it can be used by the
guest. Accepting happens via a protocol specific for the Virtual
Machine platform.

Accepting memory is costly and it makes VMM allocate memory for the
accepted guest physical address range. It's better to postpone memory
acceptance until memory is needed. It lowers boot time and reduces
memory overhead.

The kernel needs to know what memory has been accepted. Firmware
communicates this information via memory map: a new memory type --
EFI_UNACCEPTED_MEMORY -- indicates such memory.

Range-based tracking works fine for firmware, but it gets bulky for
the kernel: e820 (or whatever the arch uses) has to be modified on every
page acceptance. It leads to table fragmentation and there's a limited
number of entries in the e820 table.

Another option is to mark such memory as usable in e820 and track if the
range has been accepted in a bitmap. One bit in the bitmap represents a
naturally aligned power-2-sized region of address space -- unit.

For x86, unit size is 2MiB: 4k of the bitmap is enough to track 64GiB or
physical address space.

In the worst-case scenario -- a huge hole in the middle of the
address space -- It needs 256MiB to handle 4PiB of the address
space.

Any unaccepted memory that is not aligned to unit_size gets accepted
upfront.

The bitmap is allocated and constructed in the EFI stub and passed down
to the kernel via EFI configuration table. allocate_e820() allocates the
bitmap if unaccepted memory is present, according to the size of
unaccepted region.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20230606142637.5171-4-kirill.shutemov@linux.intel.com
---
 arch/x86/boot/compressed/Makefile                |   1 +
 arch/x86/boot/compressed/mem.c                   |   9 +
 arch/x86/include/asm/efi.h                       |   2 +
 drivers/firmware/efi/Kconfig                     |  14 ++
 drivers/firmware/efi/efi.c                       |   1 +
 drivers/firmware/efi/libstub/Makefile            |   2 +
 drivers/firmware/efi/libstub/bitmap.c            |  41 +++++
 drivers/firmware/efi/libstub/efistub.h           |   6 +
 drivers/firmware/efi/libstub/find.c              |  43 +++++
 drivers/firmware/efi/libstub/unaccepted_memory.c | 222 +++++++++++++++++++++++
 drivers/firmware/efi/libstub/x86-stub.c          |  13 ++
 include/linux/efi.h                              |  12 +-
 12 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/boot/compressed/mem.c
 create mode 100644 drivers/firmware/efi/libstub/bitmap.c
 create mode 100644 drivers/firmware/efi/libstub/find.c
 create mode 100644 drivers/firmware/efi/libstub/unaccepted_memory.c

(limited to 'include/linux')

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6b6cfe607bdb..cc4978123c30 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -107,6 +107,7 @@ endif
 
 vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
 vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
+vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o
 
 vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
 vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o
diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
new file mode 100644
index 000000000000..67594fcb11d9
--- /dev/null
+++ b/arch/x86/boot/compressed/mem.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "error.h"
+
+void arch_accept_memory(phys_addr_t start, phys_addr_t end)
+{
+	/* Platform-specific memory-acceptance call goes here */
+	error("Cannot accept memory");
+}
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 419280d263d2..8b4be7cecdb8 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -31,6 +31,8 @@ extern unsigned long efi_mixed_mode_stack_pa;
 
 #define ARCH_EFI_IRQ_FLAGS_MASK	X86_EFLAGS_IF
 
+#define EFI_UNACCEPTED_UNIT_SIZE PMD_SIZE
+
 /*
  * The EFI services are called through variadic functions in many cases. These
  * functions are implemented in assembler and support only a fixed number of
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 043ca31c114e..231f1c70d1db 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -269,6 +269,20 @@ config EFI_COCO_SECRET
 	  virt/coco/efi_secret module to access the secrets, which in turn
 	  allows userspace programs to access the injected secrets.
 
+config UNACCEPTED_MEMORY
+	bool
+	depends on EFI_STUB
+	help
+	   Some Virtual Machine platforms, such as Intel TDX, require
+	   some memory to be "accepted" by the guest before it can be used.
+	   This mechanism helps prevent malicious hosts from making changes
+	   to guest memory.
+
+	   UEFI specification v2.9 introduced EFI_UNACCEPTED_MEMORY memory type.
+
+	   This option adds support for unaccepted memory and makes such memory
+	   usable by the kernel.
+
 config EFI_EMBEDDED_FIRMWARE
 	bool
 	select CRYPTO_LIB_SHA256
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index abeff7dc0b58..7dce06e419c5 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -843,6 +843,7 @@ static __initdata char memory_type_name[][13] = {
 	"MMIO Port",
 	"PAL Code",
 	"Persistent",
+	"Unaccepted",
 };
 
 char * __init efi_md_typeattr_format(char *buf, size_t size,
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 3abb2b357482..16d64a34d1e1 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -96,6 +96,8 @@ CFLAGS_arm32-stub.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 zboot-obj-$(CONFIG_RISCV)	:= lib-clz_ctz.o lib-ashldi3.o
 lib-$(CONFIG_EFI_ZBOOT)		+= zboot.o $(zboot-obj-y)
 
+lib-$(CONFIG_UNACCEPTED_MEMORY) += unaccepted_memory.o bitmap.o find.o
+
 extra-y				:= $(lib-y)
 lib-y				:= $(patsubst %.o,%.stub.o,$(lib-y))
 
diff --git a/drivers/firmware/efi/libstub/bitmap.c b/drivers/firmware/efi/libstub/bitmap.c
new file mode 100644
index 000000000000..5c9bba0d549b
--- /dev/null
+++ b/drivers/firmware/efi/libstub/bitmap.c
@@ -0,0 +1,41 @@
+#include <linux/bitmap.h>
+
+void __bitmap_set(unsigned long *map, unsigned int start, int len)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const unsigned int size = start + len;
+	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+	while (len - bits_to_set >= 0) {
+		*p |= mask_to_set;
+		len -= bits_to_set;
+		bits_to_set = BITS_PER_LONG;
+		mask_to_set = ~0UL;
+		p++;
+	}
+	if (len) {
+		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+		*p |= mask_to_set;
+	}
+}
+
+void __bitmap_clear(unsigned long *map, unsigned int start, int len)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const unsigned int size = start + len;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (len - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 54a2822cae77..6aa38a1bf126 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -1136,4 +1136,10 @@ void efi_remap_image(unsigned long image_base, unsigned alloc_size,
 asmlinkage efi_status_t __efiapi
 efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab);
 
+efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc,
+					struct efi_boot_memmap *map);
+void process_unaccepted_memory(u64 start, u64 end);
+void accept_memory(phys_addr_t start, phys_addr_t end);
+void arch_accept_memory(phys_addr_t start, phys_addr_t end);
+
 #endif
diff --git a/drivers/firmware/efi/libstub/find.c b/drivers/firmware/efi/libstub/find.c
new file mode 100644
index 000000000000..4e7740d28987
--- /dev/null
+++ b/drivers/firmware/efi/libstub/find.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/bitmap.h>
+#include <linux/math.h>
+#include <linux/minmax.h>
+
+/*
+ * Common helper for find_next_bit() function family
+ * @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
+ * @MUNGE: The expression that post-processes a word containing found bit (may be empty)
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ */
+#define FIND_NEXT_BIT(FETCH, MUNGE, size, start)				\
+({										\
+	unsigned long mask, idx, tmp, sz = (size), __start = (start);		\
+										\
+	if (unlikely(__start >= sz))						\
+		goto out;							\
+										\
+	mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start));				\
+	idx = __start / BITS_PER_LONG;						\
+										\
+	for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) {			\
+		if ((idx + 1) * BITS_PER_LONG >= sz)				\
+			goto out;						\
+		idx++;								\
+	}									\
+										\
+	sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz);			\
+out:										\
+	sz;									\
+})
+
+unsigned long _find_next_bit(const unsigned long *addr, unsigned long nbits, unsigned long start)
+{
+	return FIND_NEXT_BIT(addr[idx], /* nop */, nbits, start);
+}
+
+unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
+					 unsigned long start)
+{
+	return FIND_NEXT_BIT(~addr[idx], /* nop */, nbits, start);
+}
diff --git a/drivers/firmware/efi/libstub/unaccepted_memory.c b/drivers/firmware/efi/libstub/unaccepted_memory.c
new file mode 100644
index 000000000000..ca61f4733ea5
--- /dev/null
+++ b/drivers/firmware/efi/libstub/unaccepted_memory.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/efi.h>
+#include <asm/efi.h>
+#include "efistub.h"
+
+struct efi_unaccepted_memory *unaccepted_table;
+
+efi_status_t allocate_unaccepted_bitmap(__u32 nr_desc,
+					struct efi_boot_memmap *map)
+{
+	efi_guid_t unaccepted_table_guid = LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID;
+	u64 unaccepted_start = ULLONG_MAX, unaccepted_end = 0, bitmap_size;
+	efi_status_t status;
+	int i;
+
+	/* Check if the table is already installed */
+	unaccepted_table = get_efi_config_table(unaccepted_table_guid);
+	if (unaccepted_table) {
+		if (unaccepted_table->version != 1) {
+			efi_err("Unknown version of unaccepted memory table\n");
+			return EFI_UNSUPPORTED;
+		}
+		return EFI_SUCCESS;
+	}
+
+	/* Check if there's any unaccepted memory and find the max address */
+	for (i = 0; i < nr_desc; i++) {
+		efi_memory_desc_t *d;
+		unsigned long m = (unsigned long)map->map;
+
+		d = efi_early_memdesc_ptr(m, map->desc_size, i);
+		if (d->type != EFI_UNACCEPTED_MEMORY)
+			continue;
+
+		unaccepted_start = min(unaccepted_start, d->phys_addr);
+		unaccepted_end = max(unaccepted_end,
+				     d->phys_addr + d->num_pages * PAGE_SIZE);
+	}
+
+	if (unaccepted_start == ULLONG_MAX)
+		return EFI_SUCCESS;
+
+	unaccepted_start = round_down(unaccepted_start,
+				      EFI_UNACCEPTED_UNIT_SIZE);
+	unaccepted_end = round_up(unaccepted_end, EFI_UNACCEPTED_UNIT_SIZE);
+
+	/*
+	 * If unaccepted memory is present, allocate a bitmap to track what
+	 * memory has to be accepted before access.
+	 *
+	 * One bit in the bitmap represents 2MiB in the address space:
+	 * A 4k bitmap can track 64GiB of physical address space.
+	 *
+	 * In the worst case scenario -- a huge hole in the middle of the
+	 * address space -- It needs 256MiB to handle 4PiB of the address
+	 * space.
+	 *
+	 * The bitmap will be populated in setup_e820() according to the memory
+	 * map after efi_exit_boot_services().
+	 */
+	bitmap_size = DIV_ROUND_UP(unaccepted_end - unaccepted_start,
+				   EFI_UNACCEPTED_UNIT_SIZE * BITS_PER_BYTE);
+
+	status = efi_bs_call(allocate_pool, EFI_LOADER_DATA,
+			     sizeof(*unaccepted_table) + bitmap_size,
+			     (void **)&unaccepted_table);
+	if (status != EFI_SUCCESS) {
+		efi_err("Failed to allocate unaccepted memory config table\n");
+		return status;
+	}
+
+	unaccepted_table->version = 1;
+	unaccepted_table->unit_size = EFI_UNACCEPTED_UNIT_SIZE;
+	unaccepted_table->phys_base = unaccepted_start;
+	unaccepted_table->size = bitmap_size;
+	memset(unaccepted_table->bitmap, 0, bitmap_size);
+
+	status = efi_bs_call(install_configuration_table,
+			     &unaccepted_table_guid, unaccepted_table);
+	if (status != EFI_SUCCESS) {
+		efi_bs_call(free_pool, unaccepted_table);
+		efi_err("Failed to install unaccepted memory config table!\n");
+	}
+
+	return status;
+}
+
+/*
+ * The accepted memory bitmap only works at unit_size granularity.  Take
+ * unaligned start/end addresses and either:
+ *  1. Accepts the memory immediately and in its entirety
+ *  2. Accepts unaligned parts, and marks *some* aligned part unaccepted
+ *
+ * The function will never reach the bitmap_set() with zero bits to set.
+ */
+void process_unaccepted_memory(u64 start, u64 end)
+{
+	u64 unit_size = unaccepted_table->unit_size;
+	u64 unit_mask = unaccepted_table->unit_size - 1;
+	u64 bitmap_size = unaccepted_table->size;
+
+	/*
+	 * Ensure that at least one bit will be set in the bitmap by
+	 * immediately accepting all regions under 2*unit_size.  This is
+	 * imprecise and may immediately accept some areas that could
+	 * have been represented in the bitmap.  But, results in simpler
+	 * code below
+	 *
+	 * Consider case like this (assuming unit_size == 2MB):
+	 *
+	 * | 4k | 2044k |    2048k   |
+	 * ^ 0x0        ^ 2MB        ^ 4MB
+	 *
+	 * Only the first 4k has been accepted. The 0MB->2MB region can not be
+	 * represented in the bitmap. The 2MB->4MB region can be represented in
+	 * the bitmap. But, the 0MB->4MB region is <2*unit_size and will be
+	 * immediately accepted in its entirety.
+	 */
+	if (end - start < 2 * unit_size) {
+		arch_accept_memory(start, end);
+		return;
+	}
+
+	/*
+	 * No matter how the start and end are aligned, at least one unaccepted
+	 * unit_size area will remain to be marked in the bitmap.
+	 */
+
+	/* Immediately accept a <unit_size piece at the start: */
+	if (start & unit_mask) {
+		arch_accept_memory(start, round_up(start, unit_size));
+		start = round_up(start, unit_size);
+	}
+
+	/* Immediately accept a <unit_size piece at the end: */
+	if (end & unit_mask) {
+		arch_accept_memory(round_down(end, unit_size), end);
+		end = round_down(end, unit_size);
+	}
+
+	/*
+	 * Accept part of the range that before phys_base and cannot be recorded
+	 * into the bitmap.
+	 */
+	if (start < unaccepted_table->phys_base) {
+		arch_accept_memory(start,
+				   min(unaccepted_table->phys_base, end));
+		start = unaccepted_table->phys_base;
+	}
+
+	/* Nothing to record */
+	if (end < unaccepted_table->phys_base)
+		return;
+
+	/* Translate to offsets from the beginning of the bitmap */
+	start -= unaccepted_table->phys_base;
+	end -= unaccepted_table->phys_base;
+
+	/* Accept memory that doesn't fit into bitmap */
+	if (end > bitmap_size * unit_size * BITS_PER_BYTE) {
+		unsigned long phys_start, phys_end;
+
+		phys_start = bitmap_size * unit_size * BITS_PER_BYTE +
+			     unaccepted_table->phys_base;
+		phys_end = end + unaccepted_table->phys_base;
+
+		arch_accept_memory(phys_start, phys_end);
+		end = bitmap_size * unit_size * BITS_PER_BYTE;
+	}
+
+	/*
+	 * 'start' and 'end' are now both unit_size-aligned.
+	 * Record the range as being unaccepted:
+	 */
+	bitmap_set(unaccepted_table->bitmap,
+		   start / unit_size, (end - start) / unit_size);
+}
+
+void accept_memory(phys_addr_t start, phys_addr_t end)
+{
+	unsigned long range_start, range_end;
+	unsigned long bitmap_size;
+	u64 unit_size;
+
+	if (!unaccepted_table)
+		return;
+
+	unit_size = unaccepted_table->unit_size;
+
+	/*
+	 * Only care for the part of the range that is represented
+	 * in the bitmap.
+	 */
+	if (start < unaccepted_table->phys_base)
+		start = unaccepted_table->phys_base;
+	if (end < unaccepted_table->phys_base)
+		return;
+
+	/* Translate to offsets from the beginning of the bitmap */
+	start -= unaccepted_table->phys_base;
+	end -= unaccepted_table->phys_base;
+
+	/* Make sure not to overrun the bitmap */
+	if (end > unaccepted_table->size * unit_size * BITS_PER_BYTE)
+		end = unaccepted_table->size * unit_size * BITS_PER_BYTE;
+
+	range_start = start / unit_size;
+	bitmap_size = DIV_ROUND_UP(end, unit_size);
+
+	for_each_set_bitrange_from(range_start, range_end,
+				   unaccepted_table->bitmap, bitmap_size) {
+		unsigned long phys_start, phys_end;
+
+		phys_start = range_start * unit_size + unaccepted_table->phys_base;
+		phys_end = range_end * unit_size + unaccepted_table->phys_base;
+
+		arch_accept_memory(phys_start, phys_end);
+		bitmap_clear(unaccepted_table->bitmap,
+			     range_start, range_end - range_start);
+	}
+}
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index cd77a7a61470..3cc7faac001d 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -613,6 +613,16 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s
 			e820_type = E820_TYPE_PMEM;
 			break;
 
+		case EFI_UNACCEPTED_MEMORY:
+			if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)) {
+				efi_warn_once(
+"The system has unaccepted memory,  but kernel does not support it\nConsider enabling CONFIG_UNACCEPTED_MEMORY\n");
+				continue;
+			}
+			e820_type = E820_TYPE_RAM;
+			process_unaccepted_memory(d->phys_addr,
+						  d->phys_addr + PAGE_SIZE * d->num_pages);
+			break;
 		default:
 			continue;
 		}
@@ -697,6 +707,9 @@ static efi_status_t allocate_e820(struct boot_params *params,
 		status = alloc_e820ext(nr_e820ext, e820ext, e820ext_size);
 	}
 
+	if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) && status == EFI_SUCCESS)
+		status = allocate_unaccepted_bitmap(nr_desc, map);
+
 	efi_bs_call(free_pool, map);
 	return status;
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 571d1a6e1b74..8ffe451a6a2f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -108,7 +108,8 @@ typedef	struct {
 #define EFI_MEMORY_MAPPED_IO_PORT_SPACE	12
 #define EFI_PAL_CODE			13
 #define EFI_PERSISTENT_MEMORY		14
-#define EFI_MAX_MEMORY_TYPE		15
+#define EFI_UNACCEPTED_MEMORY		15
+#define EFI_MAX_MEMORY_TYPE		16
 
 /* Attribute values: */
 #define EFI_MEMORY_UC		((u64)0x0000000000000001ULL)	/* uncached */
@@ -417,6 +418,7 @@ void efi_native_runtime_setup(void);
 #define LINUX_EFI_MOK_VARIABLE_TABLE_GUID	EFI_GUID(0xc451ed2b, 0x9694, 0x45d3,  0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89)
 #define LINUX_EFI_COCO_SECRET_AREA_GUID		EFI_GUID(0xadf956ad, 0xe98c, 0x484c,  0xae, 0x11, 0xb5, 0x1c, 0x7d, 0x33, 0x64, 0x47)
 #define LINUX_EFI_BOOT_MEMMAP_GUID		EFI_GUID(0x800f683f, 0xd08b, 0x423a,  0xa2, 0x93, 0x96, 0x5c, 0x3c, 0x6f, 0xe2, 0xb4)
+#define LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID	EFI_GUID(0xd5d1de3c, 0x105c, 0x44f9,  0x9e, 0xa9, 0xbc, 0xef, 0x98, 0x12, 0x00, 0x31)
 
 #define RISCV_EFI_BOOT_PROTOCOL_GUID		EFI_GUID(0xccd15fec, 0x6f73, 0x4eec,  0x83, 0x95, 0x3e, 0x69, 0xe4, 0xb9, 0x40, 0xbf)
 
@@ -534,6 +536,14 @@ struct efi_boot_memmap {
 	efi_memory_desc_t	map[];
 };
 
+struct efi_unaccepted_memory {
+	u32 version;
+	u32 unit_size;
+	u64 phys_base;
+	u64 size;
+	unsigned long bitmap[];
+};
+
 /*
  * Architecture independent structure for describing a memory map for the
  * benefit of efi_memmap_init_early(), and for passing context between
-- 
cgit v1.2.3


From aa6182707a53c5e4df7b3da7ba4faa7e29dc71a0 Mon Sep 17 00:00:00 2001
From: Ruiqi Gong <gongruiqi@huaweicloud.com>
Date: Tue, 6 Jun 2023 10:10:47 +0800
Subject: bpf: Cleanup unused function declaration

All usage and the definition of `bpf_prog_free_linfo()` has been removed
in commit e16301fbe183 ("bpf: Simplify freeing logic in linfo and
jited_linfo"). Clean up its declaration in the header file.

Signed-off-by: Ruiqi Gong <gongruiqi@huaweicloud.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/all/20230602030842.279262-1-gongruiqi@huaweicloud.com/
Link: https://lore.kernel.org/bpf/20230606021047.170667-1-gongruiqi@huaweicloud.com
---
 include/linux/filter.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index bbce89937fde..f69114083ec7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -874,7 +874,6 @@ void bpf_prog_free(struct bpf_prog *fp);
 
 bool bpf_opcode_in_insntable(u8 code);
 
-void bpf_prog_free_linfo(struct bpf_prog *prog);
 void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
 			       const u32 *insn_to_jit_off);
 int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog);
-- 
cgit v1.2.3


From 2053bc57f36763febced0b5cd91821698bcf6b3d Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 6 Jun 2023 17:26:33 +0300
Subject: efi: Add unaccepted memory support

efi_config_parse_tables() reserves memory that holds unaccepted memory
configuration table so it won't be reused by page allocator.

Core-mm requires few helpers to support unaccepted memory:

 - accept_memory() checks the range of addresses against the bitmap and
   accept memory if needed.

 - range_contains_unaccepted_memory() checks if anything within the
   range requires acceptance.

Architectural code has to provide efi_get_unaccepted_table() that
returns pointer to the unaccepted memory configuration table.

arch_accept_memory() handles arch-specific part of memory acceptance.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20230606142637.5171-6-kirill.shutemov@linux.intel.com
---
 arch/x86/platform/efi/efi.c              |   3 +
 drivers/firmware/efi/Makefile            |   1 +
 drivers/firmware/efi/efi.c               |  25 +++++++
 drivers/firmware/efi/unaccepted_memory.c | 112 +++++++++++++++++++++++++++++++
 include/linux/efi.h                      |   1 +
 5 files changed, 142 insertions(+)
 create mode 100644 drivers/firmware/efi/unaccepted_memory.c

(limited to 'include/linux')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f3f2d87cce1b..e9f99c56f3ce 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -96,6 +96,9 @@ static const unsigned long * const efi_tables[] = {
 #ifdef CONFIG_EFI_COCO_SECRET
 	&efi.coco_secret,
 #endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	&efi.unaccepted,
+#endif
 };
 
 u64 efi_setup;		/* efi setup_data physical address */
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index b51f2a4c821e..e489fefd23da 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -41,3 +41,4 @@ obj-$(CONFIG_EFI_CAPSULE_LOADER)	+= capsule-loader.o
 obj-$(CONFIG_EFI_EARLYCON)		+= earlycon.o
 obj-$(CONFIG_UEFI_CPER_ARM)		+= cper-arm.o
 obj-$(CONFIG_UEFI_CPER_X86)		+= cper-x86.o
+obj-$(CONFIG_UNACCEPTED_MEMORY)		+= unaccepted_memory.o
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 7dce06e419c5..d817e7afd266 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -50,6 +50,9 @@ struct efi __read_mostly efi = {
 #ifdef CONFIG_EFI_COCO_SECRET
 	.coco_secret		= EFI_INVALID_TABLE_ADDR,
 #endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	.unaccepted		= EFI_INVALID_TABLE_ADDR,
+#endif
 };
 EXPORT_SYMBOL(efi);
 
@@ -605,6 +608,9 @@ static const efi_config_table_type_t common_tables[] __initconst = {
 #ifdef CONFIG_EFI_COCO_SECRET
 	{LINUX_EFI_COCO_SECRET_AREA_GUID,	&efi.coco_secret,	"CocoSecret"	},
 #endif
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	{LINUX_EFI_UNACCEPTED_MEM_TABLE_GUID,	&efi.unaccepted,	"Unaccepted"	},
+#endif
 #ifdef CONFIG_EFI_GENERIC_STUB
 	{LINUX_EFI_SCREEN_INFO_TABLE_GUID,	&screen_info_table			},
 #endif
@@ -759,6 +765,25 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables,
 		}
 	}
 
+	if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY) &&
+	    efi.unaccepted != EFI_INVALID_TABLE_ADDR) {
+		struct efi_unaccepted_memory *unaccepted;
+
+		unaccepted = early_memremap(efi.unaccepted, sizeof(*unaccepted));
+		if (unaccepted) {
+			unsigned long size;
+
+			if (unaccepted->version == 1) {
+				size = sizeof(*unaccepted) + unaccepted->size;
+				memblock_reserve(efi.unaccepted, size);
+			} else {
+				efi.unaccepted = EFI_INVALID_TABLE_ADDR;
+			}
+
+			early_memunmap(unaccepted, sizeof(*unaccepted));
+		}
+	}
+
 	return 0;
 }
 
diff --git a/drivers/firmware/efi/unaccepted_memory.c b/drivers/firmware/efi/unaccepted_memory.c
new file mode 100644
index 000000000000..08a9a843550a
--- /dev/null
+++ b/drivers/firmware/efi/unaccepted_memory.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/efi.h>
+#include <linux/memblock.h>
+#include <linux/spinlock.h>
+#include <asm/unaccepted_memory.h>
+
+/* Protects unaccepted memory bitmap */
+static DEFINE_SPINLOCK(unaccepted_memory_lock);
+
+/*
+ * accept_memory() -- Consult bitmap and accept the memory if needed.
+ *
+ * Only memory that is explicitly marked as unaccepted in the bitmap requires
+ * an action. All the remaining memory is implicitly accepted and doesn't need
+ * acceptance.
+ *
+ * No need to accept:
+ *  - anything if the system has no unaccepted table;
+ *  - memory that is below phys_base;
+ *  - memory that is above the memory that addressable by the bitmap;
+ */
+void accept_memory(phys_addr_t start, phys_addr_t end)
+{
+	struct efi_unaccepted_memory *unaccepted;
+	unsigned long range_start, range_end;
+	unsigned long flags;
+	u64 unit_size;
+
+	unaccepted = efi_get_unaccepted_table();
+	if (!unaccepted)
+		return;
+
+	unit_size = unaccepted->unit_size;
+
+	/*
+	 * Only care for the part of the range that is represented
+	 * in the bitmap.
+	 */
+	if (start < unaccepted->phys_base)
+		start = unaccepted->phys_base;
+	if (end < unaccepted->phys_base)
+		return;
+
+	/* Translate to offsets from the beginning of the bitmap */
+	start -= unaccepted->phys_base;
+	end -= unaccepted->phys_base;
+
+	/* Make sure not to overrun the bitmap */
+	if (end > unaccepted->size * unit_size * BITS_PER_BYTE)
+		end = unaccepted->size * unit_size * BITS_PER_BYTE;
+
+	range_start = start / unit_size;
+
+	spin_lock_irqsave(&unaccepted_memory_lock, flags);
+	for_each_set_bitrange_from(range_start, range_end, unaccepted->bitmap,
+				   DIV_ROUND_UP(end, unit_size)) {
+		unsigned long phys_start, phys_end;
+		unsigned long len = range_end - range_start;
+
+		phys_start = range_start * unit_size + unaccepted->phys_base;
+		phys_end = range_end * unit_size + unaccepted->phys_base;
+
+		arch_accept_memory(phys_start, phys_end);
+		bitmap_clear(unaccepted->bitmap, range_start, len);
+	}
+	spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
+}
+
+bool range_contains_unaccepted_memory(phys_addr_t start, phys_addr_t end)
+{
+	struct efi_unaccepted_memory *unaccepted;
+	unsigned long flags;
+	bool ret = false;
+	u64 unit_size;
+
+	unaccepted = efi_get_unaccepted_table();
+	if (!unaccepted)
+		return false;
+
+	unit_size = unaccepted->unit_size;
+
+	/*
+	 * Only care for the part of the range that is represented
+	 * in the bitmap.
+	 */
+	if (start < unaccepted->phys_base)
+		start = unaccepted->phys_base;
+	if (end < unaccepted->phys_base)
+		return false;
+
+	/* Translate to offsets from the beginning of the bitmap */
+	start -= unaccepted->phys_base;
+	end -= unaccepted->phys_base;
+
+	/* Make sure not to overrun the bitmap */
+	if (end > unaccepted->size * unit_size * BITS_PER_BYTE)
+		end = unaccepted->size * unit_size * BITS_PER_BYTE;
+
+	spin_lock_irqsave(&unaccepted_memory_lock, flags);
+	while (start < end) {
+		if (test_bit(start / unit_size, unaccepted->bitmap)) {
+			ret = true;
+			break;
+		}
+
+		start += unit_size;
+	}
+	spin_unlock_irqrestore(&unaccepted_memory_lock, flags);
+
+	return ret;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 8ffe451a6a2f..67cb72d7a764 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -646,6 +646,7 @@ extern struct efi {
 	unsigned long			tpm_final_log;		/* TPM2 Final Events Log table */
 	unsigned long			mokvar_table;		/* MOK variable config table */
 	unsigned long			coco_secret;		/* Confidential computing secret table */
+	unsigned long			unaccepted;		/* Unaccepted memory table */
 
 	efi_get_time_t			*get_time;
 	efi_set_time_t			*set_time;
-- 
cgit v1.2.3


From c0461bd16666351f0de11578b1e02dcdae4db736 Mon Sep 17 00:00:00 2001
From: Dionna Glaze <dionnaglaze@google.com>
Date: Tue, 6 Jun 2023 09:51:27 -0500
Subject: x86/efi: Safely enable unaccepted memory in UEFI

The UEFI v2.9 specification includes a new memory type to be used in
environments where the OS must accept memory that is provided from its
host. Before the introduction of this memory type, all memory was
accepted eagerly in the firmware. In order for the firmware to safely
stop accepting memory on the OS's behalf, the OS must affirmatively
indicate support to the firmware. This is only a problem for AMD
SEV-SNP, since Linux has had support for it since 5.19. The other
technology that can make use of unaccepted memory, Intel TDX, does not
yet have Linux support, so it can strictly require unaccepted memory
support as a dependency of CONFIG_TDX and not require communication with
the firmware.

Enabling unaccepted memory requires calling a 0-argument enablement
protocol before ExitBootServices. This call is only made if the kernel
is compiled with UNACCEPTED_MEMORY=y

This protocol will be removed after the end of life of the first LTS
that includes it, in order to give firmware implementations an
expiration date for it. When the protocol is removed, firmware will
strictly infer that a SEV-SNP VM is running an OS that supports the
unaccepted memory type. At the earliest convenience, when unaccepted
memory support is added to Linux, SEV-SNP may take strict dependence in
it. After the firmware removes support for the protocol, this should be
reverted.

  [tl: address some checkscript warnings]

Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/0d5f3d9a20b5cf361945b7ab1263c36586a78a42.1686063086.git.thomas.lendacky@amd.com
---
 drivers/firmware/efi/libstub/x86-stub.c | 36 +++++++++++++++++++++++++++++++++
 include/linux/efi.h                     |  3 +++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index 3cc7faac001d..220be75a5cdc 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -26,6 +26,17 @@ const efi_dxe_services_table_t *efi_dxe_table;
 u32 image_offset __section(".data");
 static efi_loaded_image_t *image = NULL;
 
+typedef union sev_memory_acceptance_protocol sev_memory_acceptance_protocol_t;
+union sev_memory_acceptance_protocol {
+	struct {
+		efi_status_t (__efiapi * allow_unaccepted_memory)(
+			sev_memory_acceptance_protocol_t *);
+	};
+	struct {
+		u32 allow_unaccepted_memory;
+	} mixed_mode;
+};
+
 static efi_status_t
 preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 {
@@ -310,6 +321,29 @@ setup_memory_protection(unsigned long image_base, unsigned long image_size)
 #endif
 }
 
+static void setup_unaccepted_memory(void)
+{
+	efi_guid_t mem_acceptance_proto = OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID;
+	sev_memory_acceptance_protocol_t *proto;
+	efi_status_t status;
+
+	if (!IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
+		return;
+
+	/*
+	 * Enable unaccepted memory before calling exit boot services in order
+	 * for the UEFI to not accept all memory on EBS.
+	 */
+	status = efi_bs_call(locate_protocol, &mem_acceptance_proto, NULL,
+			     (void **)&proto);
+	if (status != EFI_SUCCESS)
+		return;
+
+	status = efi_call_proto(proto, allow_unaccepted_memory);
+	if (status != EFI_SUCCESS)
+		efi_err("Memory acceptance protocol failed\n");
+}
+
 static const efi_char16_t apple[] = L"Apple";
 
 static void setup_quirks(struct boot_params *boot_params,
@@ -908,6 +942,8 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
 
 	setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
 
+	setup_unaccepted_memory();
+
 	status = exit_boot(boot_params, handle);
 	if (status != EFI_SUCCESS) {
 		efi_err("exit_boot() failed!\n");
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 67cb72d7a764..18d83a613635 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -437,6 +437,9 @@ void efi_native_runtime_setup(void);
 #define DELLEMC_EFI_RCI2_TABLE_GUID		EFI_GUID(0x2d9f28a2, 0xa886, 0x456a,  0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
 #define AMD_SEV_MEM_ENCRYPT_GUID		EFI_GUID(0x0cf29b71, 0x9e51, 0x433a,  0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
 
+/* OVMF protocol GUIDs */
+#define OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID	EFI_GUID(0xc5a010fe, 0x38a7, 0x4531,  0x8a, 0x4a, 0x05, 0x00, 0xd2, 0xfd, 0x16, 0x49)
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;
-- 
cgit v1.2.3


From 6d6e57594957ee9131bc3802dfc8657ca6f78fee Mon Sep 17 00:00:00 2001
From: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
Date: Wed, 17 May 2023 17:48:13 +0530
Subject: soc: qcom: geni-se: Add interfaces geni_se_tx_init_dma() and
 geni_se_rx_init_dma()

The geni_se_xx_dma_prep() interfaces necessarily do DMA mapping before
initiating DMA transfers. This is not suitable for spi where framework
is expected to handle map/unmap.

Expose new interfaces geni_se_xx_init_dma() which do only DMA transfer.

Signed-off-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Link: https://lore.kernel.org/r/1684325894-30252-2-git-send-email-quic_vnivarth@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/soc/qcom/qcom-geni-se.c  | 67 +++++++++++++++++++++++++++++-----------
 include/linux/soc/qcom/geni-se.h |  4 +++
 2 files changed, 53 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c
index 795a2e1d59b3..dd50a255fa6c 100644
--- a/drivers/soc/qcom/qcom-geni-se.c
+++ b/drivers/soc/qcom/qcom-geni-se.c
@@ -682,6 +682,30 @@ EXPORT_SYMBOL(geni_se_clk_freq_match);
 #define GENI_SE_DMA_EOT_EN BIT(1)
 #define GENI_SE_DMA_AHB_ERR_EN BIT(2)
 #define GENI_SE_DMA_EOT_BUF BIT(0)
+
+/**
+ * geni_se_tx_init_dma() - Initiate TX DMA transfer on the serial engine
+ * @se:			Pointer to the concerned serial engine.
+ * @iova:		Mapped DMA address.
+ * @len:		Length of the TX buffer.
+ *
+ * This function is used to initiate DMA TX transfer.
+ */
+void geni_se_tx_init_dma(struct geni_se *se, dma_addr_t iova, size_t len)
+{
+	u32 val;
+
+	val = GENI_SE_DMA_DONE_EN;
+	val |= GENI_SE_DMA_EOT_EN;
+	val |= GENI_SE_DMA_AHB_ERR_EN;
+	writel_relaxed(val, se->base + SE_DMA_TX_IRQ_EN_SET);
+	writel_relaxed(lower_32_bits(iova), se->base + SE_DMA_TX_PTR_L);
+	writel_relaxed(upper_32_bits(iova), se->base + SE_DMA_TX_PTR_H);
+	writel_relaxed(GENI_SE_DMA_EOT_BUF, se->base + SE_DMA_TX_ATTR);
+	writel(len, se->base + SE_DMA_TX_LEN);
+}
+EXPORT_SYMBOL(geni_se_tx_init_dma);
+
 /**
  * geni_se_tx_dma_prep() - Prepare the serial engine for TX DMA transfer
  * @se:			Pointer to the concerned serial engine.
@@ -697,7 +721,6 @@ int geni_se_tx_dma_prep(struct geni_se *se, void *buf, size_t len,
 			dma_addr_t *iova)
 {
 	struct geni_wrapper *wrapper = se->wrapper;
-	u32 val;
 
 	if (!wrapper)
 		return -EINVAL;
@@ -706,17 +729,34 @@ int geni_se_tx_dma_prep(struct geni_se *se, void *buf, size_t len,
 	if (dma_mapping_error(wrapper->dev, *iova))
 		return -EIO;
 
+	geni_se_tx_init_dma(se, *iova, len);
+	return 0;
+}
+EXPORT_SYMBOL(geni_se_tx_dma_prep);
+
+/**
+ * geni_se_rx_init_dma() - Initiate RX DMA transfer on the serial engine
+ * @se:			Pointer to the concerned serial engine.
+ * @iova:		Mapped DMA address.
+ * @len:		Length of the RX buffer.
+ *
+ * This function is used to initiate DMA RX transfer.
+ */
+void geni_se_rx_init_dma(struct geni_se *se, dma_addr_t iova, size_t len)
+{
+	u32 val;
+
 	val = GENI_SE_DMA_DONE_EN;
 	val |= GENI_SE_DMA_EOT_EN;
 	val |= GENI_SE_DMA_AHB_ERR_EN;
-	writel_relaxed(val, se->base + SE_DMA_TX_IRQ_EN_SET);
-	writel_relaxed(lower_32_bits(*iova), se->base + SE_DMA_TX_PTR_L);
-	writel_relaxed(upper_32_bits(*iova), se->base + SE_DMA_TX_PTR_H);
-	writel_relaxed(GENI_SE_DMA_EOT_BUF, se->base + SE_DMA_TX_ATTR);
-	writel(len, se->base + SE_DMA_TX_LEN);
-	return 0;
+	writel_relaxed(val, se->base + SE_DMA_RX_IRQ_EN_SET);
+	writel_relaxed(lower_32_bits(iova), se->base + SE_DMA_RX_PTR_L);
+	writel_relaxed(upper_32_bits(iova), se->base + SE_DMA_RX_PTR_H);
+	/* RX does not have EOT buffer type bit. So just reset RX_ATTR */
+	writel_relaxed(0, se->base + SE_DMA_RX_ATTR);
+	writel(len, se->base + SE_DMA_RX_LEN);
 }
-EXPORT_SYMBOL(geni_se_tx_dma_prep);
+EXPORT_SYMBOL(geni_se_rx_init_dma);
 
 /**
  * geni_se_rx_dma_prep() - Prepare the serial engine for RX DMA transfer
@@ -733,7 +773,6 @@ int geni_se_rx_dma_prep(struct geni_se *se, void *buf, size_t len,
 			dma_addr_t *iova)
 {
 	struct geni_wrapper *wrapper = se->wrapper;
-	u32 val;
 
 	if (!wrapper)
 		return -EINVAL;
@@ -742,15 +781,7 @@ int geni_se_rx_dma_prep(struct geni_se *se, void *buf, size_t len,
 	if (dma_mapping_error(wrapper->dev, *iova))
 		return -EIO;
 
-	val = GENI_SE_DMA_DONE_EN;
-	val |= GENI_SE_DMA_EOT_EN;
-	val |= GENI_SE_DMA_AHB_ERR_EN;
-	writel_relaxed(val, se->base + SE_DMA_RX_IRQ_EN_SET);
-	writel_relaxed(lower_32_bits(*iova), se->base + SE_DMA_RX_PTR_L);
-	writel_relaxed(upper_32_bits(*iova), se->base + SE_DMA_RX_PTR_H);
-	/* RX does not have EOT buffer type bit. So just reset RX_ATTR */
-	writel_relaxed(0, se->base + SE_DMA_RX_ATTR);
-	writel(len, se->base + SE_DMA_RX_LEN);
+	geni_se_rx_init_dma(se, *iova, len);
 	return 0;
 }
 EXPORT_SYMBOL(geni_se_rx_dma_prep);
diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h
index c55a0bc8cb0e..821a19135bb6 100644
--- a/include/linux/soc/qcom/geni-se.h
+++ b/include/linux/soc/qcom/geni-se.h
@@ -490,9 +490,13 @@ int geni_se_clk_freq_match(struct geni_se *se, unsigned long req_freq,
 			   unsigned int *index, unsigned long *res_freq,
 			   bool exact);
 
+void geni_se_tx_init_dma(struct geni_se *se, dma_addr_t iova, size_t len);
+
 int geni_se_tx_dma_prep(struct geni_se *se, void *buf, size_t len,
 			dma_addr_t *iova);
 
+void geni_se_rx_init_dma(struct geni_se *se, dma_addr_t iova, size_t len);
+
 int geni_se_rx_dma_prep(struct geni_se *se, void *buf, size_t len,
 			dma_addr_t *iova);
 
-- 
cgit v1.2.3


From f11f1a92c17385ff4d6e2bc8002d59aed70b98c4 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 6 Jun 2023 11:16:25 -0700
Subject: Input: gameport - provide default trigger() and read()

Instead of constantly checking pointer(s) for non-NULL-ness provide
default implementations of trigger() and read() and instantiate them during
pore registration if driver-specific versions were not provided.

Link: https://lore.kernel.org/r/ZGvoqP5PAAsJuky4@google.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/gameport/gameport.c | 18 ++++++++++++++++--
 include/linux/gameport.h          | 11 ++---------
 2 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index db58a01b23d3..a1443320b419 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -11,6 +11,7 @@
 
 #include <linux/stddef.h>
 #include <linux/module.h>
+#include <linux/io.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/gameport.h>
@@ -21,8 +22,6 @@
 #include <linux/mutex.h>
 #include <linux/timekeeping.h>
 
-/*#include <asm/io.h>*/
-
 MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
 MODULE_DESCRIPTION("Generic gameport layer");
 MODULE_LICENSE("GPL");
@@ -518,6 +517,16 @@ void gameport_set_phys(struct gameport *gameport, const char *fmt, ...)
 }
 EXPORT_SYMBOL(gameport_set_phys);
 
+static void gameport_default_trigger(struct gameport *gameport)
+{
+	outb(0xff, gameport->io);
+}
+
+static unsigned char gameport_default_read(struct gameport *gameport)
+{
+	return inb(gameport->io);
+}
+
 /*
  * Prepare gameport port for registration.
  */
@@ -536,6 +545,11 @@ static void gameport_init_port(struct gameport *gameport)
 	if (gameport->parent)
 		gameport->dev.parent = &gameport->parent->dev;
 
+	if (!gameport->trigger)
+		gameport->trigger = gameport_default_trigger;
+	if (!gameport->read)
+		gameport->read = gameport_default_read;
+
 	INIT_LIST_HEAD(&gameport->node);
 	spin_lock_init(&gameport->timer_lock);
 	timer_setup(&gameport->poll_timer, gameport_run_poll_handler, 0);
diff --git a/include/linux/gameport.h b/include/linux/gameport.h
index 8c2f00018e89..0a221e768ea4 100644
--- a/include/linux/gameport.h
+++ b/include/linux/gameport.h
@@ -5,7 +5,6 @@
 #ifndef _GAMEPORT_H
 #define _GAMEPORT_H
 
-#include <asm/io.h>
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
@@ -165,18 +164,12 @@ void gameport_unregister_driver(struct gameport_driver *drv);
 
 static inline void gameport_trigger(struct gameport *gameport)
 {
-	if (gameport->trigger)
-		gameport->trigger(gameport);
-	else
-		outb(0xff, gameport->io);
+	gameport->trigger(gameport);
 }
 
 static inline unsigned char gameport_read(struct gameport *gameport)
 {
-	if (gameport->read)
-		return gameport->read(gameport);
-	else
-		return inb(gameport->io);
+	return gameport->read(gameport);
 }
 
 static inline int gameport_cooked_read(struct gameport *gameport, int *axes, int *buttons)
-- 
cgit v1.2.3


From 0a8a5f2c8c266e9d94fb45f76a26cff135d0051c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 1 Jun 2023 18:15:17 -0700
Subject: KVM: x86: Use standard mmu_notifier invalidate hooks for APIC access
 page

Now that KVM honors past and in-progress mmu_notifier invalidations when
reloading the APIC-access page, use KVM's "standard" invalidation hooks
to trigger a reload and delete the one-off usage of invalidate_range().

Aside from eliminating one-off code in KVM, dropping KVM's use of
invalidate_range() will allow common mmu_notifier to redefine the API to
be more strictly focused on invalidating secondary TLBs that share the
primary MMU's page tables.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/r/20230602011518.787006-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/mmu/mmu.c   |  3 +++
 arch/x86/kvm/x86.c       | 14 --------------
 include/linux/kvm_host.h |  3 ---
 virt/kvm/kvm_main.c      | 18 ------------------
 4 files changed, 3 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c8961f45e3b1..01a11ce68e57 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1600,6 +1600,9 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 	if (tdp_mmu_enabled)
 		flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
 
+	if (range->slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT)
+		kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+
 	return flush;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c0778ca39650..f962b7e3487e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10435,20 +10435,6 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
 		vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
 }
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-					    unsigned long start, unsigned long end)
-{
-	unsigned long apic_address;
-
-	/*
-	 * The physical address of apic access page is stored in the VMCS.
-	 * Update it when it becomes invalid.
-	 */
-	apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
-	if (start <= apic_address && apic_address < end)
-		kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
-}
-
 void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
 {
 	static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0e571e973bc2..cb66f4100be7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2237,9 +2237,6 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
 
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-					    unsigned long start, unsigned long end);
-
 void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
 
 #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 479802a892d4..f3c7c3c90161 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -154,11 +154,6 @@ static unsigned long long kvm_active_vms;
 
 static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask);
 
-__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-						   unsigned long start, unsigned long end)
-{
-}
-
 __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
 {
 }
@@ -521,18 +516,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
 	return container_of(mn, struct kvm, mmu_notifier);
 }
 
-static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
-					      struct mm_struct *mm,
-					      unsigned long start, unsigned long end)
-{
-	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	int idx;
-
-	idx = srcu_read_lock(&kvm->srcu);
-	kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
-	srcu_read_unlock(&kvm->srcu, idx);
-}
-
 typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
 
 typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
@@ -892,7 +875,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
 }
 
 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
-	.invalidate_range	= kvm_mmu_notifier_invalidate_range,
 	.invalidate_range_start	= kvm_mmu_notifier_invalidate_range_start,
 	.invalidate_range_end	= kvm_mmu_notifier_invalidate_range_end,
 	.clear_flush_young	= kvm_mmu_notifier_clear_flush_young,
-- 
cgit v1.2.3


From 5c3b74a92aa285a3df722bf6329ba7ccf70346d6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 6 Jun 2023 07:41:15 +0000
Subject: rfs: annotate lockless accesses to RFS sock flow table

Add READ_ONCE()/WRITE_ONCE() on accesses to the sock flow table.

This also prevents a (smart ?) compiler to remove the condition in:

if (table->ents[index] != newval)
        table->ents[index] = newval;

We need the condition to avoid dirtying a shared cache line.

Fixes: fec5e652e58f ("rfs: Receive Flow Steering")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 7 +++++--
 net/core/dev.c            | 6 ++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 08fbd4622ccf..e6f22b7403d0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -768,8 +768,11 @@ static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
 		/* We only give a hint, preemption can change CPU under us */
 		val |= raw_smp_processor_id();
 
-		if (table->ents[index] != val)
-			table->ents[index] = val;
+		/* The following WRITE_ONCE() is paired with the READ_ONCE()
+		 * here, and another one in get_rps_cpu().
+		 */
+		if (READ_ONCE(table->ents[index]) != val)
+			WRITE_ONCE(table->ents[index], val);
 	}
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index b3c13e041935..1495f8aff288 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4471,8 +4471,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		u32 next_cpu;
 		u32 ident;
 
-		/* First check into global flow table if there is a match */
-		ident = sock_flow_table->ents[hash & sock_flow_table->mask];
+		/* First check into global flow table if there is a match.
+		 * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow().
+		 */
+		ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]);
 		if ((ident ^ hash) & ~rps_cpu_mask)
 			goto try_rps;
 
-- 
cgit v1.2.3


From d636fc5dd692c8f4e00ae6e0359c0eceeb5d9bdb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 6 Jun 2023 11:19:29 +0000
Subject: net: sched: add rcu annotations around qdisc->qdisc_sleeping

syzbot reported a race around qdisc->qdisc_sleeping [1]

It is time we add proper annotations to reads and writes to/from
qdisc->qdisc_sleeping.

[1]
BUG: KCSAN: data-race in dev_graft_qdisc / qdisc_lookup_rcu

read to 0xffff8881286fc618 of 8 bytes by task 6928 on cpu 1:
qdisc_lookup_rcu+0x192/0x2c0 net/sched/sch_api.c:331
__tcf_qdisc_find+0x74/0x3c0 net/sched/cls_api.c:1174
tc_get_tfilter+0x18f/0x990 net/sched/cls_api.c:2547
rtnetlink_rcv_msg+0x7af/0x8c0 net/core/rtnetlink.c:6386
netlink_rcv_skb+0x126/0x220 net/netlink/af_netlink.c:2546
rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:6413
netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
netlink_unicast+0x56f/0x640 net/netlink/af_netlink.c:1365
netlink_sendmsg+0x665/0x770 net/netlink/af_netlink.c:1913
sock_sendmsg_nosec net/socket.c:724 [inline]
sock_sendmsg net/socket.c:747 [inline]
____sys_sendmsg+0x375/0x4c0 net/socket.c:2503
___sys_sendmsg net/socket.c:2557 [inline]
__sys_sendmsg+0x1e3/0x270 net/socket.c:2586
__do_sys_sendmsg net/socket.c:2595 [inline]
__se_sys_sendmsg net/socket.c:2593 [inline]
__x64_sys_sendmsg+0x46/0x50 net/socket.c:2593
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

write to 0xffff8881286fc618 of 8 bytes by task 6912 on cpu 0:
dev_graft_qdisc+0x4f/0x80 net/sched/sch_generic.c:1115
qdisc_graft+0x7d0/0xb60 net/sched/sch_api.c:1103
tc_modify_qdisc+0x712/0xf10 net/sched/sch_api.c:1693
rtnetlink_rcv_msg+0x807/0x8c0 net/core/rtnetlink.c:6395
netlink_rcv_skb+0x126/0x220 net/netlink/af_netlink.c:2546
rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:6413
netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
netlink_unicast+0x56f/0x640 net/netlink/af_netlink.c:1365
netlink_sendmsg+0x665/0x770 net/netlink/af_netlink.c:1913
sock_sendmsg_nosec net/socket.c:724 [inline]
sock_sendmsg net/socket.c:747 [inline]
____sys_sendmsg+0x375/0x4c0 net/socket.c:2503
___sys_sendmsg net/socket.c:2557 [inline]
__sys_sendmsg+0x1e3/0x270 net/socket.c:2586
__do_sys_sendmsg net/socket.c:2595 [inline]
__se_sys_sendmsg net/socket.c:2593 [inline]
__x64_sys_sendmsg+0x46/0x50 net/socket.c:2593
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 6912 Comm: syz-executor.5 Not tainted 6.4.0-rc3-syzkaller-00190-g0d85b27b0cc6 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/16/2023

Fixes: 3a7d0d07a386 ("net: sched: extend Qdisc with rcu")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Vlad Buslov <vladbu@nvidia.com>
Acked-by: Jamal Hadi Salim<jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 include/net/sch_generic.h |  6 ++++--
 net/core/dev.c            |  2 +-
 net/sched/sch_api.c       | 26 ++++++++++++++++----------
 net/sched/sch_fq_pie.c    |  2 ++
 net/sched/sch_generic.c   | 30 +++++++++++++++---------------
 net/sched/sch_mq.c        |  8 ++++----
 net/sched/sch_mqprio.c    |  8 ++++----
 net/sched/sch_pie.c       |  5 ++++-
 net/sched/sch_red.c       |  5 ++++-
 net/sched/sch_sfq.c       |  5 ++++-
 net/sched/sch_taprio.c    |  6 +++---
 net/sched/sch_teql.c      |  2 +-
 13 files changed, 63 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e6f22b7403d0..c2f0c6002a84 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -620,7 +620,7 @@ struct netdev_queue {
 	netdevice_tracker	dev_tracker;
 
 	struct Qdisc __rcu	*qdisc;
-	struct Qdisc		*qdisc_sleeping;
+	struct Qdisc __rcu	*qdisc_sleeping;
 #ifdef CONFIG_SYSFS
 	struct kobject		kobj;
 #endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index fab5ba3e61b7..27271f2b37cb 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -545,7 +545,7 @@ static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc)
 
 static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
 {
-	return qdisc->dev_queue->qdisc_sleeping;
+	return rcu_dereference_rtnl(qdisc->dev_queue->qdisc_sleeping);
 }
 
 static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
@@ -754,7 +754,9 @@ static inline bool qdisc_tx_changing(const struct net_device *dev)
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping)
+
+		if (rcu_access_pointer(txq->qdisc) !=
+		    rcu_access_pointer(txq->qdisc_sleeping))
 			return true;
 	}
 	return false;
diff --git a/net/core/dev.c b/net/core/dev.c
index 1495f8aff288..c29f3e1db3ca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10543,7 +10543,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 		return NULL;
 	netdev_init_one_queue(dev, queue, NULL);
 	RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
-	queue->qdisc_sleeping = &noop_qdisc;
+	RCU_INIT_POINTER(queue->qdisc_sleeping, &noop_qdisc);
 	rcu_assign_pointer(dev->ingress_queue, queue);
 #endif
 	return queue;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 9ea51812b9cf..e4b6452318c0 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -309,7 +309,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 
 	if (dev_ingress_queue(dev))
 		q = qdisc_match_from_root(
-			dev_ingress_queue(dev)->qdisc_sleeping,
+			rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
 			handle);
 out:
 	return q;
@@ -328,7 +328,8 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
 
 	nq = dev_ingress_queue_rcu(dev);
 	if (nq)
-		q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
+		q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
+					  handle);
 out:
 	return q;
 }
@@ -634,8 +635,13 @@ EXPORT_SYMBOL(qdisc_watchdog_init);
 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
 				      u64 delta_ns)
 {
-	if (test_bit(__QDISC_STATE_DEACTIVATED,
-		     &qdisc_root_sleeping(wd->qdisc)->state))
+	bool deactivated;
+
+	rcu_read_lock();
+	deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
+			       &qdisc_root_sleeping(wd->qdisc)->state);
+	rcu_read_unlock();
+	if (deactivated)
 		return;
 
 	if (hrtimer_is_queued(&wd->timer)) {
@@ -1478,7 +1484,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
 				}
 				q = qdisc_leaf(p, clid);
 			} else if (dev_ingress_queue(dev)) {
-				q = dev_ingress_queue(dev)->qdisc_sleeping;
+				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
 			}
 		} else {
 			q = rtnl_dereference(dev->qdisc);
@@ -1564,7 +1570,7 @@ replay:
 				}
 				q = qdisc_leaf(p, clid);
 			} else if (dev_ingress_queue_create(dev)) {
-				q = dev_ingress_queue(dev)->qdisc_sleeping;
+				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
 			}
 		} else {
 			q = rtnl_dereference(dev->qdisc);
@@ -1805,8 +1811,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 
 		dev_queue = dev_ingress_queue(dev);
 		if (dev_queue &&
-		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
-				       &q_idx, s_q_idx, false,
+		    tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
+				       skb, cb, &q_idx, s_q_idx, false,
 				       tca[TCA_DUMP_INVISIBLE]) < 0)
 			goto done;
 
@@ -2249,8 +2255,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 
 	dev_queue = dev_ingress_queue(dev);
 	if (dev_queue &&
-	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
-				&t, s_t, false) < 0)
+	    tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
+				skb, tcm, cb, &t, s_t, false) < 0)
 		goto done;
 
 done:
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index c699e5095607..591d87d5e5c0 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -379,6 +379,7 @@ static void fq_pie_timer(struct timer_list *t)
 	spinlock_t *root_lock; /* to lock qdisc for probability calculations */
 	u32 idx;
 
+	rcu_read_lock();
 	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 	spin_lock(root_lock);
 
@@ -391,6 +392,7 @@ static void fq_pie_timer(struct timer_list *t)
 		mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
 
 	spin_unlock(root_lock);
+	rcu_read_unlock();
 }
 
 static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 37e41f972f69..3248259eba32 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -648,7 +648,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 
 static struct netdev_queue noop_netdev_queue = {
 	RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
-	.qdisc_sleeping	=	&noop_qdisc,
+	RCU_POINTER_INITIALIZER(qdisc_sleeping, &noop_qdisc),
 };
 
 struct Qdisc noop_qdisc = {
@@ -1103,7 +1103,7 @@ EXPORT_SYMBOL(qdisc_put_unlocked);
 struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 			      struct Qdisc *qdisc)
 {
-	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
+	struct Qdisc *oqdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
 	spinlock_t *root_lock;
 
 	root_lock = qdisc_lock(oqdisc);
@@ -1112,7 +1112,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 	/* ... and graft new one */
 	if (qdisc == NULL)
 		qdisc = &noop_qdisc;
-	dev_queue->qdisc_sleeping = qdisc;
+	rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
 	rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
 
 	spin_unlock_bh(root_lock);
@@ -1125,12 +1125,12 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
 				     void *_qdisc_default)
 {
-	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+	struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
 	struct Qdisc *qdisc_default = _qdisc_default;
 
 	if (qdisc) {
 		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
-		dev_queue->qdisc_sleeping = qdisc_default;
+		rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc_default);
 
 		qdisc_put(qdisc);
 	}
@@ -1154,7 +1154,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
 
 	if (!netif_is_multiqueue(dev))
 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
-	dev_queue->qdisc_sleeping = qdisc;
+	rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
 }
 
 static void attach_default_qdiscs(struct net_device *dev)
@@ -1167,7 +1167,7 @@ static void attach_default_qdiscs(struct net_device *dev)
 	if (!netif_is_multiqueue(dev) ||
 	    dev->priv_flags & IFF_NO_QUEUE) {
 		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
-		qdisc = txq->qdisc_sleeping;
+		qdisc = rtnl_dereference(txq->qdisc_sleeping);
 		rcu_assign_pointer(dev->qdisc, qdisc);
 		qdisc_refcount_inc(qdisc);
 	} else {
@@ -1186,7 +1186,7 @@ static void attach_default_qdiscs(struct net_device *dev)
 		netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
 		dev->priv_flags |= IFF_NO_QUEUE;
 		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
-		qdisc = txq->qdisc_sleeping;
+		qdisc = rtnl_dereference(txq->qdisc_sleeping);
 		rcu_assign_pointer(dev->qdisc, qdisc);
 		qdisc_refcount_inc(qdisc);
 		dev->priv_flags ^= IFF_NO_QUEUE;
@@ -1202,7 +1202,7 @@ static void transition_one_qdisc(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
 				 void *_need_watchdog)
 {
-	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
+	struct Qdisc *new_qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
 	int *need_watchdog_p = _need_watchdog;
 
 	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
@@ -1272,7 +1272,7 @@ static void dev_reset_queue(struct net_device *dev,
 	struct Qdisc *qdisc;
 	bool nolock;
 
-	qdisc = dev_queue->qdisc_sleeping;
+	qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
 	if (!qdisc)
 		return;
 
@@ -1303,7 +1303,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 		int val;
 
 		dev_queue = netdev_get_tx_queue(dev, i);
-		q = dev_queue->qdisc_sleeping;
+		q = rtnl_dereference(dev_queue->qdisc_sleeping);
 
 		root_lock = qdisc_lock(q);
 		spin_lock_bh(root_lock);
@@ -1379,7 +1379,7 @@ EXPORT_SYMBOL(dev_deactivate);
 static int qdisc_change_tx_queue_len(struct net_device *dev,
 				     struct netdev_queue *dev_queue)
 {
-	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+	struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping);
 	const struct Qdisc_ops *ops = qdisc->ops;
 
 	if (ops->change_tx_queue_len)
@@ -1404,7 +1404,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
 	unsigned int i;
 
 	for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
-		qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping);
 		/* Only update the default qdiscs we created,
 		 * qdiscs with handles are always hashed.
 		 */
@@ -1412,7 +1412,7 @@ void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
 			qdisc_hash_del(qdisc);
 	}
 	for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
-		qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping);
 		if (qdisc != &noop_qdisc && !qdisc->handle)
 			qdisc_hash_add(qdisc, false);
 	}
@@ -1449,7 +1449,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
 	struct Qdisc *qdisc = _qdisc;
 
 	rcu_assign_pointer(dev_queue->qdisc, qdisc);
-	dev_queue->qdisc_sleeping = qdisc;
+	rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
 }
 
 void dev_init_scheduler(struct net_device *dev)
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index d0bc660d7401..c860119a8f09 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -141,7 +141,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	 * qdisc totals are added at end.
 	 */
 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
-		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
 		spin_lock_bh(qdisc_lock(qdisc));
 
 		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
@@ -202,7 +202,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
 {
 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 
-	return dev_queue->qdisc_sleeping;
+	return rtnl_dereference(dev_queue->qdisc_sleeping);
 }
 
 static unsigned long mq_find(struct Qdisc *sch, u32 classid)
@@ -221,7 +221,7 @@ static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
 
 	tcm->tcm_parent = TC_H_ROOT;
 	tcm->tcm_handle |= TC_H_MIN(cl);
-	tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+	tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
 	return 0;
 }
 
@@ -230,7 +230,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 {
 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 
-	sch = dev_queue->qdisc_sleeping;
+	sch = rtnl_dereference(dev_queue->qdisc_sleeping);
 	if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index dc5a0ff50b14..ab69ff7577fc 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -557,7 +557,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	 * qdisc totals are added at end.
 	 */
 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
-		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, ntx)->qdisc_sleeping);
 		spin_lock_bh(qdisc_lock(qdisc));
 
 		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
@@ -604,7 +604,7 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
 	if (!dev_queue)
 		return NULL;
 
-	return dev_queue->qdisc_sleeping;
+	return rtnl_dereference(dev_queue->qdisc_sleeping);
 }
 
 static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
@@ -637,7 +637,7 @@ static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
 		tcm->tcm_parent = (tc < 0) ? 0 :
 			TC_H_MAKE(TC_H_MAJ(sch->handle),
 				  TC_H_MIN(tc + TC_H_MIN_PRIORITY));
-		tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+		tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
 	} else {
 		tcm->tcm_parent = TC_H_ROOT;
 		tcm->tcm_info = 0;
@@ -693,7 +693,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	} else {
 		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
 
-		sch = dev_queue->qdisc_sleeping;
+		sch = rtnl_dereference(dev_queue->qdisc_sleeping);
 		if (gnet_stats_copy_basic(d, sch->cpu_bstats,
 					  &sch->bstats, true) < 0 ||
 		    qdisc_qstats_copy(d, sch) < 0)
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 2152a56d73f8..2da6250ec346 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -421,8 +421,10 @@ static void pie_timer(struct timer_list *t)
 {
 	struct pie_sched_data *q = from_timer(q, t, adapt_timer);
 	struct Qdisc *sch = q->sch;
-	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+	spinlock_t *root_lock;
 
+	rcu_read_lock();
+	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 	spin_lock(root_lock);
 	pie_calculate_probability(&q->params, &q->vars, sch->qstats.backlog);
 
@@ -430,6 +432,7 @@ static void pie_timer(struct timer_list *t)
 	if (q->params.tupdate)
 		mod_timer(&q->adapt_timer, jiffies + q->params.tupdate);
 	spin_unlock(root_lock);
+	rcu_read_unlock();
 }
 
 static int pie_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 98129324e157..16277b6a0238 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -321,12 +321,15 @@ static inline void red_adaptative_timer(struct timer_list *t)
 {
 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
 	struct Qdisc *sch = q->sch;
-	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+	spinlock_t *root_lock;
 
+	rcu_read_lock();
+	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 	spin_lock(root_lock);
 	red_adaptative_algo(&q->parms, &q->vars);
 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
 	spin_unlock(root_lock);
+	rcu_read_unlock();
 }
 
 static int red_init(struct Qdisc *sch, struct nlattr *opt,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index abd436307d6a..66dcb18638fe 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -606,10 +606,12 @@ static void sfq_perturbation(struct timer_list *t)
 {
 	struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
 	struct Qdisc *sch = q->sch;
-	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+	spinlock_t *root_lock;
 	siphash_key_t nkey;
 
 	get_random_bytes(&nkey, sizeof(nkey));
+	rcu_read_lock();
+	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 	spin_lock(root_lock);
 	q->perturbation = nkey;
 	if (!q->filter_list && q->tail)
@@ -618,6 +620,7 @@ static void sfq_perturbation(struct timer_list *t)
 
 	if (q->perturb_period)
 		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+	rcu_read_unlock();
 }
 
 static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 76db9a10ef50..dd7dea2f6e83 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -2358,7 +2358,7 @@ static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
 	if (!dev_queue)
 		return NULL;
 
-	return dev_queue->qdisc_sleeping;
+	return rtnl_dereference(dev_queue->qdisc_sleeping);
 }
 
 static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
@@ -2377,7 +2377,7 @@ static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
 
 	tcm->tcm_parent = TC_H_ROOT;
 	tcm->tcm_handle |= TC_H_MIN(cl);
-	tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+	tcm->tcm_info = rtnl_dereference(dev_queue->qdisc_sleeping)->handle;
 
 	return 0;
 }
@@ -2389,7 +2389,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 {
 	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 
-	sch = dev_queue->qdisc_sleeping;
+	sch = rtnl_dereference(dev_queue->qdisc_sleeping);
 	if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 16f9238aa51d..7721239c185f 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -297,7 +297,7 @@ restart:
 		struct net_device *slave = qdisc_dev(q);
 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
 
-		if (slave_txq->qdisc_sleeping != q)
+		if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
 			continue;
 		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 		    !netif_running(slave)) {
-- 
cgit v1.2.3


From 28bd137a3c8e105587ba8c55b68ef43b519b270f Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Wed, 7 Jun 2023 17:21:49 +0800
Subject: ALSA: hda: Add Loongson LS7A HD-Audio support

Add the new PCI ID 0x0014 0x7a07 and the new PCI ID 0x0014 0x7a37
Loongson HDA controller.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Acked-by: Huacai Chen <chenhuacai@loongson.cn>
Link: https://lore.kernel.org/r/993587483b9509796b29a416f257fcfb4b15c6ea.1686128807.git.siyanteng@loongson.cn
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/pci_ids.h    | 3 +++
 sound/hda/hdac_device.c    | 1 +
 sound/pci/hda/hda_intel.c  | 7 +++++++
 sound/pci/hda/patch_hdmi.c | 1 +
 4 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 95f33dadb2be..c0c4ca8e2851 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -158,6 +158,9 @@
 
 #define PCI_VENDOR_ID_LOONGSON		0x0014
 
+#define PCI_DEVICE_ID_LOONGSON_HDA      0x7a07
+#define PCI_DEVICE_ID_LOONGSON_HDMI     0x7a37
+
 #define PCI_VENDOR_ID_TTTECH		0x0357
 #define PCI_DEVICE_ID_TTTECH_MC322	0x000a
 
diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c
index accc9d279ce5..89bed32b5379 100644
--- a/sound/hda/hdac_device.c
+++ b/sound/hda/hdac_device.c
@@ -645,6 +645,7 @@ struct hda_vendor_id {
 };
 
 static const struct hda_vendor_id hda_vendor_ids[] = {
+	{ 0x0014, "Loongson" },
 	{ 0x1002, "ATI" },
 	{ 0x1013, "Cirrus Logic" },
 	{ 0x1057, "Motorola" },
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 3226691ac923..9c353dc7740c 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -237,6 +237,7 @@ enum {
 	AZX_DRIVER_CTHDA,
 	AZX_DRIVER_CMEDIA,
 	AZX_DRIVER_ZHAOXIN,
+	AZX_DRIVER_LOONGSON,
 	AZX_DRIVER_GENERIC,
 	AZX_NUM_DRIVERS, /* keep this as last entry */
 };
@@ -360,6 +361,7 @@ static const char * const driver_short_names[] = {
 	[AZX_DRIVER_CTHDA] = "HDA Creative",
 	[AZX_DRIVER_CMEDIA] = "HDA C-Media",
 	[AZX_DRIVER_ZHAOXIN] = "HDA Zhaoxin",
+	[AZX_DRIVER_LOONGSON] = "HDA Loongson",
 	[AZX_DRIVER_GENERIC] = "HD-Audio Generic",
 };
 
@@ -2809,6 +2811,11 @@ static const struct pci_device_id azx_ids[] = {
 	  .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_HDMI },
 	/* Zhaoxin */
 	{ PCI_DEVICE(0x1d17, 0x3288), .driver_data = AZX_DRIVER_ZHAOXIN },
+	/* Loongson HDAudio*/
+	{PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_HDA),
+	  .driver_data = AZX_DRIVER_LOONGSON },
+	{PCI_DEVICE(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_HDMI),
+	  .driver_data = AZX_DRIVER_LOONGSON },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, azx_ids);
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 64a944016c78..44b55ba38e45 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -4505,6 +4505,7 @@ static int patch_gf_hdmi(struct hda_codec *codec)
  * patch entries
  */
 static const struct hda_device_id snd_hda_id_hdmi[] = {
+HDA_CODEC_ENTRY(0x00147a47, "Loongson HDMI",	patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x1002793c, "RS600 HDMI",	patch_atihdmi),
 HDA_CODEC_ENTRY(0x10027919, "RS600 HDMI",	patch_atihdmi),
 HDA_CODEC_ENTRY(0x1002791a, "RS690/780 HDMI",	patch_atihdmi),
-- 
cgit v1.2.3


From efa76afdde16f195f8faff0e8dbe58ec18aad70c Mon Sep 17 00:00:00 2001
From: Petr Tesarik <petr.tesarik.ext@huawei.com>
Date: Tue, 6 Jun 2023 14:46:25 +0200
Subject: swiotlb: remove unused field "used" from struct io_tlb_mem

Commit 20347fca71a3 ("swiotlb: split up the global swiotlb lock") moved
the number of used slots to struct io_tlb_area, but it did not remove
the field from struct io_tlb_mem.

Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/swiotlb.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 7af2673b47ba..4e52cd5e0bdc 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -76,7 +76,6 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
  * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
  *		@end. For default swiotlb, this is command line adjustable via
  *		setup_io_tlb_npages.
- * @used:	The number of used IO TLB block.
  * @list:	The free list describing the number of free entries available
  *		from each index.
  * @orig_addr:	The original address corresponding to a mapped entry.
@@ -98,7 +97,6 @@ struct io_tlb_mem {
 	phys_addr_t end;
 	void *vaddr;
 	unsigned long nslabs;
-	unsigned long used;
 	struct dentry *debugfs;
 	bool late_alloc;
 	bool force_bounce;
-- 
cgit v1.2.3


From 6ff58ae17fd9523246a260434133ed9ab7f56df2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Sun, 4 Jun 2023 14:35:03 +0200
Subject: USB: serial: return errors from break handling

Start propagating errors to user space when setting the break state
fails.

This will be used by follow-on changes to also report when a driver or
device does not support break control.

Tested-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ark3116.c          |  7 +++++--
 drivers/usb/serial/belkin_sa.c        | 12 +++++++++---
 drivers/usb/serial/ch341.c            | 37 ++++++++++++++++++++++-------------
 drivers/usb/serial/cp210x.c           |  8 +++++---
 drivers/usb/serial/digi_acceleport.c  |  7 ++++---
 drivers/usb/serial/f81232.c           |  4 +++-
 drivers/usb/serial/f81534.c           |  4 +++-
 drivers/usb/serial/ftdi_sio.c         | 10 +++++++---
 drivers/usb/serial/io_edgeport.c      |  6 ++++--
 drivers/usb/serial/io_ti.c            |  9 +++++++--
 drivers/usb/serial/keyspan.c          |  5 ++++-
 drivers/usb/serial/keyspan_pda.c      |  8 ++++++--
 drivers/usb/serial/mct_u232.c         |  6 +++---
 drivers/usb/serial/mos7720.c          |  9 +++++----
 drivers/usb/serial/mos7840.c          |  7 ++++---
 drivers/usb/serial/mxuport.c          |  6 +++---
 drivers/usb/serial/pl2303.c           | 14 ++++++++-----
 drivers/usb/serial/quatech2.c         |  8 ++++++--
 drivers/usb/serial/ti_usb_3410_5052.c | 10 +++++++---
 drivers/usb/serial/upd78f0730.c       |  7 +++++--
 drivers/usb/serial/usb-serial.c       |  2 +-
 drivers/usb/serial/usb_debug.c        | 13 +++++++++---
 drivers/usb/serial/whiteheat.c        |  7 ++++---
 drivers/usb/serial/xr_serial.c        |  4 ++--
 include/linux/usb/serial.h            |  2 +-
 25 files changed, 140 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index 9452291f1703..67a07cc007f0 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -433,10 +433,11 @@ static int ark3116_tiocmset(struct tty_struct *tty,
 	return 0;
 }
 
-static void ark3116_break_ctl(struct tty_struct *tty, int break_state)
+static int ark3116_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct ark3116_private *priv = usb_get_serial_port_data(port);
+	int ret;
 
 	/* LCR is also used for other things: protect access */
 	mutex_lock(&priv->hw_lock);
@@ -446,9 +447,11 @@ static void ark3116_break_ctl(struct tty_struct *tty, int break_state)
 	else
 		priv->lcr &= ~UART_LCR_SBC;
 
-	ark3116_write_reg(port->serial, UART_LCR, priv->lcr);
+	ret = ark3116_write_reg(port->serial, UART_LCR, priv->lcr);
 
 	mutex_unlock(&priv->hw_lock);
+
+	return ret;
 }
 
 static void ark3116_update_msr(struct usb_serial_port *port, __u8 msr)
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index 9331a562dac0..cf47ee4ae5d3 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -46,7 +46,7 @@ static void belkin_sa_process_read_urb(struct urb *urb);
 static void belkin_sa_set_termios(struct tty_struct *tty,
 				  struct usb_serial_port *port,
 				  const struct ktermios *old_termios);
-static void belkin_sa_break_ctl(struct tty_struct *tty, int break_state);
+static int belkin_sa_break_ctl(struct tty_struct *tty, int break_state);
 static int  belkin_sa_tiocmget(struct tty_struct *tty);
 static int  belkin_sa_tiocmset(struct tty_struct *tty,
 					unsigned int set, unsigned int clear);
@@ -399,13 +399,19 @@ static void belkin_sa_set_termios(struct tty_struct *tty,
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static void belkin_sa_break_ctl(struct tty_struct *tty, int break_state)
+static int belkin_sa_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct usb_serial *serial = port->serial;
+	int ret;
 
-	if (BSA_USB_CMD(BELKIN_SA_SET_BREAK_REQUEST, break_state ? 1 : 0) < 0)
+	ret = BSA_USB_CMD(BELKIN_SA_SET_BREAK_REQUEST, break_state ? 1 : 0);
+	if (ret < 0) {
 		dev_err(&port->dev, "Set break_ctl %d\n", break_state);
+		return ret;
+	}
+
+	return 0;
 }
 
 static int belkin_sa_tiocmget(struct tty_struct *tty)
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 6e1b87e67304..612bea504d7a 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -562,12 +562,12 @@ static void ch341_set_termios(struct tty_struct *tty,
  * TCSBRKP. Due to how the simulation is implemented the duration can't be
  * controlled. The duration is always about (1s / 46bd * 9bit) = 196ms.
  */
-static void ch341_simulate_break(struct tty_struct *tty, int break_state)
+static int ch341_simulate_break(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct ch341_private *priv = usb_get_serial_port_data(port);
 	unsigned long now, delay;
-	int r;
+	int r, r2;
 
 	if (break_state != 0) {
 		dev_dbg(&port->dev, "enter break state requested\n");
@@ -599,7 +599,7 @@ static void ch341_simulate_break(struct tty_struct *tty, int break_state)
 		 */
 		priv->break_end = jiffies + (11 * HZ / CH341_MIN_BPS);
 
-		return;
+		return 0;
 	}
 
 	dev_dbg(&port->dev, "leave break state requested\n");
@@ -615,17 +615,22 @@ static void ch341_simulate_break(struct tty_struct *tty, int break_state)
 		schedule_timeout_interruptible(delay);
 	}
 
+	r = 0;
 restore:
 	/* Restore original baud rate */
-	r = ch341_set_baudrate_lcr(port->serial->dev, priv, priv->baud_rate,
-				   priv->lcr);
-	if (r < 0)
+	r2 = ch341_set_baudrate_lcr(port->serial->dev, priv, priv->baud_rate,
+			priv->lcr);
+	if (r2 < 0) {
 		dev_err(&port->dev,
 			"restoring original baud rate of %u failed: %d\n",
-			priv->baud_rate, r);
+			priv->baud_rate, r2);
+		return r2;
+	}
+
+	return r;
 }
 
-static void ch341_break_ctl(struct tty_struct *tty, int break_state)
+static int ch341_break_ctl(struct tty_struct *tty, int break_state)
 {
 	const uint16_t ch341_break_reg =
 			((uint16_t) CH341_REG_LCR << 8) | CH341_REG_BREAK;
@@ -635,17 +640,17 @@ static void ch341_break_ctl(struct tty_struct *tty, int break_state)
 	uint16_t reg_contents;
 	uint8_t break_reg[2];
 
-	if (priv->quirks & CH341_QUIRK_SIMULATE_BREAK) {
-		ch341_simulate_break(tty, break_state);
-		return;
-	}
+	if (priv->quirks & CH341_QUIRK_SIMULATE_BREAK)
+		return ch341_simulate_break(tty, break_state);
 
 	r = ch341_control_in(port->serial->dev, CH341_REQ_READ_REG,
 			ch341_break_reg, 0, break_reg, 2);
 	if (r) {
 		dev_err(&port->dev, "%s - USB control read error (%d)\n",
 				__func__, r);
-		return;
+		if (r > 0)
+			r = -EIO;
+		return r;
 	}
 	dev_dbg(&port->dev, "%s - initial ch341 break register contents - reg1: %x, reg2: %x\n",
 		__func__, break_reg[0], break_reg[1]);
@@ -663,9 +668,13 @@ static void ch341_break_ctl(struct tty_struct *tty, int break_state)
 	reg_contents = get_unaligned_le16(break_reg);
 	r = ch341_control_out(port->serial->dev, CH341_REQ_WRITE_REG,
 			ch341_break_reg, reg_contents);
-	if (r < 0)
+	if (r < 0) {
 		dev_err(&port->dev, "%s - USB control write error (%d)\n",
 				__func__, r);
+		return r;
+	}
+
+	return 0;
 }
 
 static int ch341_tiocmset(struct tty_struct *tty,
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index cdea1bff3b70..81e49ed9d147 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -39,7 +39,7 @@ static int cp210x_tiocmget(struct tty_struct *);
 static int cp210x_tiocmset(struct tty_struct *, unsigned int, unsigned int);
 static int cp210x_tiocmset_port(struct usb_serial_port *port,
 		unsigned int, unsigned int);
-static void cp210x_break_ctl(struct tty_struct *, int);
+static int cp210x_break_ctl(struct tty_struct *, int);
 static int cp210x_attach(struct usb_serial *);
 static void cp210x_disconnect(struct usb_serial *);
 static void cp210x_release(struct usb_serial *);
@@ -1434,7 +1434,7 @@ static int cp210x_tiocmget(struct tty_struct *tty)
 	return result;
 }
 
-static void cp210x_break_ctl(struct tty_struct *tty, int break_state)
+static int cp210x_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	u16 state;
@@ -1443,9 +1443,11 @@ static void cp210x_break_ctl(struct tty_struct *tty, int break_state)
 		state = BREAK_OFF;
 	else
 		state = BREAK_ON;
+
 	dev_dbg(&port->dev, "%s - turning break %s\n", __func__,
 		state == BREAK_OFF ? "off" : "on");
-	cp210x_write_u16_reg(port, CP210X_SET_BREAK, state);
+
+	return cp210x_write_u16_reg(port, CP210X_SET_BREAK, state);
 }
 
 #ifdef CONFIG_GPIOLIB
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index 45d688e9b93f..d1dea3850576 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -217,7 +217,7 @@ static void digi_rx_unthrottle(struct tty_struct *tty);
 static void digi_set_termios(struct tty_struct *tty,
 			     struct usb_serial_port *port,
 			     const struct ktermios *old_termios);
-static void digi_break_ctl(struct tty_struct *tty, int break_state);
+static int digi_break_ctl(struct tty_struct *tty, int break_state);
 static int digi_tiocmget(struct tty_struct *tty);
 static int digi_tiocmset(struct tty_struct *tty, unsigned int set,
 		unsigned int clear);
@@ -839,7 +839,7 @@ static void digi_set_termios(struct tty_struct *tty,
 }
 
 
-static void digi_break_ctl(struct tty_struct *tty, int break_state)
+static int digi_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	unsigned char buf[4];
@@ -848,7 +848,8 @@ static void digi_break_ctl(struct tty_struct *tty, int break_state)
 	buf[1] = 2;				/* length */
 	buf[2] = break_state ? 1 : 0;
 	buf[3] = 0;				/* pad */
-	digi_write_inb_command(port, buf, 4, 0);
+
+	return digi_write_inb_command(port, buf, 4, 0);
 }
 
 
diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index 891fb1fe69df..5f7a46bcace6 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -448,7 +448,7 @@ static void f81534a_process_read_urb(struct urb *urb)
 	tty_flip_buffer_push(&port->port);
 }
 
-static void f81232_break_ctl(struct tty_struct *tty, int break_state)
+static int f81232_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct f81232_private *priv = usb_get_serial_port_data(port);
@@ -467,6 +467,8 @@ static void f81232_break_ctl(struct tty_struct *tty, int break_state)
 		dev_err(&port->dev, "set break failed: %d\n", status);
 
 	mutex_unlock(&priv->lock);
+
+	return status;
 }
 
 static int f81232_find_clk(speed_t baudrate)
diff --git a/drivers/usb/serial/f81534.c b/drivers/usb/serial/f81534.c
index 4083ae961be4..ef126cd3d94f 100644
--- a/drivers/usb/serial/f81534.c
+++ b/drivers/usb/serial/f81534.c
@@ -656,7 +656,7 @@ out_unlock:
 	return status;
 }
 
-static void f81534_break_ctl(struct tty_struct *tty, int break_state)
+static int f81534_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct f81534_port_private *port_priv = usb_get_serial_port_data(port);
@@ -675,6 +675,8 @@ static void f81534_break_ctl(struct tty_struct *tty, int break_state)
 		dev_err(&port->dev, "set break failed: %d\n", status);
 
 	mutex_unlock(&port_priv->lcr_mutex);
+
+	return status;
 }
 
 static int f81534_update_mctrl(struct usb_serial_port *port, unsigned int set,
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 05e28a5ce42b..1bf23611be12 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -2550,11 +2550,12 @@ static void ftdi_process_read_urb(struct urb *urb)
 		tty_flip_buffer_push(&port->port);
 }
 
-static void ftdi_break_ctl(struct tty_struct *tty, int break_state)
+static int ftdi_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
 	u16 value;
+	int ret;
 
 	/* break_state = -1 to turn on break, and 0 to turn off break */
 	/* see drivers/char/tty_io.c to see it used */
@@ -2565,19 +2566,22 @@ static void ftdi_break_ctl(struct tty_struct *tty, int break_state)
 	else
 		value = priv->last_set_data_value;
 
-	if (usb_control_msg(port->serial->dev,
+	ret = usb_control_msg(port->serial->dev,
 			usb_sndctrlpipe(port->serial->dev, 0),
 			FTDI_SIO_SET_DATA_REQUEST,
 			FTDI_SIO_SET_DATA_REQUEST_TYPE,
 			value, priv->channel,
-			NULL, 0, WDR_TIMEOUT) < 0) {
+			NULL, 0, WDR_TIMEOUT);
+	if (ret < 0) {
 		dev_err(&port->dev, "%s FAILED to enable/disable break state (state was %d)\n",
 			__func__, break_state);
+		return ret;
 	}
 
 	dev_dbg(&port->dev, "%s break state is %d - urb is %d\n", __func__,
 		break_state, value);
 
+	return 0;
 }
 
 static bool ftdi_tx_empty(struct usb_serial_port *port)
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index 3a4c0febf335..abe4bbb0ac65 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -1560,12 +1560,12 @@ static int edge_ioctl(struct tty_struct *tty,
  * SerialBreak
  *	this function sends a break to the port
  *****************************************************************************/
-static void edge_break(struct tty_struct *tty, int break_state)
+static int edge_break(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct edgeport_port *edge_port = usb_get_serial_port_data(port);
 	struct edgeport_serial *edge_serial = usb_get_serial_data(port->serial);
-	int status;
+	int status = 0;
 
 	if (!edge_serial->is_epic ||
 	    edge_serial->epic_descriptor.Supports.IOSPChase) {
@@ -1597,6 +1597,8 @@ static void edge_break(struct tty_struct *tty, int break_state)
 			dev_dbg(&port->dev, "%s - error sending break set/clear command.\n",
 				__func__);
 	}
+
+	return status;
 }
 
 
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index bc3c24ea42c1..7a3a6e539456 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -2421,7 +2421,7 @@ static int edge_tiocmget(struct tty_struct *tty)
 	return result;
 }
 
-static void edge_break(struct tty_struct *tty, int break_state)
+static int edge_break(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct edgeport_port *edge_port = usb_get_serial_port_data(port);
@@ -2430,10 +2430,15 @@ static void edge_break(struct tty_struct *tty, int break_state)
 
 	if (break_state == -1)
 		bv = 1;	/* On */
+
 	status = ti_do_config(edge_port, UMPC_SET_CLR_BREAK, bv);
-	if (status)
+	if (status) {
 		dev_dbg(&port->dev, "%s - error %d sending break set/clear command.\n",
 			__func__, status);
+		return status;
+	}
+
+	return 0;
 }
 
 static void edge_heartbeat_schedule(struct edgeport_serial *edge_serial)
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 2966e0c4941e..93b17e0e05a3 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -599,7 +599,7 @@ struct keyspan_port_private {
 #include "keyspan_usa67msg.h"
 
 
-static void keyspan_break_ctl(struct tty_struct *tty, int break_state)
+static int keyspan_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct keyspan_port_private 	*p_priv;
@@ -611,7 +611,10 @@ static void keyspan_break_ctl(struct tty_struct *tty, int break_state)
 	else
 		p_priv->break_on = 0;
 
+	/* FIXME: return errors */
 	keyspan_send_setup(port, 0);
+
+	return 0;
 }
 
 
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index 6fd15cd9e1eb..0eef358b314a 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -299,7 +299,7 @@ static speed_t keyspan_pda_setbaud(struct usb_serial *serial, speed_t baud)
 	return baud;
 }
 
-static void keyspan_pda_break_ctl(struct tty_struct *tty, int break_state)
+static int keyspan_pda_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct usb_serial *serial = port->serial;
@@ -315,9 +315,13 @@ static void keyspan_pda_break_ctl(struct tty_struct *tty, int break_state)
 			4, /* set break */
 			USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
 			value, 0, NULL, 0, 2000);
-	if (result < 0)
+	if (result < 0) {
 		dev_dbg(&port->dev, "%s - error %d from usb_control_msg\n",
 			__func__, result);
+		return result;
+	}
+
+	return 0;
 }
 
 static void keyspan_pda_set_termios(struct tty_struct *tty,
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index d3852feb81a4..6570c8817a80 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -47,7 +47,7 @@ static void mct_u232_read_int_callback(struct urb *urb);
 static void mct_u232_set_termios(struct tty_struct *tty,
 				 struct usb_serial_port *port,
 				 const struct ktermios *old_termios);
-static void mct_u232_break_ctl(struct tty_struct *tty, int break_state);
+static int  mct_u232_break_ctl(struct tty_struct *tty, int break_state);
 static int  mct_u232_tiocmget(struct tty_struct *tty);
 static int  mct_u232_tiocmset(struct tty_struct *tty,
 			unsigned int set, unsigned int clear);
@@ -677,7 +677,7 @@ static void mct_u232_set_termios(struct tty_struct *tty,
 	spin_unlock_irqrestore(&priv->lock, flags);
 } /* mct_u232_set_termios */
 
-static void mct_u232_break_ctl(struct tty_struct *tty, int break_state)
+static int mct_u232_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct mct_u232_private *priv = usb_get_serial_port_data(port);
@@ -691,7 +691,7 @@ static void mct_u232_break_ctl(struct tty_struct *tty, int break_state)
 		lcr |= MCT_U232_SET_BREAK;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	mct_u232_set_line_ctrl(port, lcr);
+	return mct_u232_set_line_ctrl(port, lcr);
 } /* mct_u232_break_ctl */
 
 
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 1d1f85fabc28..23544074eb1c 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -996,7 +996,7 @@ static void mos7720_close(struct usb_serial_port *port)
 	mos7720_port->open = 0;
 }
 
-static void mos7720_break(struct tty_struct *tty, int break_state)
+static int mos7720_break(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	unsigned char data;
@@ -1007,7 +1007,7 @@ static void mos7720_break(struct tty_struct *tty, int break_state)
 
 	mos7720_port = usb_get_serial_port_data(port);
 	if (mos7720_port == NULL)
-		return;
+		return -ENODEV;
 
 	if (break_state == -1)
 		data = mos7720_port->shadowLCR | UART_LCR_SBC;
@@ -1015,8 +1015,9 @@ static void mos7720_break(struct tty_struct *tty, int break_state)
 		data = mos7720_port->shadowLCR & ~UART_LCR_SBC;
 
 	mos7720_port->shadowLCR  = data;
-	write_mos_reg(serial, port->port_number, MOS7720_LCR,
-		      mos7720_port->shadowLCR);
+
+	return write_mos_reg(serial, port->port_number, MOS7720_LCR,
+			     mos7720_port->shadowLCR);
 }
 
 /*
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 73370eaae0ab..8b0308d84270 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -787,7 +787,7 @@ static void mos7840_close(struct usb_serial_port *port)
  * mos7840_break
  *	this function sends a break to the port
  *****************************************************************************/
-static void mos7840_break(struct tty_struct *tty, int break_state)
+static int mos7840_break(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct moschip_port *mos7840_port = usb_get_serial_port_data(port);
@@ -801,8 +801,9 @@ static void mos7840_break(struct tty_struct *tty, int break_state)
 	/* FIXME: no locking on shadowLCR anywhere in driver */
 	mos7840_port->shadowLCR = data;
 	dev_dbg(&port->dev, "%s mos7840_port->shadowLCR is %x\n", __func__, mos7840_port->shadowLCR);
-	mos7840_set_uart_reg(port, LINE_CONTROL_REGISTER,
-			     mos7840_port->shadowLCR);
+
+	return mos7840_set_uart_reg(port, LINE_CONTROL_REGISTER,
+				    mos7840_port->shadowLCR);
 }
 
 /*****************************************************************************
diff --git a/drivers/usb/serial/mxuport.c b/drivers/usb/serial/mxuport.c
index faa0eedfe245..1f7bb3e4fcf2 100644
--- a/drivers/usb/serial/mxuport.c
+++ b/drivers/usb/serial/mxuport.c
@@ -1230,7 +1230,7 @@ static void mxuport_close(struct usb_serial_port *port)
 }
 
 /* Send a break to the port. */
-static void mxuport_break_ctl(struct tty_struct *tty, int break_state)
+static int mxuport_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct usb_serial *serial = port->serial;
@@ -1244,8 +1244,8 @@ static void mxuport_break_ctl(struct tty_struct *tty, int break_state)
 		dev_dbg(&port->dev, "%s - clearing break\n", __func__);
 	}
 
-	mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_BREAK,
-			      enable, port->port_number);
+	return mxuport_send_ctrl_urb(serial, RQ_VENDOR_SET_BREAK,
+				     enable, port->port_number);
 }
 
 static int mxuport_resume(struct usb_serial *serial)
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 8949c1891164..d93f5d584557 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -173,7 +173,7 @@ MODULE_DEVICE_TABLE(usb, id_table);
 #define PL2303_HXN_FLOWCTRL_RTS_CTS	0x18
 #define PL2303_HXN_FLOWCTRL_XON_XOFF	0x0c
 
-static void pl2303_set_break(struct usb_serial_port *port, bool enable);
+static int pl2303_set_break(struct usb_serial_port *port, bool enable);
 
 enum pl2303_type {
 	TYPE_H,
@@ -1060,7 +1060,7 @@ static int pl2303_carrier_raised(struct usb_serial_port *port)
 	return 0;
 }
 
-static void pl2303_set_break(struct usb_serial_port *port, bool enable)
+static int pl2303_set_break(struct usb_serial_port *port, bool enable)
 {
 	struct usb_serial *serial = port->serial;
 	u16 state;
@@ -1077,15 +1077,19 @@ static void pl2303_set_break(struct usb_serial_port *port, bool enable)
 	result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
 				 BREAK_REQUEST, BREAK_REQUEST_TYPE, state,
 				 0, NULL, 0, 100);
-	if (result)
+	if (result) {
 		dev_err(&port->dev, "error sending break = %d\n", result);
+		return result;
+	}
+
+	return 0;
 }
 
-static void pl2303_break_ctl(struct tty_struct *tty, int state)
+static int pl2303_break_ctl(struct tty_struct *tty, int state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 
-	pl2303_set_break(port, state);
+	return pl2303_set_break(port, state);
 }
 
 static void pl2303_update_line_status(struct usb_serial_port *port,
diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
index fee581409bf6..821f25e52ec2 100644
--- a/drivers/usb/serial/quatech2.c
+++ b/drivers/usb/serial/quatech2.c
@@ -741,7 +741,7 @@ static int qt2_tiocmset(struct tty_struct *tty,
 	return update_mctrl(port_priv, set, clear);
 }
 
-static void qt2_break_ctl(struct tty_struct *tty, int break_state)
+static int qt2_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct qt2_port_private *port_priv;
@@ -754,10 +754,14 @@ static void qt2_break_ctl(struct tty_struct *tty, int break_state)
 
 	status = qt2_control_msg(port->serial->dev, QT2_BREAK_CONTROL,
 				 val, port_priv->device_port);
-	if (status < 0)
+	if (status < 0) {
 		dev_warn(&port->dev,
 			 "%s - failed to send control message: %i\n", __func__,
 			 status);
+		return status;
+	}
+
+	return 0;
 }
 
 
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index b99f78224846..0fba25abf671 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -319,7 +319,7 @@ static void ti_set_termios(struct tty_struct *tty,
 static int ti_tiocmget(struct tty_struct *tty);
 static int ti_tiocmset(struct tty_struct *tty,
 		unsigned int set, unsigned int clear);
-static void ti_break(struct tty_struct *tty, int break_state);
+static int ti_break(struct tty_struct *tty, int break_state);
 static void ti_interrupt_callback(struct urb *urb);
 static void ti_bulk_in_callback(struct urb *urb);
 static void ti_bulk_out_callback(struct urb *urb);
@@ -1071,7 +1071,7 @@ static int ti_tiocmset(struct tty_struct *tty,
 }
 
 
-static void ti_break(struct tty_struct *tty, int break_state)
+static int ti_break(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct ti_port *tport = usb_get_serial_port_data(port);
@@ -1083,8 +1083,12 @@ static void ti_break(struct tty_struct *tty, int break_state)
 		tport->tp_uart_base_addr + TI_UART_OFFSET_LCR,
 		TI_LCR_BREAK, break_state == -1 ? TI_LCR_BREAK : 0);
 
-	if (status)
+	if (status) {
 		dev_dbg(&port->dev, "%s - error setting break, %d\n", __func__, status);
+		return status;
+	}
+
+	return 0;
 }
 
 static int ti_get_port_from_code(unsigned char code)
diff --git a/drivers/usb/serial/upd78f0730.c b/drivers/usb/serial/upd78f0730.c
index c47439bd90fa..46952182e04f 100644
--- a/drivers/usb/serial/upd78f0730.c
+++ b/drivers/usb/serial/upd78f0730.c
@@ -238,12 +238,13 @@ static int upd78f0730_tiocmset(struct tty_struct *tty,
 	return res;
 }
 
-static void upd78f0730_break_ctl(struct tty_struct *tty, int break_state)
+static int upd78f0730_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct upd78f0730_port_private *private;
 	struct usb_serial_port *port = tty->driver_data;
 	struct upd78f0730_set_dtr_rts request;
 	struct device *dev = &port->dev;
+	int res;
 
 	private = usb_get_serial_port_data(port);
 
@@ -258,8 +259,10 @@ static void upd78f0730_break_ctl(struct tty_struct *tty, int break_state)
 	request.opcode = UPD78F0730_CMD_SET_DTR_RTS;
 	request.params = private->line_signals;
 
-	upd78f0730_send_ctl(port, &request, sizeof(request));
+	res = upd78f0730_send_ctl(port, &request, sizeof(request));
 	mutex_unlock(&private->lock);
+
+	return res;
 }
 
 static void upd78f0730_dtr_rts(struct usb_serial_port *port, int on)
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index f8404073558b..470634444af7 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -539,7 +539,7 @@ static int serial_break(struct tty_struct *tty, int break_state)
 	dev_dbg(&port->dev, "%s\n", __func__);
 
 	if (port->serial->type->break_ctl)
-		port->serial->type->break_ctl(tty, break_state);
+		return port->serial->type->break_ctl(tty, break_state);
 
 	return 0;
 }
diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c
index aaf4813e4971..6934970f180d 100644
--- a/drivers/usb/serial/usb_debug.c
+++ b/drivers/usb/serial/usb_debug.c
@@ -47,12 +47,19 @@ MODULE_DEVICE_TABLE(usb, id_table_combined);
 /* This HW really does not support a serial break, so one will be
  * emulated when ever the break state is set to true.
  */
-static void usb_debug_break_ctl(struct tty_struct *tty, int break_state)
+static int usb_debug_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
+	int ret;
+
 	if (!break_state)
-		return;
-	usb_serial_generic_write(tty, port, USB_DEBUG_BRK, USB_DEBUG_BRK_SIZE);
+		return 0;
+
+	ret = usb_serial_generic_write(tty, port, USB_DEBUG_BRK, USB_DEBUG_BRK_SIZE);
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static void usb_debug_process_read_urb(struct urb *urb)
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 7f82d40753ee..ca48e90a8e81 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -87,7 +87,7 @@ static void whiteheat_set_termios(struct tty_struct *tty,
 static int  whiteheat_tiocmget(struct tty_struct *tty);
 static int  whiteheat_tiocmset(struct tty_struct *tty,
 			unsigned int set, unsigned int clear);
-static void whiteheat_break_ctl(struct tty_struct *tty, int break_state);
+static int whiteheat_break_ctl(struct tty_struct *tty, int break_state);
 
 static struct usb_serial_driver whiteheat_fake_device = {
 	.driver = {
@@ -449,10 +449,11 @@ static void whiteheat_set_termios(struct tty_struct *tty,
 	firm_setup_port(tty);
 }
 
-static void whiteheat_break_ctl(struct tty_struct *tty, int break_state)
+static int whiteheat_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
-	firm_set_break(port, break_state);
+
+	return firm_set_break(port, break_state);
 }
 
 
diff --git a/drivers/usb/serial/xr_serial.c b/drivers/usb/serial/xr_serial.c
index fdb0aae546c3..4ec7c5892b84 100644
--- a/drivers/usb/serial/xr_serial.c
+++ b/drivers/usb/serial/xr_serial.c
@@ -503,7 +503,7 @@ static void xr_dtr_rts(struct usb_serial_port *port, int on)
 		xr_tiocmset_port(port, 0, TIOCM_DTR | TIOCM_RTS);
 }
 
-static void xr_break_ctl(struct tty_struct *tty, int break_state)
+static int xr_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct xr_data *data = usb_get_serial_port_data(port);
@@ -517,7 +517,7 @@ static void xr_break_ctl(struct tty_struct *tty, int break_state)
 
 	dev_dbg(&port->dev, "Turning break %s\n", state == 0 ? "off" : "on");
 
-	xr_set_reg_uart(port, type->tx_break, state);
+	return xr_set_reg_uart(port, type->tx_break, state);
 }
 
 /* Tx and Rx clock mask values obtained from section 3.3.4 of datasheet */
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 7eeb5f9c4f0d..1a0a4dc87980 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -278,7 +278,7 @@ struct usb_serial_driver {
 	int  (*set_serial)(struct tty_struct *tty, struct serial_struct *ss);
 	void (*set_termios)(struct tty_struct *tty, struct usb_serial_port *port,
 			    const struct ktermios *old);
-	void (*break_ctl)(struct tty_struct *tty, int break_state);
+	int (*break_ctl)(struct tty_struct *tty, int break_state);
 	unsigned int (*chars_in_buffer)(struct tty_struct *tty);
 	void (*wait_until_sent)(struct tty_struct *tty, long timeout);
 	bool (*tx_empty)(struct usb_serial_port *port);
-- 
cgit v1.2.3


From 3a41db531e5124adaa3a9ab9ca0c724aee85b10c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 10 Mar 2023 18:45:41 +0200
Subject: pktcdvd: Get rid of custom printing macros

We may use traditional dev_*() macros instead of custom ones
provided by the driver.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20230310164549.22133-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/pktcdvd.c      | 247 ++++++++++++++++++++++---------------------
 include/linux/pktcdvd.h      |   1 -
 include/uapi/linux/pktcdvd.h |   1 +
 3 files changed, 130 insertions(+), 119 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 377f8b345352..a327cce67768 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -72,22 +72,6 @@
 
 #define DRIVER_NAME	"pktcdvd"
 
-#define pkt_err(pd, fmt, ...)						\
-	pr_err("%s: " fmt, pd->name, ##__VA_ARGS__)
-#define pkt_notice(pd, fmt, ...)					\
-	pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__)
-#define pkt_info(pd, fmt, ...)						\
-	pr_info("%s: " fmt, pd->name, ##__VA_ARGS__)
-
-#define pkt_dbg(level, pd, fmt, ...)					\
-do {									\
-	if (level == 2 && PACKET_DEBUG >= 2)				\
-		pr_notice("%s: %s():" fmt,				\
-			  pd->name, __func__, ##__VA_ARGS__);		\
-	else if (level == 1 && PACKET_DEBUG >= 1)			\
-		pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__);		\
-} while (0)
-
 #define MAX_SPEED 0xffff
 
 static DEFINE_MUTEX(pktcdvd_mutex);
@@ -319,7 +303,7 @@ static void pkt_sysfs_dev_new(struct pktcdvd_device *pd)
 	if (class_is_registered(&class_pktcdvd)) {
 		pd->dev = device_create_with_groups(&class_pktcdvd, NULL,
 						    MKDEV(0, 0), pd, pkt_groups,
-						    "%s", pd->name);
+						    "%s", pd->disk->disk_name);
 		if (IS_ERR(pd->dev))
 			pd->dev = NULL;
 	}
@@ -350,7 +334,7 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu
 		if (!pd)
 			continue;
 		n += sprintf(data+n, "%s %u:%u %u:%u\n",
-			pd->name,
+			pd->disk->disk_name,
 			MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev),
 			MAJOR(pd->bdev->bd_dev),
 			MINOR(pd->bdev->bd_dev));
@@ -450,7 +434,7 @@ static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
 {
 	if (!pkt_debugfs_root)
 		return;
-	pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root);
+	pd->dfs_d_root = debugfs_create_dir(pd->disk->disk_name, pkt_debugfs_root);
 	if (!pd->dfs_d_root)
 		return;
 
@@ -484,9 +468,11 @@ static void pkt_debugfs_cleanup(void)
 
 static void pkt_bio_finished(struct pktcdvd_device *pd)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
+
 	BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0);
 	if (atomic_dec_and_test(&pd->cdrw.pending_bios)) {
-		pkt_dbg(2, pd, "queue empty\n");
+		dev_dbg(ddev, "queue empty\n");
 		atomic_set(&pd->iosched.attention, 1);
 		wake_up(&pd->wqueue);
 	}
@@ -717,15 +703,16 @@ static const char *sense_key_string(__u8 index)
 static void pkt_dump_sense(struct pktcdvd_device *pd,
 			   struct packet_command *cgc)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	struct scsi_sense_hdr *sshdr = cgc->sshdr;
 
 	if (sshdr)
-		pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n",
+		dev_err(ddev, "%*ph - sense %02x.%02x.%02x (%s)\n",
 			CDROM_PACKET_SIZE, cgc->cmd,
 			sshdr->sense_key, sshdr->asc, sshdr->ascq,
 			sense_key_string(sshdr->sense_key));
 	else
-		pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
+		dev_err(ddev, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
 }
 
 /*
@@ -809,6 +796,7 @@ static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
  */
 static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 
 	if (atomic_read(&pd->iosched.attention) == 0)
 		return;
@@ -836,7 +824,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
 				need_write_seek = 0;
 			if (need_write_seek && reads_queued) {
 				if (atomic_read(&pd->cdrw.pending_bios) > 0) {
-					pkt_dbg(2, pd, "write, waiting\n");
+					dev_dbg(ddev, "write, waiting\n");
 					break;
 				}
 				pkt_flush_cache(pd);
@@ -845,7 +833,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
 		} else {
 			if (!reads_queued && writes_queued) {
 				if (atomic_read(&pd->cdrw.pending_bios) > 0) {
-					pkt_dbg(2, pd, "read, waiting\n");
+					dev_dbg(ddev, "read, waiting\n");
 					break;
 				}
 				pd->iosched.writing = 1;
@@ -892,6 +880,8 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
  */
 static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
+
 	if ((pd->settings.size << 9) / CD_FRAMESIZE
 	    <= queue_max_segments(q)) {
 		/*
@@ -908,7 +898,7 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que
 		set_bit(PACKET_MERGE_SEGS, &pd->flags);
 		return 0;
 	} else {
-		pkt_err(pd, "cdrom max_phys_segments too small\n");
+		dev_err(ddev, "cdrom max_phys_segments too small\n");
 		return -EIO;
 	}
 }
@@ -919,7 +909,7 @@ static void pkt_end_io_read(struct bio *bio)
 	struct pktcdvd_device *pd = pkt->pd;
 	BUG_ON(!pd);
 
-	pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
+	dev_dbg(disk_to_dev(pd->disk), "bio=%p sec0=%llx sec=%llx err=%d\n",
 		bio, (unsigned long long)pkt->sector,
 		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
 
@@ -939,7 +929,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
 	struct pktcdvd_device *pd = pkt->pd;
 	BUG_ON(!pd);
 
-	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
+	dev_dbg(disk_to_dev(pd->disk), "id=%d, err=%d\n", pkt->id, bio->bi_status);
 
 	pd->stats.pkt_ended++;
 
@@ -955,6 +945,7 @@ static void pkt_end_io_packet_write(struct bio *bio)
  */
 static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	int frames_read = 0;
 	struct bio *bio;
 	int f;
@@ -983,8 +974,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 	spin_unlock(&pkt->lock);
 
 	if (pkt->cache_valid) {
-		pkt_dbg(2, pd, "zone %llx cached\n",
-			(unsigned long long)pkt->sector);
+		dev_dbg(ddev, "zone %llx cached\n", (unsigned long long)pkt->sector);
 		goto out_account;
 	}
 
@@ -1005,8 +995,8 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 
 		p = (f * CD_FRAMESIZE) / PAGE_SIZE;
 		offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
-		pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n",
-			f, pkt->pages[p], offset);
+		dev_dbg(ddev, "Adding frame %d, page:%p offs:%d\n", f,
+			pkt->pages[p], offset);
 		if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset))
 			BUG();
 
@@ -1016,8 +1006,8 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 	}
 
 out_account:
-	pkt_dbg(2, pd, "need %d frames for zone %llx\n",
-		frames_read, (unsigned long long)pkt->sector);
+	dev_dbg(ddev, "need %d frames for zone %llx\n", frames_read,
+		(unsigned long long)pkt->sector);
 	pd->stats.pkt_started++;
 	pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
 }
@@ -1051,17 +1041,18 @@ static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *p
 	}
 }
 
-static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state)
+static inline void pkt_set_state(struct device *ddev, struct packet_data *pkt,
+				 enum packet_data_state state)
 {
-#if PACKET_DEBUG > 1
 	static const char *state_name[] = {
 		"IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED"
 	};
 	enum packet_data_state old_state = pkt->state;
-	pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n",
+
+	dev_dbg(ddev, "pkt %2d : s=%6llx %s -> %s\n",
 		pkt->id, (unsigned long long)pkt->sector,
 		state_name[old_state], state_name[state]);
-#endif
+
 	pkt->state = state;
 }
 
@@ -1071,6 +1062,7 @@ static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state
  */
 static int pkt_handle_queue(struct pktcdvd_device *pd)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	struct packet_data *pkt, *p;
 	struct bio *bio = NULL;
 	sector_t zone = 0; /* Suppress gcc warning */
@@ -1080,7 +1072,7 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
 	atomic_set(&pd->scan_queue, 0);
 
 	if (list_empty(&pd->cdrw.pkt_free_list)) {
-		pkt_dbg(2, pd, "no pkt\n");
+		dev_dbg(ddev, "no pkt\n");
 		return 0;
 	}
 
@@ -1117,7 +1109,7 @@ try_next_bio:
 	}
 	spin_unlock(&pd->lock);
 	if (!bio) {
-		pkt_dbg(2, pd, "no bio\n");
+		dev_dbg(ddev, "no bio\n");
 		return 0;
 	}
 
@@ -1133,12 +1125,13 @@ try_next_bio:
 	 * to this packet.
 	 */
 	spin_lock(&pd->lock);
-	pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
+	dev_dbg(ddev, "looking for zone %llx\n", (unsigned long long)zone);
 	while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
+		sector_t tmp = get_zone(node->bio->bi_iter.bi_sector, pd);
+
 		bio = node->bio;
-		pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long)
-			get_zone(bio->bi_iter.bi_sector, pd));
-		if (get_zone(bio->bi_iter.bi_sector, pd) != zone)
+		dev_dbg(ddev, "found zone=%llx\n", (unsigned long long)tmp);
+		if (tmp != zone)
 			break;
 		pkt_rbtree_erase(pd, node);
 		spin_lock(&pkt->lock);
@@ -1157,7 +1150,7 @@ try_next_bio:
 	spin_unlock(&pd->lock);
 
 	pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
-	pkt_set_state(pkt, PACKET_WAITING_STATE);
+	pkt_set_state(ddev, pkt, PACKET_WAITING_STATE);
 	atomic_set(&pkt->run_sm, 1);
 
 	spin_lock(&pd->cdrw.active_list_lock);
@@ -1209,6 +1202,7 @@ static void bio_list_copy_data(struct bio *dst, struct bio *src)
  */
 static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	int f;
 
 	bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
@@ -1225,7 +1219,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 		if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset))
 			BUG();
 	}
-	pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt);
+	dev_dbg(ddev, "vcnt=%d\n", pkt->w_bio->bi_vcnt);
 
 	/*
 	 * Fill-in bvec with data from orig_bios.
@@ -1233,11 +1227,11 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 	spin_lock(&pkt->lock);
 	bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head);
 
-	pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
+	pkt_set_state(ddev, pkt, PACKET_WRITE_WAIT_STATE);
 	spin_unlock(&pkt->lock);
 
-	pkt_dbg(2, pd, "Writing %d frames for zone %llx\n",
-		pkt->write_size, (unsigned long long)pkt->sector);
+	dev_dbg(ddev, "Writing %d frames for zone %llx\n", pkt->write_size,
+		(unsigned long long)pkt->sector);
 
 	if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames))
 		pkt->cache_valid = 1;
@@ -1265,7 +1259,9 @@ static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
 
 static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
 {
-	pkt_dbg(2, pd, "pkt %d\n", pkt->id);
+	struct device *ddev = disk_to_dev(pd->disk);
+
+	dev_dbg(ddev, "pkt %d\n", pkt->id);
 
 	for (;;) {
 		switch (pkt->state) {
@@ -1275,7 +1271,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 
 			pkt->sleep_time = 0;
 			pkt_gather_data(pd, pkt);
-			pkt_set_state(pkt, PACKET_READ_WAIT_STATE);
+			pkt_set_state(ddev, pkt, PACKET_READ_WAIT_STATE);
 			break;
 
 		case PACKET_READ_WAIT_STATE:
@@ -1283,7 +1279,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 				return;
 
 			if (atomic_read(&pkt->io_errors) > 0) {
-				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+				pkt_set_state(ddev, pkt, PACKET_RECOVERY_STATE);
 			} else {
 				pkt_start_write(pd, pkt);
 			}
@@ -1294,15 +1290,15 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 				return;
 
 			if (!pkt->w_bio->bi_status) {
-				pkt_set_state(pkt, PACKET_FINISHED_STATE);
+				pkt_set_state(ddev, pkt, PACKET_FINISHED_STATE);
 			} else {
-				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+				pkt_set_state(ddev, pkt, PACKET_RECOVERY_STATE);
 			}
 			break;
 
 		case PACKET_RECOVERY_STATE:
-			pkt_dbg(2, pd, "No recovery possible\n");
-			pkt_set_state(pkt, PACKET_FINISHED_STATE);
+			dev_dbg(ddev, "No recovery possible\n");
+			pkt_set_state(ddev, pkt, PACKET_FINISHED_STATE);
 			break;
 
 		case PACKET_FINISHED_STATE:
@@ -1318,6 +1314,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 
 static void pkt_handle_packets(struct pktcdvd_device *pd)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	struct packet_data *pkt, *next;
 
 	/*
@@ -1338,7 +1335,7 @@ static void pkt_handle_packets(struct pktcdvd_device *pd)
 		if (pkt->state == PACKET_FINISHED_STATE) {
 			list_del(&pkt->list);
 			pkt_put_packet_data(pd, pkt);
-			pkt_set_state(pkt, PACKET_IDLE_STATE);
+			pkt_set_state(ddev, pkt, PACKET_IDLE_STATE);
 			atomic_set(&pd->scan_queue, 1);
 		}
 	}
@@ -1367,7 +1364,9 @@ static void pkt_count_states(struct pktcdvd_device *pd, int *states)
 static int kcdrwd(void *foobar)
 {
 	struct pktcdvd_device *pd = foobar;
+	struct device *ddev = disk_to_dev(pd->disk);
 	struct packet_data *pkt;
+	int states[PACKET_NUM_STATES];
 	long min_sleep_time, residue;
 
 	set_user_nice(current, MIN_NICE);
@@ -1398,13 +1397,9 @@ static int kcdrwd(void *foobar)
 				goto work_to_do;
 
 			/* Otherwise, go to sleep */
-			if (PACKET_DEBUG > 1) {
-				int states[PACKET_NUM_STATES];
-				pkt_count_states(pd, states);
-				pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
-					states[0], states[1], states[2],
-					states[3], states[4], states[5]);
-			}
+			pkt_count_states(pd, states);
+			dev_dbg(ddev, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+				states[0], states[1], states[2], states[3], states[4], states[5]);
 
 			min_sleep_time = MAX_SCHEDULE_TIMEOUT;
 			list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
@@ -1412,9 +1407,9 @@ static int kcdrwd(void *foobar)
 					min_sleep_time = pkt->sleep_time;
 			}
 
-			pkt_dbg(2, pd, "sleeping\n");
+			dev_dbg(ddev, "sleeping\n");
 			residue = schedule_timeout(min_sleep_time);
-			pkt_dbg(2, pd, "wake up\n");
+			dev_dbg(ddev, "wake up\n");
 
 			/* make swsusp happy with our thread */
 			try_to_freeze();
@@ -1462,7 +1457,7 @@ work_to_do:
 
 static void pkt_print_settings(struct pktcdvd_device *pd)
 {
-	pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n",
+	dev_info(disk_to_dev(pd->disk), "%s packets, %u blocks, Mode-%c disc\n",
 		 pd->settings.fp ? "Fixed" : "Variable",
 		 pd->settings.size >> 2,
 		 pd->settings.block_mode == 8 ? '1' : '2');
@@ -1590,6 +1585,7 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
  */
 static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	struct packet_command cgc;
 	struct scsi_sense_hdr sshdr;
 	write_param_page *wp;
@@ -1656,7 +1652,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
 		/*
 		 * paranoia
 		 */
-		pkt_err(pd, "write mode wrong %d\n", wp->data_block_type);
+		dev_err(ddev, "write mode wrong %d\n", wp->data_block_type);
 		return 1;
 	}
 	wp->packet_size = cpu_to_be32(pd->settings.size >> 2);
@@ -1677,6 +1673,8 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
  */
 static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
+
 	switch (pd->mmc3_profile) {
 		case 0x1a: /* DVD+RW */
 		case 0x12: /* DVD-RAM */
@@ -1701,7 +1699,7 @@ static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
 	if (ti->rt == 1 && ti->blank == 0)
 		return 1;
 
-	pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
+	dev_err(ddev, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
 	return 0;
 }
 
@@ -1710,6 +1708,8 @@ static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
  */
 static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
+
 	switch (pd->mmc3_profile) {
 		case 0x0a: /* CD-RW */
 		case 0xffff: /* MMC3 not supported */
@@ -1719,8 +1719,7 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
 		case 0x12: /* DVD-RAM */
 			return 1;
 		default:
-			pkt_dbg(2, pd, "Wrong disc profile (%x)\n",
-				pd->mmc3_profile);
+			dev_dbg(ddev, "Wrong disc profile (%x)\n", pd->mmc3_profile);
 			return 0;
 	}
 
@@ -1729,22 +1728,22 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
 	 * but i'm not sure, should we leave this to user apps? probably.
 	 */
 	if (di->disc_type == 0xff) {
-		pkt_notice(pd, "unknown disc - no track?\n");
+		dev_notice(ddev, "unknown disc - no track?\n");
 		return 0;
 	}
 
 	if (di->disc_type != 0x20 && di->disc_type != 0) {
-		pkt_err(pd, "wrong disc type (%x)\n", di->disc_type);
+		dev_err(ddev, "wrong disc type (%x)\n", di->disc_type);
 		return 0;
 	}
 
 	if (di->erasable == 0) {
-		pkt_notice(pd, "disc not erasable\n");
+		dev_err(ddev, "disc not erasable\n");
 		return 0;
 	}
 
 	if (di->border_status == PACKET_SESSION_RESERVED) {
-		pkt_err(pd, "can't write to last track (reserved)\n");
+		dev_err(ddev, "can't write to last track (reserved)\n");
 		return 0;
 	}
 
@@ -1753,6 +1752,7 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
 
 static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	struct packet_command cgc;
 	unsigned char buf[12];
 	disc_information di;
@@ -1770,7 +1770,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
 
 	ret = pkt_get_disc_info(pd, &di);
 	if (ret) {
-		pkt_err(pd, "failed get_disc\n");
+		dev_err(ddev, "failed get_disc\n");
 		return ret;
 	}
 
@@ -1782,12 +1782,12 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
 	track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */
 	ret = pkt_get_track_info(pd, track, 1, &ti);
 	if (ret) {
-		pkt_err(pd, "failed get_track\n");
+		dev_err(ddev, "failed get_track\n");
 		return ret;
 	}
 
 	if (!pkt_writable_track(pd, &ti)) {
-		pkt_err(pd, "can't write to this track\n");
+		dev_err(ddev, "can't write to this track\n");
 		return -EROFS;
 	}
 
@@ -1797,11 +1797,11 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
 	 */
 	pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
 	if (pd->settings.size == 0) {
-		pkt_notice(pd, "detected zero packet size!\n");
+		dev_notice(ddev, "detected zero packet size!\n");
 		return -ENXIO;
 	}
 	if (pd->settings.size > PACKET_MAX_SECTORS) {
-		pkt_err(pd, "packet size is too big\n");
+		dev_err(ddev, "packet size is too big\n");
 		return -EROFS;
 	}
 	pd->settings.fp = ti.fp;
@@ -1843,7 +1843,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
 			pd->settings.block_mode = PACKET_BLOCK_MODE2;
 			break;
 		default:
-			pkt_err(pd, "unknown data mode\n");
+			dev_err(ddev, "unknown data mode\n");
 			return -EROFS;
 	}
 	return 0;
@@ -1854,6 +1854,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
  */
 static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	struct packet_command cgc;
 	struct scsi_sense_hdr sshdr;
 	unsigned char buf[64];
@@ -1883,10 +1884,10 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd)
 	cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff));
 	ret = pkt_mode_select(pd, &cgc);
 	if (ret) {
-		pkt_err(pd, "write caching control failed\n");
+		dev_err(ddev, "write caching control failed\n");
 		pkt_dump_sense(pd, &cgc);
 	} else if (!ret && set)
-		pkt_notice(pd, "enabled write caching\n");
+		dev_notice(ddev, "enabled write caching\n");
 	return ret;
 }
 
@@ -1967,6 +1968,7 @@ static char us_clv_to_speed[16] = {
 static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
 						unsigned *speed)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	struct packet_command cgc;
 	struct scsi_sense_hdr sshdr;
 	unsigned char buf[64];
@@ -2001,11 +2003,11 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
 	}
 
 	if (!(buf[6] & 0x40)) {
-		pkt_notice(pd, "disc type is not CD-RW\n");
+		dev_notice(ddev, "disc type is not CD-RW\n");
 		return 1;
 	}
 	if (!(buf[6] & 0x4)) {
-		pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n");
+		dev_notice(ddev, "A1 values on media are not valid, maybe not CDRW?\n");
 		return 1;
 	}
 
@@ -2025,25 +2027,26 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
 			*speed = us_clv_to_speed[sp];
 			break;
 		default:
-			pkt_notice(pd, "unknown disc sub-type %d\n", st);
+			dev_notice(ddev, "unknown disc sub-type %d\n", st);
 			return 1;
 	}
 	if (*speed) {
-		pkt_info(pd, "maximum media speed: %d\n", *speed);
+		dev_info(ddev, "maximum media speed: %d\n", *speed);
 		return 0;
 	} else {
-		pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st);
+		dev_notice(ddev, "unknown speed %d for sub-type %d\n", sp, st);
 		return 1;
 	}
 }
 
 static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	struct packet_command cgc;
 	struct scsi_sense_hdr sshdr;
 	int ret;
 
-	pkt_dbg(2, pd, "Performing OPC\n");
+	dev_dbg(ddev, "Performing OPC\n");
 
 	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
 	cgc.sshdr = &sshdr;
@@ -2058,18 +2061,19 @@ static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
 
 static int pkt_open_write(struct pktcdvd_device *pd)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	int ret;
 	unsigned int write_speed, media_write_speed, read_speed;
 
 	ret = pkt_probe_settings(pd);
 	if (ret) {
-		pkt_dbg(2, pd, "failed probe\n");
+		dev_dbg(ddev, "failed probe\n");
 		return ret;
 	}
 
 	ret = pkt_set_write_settings(pd);
 	if (ret) {
-		pkt_dbg(1, pd, "failed saving write settings\n");
+		dev_notice(ddev, "failed saving write settings\n");
 		return -EIO;
 	}
 
@@ -2082,30 +2086,29 @@ static int pkt_open_write(struct pktcdvd_device *pd)
 		case 0x13: /* DVD-RW */
 		case 0x1a: /* DVD+RW */
 		case 0x12: /* DVD-RAM */
-			pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed);
+			dev_notice(ddev, "write speed %ukB/s\n", write_speed);
 			break;
 		default:
 			ret = pkt_media_speed(pd, &media_write_speed);
 			if (ret)
 				media_write_speed = 16;
 			write_speed = min(write_speed, media_write_speed * 177);
-			pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176);
+			dev_notice(ddev, "write speed %ux\n", write_speed / 176);
 			break;
 	}
 	read_speed = write_speed;
 
 	ret = pkt_set_speed(pd, write_speed, read_speed);
 	if (ret) {
-		pkt_dbg(1, pd, "couldn't set write speed\n");
+		dev_notice(ddev, "couldn't set write speed\n");
 		return -EIO;
 	}
 	pd->write_speed = write_speed;
 	pd->read_speed = read_speed;
 
 	ret = pkt_perform_opc(pd);
-	if (ret) {
-		pkt_dbg(1, pd, "Optimum Power Calibration failed\n");
-	}
+	if (ret)
+		dev_notice(ddev, "Optimum Power Calibration failed\n");
 
 	return 0;
 }
@@ -2115,6 +2118,7 @@ static int pkt_open_write(struct pktcdvd_device *pd)
  */
 static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	int ret;
 	long lba;
 	struct request_queue *q;
@@ -2134,7 +2138,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
 
 	ret = pkt_get_last_written(pd, &lba);
 	if (ret) {
-		pkt_err(pd, "pkt_get_last_written failed\n");
+		dev_err(ddev, "pkt_get_last_written failed\n");
 		goto out_putdev;
 	}
 
@@ -2163,11 +2167,11 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
 
 	if (write) {
 		if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
-			pkt_err(pd, "not enough memory for buffers\n");
+			dev_err(ddev, "not enough memory for buffers\n");
 			ret = -ENOMEM;
 			goto out_putdev;
 		}
-		pkt_info(pd, "%lukB available on disc\n", lba << 1);
+		dev_info(ddev, "%lukB available on disc\n", lba << 1);
 	}
 
 	return 0;
@@ -2184,8 +2188,10 @@ out:
  */
 static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
+
 	if (flush && pkt_flush_cache(pd))
-		pkt_dbg(1, pd, "not flushing cache\n");
+		dev_notice(ddev, "not flushing cache\n");
 
 	pkt_lock_door(pd, 0);
 
@@ -2386,13 +2392,14 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
 static void pkt_submit_bio(struct bio *bio)
 {
 	struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
+	struct device *ddev = disk_to_dev(pd->disk);
 	struct bio *split;
 
 	bio = bio_split_to_limits(bio);
 	if (!bio)
 		return;
 
-	pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
+	dev_dbg(ddev, "start = %6llx stop = %6llx\n",
 		(unsigned long long)bio->bi_iter.bi_sector,
 		(unsigned long long)bio_end_sector(bio));
 
@@ -2405,13 +2412,13 @@ static void pkt_submit_bio(struct bio *bio)
 	}
 
 	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
-		pkt_notice(pd, "WRITE for ro device (%llu)\n",
+		dev_notice(ddev, "WRITE for ro device (%llu)\n",
 			   (unsigned long long)bio->bi_iter.bi_sector);
 		goto end_io;
 	}
 
 	if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
-		pkt_err(pd, "wrong bio size\n");
+		dev_err(ddev, "wrong bio size\n");
 		goto end_io;
 	}
 
@@ -2453,7 +2460,7 @@ static int pkt_seq_show(struct seq_file *m, void *p)
 	char *msg;
 	int states[PACKET_NUM_STATES];
 
-	seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev);
+	seq_printf(m, "Writer %s mapped to %pg:\n", pd->disk->disk_name, pd->bdev);
 
 	seq_printf(m, "\nSettings:\n");
 	seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
@@ -2509,12 +2516,13 @@ static int pkt_seq_show(struct seq_file *m, void *p)
 
 static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 {
+	struct device *ddev = disk_to_dev(pd->disk);
 	int i;
 	struct block_device *bdev;
 	struct scsi_device *sdev;
 
 	if (pd->pkt_dev == dev) {
-		pkt_err(pd, "recursive setup not allowed\n");
+		dev_err(ddev, "recursive setup not allowed\n");
 		return -EBUSY;
 	}
 	for (i = 0; i < MAX_WRITERS; i++) {
@@ -2522,11 +2530,11 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 		if (!pd2)
 			continue;
 		if (pd2->bdev->bd_dev == dev) {
-			pkt_err(pd, "%pg already setup\n", pd2->bdev);
+			dev_err(ddev, "%pg already setup\n", pd2->bdev);
 			return -EBUSY;
 		}
 		if (pd2->pkt_dev == dev) {
-			pkt_err(pd, "can't chain pktcdvd devices\n");
+			dev_err(ddev, "can't chain pktcdvd devices\n");
 			return -EBUSY;
 		}
 	}
@@ -2550,14 +2558,14 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 	pkt_init_queue(pd);
 
 	atomic_set(&pd->cdrw.pending_bios, 0);
-	pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
+	pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->disk->disk_name);
 	if (IS_ERR(pd->cdrw.thread)) {
-		pkt_err(pd, "can't start kernel thread\n");
+		dev_err(ddev, "can't start kernel thread\n");
 		goto out_mem;
 	}
 
-	proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd);
-	pkt_dbg(1, pd, "writer mapped to %pg\n", bdev);
+	proc_create_single_data(pd->disk->disk_name, 0, pkt_proc, pkt_seq_show, pd);
+	dev_notice(ddev, "writer mapped to %pg\n", bdev);
 	return 0;
 
 out_mem:
@@ -2570,10 +2578,10 @@ out_mem:
 static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
 {
 	struct pktcdvd_device *pd = bdev->bd_disk->private_data;
+	struct device *ddev = disk_to_dev(pd->disk);
 	int ret;
 
-	pkt_dbg(2, pd, "cmd %x, dev %d:%d\n",
-		cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+	dev_dbg(ddev, "cmd %x, dev %d:%d\n", cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
 
 	mutex_lock(&pktcdvd_mutex);
 	switch (cmd) {
@@ -2599,7 +2607,7 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 			ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg);
 		break;
 	default:
-		pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd);
+		dev_dbg(ddev, "Unknown ioctl (%x)\n", cmd);
 		ret = -ENOTTY;
 	}
 	mutex_unlock(&pktcdvd_mutex);
@@ -2677,7 +2685,6 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	spin_lock_init(&pd->iosched.lock);
 	bio_list_init(&pd->iosched.read_queue);
 	bio_list_init(&pd->iosched.write_queue);
-	sprintf(pd->name, DRIVER_NAME"%d", idx);
 	init_waitqueue_head(&pd->wqueue);
 	pd->bio_queue = RB_ROOT;
 
@@ -2694,7 +2701,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	disk->minors = 1;
 	disk->fops = &pktcdvd_ops;
 	disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
-	strcpy(disk->disk_name, pd->name);
+	snprintf(disk->disk_name, sizeof(disk->disk_name), DRIVER_NAME"%d", idx);
 	disk->private_data = pd;
 
 	pd->pkt_dev = MKDEV(pktdev_major, idx);
@@ -2736,6 +2743,7 @@ out_mutex:
 static int pkt_remove_dev(dev_t pkt_dev)
 {
 	struct pktcdvd_device *pd;
+	struct device *ddev;
 	int idx;
 	int ret = 0;
 
@@ -2756,6 +2764,9 @@ static int pkt_remove_dev(dev_t pkt_dev)
 		ret = -EBUSY;
 		goto out;
 	}
+
+	ddev = disk_to_dev(pd->disk);
+
 	if (!IS_ERR(pd->cdrw.thread))
 		kthread_stop(pd->cdrw.thread);
 
@@ -2766,8 +2777,8 @@ static int pkt_remove_dev(dev_t pkt_dev)
 
 	blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY);
 
-	remove_proc_entry(pd->name, pkt_proc);
-	pkt_dbg(1, pd, "writer unmapped\n");
+	remove_proc_entry(pd->disk->disk_name, pkt_proc);
+	dev_notice(ddev, "writer unmapped\n");
 
 	del_gendisk(pd->disk);
 	put_disk(pd->disk);
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index f9c5ac80d59b..80cb00db42a4 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -156,7 +156,6 @@ struct pktcdvd_device
 {
 	struct block_device	*bdev;		/* dev attached */
 	dev_t			pkt_dev;	/* our dev */
-	char			name[20];
 	struct packet_settings	settings;
 	struct packet_stats	stats;
 	int			refcnt;		/* Open count */
diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h
index 6a5552dfd6af..987a3022dc5f 100644
--- a/include/uapi/linux/pktcdvd.h
+++ b/include/uapi/linux/pktcdvd.h
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 
 /*
+ * UNUSED:
  * 1 for normal debug messages, 2 is very verbose. 0 to turn it off.
  */
 #define PACKET_DEBUG		1
-- 
cgit v1.2.3


From de29a96acceae732c68a4094d08dc49079eefa02 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Fri, 26 May 2023 15:35:37 +0800
Subject: notifier: Initialize new struct srcu_usage field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit 95433f726301 ("srcu: Begin offloading srcu_struct fields to
srcu_update"), a new struct srcu_usage field was added, but was not
properly initialized. This led to a "spinlock bad magic" BUG when the
SRCU notifier was ever used. This was observed in the MediaTek CCI
devfreq driver on next-20230525. The trimmed stack trace is as follows:

    BUG: spinlock bad magic on CPU#4, swapper/0/1
     lock: 0xffffff80ff529ac0, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0
    Call trace:
     spin_bug+0xa4/0xe8
     do_raw_spin_lock+0xec/0x120
     _raw_spin_lock_irqsave+0x78/0xb8
     synchronize_srcu+0x3c/0x168
     srcu_notifier_chain_unregister+0x5c/0xa0
     cpufreq_unregister_notifier+0x94/0xe0
     devfreq_passive_event_handler+0x7c/0x3e0
     devfreq_remove_device+0x48/0xe8

Add __SRCU_USAGE_INIT() to SRCU_NOTIFIER_INIT() so that srcu_usage gets
initialized properly.

Reported-by: Jon Hunter <jonathanh@nvidia.com>
Fixes: 95433f726301 ("srcu: Begin offloading srcu_struct fields to srcu_update")
Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Tested-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: "Michał Mirosław" <mirq-linux@rere.qmqm.pl>
Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Cc: Sachin Sant <sachinp@linux.ibm.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Acked-by: Zqiang <qiang.zhang1211@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/notifier.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 2aba75145144..86544707236a 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -106,12 +106,22 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
 #define RAW_NOTIFIER_INIT(name)	{				\
 		.head = NULL }
 
+#ifdef CONFIG_TREE_SRCU
 #define SRCU_NOTIFIER_INIT(name, pcpu)				\
 	{							\
 		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
 		.head = NULL,					\
+		.srcuu = __SRCU_USAGE_INIT(name.srcuu),		\
 		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
 	}
+#else
+#define SRCU_NOTIFIER_INIT(name, pcpu)				\
+	{							\
+		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
+		.head = NULL,					\
+		.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
+	}
+#endif
 
 #define ATOMIC_NOTIFIER_HEAD(name)				\
 	struct atomic_notifier_head name =			\
-- 
cgit v1.2.3


From 222dd185833e464faad2d175c14bca584b6b6dad Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 6 Jun 2023 00:12:06 -0700
Subject: {net/RDMA}/mlx5: introduce lag_for_each_peer

Introduce a generic APIs to iterate over all the devices which are part
of the LAG. This API replace mlx5_lag_get_peer_mdev() which retrieve
only a single peer device from the lag.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c               | 98 ++++++++++++++---------
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  | 24 +++---
 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 21 +++--
 include/linux/mlx5/driver.h                       |  8 +-
 4 files changed, 100 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index a4db22fe1883..c7a4ee896121 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -30,45 +30,65 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
 
 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
 
+static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
+{
+	struct mlx5_core_dev *peer_dev;
+	int i;
+
+	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+		u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+		if (mlx5_lag_is_mpesw(peer_dev))
+			*num_ports += peer_num_ports;
+		else
+			/* Only 1 ib port is the representor for all uplinks */
+			*num_ports += peer_num_ports - 1;
+	}
+}
+
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
 	u32 num_ports = mlx5_eswitch_get_total_vports(dev);
+	struct mlx5_core_dev *lag_master = dev;
 	const struct mlx5_ib_profile *profile;
 	struct mlx5_core_dev *peer_dev;
 	struct mlx5_ib_dev *ibdev;
-	int second_uplink = false;
-	u32 peer_num_ports;
+	int new_uplink = false;
 	int vport_index;
 	int ret;
+	int i;
 
 	vport_index = rep->vport_index;
 
 	if (mlx5_lag_is_shared_fdb(dev)) {
-		peer_dev = mlx5_lag_get_peer_mdev(dev);
-		peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
 		if (mlx5_lag_is_master(dev)) {
-			if (mlx5_lag_is_mpesw(dev))
-				num_ports += peer_num_ports;
-			else
-				num_ports += peer_num_ports - 1;
-
+			mlx5_ib_num_ports_update(dev, &num_ports);
 		} else {
 			if (rep->vport == MLX5_VPORT_UPLINK) {
 				if (!mlx5_lag_is_mpesw(dev))
 					return 0;
-				second_uplink = true;
+				new_uplink = true;
 			}
+			mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+				u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+				if (mlx5_lag_is_master(peer_dev))
+					lag_master = peer_dev;
+				else if (!mlx5_lag_is_mpesw(dev))
+				/* Only 1 ib port is the representor for all uplinks */
+					peer_n_ports--;
 
-			vport_index += peer_num_ports;
-			dev = peer_dev;
+				if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
+					vport_index += peer_n_ports;
+			}
 		}
 	}
 
-	if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink)
+	if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
 		profile = &raw_eth_profile;
 	else
-		return mlx5_ib_set_vport_rep(dev, rep, vport_index);
+		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
 
 	ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
 	if (!ibdev)
@@ -85,8 +105,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	vport_index = rep->vport_index;
 	ibdev->port[vport_index].rep = rep;
 	ibdev->port[vport_index].roce.netdev =
-		mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
-	ibdev->mdev = dev;
+		mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport);
+	ibdev->mdev = lag_master;
 	ibdev->num_ports = num_ports;
 
 	ret = __mlx5_ib_add(ibdev, profile);
@@ -94,8 +114,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 		goto fail_add;
 
 	rep->rep_data[REP_IB].priv = ibdev;
-	if (mlx5_lag_is_shared_fdb(dev))
-		mlx5_ib_register_peer_vport_reps(dev);
+	if (mlx5_lag_is_shared_fdb(lag_master))
+		mlx5_ib_register_peer_vport_reps(lag_master);
 
 	return 0;
 
@@ -118,23 +138,27 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
 	int vport_index = rep->vport_index;
 	struct mlx5_ib_port *port;
+	int i;
 
 	if (WARN_ON(!mdev))
 		return;
 
+	if (!dev)
+		return;
+
 	if (mlx5_lag_is_shared_fdb(mdev) &&
 	    !mlx5_lag_is_master(mdev)) {
-		struct mlx5_core_dev *peer_mdev;
-
 		if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
 			return;
-		peer_mdev = mlx5_lag_get_peer_mdev(mdev);
-		vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+		for (i = 0; i < dev->num_ports; i++) {
+			if (dev->port[i].rep == rep)
+				break;
+		}
+		if (WARN_ON(i == dev->num_ports))
+			return;
+		vport_index = i;
 	}
 
-	if (!dev)
-		return;
-
 	port = &dev->port[vport_index];
 	write_lock(&port->roce.netdev_lock);
 	port->roce.netdev = NULL;
@@ -143,16 +167,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	port->rep = NULL;
 
 	if (rep->vport == MLX5_VPORT_UPLINK) {
-		struct mlx5_core_dev *peer_mdev;
-		struct mlx5_eswitch *esw;
 
 		if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
 			return;
 
 		if (mlx5_lag_is_shared_fdb(mdev)) {
-			peer_mdev = mlx5_lag_get_peer_mdev(mdev);
-			esw = peer_mdev->priv.eswitch;
-			mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+			struct mlx5_core_dev *peer_mdev;
+			struct mlx5_eswitch *esw;
+
+			mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+				esw = peer_mdev->priv.eswitch;
+				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+			}
 		}
 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
 	}
@@ -166,14 +192,14 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
 
 static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
 {
-	struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+	struct mlx5_core_dev *peer_mdev;
 	struct mlx5_eswitch *esw;
+	int i;
 
-	if (!peer_mdev)
-		return;
-
-	esw = peer_mdev->priv.eswitch;
-	mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+	mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+		esw = peer_mdev->priv.eswitch;
+		mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+	}
 }
 
 struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 11374c3744c5..8a10ed4d8cbb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -244,16 +244,22 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
 	    ft->type == FS_FT_FDB &&
 	    mlx5_lag_is_shared_fdb(dev) &&
 	    mlx5_lag_is_master(dev)) {
-		err = mlx5_cmd_set_slave_root_fdb(dev,
-						  mlx5_lag_get_peer_mdev(dev),
-						  !disconnect, (!disconnect) ?
-						  ft->id : 0);
-		if (err && !disconnect) {
-			MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
-			MLX5_SET(set_flow_table_root_in, in, table_id,
-				 ns->root_ft->id);
-			mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+		struct mlx5_core_dev *peer_dev;
+		int i;
+
+		mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+			err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect,
+							  (!disconnect) ? ft->id : 0);
+			if (err && !disconnect) {
+				MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+				MLX5_SET(set_flow_table_root_in, in, table_id,
+					 ns->root_ft->id);
+				mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+			}
+			if (err)
+				break;
 		}
+
 	}
 
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index c820f7d266de..c55e36e0571d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1519,26 +1519,37 @@ u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
 
-struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
 {
 	struct mlx5_core_dev *peer_dev = NULL;
 	struct mlx5_lag *ldev;
 	unsigned long flags;
+	int idx;
 
 	spin_lock_irqsave(&lag_lock, flags);
 	ldev = mlx5_lag_dev(dev);
 	if (!ldev)
 		goto unlock;
 
-	peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
-			   ldev->pf[MLX5_LAG_P2].dev :
-			   ldev->pf[MLX5_LAG_P1].dev;
+	if (*i == ldev->ports)
+		goto unlock;
+	for (idx = *i; idx < ldev->ports; idx++)
+		if (ldev->pf[idx].dev != dev)
+			break;
+
+	if (idx == ldev->ports) {
+		*i = idx;
+		goto unlock;
+	}
+	*i = idx + 1;
+
+	peer_dev = ldev->pf[idx].dev;
 
 unlock:
 	spin_unlock_irqrestore(&lag_lock, flags);
 	return peer_dev;
 }
-EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
 
 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 				 u64 *values,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 94d2be5848ae..9a744c48eec2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1174,7 +1174,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 				 u64 *values,
 				 int num_counters,
 				 size_t *offsets);
-struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i);
+
+#define mlx5_lag_for_each_peer_mdev(dev, peer, i)				\
+	for (i = 0, peer = mlx5_lag_get_next_peer_mdev(dev, &i);		\
+	     peer;								\
+	     peer = mlx5_lag_get_next_peer_mdev(dev, &i))
+
 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
-- 
cgit v1.2.3


From a33682e4e78e249155abbe5e8ee880d5760b5e28 Mon Sep 17 00:00:00 2001
From: Lama Kayal <lkayal@nvidia.com>
Date: Tue, 6 Jun 2023 00:12:14 -0700
Subject: net/mlx5e: Expose catastrophic steering error counters

Add generated_pkt_steering_fail and handled_pkt_steering_fail to devlink
heatlth reporter.
generated_pkt_steering_fail indicates the number of packets dropped due to
illegal steering operation within the vport steering domain.
handled_pkt_steering_fail indicates the number of packets dropped due to
illegal steering operation, originated by the vport.

Also, update devlink reporter functionality documentation with the newly
exposed counters.

Signed-off-by: Lama Kayal <lkayal@nvidia.com>
Reviewed-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../device_drivers/ethernet/mellanox/mlx5/devlink.rst        |  7 +++++++
 drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c | 10 ++++++++++
 include/linux/mlx5/mlx5_ifc.h                                | 12 ++++++++++--
 3 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
index 3354ca3608ee..a4edf908b707 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
@@ -290,6 +290,13 @@ Description of the vnic counters:
 - nic_receive_steering_discard
         number of packets that completed RX flow
         steering but were discarded due to a mismatch in flow table.
+- generated_pkt_steering_fail
+	number of packets generated by the VNIC experiencing unexpected steering
+	failure (at any point in steering flow).
+- handled_pkt_steering_fail
+	number of packets handled by the VNIC experiencing unexpected steering
+	failure (at any point in steering flow owned by the VNIC, including the FDB
+	for the eswitch owner).
 
 User commands examples:
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
index 9114661cd967..b0128336ff01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
@@ -76,6 +76,16 @@ int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
 	if (err)
 		return err;
 
+	err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
+					VNIC_ENV_GET64(&vnic, generated_pkt_steering_fail));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
+					VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
+	if (err)
+		return err;
+
 	err = devlink_fmsg_obj_nest_end(fmsg);
 	if (err)
 		return err;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b89778d0d326..af3a92ad2e6b 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1755,7 +1755,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_328[0x2];
 	u8	   relaxed_ordering_read[0x1];
 	u8         log_max_pd[0x5];
-	u8         reserved_at_330[0x9];
+	u8         reserved_at_330[0x7];
+	u8         vnic_env_cnt_steering_fail[0x1];
+	u8         reserved_at_338[0x1];
 	u8         q_counter_aggregation[0x1];
 	u8         q_counter_other_vport[0x1];
 	u8         log_max_xrcd[0x5];
@@ -3673,7 +3675,13 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits {
 
 	u8         eth_wqe_too_small[0x20];
 
-	u8         reserved_at_220[0xdc0];
+	u8         reserved_at_220[0xc0];
+
+	u8         generated_pkt_steering_fail[0x40];
+
+	u8         handled_pkt_steering_fail[0x40];
+
+	u8         reserved_at_360[0xc80];
 };
 
 struct mlx5_ifc_traffic_counter_bits {
-- 
cgit v1.2.3


From fcea0ccf4fd7f5e0b978d3c18923eea4e431118d Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 6 Jun 2023 10:35:31 +0100
Subject: ACPI: bus: Consolidate all arm specific initialisation into
 acpi_arm_init()

Move all of the ARM-specific initialization into one function namely
acpi_arm_init(), so it is not necessary to modify/update bus.c every
time a new piece of it is added.

Cc: Lorenzo Pieralisi <lpieralisi@kernel.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Link: https://lore.kernel.org/r/CAJZ5v0iBZRZmV_oU+VurqxnVMbFN_ttqrL=cLh0sUH+=u0PYsw@mail.gmail.com
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
Link: https://lore.kernel.org/r/20230606093531.2746732-1-sudeep.holla@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/acpi/arm64/Makefile |  2 +-
 drivers/acpi/arm64/agdi.c   |  2 +-
 drivers/acpi/arm64/apmt.c   |  2 +-
 drivers/acpi/arm64/init.c   | 13 +++++++++++++
 drivers/acpi/arm64/init.h   |  6 ++++++
 drivers/acpi/arm64/iort.c   |  1 +
 drivers/acpi/bus.c          |  7 +------
 include/linux/acpi.h        |  6 ++++++
 include/linux/acpi_agdi.h   | 13 -------------
 include/linux/acpi_apmt.h   | 19 -------------------
 include/linux/acpi_iort.h   |  2 --
 11 files changed, 30 insertions(+), 43 deletions(-)
 create mode 100644 drivers/acpi/arm64/init.c
 create mode 100644 drivers/acpi/arm64/init.h
 delete mode 100644 include/linux/acpi_agdi.h
 delete mode 100644 include/linux/acpi_apmt.h

(limited to 'include/linux')

diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
index e21a9e84e394..f81fe24894b2 100644
--- a/drivers/acpi/arm64/Makefile
+++ b/drivers/acpi/arm64/Makefile
@@ -3,4 +3,4 @@ obj-$(CONFIG_ACPI_AGDI) 	+= agdi.o
 obj-$(CONFIG_ACPI_IORT) 	+= iort.o
 obj-$(CONFIG_ACPI_GTDT) 	+= gtdt.o
 obj-$(CONFIG_ACPI_APMT) 	+= apmt.o
-obj-y				+= dma.o
+obj-y				+= dma.o init.o
diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c
index f605302395c3..8b3c7d42b41b 100644
--- a/drivers/acpi/arm64/agdi.c
+++ b/drivers/acpi/arm64/agdi.c
@@ -9,11 +9,11 @@
 #define pr_fmt(fmt) "ACPI: AGDI: " fmt
 
 #include <linux/acpi.h>
-#include <linux/acpi_agdi.h>
 #include <linux/arm_sdei.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include "init.h"
 
 struct agdi_data {
 	int sdei_event;
diff --git a/drivers/acpi/arm64/apmt.c b/drivers/acpi/arm64/apmt.c
index 8cab69fa5d59..e5c3bc99fc79 100644
--- a/drivers/acpi/arm64/apmt.c
+++ b/drivers/acpi/arm64/apmt.c
@@ -10,10 +10,10 @@
 #define pr_fmt(fmt)	"ACPI: APMT: " fmt
 
 #include <linux/acpi.h>
-#include <linux/acpi_apmt.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include "init.h"
 
 #define DEV_NAME "arm-cs-arch-pmu"
 
diff --git a/drivers/acpi/arm64/init.c b/drivers/acpi/arm64/init.c
new file mode 100644
index 000000000000..d3ce53dda122
--- /dev/null
+++ b/drivers/acpi/arm64/init.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/acpi.h>
+#include "init.h"
+
+void __init acpi_arm_init(void)
+{
+	if (IS_ENABLED(CONFIG_ACPI_AGDI))
+		acpi_agdi_init();
+	if (IS_ENABLED(CONFIG_ACPI_APMT))
+		acpi_apmt_init();
+	if (IS_ENABLED(CONFIG_ACPI_IORT))
+		acpi_iort_init();
+}
diff --git a/drivers/acpi/arm64/init.h b/drivers/acpi/arm64/init.h
new file mode 100644
index 000000000000..a1715a2a34e9
--- /dev/null
+++ b/drivers/acpi/arm64/init.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/init.h>
+
+void __init acpi_agdi_init(void);
+void __init acpi_apmt_init(void);
+void __init acpi_iort_init(void);
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 38fb84974f35..3631230a61c8 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/dma-map-ops.h>
+#include "init.h"
 
 #define IORT_TYPE_MASK(type)	(1 << (type))
 #define IORT_MSI_TYPE		(1 << ACPI_IORT_NODE_ITS_GROUP)
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index d161ff707de4..7a1eaf8c7bde 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -26,9 +26,6 @@
 #include <asm/mpspec.h>
 #include <linux/dmi.h>
 #endif
-#include <linux/acpi_agdi.h>
-#include <linux/acpi_apmt.h>
-#include <linux/acpi_iort.h>
 #include <linux/acpi_viot.h>
 #include <linux/pci.h>
 #include <acpi/apei.h>
@@ -1408,7 +1405,7 @@ static int __init acpi_init(void)
 	acpi_init_ffh();
 
 	pci_mmcfg_late_init();
-	acpi_iort_init();
+	acpi_arm_init();
 	acpi_viot_early_init();
 	acpi_hest_init();
 	acpi_ghes_init();
@@ -1420,8 +1417,6 @@ static int __init acpi_init(void)
 	acpi_debugger_init();
 	acpi_setup_sb_notify_handler();
 	acpi_viot_init();
-	acpi_agdi_init();
-	acpi_apmt_init();
 	return 0;
 }
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7b71dd74baeb..5ef126a0a50f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1507,6 +1507,12 @@ static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
 }
 #endif
 
+#ifdef CONFIG_ARM64
+void acpi_arm_init(void);
+#else
+static inline void acpi_arm_init(void) { }
+#endif
+
 #ifdef CONFIG_ACPI_PCC
 void acpi_init_pcc(void);
 #else
diff --git a/include/linux/acpi_agdi.h b/include/linux/acpi_agdi.h
deleted file mode 100644
index f477f0b452fa..000000000000
--- a/include/linux/acpi_agdi.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-
-#ifndef __ACPI_AGDI_H__
-#define __ACPI_AGDI_H__
-
-#include <linux/acpi.h>
-
-#ifdef CONFIG_ACPI_AGDI
-void __init acpi_agdi_init(void);
-#else
-static inline void acpi_agdi_init(void) {}
-#endif
-#endif /* __ACPI_AGDI_H__ */
diff --git a/include/linux/acpi_apmt.h b/include/linux/acpi_apmt.h
deleted file mode 100644
index 40bd634d082f..000000000000
--- a/include/linux/acpi_apmt.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * ARM CoreSight PMU driver.
- * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.
- *
- */
-
-#ifndef __ACPI_APMT_H__
-#define __ACPI_APMT_H__
-
-#include <linux/acpi.h>
-
-#ifdef CONFIG_ACPI_APMT
-void acpi_apmt_init(void);
-#else
-static inline void acpi_apmt_init(void) { }
-#endif /* CONFIG_ACPI_APMT */
-
-#endif /* __ACPI_APMT_H__ */
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index b43be0987b19..e4e7bb6fa720 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -27,7 +27,6 @@ int iort_register_domain_token(int trans_id, phys_addr_t base,
 void iort_deregister_domain_token(int trans_id);
 struct fwnode_handle *iort_find_domain_token(int trans_id);
 #ifdef CONFIG_ACPI_IORT
-void acpi_iort_init(void);
 u32 iort_msi_map_id(struct device *dev, u32 id);
 struct irq_domain *iort_get_device_domain(struct device *dev, u32 id,
 					  enum irq_domain_bus_token bus_token);
@@ -43,7 +42,6 @@ int iort_iommu_configure_id(struct device *dev, const u32 *id_in);
 void iort_iommu_get_resv_regions(struct device *dev, struct list_head *head);
 phys_addr_t acpi_iort_dma_get_max_cpu_address(void);
 #else
-static inline void acpi_iort_init(void) { }
 static inline u32 iort_msi_map_id(struct device *dev, u32 id)
 { return id; }
 static inline struct irq_domain *iort_get_device_domain(
-- 
cgit v1.2.3


From 9ca73f2645706230249c4ec2a2b0cab9515987c8 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 17 Apr 2023 19:04:49 +0000
Subject: mm/slab: add a missing semicolon on SLAB_TYPESAFE_BY_RCU example code

An example code snippet for SLAB_TYPESAFE_BY_RCU is missing a semicolon.
Add it.

Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 6b3e155b70bf..5eeedbfffcd2 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -53,7 +53,7 @@
  * stays valid, the trick to using this is relying on an independent
  * object validation pass. Something like:
  *
- *  rcu_read_lock()
+ *  rcu_read_lock();
  * again:
  *  obj = lockless_lookup(key);
  *  if (obj) {
-- 
cgit v1.2.3


From 1143c9d9d7602f20ba7bb3cef0d07b10f23cbef7 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 17 Apr 2023 19:04:50 +0000
Subject: mm/slab: break up RCU readers on SLAB_TYPESAFE_BY_RCU example code

The SLAB_TYPESAFE_BY_RCU example code snippet uses a single RCU
read-side critical section for retries.
'Documentation/RCU/rculist_nulls.rst' has similar example code snippet,
and commit da82af04352b ("doc: Update and wordsmith rculist_nulls.rst")
broke it up.  Apply the change to SLAB_TYPESAFE_BY_RCU example code
snippet, too.

Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/slab.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 5eeedbfffcd2..c6bc05765bdb 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -53,16 +53,18 @@
  * stays valid, the trick to using this is relying on an independent
  * object validation pass. Something like:
  *
+ * begin:
  *  rcu_read_lock();
- * again:
  *  obj = lockless_lookup(key);
  *  if (obj) {
  *    if (!try_get_ref(obj)) // might fail for free objects
- *      goto again;
+ *      rcu_read_unlock();
+ *      goto begin;
  *
  *    if (obj->key != key) { // not the object we expected
  *      put_ref(obj);
- *      goto again;
+ *      rcu_read_unlock();
+ *      goto begin;
  *    }
  *  }
  *  rcu_read_unlock();
-- 
cgit v1.2.3


From af8de1e307bf1ecbd17d220122832cd093f7a3f8 Mon Sep 17 00:00:00 2001
From: Jiawen Wu <jiawenwu@trustnetic.com>
Date: Tue, 6 Jun 2023 17:21:05 +0800
Subject: net: pcs: Add 10GBASE-R mode for Synopsys Designware XPCS

Add basic support for XPCS using 10GBASE-R interface. This mode will
be extended to use interrupt, so set pcs.poll false. And avoid soft
reset so that the device using this mode is in the default configuration.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/pcs/pcs-xpcs.c   | 30 ++++++++++++++++++++++++++++++
 include/linux/pcs/pcs-xpcs.h |  1 +
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 23223f0f8cad..e4e59aa9faf7 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -64,6 +64,16 @@ static const int xpcs_xlgmii_features[] = {
 	__ETHTOOL_LINK_MODE_MASK_NBITS,
 };
 
+static const int xpcs_10gbaser_features[] = {
+	ETHTOOL_LINK_MODE_Pause_BIT,
+	ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+	ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+	ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+	ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT,
+	ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
+	__ETHTOOL_LINK_MODE_MASK_NBITS,
+};
+
 static const int xpcs_sgmii_features[] = {
 	ETHTOOL_LINK_MODE_Pause_BIT,
 	ETHTOOL_LINK_MODE_Asym_Pause_BIT,
@@ -106,6 +116,10 @@ static const phy_interface_t xpcs_xlgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_XLGMII,
 };
 
+static const phy_interface_t xpcs_10gbaser_interfaces[] = {
+	PHY_INTERFACE_MODE_10GBASER,
+};
+
 static const phy_interface_t xpcs_sgmii_interfaces[] = {
 	PHY_INTERFACE_MODE_SGMII,
 };
@@ -123,6 +137,7 @@ enum {
 	DW_XPCS_USXGMII,
 	DW_XPCS_10GKR,
 	DW_XPCS_XLGMII,
+	DW_XPCS_10GBASER,
 	DW_XPCS_SGMII,
 	DW_XPCS_1000BASEX,
 	DW_XPCS_2500BASEX,
@@ -246,6 +261,7 @@ static int xpcs_soft_reset(struct dw_xpcs *xpcs,
 
 	switch (compat->an_mode) {
 	case DW_AN_C73:
+	case DW_10GBASER:
 		dev = MDIO_MMD_PCS;
 		break;
 	case DW_AN_C37_SGMII:
@@ -802,6 +818,8 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
 		return -ENODEV;
 
 	switch (compat->an_mode) {
+	case DW_10GBASER:
+		break;
 	case DW_AN_C73:
 		if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, advertising)) {
 			ret = xpcs_config_aneg_c73(xpcs, compat);
@@ -998,6 +1016,9 @@ static void xpcs_get_state(struct phylink_pcs *pcs,
 		return;
 
 	switch (compat->an_mode) {
+	case DW_10GBASER:
+		phylink_mii_c45_pcs_get_state(xpcs->mdiodev, state);
+		break;
 	case DW_AN_C73:
 		ret = xpcs_get_state_c73(xpcs, state, compat);
 		if (ret) {
@@ -1153,6 +1174,12 @@ static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
 		.num_interfaces = ARRAY_SIZE(xpcs_xlgmii_interfaces),
 		.an_mode = DW_AN_C73,
 	},
+	[DW_XPCS_10GBASER] = {
+		.supported = xpcs_10gbaser_features,
+		.interface = xpcs_10gbaser_interfaces,
+		.num_interfaces = ARRAY_SIZE(xpcs_10gbaser_interfaces),
+		.an_mode = DW_10GBASER,
+	},
 	[DW_XPCS_SGMII] = {
 		.supported = xpcs_sgmii_features,
 		.interface = xpcs_sgmii_interfaces,
@@ -1256,6 +1283,9 @@ static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
 		}
 
 		xpcs->pcs.ops = &xpcs_phylink_ops;
+		if (compat->an_mode == DW_10GBASER)
+			return xpcs;
+
 		xpcs->pcs.poll = true;
 
 		ret = xpcs_soft_reset(xpcs, compat);
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index 914e387d5387..ec8175b847cc 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -18,6 +18,7 @@
 #define DW_AN_C37_SGMII			2
 #define DW_2500BASEX			3
 #define DW_AN_C37_1000BASEX		4
+#define DW_10GBASER			5
 
 struct xpcs_id;
 
-- 
cgit v1.2.3


From 0d7aeb68700ff87b4d2acafb789408a065225e1e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 6 Jun 2023 14:08:47 +0100
Subject: Drop the netfs_ prefix from netfs_extract_iter_to_sg()

Rename netfs_extract_iter_to_sg() and its auxiliary functions to drop the
netfs_ prefix.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Herbert Xu <herbert@gondor.apana.org.au>
cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
cc: "David S. Miller" <davem@davemloft.net>
cc: Eric Dumazet <edumazet@google.com>
cc: Jakub Kicinski <kuba@kernel.org>
cc: Paolo Abeni <pabeni@redhat.com>
cc: linux-crypto@vger.kernel.org
cc: linux-cachefs@redhat.com
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: netdev@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 fs/netfs/iterator.c       | 66 +++++++++++++++++++++++------------------------
 fs/smb/client/smb2ops.c   |  4 +--
 fs/smb/client/smbdirect.c |  2 +-
 include/linux/netfs.h     |  6 ++---
 4 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c
index 8a4c86687429..f8eba3de1a97 100644
--- a/fs/netfs/iterator.c
+++ b/fs/netfs/iterator.c
@@ -106,11 +106,11 @@ EXPORT_SYMBOL_GPL(netfs_extract_user_iter);
  * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
  * iterators, and add them to the scatterlist.
  */
-static ssize_t netfs_extract_user_to_sg(struct iov_iter *iter,
-					ssize_t maxsize,
-					struct sg_table *sgtable,
-					unsigned int sg_max,
-					iov_iter_extraction_t extraction_flags)
+static ssize_t extract_user_to_sg(struct iov_iter *iter,
+				  ssize_t maxsize,
+				  struct sg_table *sgtable,
+				  unsigned int sg_max,
+				  iov_iter_extraction_t extraction_flags)
 {
 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
 	struct page **pages;
@@ -159,11 +159,11 @@ failed:
  * Extract up to sg_max pages from a BVEC-type iterator and add them to the
  * scatterlist.  The pages are not pinned.
  */
-static ssize_t netfs_extract_bvec_to_sg(struct iov_iter *iter,
-					ssize_t maxsize,
-					struct sg_table *sgtable,
-					unsigned int sg_max,
-					iov_iter_extraction_t extraction_flags)
+static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
+				  ssize_t maxsize,
+				  struct sg_table *sgtable,
+				  unsigned int sg_max,
+				  iov_iter_extraction_t extraction_flags)
 {
 	const struct bio_vec *bv = iter->bvec;
 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
@@ -205,11 +205,11 @@ static ssize_t netfs_extract_bvec_to_sg(struct iov_iter *iter,
  * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
  * static buffers.  The pages are not pinned.
  */
-static ssize_t netfs_extract_kvec_to_sg(struct iov_iter *iter,
-					ssize_t maxsize,
-					struct sg_table *sgtable,
-					unsigned int sg_max,
-					iov_iter_extraction_t extraction_flags)
+static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
+				  ssize_t maxsize,
+				  struct sg_table *sgtable,
+				  unsigned int sg_max,
+				  iov_iter_extraction_t extraction_flags)
 {
 	const struct kvec *kv = iter->kvec;
 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
@@ -266,11 +266,11 @@ static ssize_t netfs_extract_kvec_to_sg(struct iov_iter *iter,
  * Extract up to sg_max folios from an XARRAY-type iterator and add them to
  * the scatterlist.  The pages are not pinned.
  */
-static ssize_t netfs_extract_xarray_to_sg(struct iov_iter *iter,
-					  ssize_t maxsize,
-					  struct sg_table *sgtable,
-					  unsigned int sg_max,
-					  iov_iter_extraction_t extraction_flags)
+static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
+				    ssize_t maxsize,
+				    struct sg_table *sgtable,
+				    unsigned int sg_max,
+				    iov_iter_extraction_t extraction_flags)
 {
 	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
 	struct xarray *xa = iter->xarray;
@@ -312,7 +312,7 @@ static ssize_t netfs_extract_xarray_to_sg(struct iov_iter *iter,
 }
 
 /**
- * netfs_extract_iter_to_sg - Extract pages from an iterator and add ot an sglist
+ * extract_iter_to_sg - Extract pages from an iterator and add ot an sglist
  * @iter: The iterator to extract from
  * @maxsize: The amount of iterator to copy
  * @sgtable: The scatterlist table to fill in
@@ -339,9 +339,9 @@ static ssize_t netfs_extract_xarray_to_sg(struct iov_iter *iter,
  * The iov_iter_extract_mode() function should be used to query how cleanup
  * should be performed.
  */
-ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
-				 struct sg_table *sgtable, unsigned int sg_max,
-				 iov_iter_extraction_t extraction_flags)
+ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
+			   struct sg_table *sgtable, unsigned int sg_max,
+			   iov_iter_extraction_t extraction_flags)
 {
 	if (maxsize == 0)
 		return 0;
@@ -349,21 +349,21 @@ ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
 	switch (iov_iter_type(iter)) {
 	case ITER_UBUF:
 	case ITER_IOVEC:
-		return netfs_extract_user_to_sg(iter, maxsize, sgtable, sg_max,
-						extraction_flags);
+		return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
+					  extraction_flags);
 	case ITER_BVEC:
-		return netfs_extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
-						extraction_flags);
+		return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
+					  extraction_flags);
 	case ITER_KVEC:
-		return netfs_extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
-						extraction_flags);
+		return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
+					  extraction_flags);
 	case ITER_XARRAY:
-		return netfs_extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
-						  extraction_flags);
+		return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
+					    extraction_flags);
 	default:
 		pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
 		WARN_ON_ONCE(1);
 		return -EIO;
 	}
 }
-EXPORT_SYMBOL_GPL(netfs_extract_iter_to_sg);
+EXPORT_SYMBOL_GPL(extract_iter_to_sg);
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 6e3be58cfe49..38d2265c77fd 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4333,8 +4333,8 @@ static void *smb2_get_aead_req(struct crypto_aead *tfm, struct smb_rqst *rqst,
 		}
 		sgtable.orig_nents = sgtable.nents;
 
-		rc = netfs_extract_iter_to_sg(iter, count, &sgtable,
-					      num_sgs - sgtable.nents, 0);
+		rc = extract_iter_to_sg(iter, count, &sgtable,
+					num_sgs - sgtable.nents, 0);
 		iov_iter_revert(iter, rc);
 		sgtable.orig_nents = sgtable.nents;
 	}
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index 0362ebd4fa0f..223e17c16b60 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -2227,7 +2227,7 @@ static int smbd_iter_to_mr(struct smbd_connection *info,
 
 	memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist));
 
-	ret = netfs_extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0);
+	ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0);
 	WARN_ON(ret < 0);
 	if (sgt->nents > 0)
 		sg_mark_end(&sgt->sgl[sgt->nents - 1]);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index a1f3522daa69..55e201c3a841 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -301,9 +301,9 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
 				struct iov_iter *new,
 				iov_iter_extraction_t extraction_flags);
 struct sg_table;
-ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t len,
-				 struct sg_table *sgtable, unsigned int sg_max,
-				 iov_iter_extraction_t extraction_flags);
+ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len,
+			   struct sg_table *sgtable, unsigned int sg_max,
+			   iov_iter_extraction_t extraction_flags);
 
 /**
  * netfs_inode - Get the netfs inode context from the inode
-- 
cgit v1.2.3


From f5f82cd18732d828bcd1ec308c4e8c55012e84b0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 6 Jun 2023 14:08:50 +0100
Subject: Move netfs_extract_iter_to_sg() to lib/scatterlist.c

Move netfs_extract_iter_to_sg() to lib/scatterlist.c as it's going to be
used by more than just network filesystems (AF_ALG, for example).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Herbert Xu <herbert@gondor.apana.org.au>
cc: "David S. Miller" <davem@davemloft.net>
cc: Eric Dumazet <edumazet@google.com>
cc: Jakub Kicinski <kuba@kernel.org>
cc: Paolo Abeni <pabeni@redhat.com>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-crypto@vger.kernel.org
cc: linux-cachefs@redhat.com
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: netdev@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 fs/netfs/iterator.c   | 267 -------------------------------------------------
 include/linux/netfs.h |   4 -
 include/linux/uio.h   |   5 +
 lib/scatterlist.c     | 269 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 274 insertions(+), 271 deletions(-)

(limited to 'include/linux')

diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c
index 9f09dc30ceb6..2ff07ba655a0 100644
--- a/fs/netfs/iterator.c
+++ b/fs/netfs/iterator.c
@@ -101,270 +101,3 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
 	return npages;
 }
 EXPORT_SYMBOL_GPL(netfs_extract_user_iter);
-
-/*
- * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
- * iterators, and add them to the scatterlist.
- */
-static ssize_t extract_user_to_sg(struct iov_iter *iter,
-				  ssize_t maxsize,
-				  struct sg_table *sgtable,
-				  unsigned int sg_max,
-				  iov_iter_extraction_t extraction_flags)
-{
-	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
-	struct page **pages;
-	unsigned int npages;
-	ssize_t ret = 0, res;
-	size_t len, off;
-
-	/* We decant the page list into the tail of the scatterlist */
-	pages = (void *)sgtable->sgl +
-		array_size(sg_max, sizeof(struct scatterlist));
-	pages -= sg_max;
-
-	do {
-		res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
-					     extraction_flags, &off);
-		if (res < 0)
-			goto failed;
-
-		len = res;
-		maxsize -= len;
-		ret += len;
-		npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
-		sg_max -= npages;
-
-		for (; npages > 0; npages--) {
-			struct page *page = *pages;
-			size_t seg = min_t(size_t, PAGE_SIZE - off, len);
-
-			*pages++ = NULL;
-			sg_set_page(sg, page, seg, off);
-			sgtable->nents++;
-			sg++;
-			len -= seg;
-			off = 0;
-		}
-	} while (maxsize > 0 && sg_max > 0);
-
-	return ret;
-
-failed:
-	while (sgtable->nents > sgtable->orig_nents)
-		put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
-	return res;
-}
-
-/*
- * Extract up to sg_max pages from a BVEC-type iterator and add them to the
- * scatterlist.  The pages are not pinned.
- */
-static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
-				  ssize_t maxsize,
-				  struct sg_table *sgtable,
-				  unsigned int sg_max,
-				  iov_iter_extraction_t extraction_flags)
-{
-	const struct bio_vec *bv = iter->bvec;
-	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
-	unsigned long start = iter->iov_offset;
-	unsigned int i;
-	ssize_t ret = 0;
-
-	for (i = 0; i < iter->nr_segs; i++) {
-		size_t off, len;
-
-		len = bv[i].bv_len;
-		if (start >= len) {
-			start -= len;
-			continue;
-		}
-
-		len = min_t(size_t, maxsize, len - start);
-		off = bv[i].bv_offset + start;
-
-		sg_set_page(sg, bv[i].bv_page, len, off);
-		sgtable->nents++;
-		sg++;
-		sg_max--;
-
-		ret += len;
-		maxsize -= len;
-		if (maxsize <= 0 || sg_max == 0)
-			break;
-		start = 0;
-	}
-
-	if (ret > 0)
-		iov_iter_advance(iter, ret);
-	return ret;
-}
-
-/*
- * Extract up to sg_max pages from a KVEC-type iterator and add them to the
- * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
- * static buffers.  The pages are not pinned.
- */
-static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
-				  ssize_t maxsize,
-				  struct sg_table *sgtable,
-				  unsigned int sg_max,
-				  iov_iter_extraction_t extraction_flags)
-{
-	const struct kvec *kv = iter->kvec;
-	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
-	unsigned long start = iter->iov_offset;
-	unsigned int i;
-	ssize_t ret = 0;
-
-	for (i = 0; i < iter->nr_segs; i++) {
-		struct page *page;
-		unsigned long kaddr;
-		size_t off, len, seg;
-
-		len = kv[i].iov_len;
-		if (start >= len) {
-			start -= len;
-			continue;
-		}
-
-		kaddr = (unsigned long)kv[i].iov_base + start;
-		off = kaddr & ~PAGE_MASK;
-		len = min_t(size_t, maxsize, len - start);
-		kaddr &= PAGE_MASK;
-
-		maxsize -= len;
-		ret += len;
-		do {
-			seg = min_t(size_t, len, PAGE_SIZE - off);
-			if (is_vmalloc_or_module_addr((void *)kaddr))
-				page = vmalloc_to_page((void *)kaddr);
-			else
-				page = virt_to_page(kaddr);
-
-			sg_set_page(sg, page, len, off);
-			sgtable->nents++;
-			sg++;
-			sg_max--;
-
-			len -= seg;
-			kaddr += PAGE_SIZE;
-			off = 0;
-		} while (len > 0 && sg_max > 0);
-
-		if (maxsize <= 0 || sg_max == 0)
-			break;
-		start = 0;
-	}
-
-	if (ret > 0)
-		iov_iter_advance(iter, ret);
-	return ret;
-}
-
-/*
- * Extract up to sg_max folios from an XARRAY-type iterator and add them to
- * the scatterlist.  The pages are not pinned.
- */
-static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
-				    ssize_t maxsize,
-				    struct sg_table *sgtable,
-				    unsigned int sg_max,
-				    iov_iter_extraction_t extraction_flags)
-{
-	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
-	struct xarray *xa = iter->xarray;
-	struct folio *folio;
-	loff_t start = iter->xarray_start + iter->iov_offset;
-	pgoff_t index = start / PAGE_SIZE;
-	ssize_t ret = 0;
-	size_t offset, len;
-	XA_STATE(xas, xa, index);
-
-	rcu_read_lock();
-
-	xas_for_each(&xas, folio, ULONG_MAX) {
-		if (xas_retry(&xas, folio))
-			continue;
-		if (WARN_ON(xa_is_value(folio)))
-			break;
-		if (WARN_ON(folio_test_hugetlb(folio)))
-			break;
-
-		offset = offset_in_folio(folio, start);
-		len = min_t(size_t, maxsize, folio_size(folio) - offset);
-
-		sg_set_page(sg, folio_page(folio, 0), len, offset);
-		sgtable->nents++;
-		sg++;
-		sg_max--;
-
-		maxsize -= len;
-		ret += len;
-		if (maxsize <= 0 || sg_max == 0)
-			break;
-	}
-
-	rcu_read_unlock();
-	if (ret > 0)
-		iov_iter_advance(iter, ret);
-	return ret;
-}
-
-/**
- * extract_iter_to_sg - Extract pages from an iterator and add to an sglist
- * @iter: The iterator to extract from
- * @maxsize: The amount of iterator to copy
- * @sgtable: The scatterlist table to fill in
- * @sg_max: Maximum number of elements in @sgtable that may be filled
- * @extraction_flags: Flags to qualify the request
- *
- * Extract the page fragments from the given amount of the source iterator and
- * add them to a scatterlist that refers to all of those bits, to a maximum
- * addition of @sg_max elements.
- *
- * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
- * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
- * and DISCARD-type are not supported.
- *
- * No end mark is placed on the scatterlist; that's left to the caller.
- *
- * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
- * be allowed on the pages extracted.
- *
- * If successful, @sgtable->nents is updated to include the number of elements
- * added and the number of bytes added is returned.  @sgtable->orig_nents is
- * left unaltered.
- *
- * The iov_iter_extract_mode() function should be used to query how cleanup
- * should be performed.
- */
-ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
-			   struct sg_table *sgtable, unsigned int sg_max,
-			   iov_iter_extraction_t extraction_flags)
-{
-	if (maxsize == 0)
-		return 0;
-
-	switch (iov_iter_type(iter)) {
-	case ITER_UBUF:
-	case ITER_IOVEC:
-		return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
-					  extraction_flags);
-	case ITER_BVEC:
-		return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
-					  extraction_flags);
-	case ITER_KVEC:
-		return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
-					  extraction_flags);
-	case ITER_XARRAY:
-		return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
-					    extraction_flags);
-	default:
-		pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
-		WARN_ON_ONCE(1);
-		return -EIO;
-	}
-}
-EXPORT_SYMBOL_GPL(extract_iter_to_sg);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 55e201c3a841..b11a84f6c32b 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -300,10 +300,6 @@ void netfs_stats_show(struct seq_file *);
 ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
 				struct iov_iter *new,
 				iov_iter_extraction_t extraction_flags);
-struct sg_table;
-ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len,
-			   struct sg_table *sgtable, unsigned int sg_max,
-			   iov_iter_extraction_t extraction_flags);
 
 /**
  * netfs_inode - Get the netfs inode context from the inode
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 044c1d8c230c..0ccb983cf645 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -433,4 +433,9 @@ static inline bool iov_iter_extract_will_pin(const struct iov_iter *iter)
 	return user_backed_iter(iter);
 }
 
+struct sg_table;
+ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len,
+			   struct sg_table *sgtable, unsigned int sg_max,
+			   iov_iter_extraction_t extraction_flags);
+
 #endif
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 8d7519a8f308..e97d7060329e 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -9,6 +9,8 @@
 #include <linux/scatterlist.h>
 #include <linux/highmem.h>
 #include <linux/kmemleak.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
 
 /**
  * sg_next - return the next scatterlist entry in a list
@@ -1095,3 +1097,270 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
 	return offset;
 }
 EXPORT_SYMBOL(sg_zero_buffer);
+
+/*
+ * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
+ * iterators, and add them to the scatterlist.
+ */
+static ssize_t extract_user_to_sg(struct iov_iter *iter,
+				  ssize_t maxsize,
+				  struct sg_table *sgtable,
+				  unsigned int sg_max,
+				  iov_iter_extraction_t extraction_flags)
+{
+	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+	struct page **pages;
+	unsigned int npages;
+	ssize_t ret = 0, res;
+	size_t len, off;
+
+	/* We decant the page list into the tail of the scatterlist */
+	pages = (void *)sgtable->sgl +
+		array_size(sg_max, sizeof(struct scatterlist));
+	pages -= sg_max;
+
+	do {
+		res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
+					     extraction_flags, &off);
+		if (res < 0)
+			goto failed;
+
+		len = res;
+		maxsize -= len;
+		ret += len;
+		npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
+		sg_max -= npages;
+
+		for (; npages > 0; npages--) {
+			struct page *page = *pages;
+			size_t seg = min_t(size_t, PAGE_SIZE - off, len);
+
+			*pages++ = NULL;
+			sg_set_page(sg, page, seg, off);
+			sgtable->nents++;
+			sg++;
+			len -= seg;
+			off = 0;
+		}
+	} while (maxsize > 0 && sg_max > 0);
+
+	return ret;
+
+failed:
+	while (sgtable->nents > sgtable->orig_nents)
+		put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
+	return res;
+}
+
+/*
+ * Extract up to sg_max pages from a BVEC-type iterator and add them to the
+ * scatterlist.  The pages are not pinned.
+ */
+static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
+				  ssize_t maxsize,
+				  struct sg_table *sgtable,
+				  unsigned int sg_max,
+				  iov_iter_extraction_t extraction_flags)
+{
+	const struct bio_vec *bv = iter->bvec;
+	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+	unsigned long start = iter->iov_offset;
+	unsigned int i;
+	ssize_t ret = 0;
+
+	for (i = 0; i < iter->nr_segs; i++) {
+		size_t off, len;
+
+		len = bv[i].bv_len;
+		if (start >= len) {
+			start -= len;
+			continue;
+		}
+
+		len = min_t(size_t, maxsize, len - start);
+		off = bv[i].bv_offset + start;
+
+		sg_set_page(sg, bv[i].bv_page, len, off);
+		sgtable->nents++;
+		sg++;
+		sg_max--;
+
+		ret += len;
+		maxsize -= len;
+		if (maxsize <= 0 || sg_max == 0)
+			break;
+		start = 0;
+	}
+
+	if (ret > 0)
+		iov_iter_advance(iter, ret);
+	return ret;
+}
+
+/*
+ * Extract up to sg_max pages from a KVEC-type iterator and add them to the
+ * scatterlist.  This can deal with vmalloc'd buffers as well as kmalloc'd or
+ * static buffers.  The pages are not pinned.
+ */
+static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
+				  ssize_t maxsize,
+				  struct sg_table *sgtable,
+				  unsigned int sg_max,
+				  iov_iter_extraction_t extraction_flags)
+{
+	const struct kvec *kv = iter->kvec;
+	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+	unsigned long start = iter->iov_offset;
+	unsigned int i;
+	ssize_t ret = 0;
+
+	for (i = 0; i < iter->nr_segs; i++) {
+		struct page *page;
+		unsigned long kaddr;
+		size_t off, len, seg;
+
+		len = kv[i].iov_len;
+		if (start >= len) {
+			start -= len;
+			continue;
+		}
+
+		kaddr = (unsigned long)kv[i].iov_base + start;
+		off = kaddr & ~PAGE_MASK;
+		len = min_t(size_t, maxsize, len - start);
+		kaddr &= PAGE_MASK;
+
+		maxsize -= len;
+		ret += len;
+		do {
+			seg = min_t(size_t, len, PAGE_SIZE - off);
+			if (is_vmalloc_or_module_addr((void *)kaddr))
+				page = vmalloc_to_page((void *)kaddr);
+			else
+				page = virt_to_page(kaddr);
+
+			sg_set_page(sg, page, len, off);
+			sgtable->nents++;
+			sg++;
+			sg_max--;
+
+			len -= seg;
+			kaddr += PAGE_SIZE;
+			off = 0;
+		} while (len > 0 && sg_max > 0);
+
+		if (maxsize <= 0 || sg_max == 0)
+			break;
+		start = 0;
+	}
+
+	if (ret > 0)
+		iov_iter_advance(iter, ret);
+	return ret;
+}
+
+/*
+ * Extract up to sg_max folios from an XARRAY-type iterator and add them to
+ * the scatterlist.  The pages are not pinned.
+ */
+static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
+				    ssize_t maxsize,
+				    struct sg_table *sgtable,
+				    unsigned int sg_max,
+				    iov_iter_extraction_t extraction_flags)
+{
+	struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+	struct xarray *xa = iter->xarray;
+	struct folio *folio;
+	loff_t start = iter->xarray_start + iter->iov_offset;
+	pgoff_t index = start / PAGE_SIZE;
+	ssize_t ret = 0;
+	size_t offset, len;
+	XA_STATE(xas, xa, index);
+
+	rcu_read_lock();
+
+	xas_for_each(&xas, folio, ULONG_MAX) {
+		if (xas_retry(&xas, folio))
+			continue;
+		if (WARN_ON(xa_is_value(folio)))
+			break;
+		if (WARN_ON(folio_test_hugetlb(folio)))
+			break;
+
+		offset = offset_in_folio(folio, start);
+		len = min_t(size_t, maxsize, folio_size(folio) - offset);
+
+		sg_set_page(sg, folio_page(folio, 0), len, offset);
+		sgtable->nents++;
+		sg++;
+		sg_max--;
+
+		maxsize -= len;
+		ret += len;
+		if (maxsize <= 0 || sg_max == 0)
+			break;
+	}
+
+	rcu_read_unlock();
+	if (ret > 0)
+		iov_iter_advance(iter, ret);
+	return ret;
+}
+
+/**
+ * extract_iter_to_sg - Extract pages from an iterator and add to an sglist
+ * @iter: The iterator to extract from
+ * @maxsize: The amount of iterator to copy
+ * @sgtable: The scatterlist table to fill in
+ * @sg_max: Maximum number of elements in @sgtable that may be filled
+ * @extraction_flags: Flags to qualify the request
+ *
+ * Extract the page fragments from the given amount of the source iterator and
+ * add them to a scatterlist that refers to all of those bits, to a maximum
+ * addition of @sg_max elements.
+ *
+ * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
+ * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
+ * and DISCARD-type are not supported.
+ *
+ * No end mark is placed on the scatterlist; that's left to the caller.
+ *
+ * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
+ * be allowed on the pages extracted.
+ *
+ * If successful, @sgtable->nents is updated to include the number of elements
+ * added and the number of bytes added is returned.  @sgtable->orig_nents is
+ * left unaltered.
+ *
+ * The iov_iter_extract_mode() function should be used to query how cleanup
+ * should be performed.
+ */
+ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
+			   struct sg_table *sgtable, unsigned int sg_max,
+			   iov_iter_extraction_t extraction_flags)
+{
+	if (maxsize == 0)
+		return 0;
+
+	switch (iov_iter_type(iter)) {
+	case ITER_UBUF:
+	case ITER_IOVEC:
+		return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
+					  extraction_flags);
+	case ITER_BVEC:
+		return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
+					  extraction_flags);
+	case ITER_KVEC:
+		return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
+					  extraction_flags);
+	case ITER_XARRAY:
+		return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
+					    extraction_flags);
+	default:
+		pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
+		WARN_ON_ONCE(1);
+		return -EIO;
+	}
+}
+EXPORT_SYMBOL_GPL(extract_iter_to_sg);
-- 
cgit v1.2.3


From fe6ac23777ef70c17aa7333400ede88e364fbd36 Mon Sep 17 00:00:00 2001
From: James Seo <james@equiv.tech>
Date: Sun, 7 May 2023 08:22:17 -0700
Subject: hwmon: (core) Add missing beep-related standard attributes

beep_enable, inX_beep, currX_beep, fanX_beep, and tempX_beep
are standard attributes mentioned in the sysfs interface
specification but not implemented in the hwmon core. Since
these are not deprecated, implement them.

Adding beep_mask is not necessary, as it is deprecated and
the drivers already using it are manually defining it.

Signed-off-by: James Seo <james@equiv.tech>
Link: https://lore.kernel.org/r/20230507152216.1862653-1-james@equiv.tech
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c |  5 +++++
 include/linux/hwmon.h | 10 ++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 573b83b6c08c..5fab82dc9464 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -456,6 +456,7 @@ static const char * const hwmon_chip_attrs[] = {
 	[hwmon_chip_in_samples] = "in_samples",
 	[hwmon_chip_power_samples] = "power_samples",
 	[hwmon_chip_temp_samples] = "temp_samples",
+	[hwmon_chip_beep_enable] = "beep_enable",
 };
 
 static const char * const hwmon_temp_attr_templates[] = {
@@ -486,6 +487,7 @@ static const char * const hwmon_temp_attr_templates[] = {
 	[hwmon_temp_reset_history] = "temp%d_reset_history",
 	[hwmon_temp_rated_min] = "temp%d_rated_min",
 	[hwmon_temp_rated_max] = "temp%d_rated_max",
+	[hwmon_temp_beep] = "temp%d_beep",
 };
 
 static const char * const hwmon_in_attr_templates[] = {
@@ -507,6 +509,7 @@ static const char * const hwmon_in_attr_templates[] = {
 	[hwmon_in_crit_alarm] = "in%d_crit_alarm",
 	[hwmon_in_rated_min] = "in%d_rated_min",
 	[hwmon_in_rated_max] = "in%d_rated_max",
+	[hwmon_in_beep] = "in%d_beep",
 };
 
 static const char * const hwmon_curr_attr_templates[] = {
@@ -528,6 +531,7 @@ static const char * const hwmon_curr_attr_templates[] = {
 	[hwmon_curr_crit_alarm] = "curr%d_crit_alarm",
 	[hwmon_curr_rated_min] = "curr%d_rated_min",
 	[hwmon_curr_rated_max] = "curr%d_rated_max",
+	[hwmon_curr_beep] = "curr%d_beep",
 };
 
 static const char * const hwmon_power_attr_templates[] = {
@@ -597,6 +601,7 @@ static const char * const hwmon_fan_attr_templates[] = {
 	[hwmon_fan_min_alarm] = "fan%d_min_alarm",
 	[hwmon_fan_max_alarm] = "fan%d_max_alarm",
 	[hwmon_fan_fault] = "fan%d_fault",
+	[hwmon_fan_beep] = "fan%d_beep",
 };
 
 static const char * const hwmon_pwm_attr_templates[] = {
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 492dd27a5dd8..8cd6a6b33593 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -44,6 +44,7 @@ enum hwmon_chip_attributes {
 	hwmon_chip_in_samples,
 	hwmon_chip_power_samples,
 	hwmon_chip_temp_samples,
+	hwmon_chip_beep_enable,
 };
 
 #define HWMON_C_TEMP_RESET_HISTORY	BIT(hwmon_chip_temp_reset_history)
@@ -58,6 +59,7 @@ enum hwmon_chip_attributes {
 #define HWMON_C_IN_SAMPLES		BIT(hwmon_chip_in_samples)
 #define HWMON_C_POWER_SAMPLES		BIT(hwmon_chip_power_samples)
 #define HWMON_C_TEMP_SAMPLES		BIT(hwmon_chip_temp_samples)
+#define HWMON_C_BEEP_ENABLE		BIT(hwmon_chip_beep_enable)
 
 enum hwmon_temp_attributes {
 	hwmon_temp_enable,
@@ -87,6 +89,7 @@ enum hwmon_temp_attributes {
 	hwmon_temp_reset_history,
 	hwmon_temp_rated_min,
 	hwmon_temp_rated_max,
+	hwmon_temp_beep,
 };
 
 #define HWMON_T_ENABLE		BIT(hwmon_temp_enable)
@@ -116,6 +119,7 @@ enum hwmon_temp_attributes {
 #define HWMON_T_RESET_HISTORY	BIT(hwmon_temp_reset_history)
 #define HWMON_T_RATED_MIN	BIT(hwmon_temp_rated_min)
 #define HWMON_T_RATED_MAX	BIT(hwmon_temp_rated_max)
+#define HWMON_T_BEEP		BIT(hwmon_temp_beep)
 
 enum hwmon_in_attributes {
 	hwmon_in_enable,
@@ -136,6 +140,7 @@ enum hwmon_in_attributes {
 	hwmon_in_crit_alarm,
 	hwmon_in_rated_min,
 	hwmon_in_rated_max,
+	hwmon_in_beep,
 };
 
 #define HWMON_I_ENABLE		BIT(hwmon_in_enable)
@@ -156,6 +161,7 @@ enum hwmon_in_attributes {
 #define HWMON_I_CRIT_ALARM	BIT(hwmon_in_crit_alarm)
 #define HWMON_I_RATED_MIN	BIT(hwmon_in_rated_min)
 #define HWMON_I_RATED_MAX	BIT(hwmon_in_rated_max)
+#define HWMON_I_BEEP		BIT(hwmon_in_beep)
 
 enum hwmon_curr_attributes {
 	hwmon_curr_enable,
@@ -176,6 +182,7 @@ enum hwmon_curr_attributes {
 	hwmon_curr_crit_alarm,
 	hwmon_curr_rated_min,
 	hwmon_curr_rated_max,
+	hwmon_curr_beep,
 };
 
 #define HWMON_C_ENABLE		BIT(hwmon_curr_enable)
@@ -196,6 +203,7 @@ enum hwmon_curr_attributes {
 #define HWMON_C_CRIT_ALARM	BIT(hwmon_curr_crit_alarm)
 #define HWMON_C_RATED_MIN	BIT(hwmon_curr_rated_min)
 #define HWMON_C_RATED_MAX	BIT(hwmon_curr_rated_max)
+#define HWMON_C_BEEP		BIT(hwmon_curr_beep)
 
 enum hwmon_power_attributes {
 	hwmon_power_enable,
@@ -312,6 +320,7 @@ enum hwmon_fan_attributes {
 	hwmon_fan_min_alarm,
 	hwmon_fan_max_alarm,
 	hwmon_fan_fault,
+	hwmon_fan_beep,
 };
 
 #define HWMON_F_ENABLE			BIT(hwmon_fan_enable)
@@ -326,6 +335,7 @@ enum hwmon_fan_attributes {
 #define HWMON_F_MIN_ALARM		BIT(hwmon_fan_min_alarm)
 #define HWMON_F_MAX_ALARM		BIT(hwmon_fan_max_alarm)
 #define HWMON_F_FAULT			BIT(hwmon_fan_fault)
+#define HWMON_F_BEEP			BIT(hwmon_fan_beep)
 
 enum hwmon_pwm_attributes {
 	hwmon_pwm_input,
-- 
cgit v1.2.3


From d428487471ba6640ee8bcdabaf830aec08b85400 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Sun, 16 Apr 2023 13:36:53 -0400
Subject: counter: i8254: Introduce the Intel 8254 interface library module

Exposes consumer library functions providing support for interfaces
compatible with the venerable Intel 8254 Programmable Interval Timer
(PIT).

The Intel 8254 PIT first appeared in the early 1980s and was used
initially in IBM PC compatibles. The popularity of the original Intel
825x family of chips led to many subsequent variants and clones of the
interface in various chips and integrated circuits. Although still
popular, interfaces compatible with the Intel 8254 PIT are nowdays
typically found embedded in larger VLSI processing chips and FPGA
components rather than as discrete ICs.

A CONFIG_I8254 Kconfig option is introduced by this patch. Modules
wanting access to these i8254 library functions should select this
Kconfig option, and import the I8254 symbol namespace.

Link: https://lore.kernel.org/r/f6fe32c2db9525d816ab1a01f45abad56c081652.1681665189.git.william.gray@linaro.org/
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
---
 Documentation/ABI/testing/sysfs-bus-counter |  54 ++++
 MAINTAINERS                                 |   7 +
 drivers/counter/Kconfig                     |  15 +
 drivers/counter/Makefile                    |   1 +
 drivers/counter/counter-sysfs.c             |   8 +-
 drivers/counter/i8254.c                     | 447 ++++++++++++++++++++++++++++
 include/linux/i8254.h                       |  21 ++
 include/uapi/linux/counter.h                |   6 +
 8 files changed, 558 insertions(+), 1 deletion(-)
 create mode 100644 drivers/counter/i8254.c
 create mode 100644 include/linux/i8254.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-counter b/Documentation/ABI/testing/sysfs-bus-counter
index 1417c4272c6c..dc3b3a5c876b 100644
--- a/Documentation/ABI/testing/sysfs-bus-counter
+++ b/Documentation/ABI/testing/sysfs-bus-counter
@@ -90,6 +90,60 @@ Description:
 			counter does not freeze at the boundary points, but
 			counts continuously throughout.
 
+		interrupt on terminal count:
+			The output signal is initially low, and will remain low
+			until the counter reaches zero. The output signal then
+			goes high and remains high until a new preset value is
+			set.
+
+		hardware retriggerable one-shot:
+			The output signal is initially high. The output signal
+			will go low by a trigger input signal, and will remain
+			low until the counter reaches zero. The output will then
+			go high and remain high until the next trigger. A
+			trigger results in loading the counter to the preset
+			value and setting the output signal low, thus starting
+			the one-shot pulse.
+
+		rate generator:
+			The output signal is initially high. When the counter
+			has decremented to 1, the output signal goes low for one
+			clock pulse. The output signal then goes high again, the
+			counter is reloaded to the preset value, and the process
+			repeats in a periodic manner as such.
+
+		square wave mode:
+			The output signal is initially high.
+
+			If the initial count is even, the counter is decremented
+			by two on succeeding clock pulses. When the count
+			expires, the output signal changes value and the
+			counter is reloaded to the preset value. The process
+			repeats in periodic manner as such.
+
+			If the initial count is odd, the initial count minus one
+			(an even number) is loaded and then is decremented by
+			two on succeeding clock pulses. One clock pulse after
+			the count expires, the output signal goes low and the
+			counter is reloaded to the preset value minus one.
+			Succeeding clock pulses decrement the count by two. When
+			the count expires, the output goes high again and the
+			counter is reloaded to the preset value minus one. The
+			process repeats in a periodic manner as such.
+
+		software triggered strobe:
+			The output signal is initially high. When the count
+			expires, the output will go low for one clock pulse and
+			then go high again. The counting sequence is "triggered"
+			by setting the preset value.
+
+		hardware triggered strobe:
+			The output signal is initially high. Counting is started
+			by a trigger input signal. When the count expires, the
+			output signal will go low for one clock pulse and then
+			go high again. A trigger results in loading the counter
+			to the preset value.
+
 What:		/sys/bus/counter/devices/counterX/countY/count_mode_available
 What:		/sys/bus/counter/devices/counterX/countY/error_noise_available
 What:		/sys/bus/counter/devices/counterX/countY/function_available
diff --git a/MAINTAINERS b/MAINTAINERS
index 7e0b87d5aa2e..8d8b519f2597 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10257,6 +10257,13 @@ L:	linux-fbdev@vger.kernel.org
 S:	Maintained
 F:	drivers/video/fbdev/i810/
 
+INTEL 8254 COUNTER DRIVER
+M:	William Breathitt Gray <william.gray@linaro.org>
+L:	linux-iio@vger.kernel.org
+S:	Maintained
+F:	drivers/counter/i8254.c
+F:	include/linux/i8254.h
+
 INTEL 8255 GPIO DRIVER
 M:	William Breathitt Gray <william.gray@linaro.org>
 L:	linux-gpio@vger.kernel.org
diff --git a/drivers/counter/Kconfig b/drivers/counter/Kconfig
index f8b2a6508bb3..a61a4b9b8ec6 100644
--- a/drivers/counter/Kconfig
+++ b/drivers/counter/Kconfig
@@ -10,6 +10,21 @@ menuconfig COUNTER
 	  interface. You only need to enable this, if you also want to enable
 	  one or more of the counter device drivers below.
 
+config I8254
+	tristate
+	select COUNTER
+	select REGMAP
+	help
+	  Enables support for the i8254 interface library functions. The i8254
+	  interface library provides functions to facilitate communication with
+	  interfaces compatible with the venerable Intel 8254 Programmable
+	  Interval Timer (PIT). The Intel 825x family of chips was first
+	  released in the early 1980s but compatible interfaces are nowadays
+	  typically found embedded in larger VLSI processing chips and FPGA
+	  components.
+
+	  If built as a module its name will be i8254.
+
 if COUNTER
 
 config 104_QUAD_8
diff --git a/drivers/counter/Makefile b/drivers/counter/Makefile
index 933fdd50b3e4..fa3c1d08f706 100644
--- a/drivers/counter/Makefile
+++ b/drivers/counter/Makefile
@@ -6,6 +6,7 @@
 obj-$(CONFIG_COUNTER) += counter.o
 counter-y := counter-core.o counter-sysfs.o counter-chrdev.o
 
+obj-$(CONFIG_I8254)		+= i8254.o
 obj-$(CONFIG_104_QUAD_8)	+= 104-quad-8.o
 obj-$(CONFIG_INTERRUPT_CNT)		+= interrupt-cnt.o
 obj-$(CONFIG_RZ_MTU3_CNT)	+= rz-mtu3-cnt.o
diff --git a/drivers/counter/counter-sysfs.c b/drivers/counter/counter-sysfs.c
index b9efe66f9f8d..42c523343d32 100644
--- a/drivers/counter/counter-sysfs.c
+++ b/drivers/counter/counter-sysfs.c
@@ -88,7 +88,13 @@ static const char *const counter_count_mode_str[] = {
 	[COUNTER_COUNT_MODE_NORMAL] = "normal",
 	[COUNTER_COUNT_MODE_RANGE_LIMIT] = "range limit",
 	[COUNTER_COUNT_MODE_NON_RECYCLE] = "non-recycle",
-	[COUNTER_COUNT_MODE_MODULO_N] = "modulo-n"
+	[COUNTER_COUNT_MODE_MODULO_N] = "modulo-n",
+	[COUNTER_COUNT_MODE_INTERRUPT_ON_TERMINAL_COUNT] = "interrupt on terminal count",
+	[COUNTER_COUNT_MODE_HARDWARE_RETRIGGERABLE_ONESHOT] = "hardware retriggerable one-shot",
+	[COUNTER_COUNT_MODE_RATE_GENERATOR] = "rate generator",
+	[COUNTER_COUNT_MODE_SQUARE_WAVE_MODE] = "square wave mode",
+	[COUNTER_COUNT_MODE_SOFTWARE_TRIGGERED_STROBE] = "software triggered strobe",
+	[COUNTER_COUNT_MODE_HARDWARE_TRIGGERED_STROBE] = "hardware triggered strobe",
 };
 
 static const char *const counter_signal_polarity_str[] = {
diff --git a/drivers/counter/i8254.c b/drivers/counter/i8254.c
new file mode 100644
index 000000000000..c41e4fdc9601
--- /dev/null
+++ b/drivers/counter/i8254.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel 8254 Programmable Interval Timer
+ * Copyright (C) William Breathitt Gray
+ */
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/counter.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/i8254.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/regmap.h>
+
+#include <asm/unaligned.h>
+
+#define I8254_COUNTER_REG(_counter) (_counter)
+#define I8254_CONTROL_REG 0x3
+
+#define I8254_SC GENMASK(7, 6)
+#define I8254_RW GENMASK(5, 4)
+#define I8254_M GENMASK(3, 1)
+#define I8254_CONTROL(_sc, _rw, _m) \
+	(u8_encode_bits(_sc, I8254_SC) | u8_encode_bits(_rw, I8254_RW) | \
+	 u8_encode_bits(_m, I8254_M))
+
+#define I8254_RW_TWO_BYTE 0x3
+#define I8254_MODE_INTERRUPT_ON_TERMINAL_COUNT 0
+#define I8254_MODE_HARDWARE_RETRIGGERABLE_ONESHOT 1
+#define I8254_MODE_RATE_GENERATOR 2
+#define I8254_MODE_SQUARE_WAVE_MODE 3
+#define I8254_MODE_SOFTWARE_TRIGGERED_STROBE 4
+#define I8254_MODE_HARDWARE_TRIGGERED_STROBE 5
+
+#define I8254_COUNTER_LATCH(_counter) I8254_CONTROL(_counter, 0x0, 0x0)
+#define I8254_PROGRAM_COUNTER(_counter, _mode) I8254_CONTROL(_counter, I8254_RW_TWO_BYTE, _mode)
+
+#define I8254_NUM_COUNTERS 3
+
+/**
+ * struct i8254 - I8254 device private data structure
+ * @lock:	synchronization lock to prevent I/O race conditions
+ * @preset:	array of Counter Register states
+ * @out_mode:	array of mode configuration states
+ * @map:	Regmap for the device
+ */
+struct i8254 {
+	struct mutex lock;
+	u16 preset[I8254_NUM_COUNTERS];
+	u8 out_mode[I8254_NUM_COUNTERS];
+	struct regmap *map;
+};
+
+static int i8254_count_read(struct counter_device *const counter, struct counter_count *const count,
+			    u64 *const val)
+{
+	struct i8254 *const priv = counter_priv(counter);
+	int ret;
+	u8 value[2];
+
+	mutex_lock(&priv->lock);
+
+	ret = regmap_write(priv->map, I8254_CONTROL_REG, I8254_COUNTER_LATCH(count->id));
+	if (ret) {
+		mutex_unlock(&priv->lock);
+		return ret;
+	}
+	ret = regmap_noinc_read(priv->map, I8254_COUNTER_REG(count->id), value, sizeof(value));
+	if (ret) {
+		mutex_unlock(&priv->lock);
+		return ret;
+	}
+
+	mutex_unlock(&priv->lock);
+
+	*val = get_unaligned_le16(value);
+
+	return ret;
+}
+
+static int i8254_function_read(struct counter_device *const counter,
+			       struct counter_count *const count,
+			       enum counter_function *const function)
+{
+	*function = COUNTER_FUNCTION_DECREASE;
+	return 0;
+}
+
+#define I8254_SYNAPSES_PER_COUNT 2
+#define I8254_SIGNAL_ID_CLK 0
+#define I8254_SIGNAL_ID_GATE 1
+
+static int i8254_action_read(struct counter_device *const counter,
+			     struct counter_count *const count,
+			     struct counter_synapse *const synapse,
+			     enum counter_synapse_action *const action)
+{
+	struct i8254 *const priv = counter_priv(counter);
+
+	switch (synapse->signal->id % I8254_SYNAPSES_PER_COUNT) {
+	case I8254_SIGNAL_ID_CLK:
+		*action = COUNTER_SYNAPSE_ACTION_FALLING_EDGE;
+		return 0;
+	case I8254_SIGNAL_ID_GATE:
+		switch (priv->out_mode[count->id]) {
+		case I8254_MODE_HARDWARE_RETRIGGERABLE_ONESHOT:
+		case I8254_MODE_RATE_GENERATOR:
+		case I8254_MODE_SQUARE_WAVE_MODE:
+		case I8254_MODE_HARDWARE_TRIGGERED_STROBE:
+			*action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
+			return 0;
+		default:
+			*action = COUNTER_SYNAPSE_ACTION_NONE;
+			return 0;
+		}
+	default:
+		/* should never reach this path */
+		return -EINVAL;
+	}
+}
+
+static int i8254_count_ceiling_read(struct counter_device *const counter,
+				    struct counter_count *const count, u64 *const ceiling)
+{
+	struct i8254 *const priv = counter_priv(counter);
+
+	mutex_lock(&priv->lock);
+
+	switch (priv->out_mode[count->id]) {
+	case I8254_MODE_RATE_GENERATOR:
+		/* Rate Generator decrements 0 by one and the counter "wraps around" */
+		*ceiling = (priv->preset[count->id] == 0) ? U16_MAX : priv->preset[count->id];
+		break;
+	case I8254_MODE_SQUARE_WAVE_MODE:
+		if (priv->preset[count->id] % 2)
+			*ceiling = priv->preset[count->id] - 1;
+		else if (priv->preset[count->id] == 0)
+			/* Square Wave Mode decrements 0 by two and the counter "wraps around" */
+			*ceiling = U16_MAX - 1;
+		else
+			*ceiling = priv->preset[count->id];
+		break;
+	default:
+		*ceiling = U16_MAX;
+		break;
+	}
+
+	mutex_unlock(&priv->lock);
+
+	return 0;
+}
+
+static int i8254_count_mode_read(struct counter_device *const counter,
+				 struct counter_count *const count,
+				 enum counter_count_mode *const count_mode)
+{
+	const struct i8254 *const priv = counter_priv(counter);
+
+	switch (priv->out_mode[count->id]) {
+	case I8254_MODE_INTERRUPT_ON_TERMINAL_COUNT:
+		*count_mode = COUNTER_COUNT_MODE_INTERRUPT_ON_TERMINAL_COUNT;
+		return 0;
+	case I8254_MODE_HARDWARE_RETRIGGERABLE_ONESHOT:
+		*count_mode = COUNTER_COUNT_MODE_HARDWARE_RETRIGGERABLE_ONESHOT;
+		return 0;
+	case I8254_MODE_RATE_GENERATOR:
+		*count_mode = COUNTER_COUNT_MODE_RATE_GENERATOR;
+		return 0;
+	case I8254_MODE_SQUARE_WAVE_MODE:
+		*count_mode = COUNTER_COUNT_MODE_SQUARE_WAVE_MODE;
+		return 0;
+	case I8254_MODE_SOFTWARE_TRIGGERED_STROBE:
+		*count_mode = COUNTER_COUNT_MODE_SOFTWARE_TRIGGERED_STROBE;
+		return 0;
+	case I8254_MODE_HARDWARE_TRIGGERED_STROBE:
+		*count_mode = COUNTER_COUNT_MODE_HARDWARE_TRIGGERED_STROBE;
+		return 0;
+	default:
+		/* should never reach this path */
+		return -EINVAL;
+	}
+}
+
+static int i8254_count_mode_write(struct counter_device *const counter,
+				  struct counter_count *const count,
+				  const enum counter_count_mode count_mode)
+{
+	struct i8254 *const priv = counter_priv(counter);
+	u8 out_mode;
+	int ret;
+
+	switch (count_mode) {
+	case COUNTER_COUNT_MODE_INTERRUPT_ON_TERMINAL_COUNT:
+		out_mode = I8254_MODE_INTERRUPT_ON_TERMINAL_COUNT;
+		break;
+	case COUNTER_COUNT_MODE_HARDWARE_RETRIGGERABLE_ONESHOT:
+		out_mode = I8254_MODE_HARDWARE_RETRIGGERABLE_ONESHOT;
+		break;
+	case COUNTER_COUNT_MODE_RATE_GENERATOR:
+		out_mode = I8254_MODE_RATE_GENERATOR;
+		break;
+	case COUNTER_COUNT_MODE_SQUARE_WAVE_MODE:
+		out_mode = I8254_MODE_SQUARE_WAVE_MODE;
+		break;
+	case COUNTER_COUNT_MODE_SOFTWARE_TRIGGERED_STROBE:
+		out_mode = I8254_MODE_SOFTWARE_TRIGGERED_STROBE;
+		break;
+	case COUNTER_COUNT_MODE_HARDWARE_TRIGGERED_STROBE:
+		out_mode = I8254_MODE_HARDWARE_TRIGGERED_STROBE;
+		break;
+	default:
+		/* should never reach this path */
+		return -EINVAL;
+	}
+
+	mutex_lock(&priv->lock);
+
+	/* Counter Register is cleared when the counter is programmed */
+	priv->preset[count->id] = 0;
+	priv->out_mode[count->id] = out_mode;
+	ret = regmap_write(priv->map, I8254_CONTROL_REG,
+			   I8254_PROGRAM_COUNTER(count->id, out_mode));
+
+	mutex_unlock(&priv->lock);
+
+	return ret;
+}
+
+static int i8254_count_floor_read(struct counter_device *const counter,
+				  struct counter_count *const count, u64 *const floor)
+{
+	struct i8254 *const priv = counter_priv(counter);
+
+	mutex_lock(&priv->lock);
+
+	switch (priv->out_mode[count->id]) {
+	case I8254_MODE_RATE_GENERATOR:
+		/* counter is always reloaded after 1, but 0 is a possible reload value */
+		*floor = (priv->preset[count->id] == 0) ? 0 : 1;
+		break;
+	case I8254_MODE_SQUARE_WAVE_MODE:
+		/* counter is always reloaded after 2 for even preset values */
+		*floor = (priv->preset[count->id] % 2 || priv->preset[count->id] == 0) ? 0 : 2;
+		break;
+	default:
+		*floor = 0;
+		break;
+	}
+
+	mutex_unlock(&priv->lock);
+
+	return 0;
+}
+
+static int i8254_count_preset_read(struct counter_device *const counter,
+				   struct counter_count *const count, u64 *const preset)
+{
+	const struct i8254 *const priv = counter_priv(counter);
+
+	*preset = priv->preset[count->id];
+
+	return 0;
+}
+
+static int i8254_count_preset_write(struct counter_device *const counter,
+				    struct counter_count *const count, const u64 preset)
+{
+	struct i8254 *const priv = counter_priv(counter);
+	int ret;
+	u8 value[2];
+
+	if (preset > U16_MAX)
+		return -ERANGE;
+
+	mutex_lock(&priv->lock);
+
+	if (priv->out_mode[count->id] == I8254_MODE_RATE_GENERATOR ||
+	    priv->out_mode[count->id] == I8254_MODE_SQUARE_WAVE_MODE) {
+		if (preset == 1) {
+			mutex_unlock(&priv->lock);
+			return -EINVAL;
+		}
+	}
+
+	priv->preset[count->id] = preset;
+
+	put_unaligned_le16(preset, value);
+	ret = regmap_noinc_write(priv->map, I8254_COUNTER_REG(count->id), value, 2);
+
+	mutex_unlock(&priv->lock);
+
+	return ret;
+}
+
+static int i8254_init_hw(struct regmap *const map)
+{
+	unsigned long i;
+	int ret;
+
+	for (i = 0; i < I8254_NUM_COUNTERS; i++) {
+		/* Initialize each counter to Mode 0 */
+		ret = regmap_write(map, I8254_CONTROL_REG,
+				   I8254_PROGRAM_COUNTER(i, I8254_MODE_INTERRUPT_ON_TERMINAL_COUNT));
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static const struct counter_ops i8254_ops = {
+	.count_read = i8254_count_read,
+	.function_read = i8254_function_read,
+	.action_read = i8254_action_read,
+};
+
+#define I8254_SIGNAL(_id, _name) {		\
+	.id = (_id),				\
+	.name = (_name),			\
+}
+
+static struct counter_signal i8254_signals[] = {
+	I8254_SIGNAL(0, "CLK 0"), I8254_SIGNAL(1, "GATE 0"),
+	I8254_SIGNAL(2, "CLK 1"), I8254_SIGNAL(3, "GATE 1"),
+	I8254_SIGNAL(4, "CLK 2"), I8254_SIGNAL(5, "GATE 2"),
+};
+
+static const enum counter_synapse_action i8254_clk_actions[] = {
+	COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+};
+static const enum counter_synapse_action i8254_gate_actions[] = {
+	COUNTER_SYNAPSE_ACTION_NONE,
+	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+};
+
+#define I8254_SYNAPSES_BASE(_id) ((_id) * I8254_SYNAPSES_PER_COUNT)
+#define I8254_SYNAPSE_CLK(_id) {					\
+	.actions_list	= i8254_clk_actions,				\
+	.num_actions	= ARRAY_SIZE(i8254_clk_actions),		\
+	.signal		= &i8254_signals[I8254_SYNAPSES_BASE(_id) + 0],	\
+}
+#define I8254_SYNAPSE_GATE(_id) {					\
+	.actions_list	= i8254_gate_actions,				\
+	.num_actions	= ARRAY_SIZE(i8254_gate_actions),		\
+	.signal		= &i8254_signals[I8254_SYNAPSES_BASE(_id) + 1],	\
+}
+
+static struct counter_synapse i8254_synapses[] = {
+	I8254_SYNAPSE_CLK(0), I8254_SYNAPSE_GATE(0),
+	I8254_SYNAPSE_CLK(1), I8254_SYNAPSE_GATE(1),
+	I8254_SYNAPSE_CLK(2), I8254_SYNAPSE_GATE(2),
+};
+
+static const enum counter_function i8254_functions_list[] = {
+	COUNTER_FUNCTION_DECREASE,
+};
+
+static const enum counter_count_mode i8254_count_modes[] = {
+	COUNTER_COUNT_MODE_INTERRUPT_ON_TERMINAL_COUNT,
+	COUNTER_COUNT_MODE_HARDWARE_RETRIGGERABLE_ONESHOT,
+	COUNTER_COUNT_MODE_RATE_GENERATOR,
+	COUNTER_COUNT_MODE_SQUARE_WAVE_MODE,
+	COUNTER_COUNT_MODE_SOFTWARE_TRIGGERED_STROBE,
+	COUNTER_COUNT_MODE_HARDWARE_TRIGGERED_STROBE,
+};
+
+static DEFINE_COUNTER_AVAILABLE(i8254_count_modes_available, i8254_count_modes);
+
+static struct counter_comp i8254_count_ext[] = {
+	COUNTER_COMP_CEILING(i8254_count_ceiling_read, NULL),
+	COUNTER_COMP_COUNT_MODE(i8254_count_mode_read, i8254_count_mode_write,
+				i8254_count_modes_available),
+	COUNTER_COMP_FLOOR(i8254_count_floor_read, NULL),
+	COUNTER_COMP_PRESET(i8254_count_preset_read, i8254_count_preset_write),
+};
+
+#define I8254_COUNT(_id, _name) {				\
+	.id = (_id),						\
+	.name = (_name),					\
+	.functions_list = i8254_functions_list,			\
+	.num_functions = ARRAY_SIZE(i8254_functions_list),	\
+	.synapses = &i8254_synapses[I8254_SYNAPSES_BASE(_id)],	\
+	.num_synapses =	I8254_SYNAPSES_PER_COUNT,		\
+	.ext = i8254_count_ext,					\
+	.num_ext = ARRAY_SIZE(i8254_count_ext)			\
+}
+
+static struct counter_count i8254_counts[I8254_NUM_COUNTERS] = {
+	I8254_COUNT(0, "Counter 0"), I8254_COUNT(1, "Counter 1"), I8254_COUNT(2, "Counter 2"),
+};
+
+/**
+ * devm_i8254_regmap_register - Register an i8254 Counter device
+ * @dev: device that is registering this i8254 Counter device
+ * @config: configuration for i8254_regmap_config
+ *
+ * Registers an Intel 8254 Programmable Interval Timer Counter device. Returns 0 on success and
+ * negative error number on failure.
+ */
+int devm_i8254_regmap_register(struct device *const dev,
+			       const struct i8254_regmap_config *const config)
+{
+	struct counter_device *counter;
+	struct i8254 *priv;
+	int err;
+
+	if (!config->parent)
+		return -EINVAL;
+
+	if (!config->map)
+		return -EINVAL;
+
+	counter = devm_counter_alloc(dev, sizeof(*priv));
+	if (!counter)
+		return -ENOMEM;
+	priv = counter_priv(counter);
+	priv->map = config->map;
+
+	counter->name = dev_name(config->parent);
+	counter->parent = config->parent;
+	counter->ops = &i8254_ops;
+	counter->counts = i8254_counts;
+	counter->num_counts = ARRAY_SIZE(i8254_counts);
+	counter->signals = i8254_signals;
+	counter->num_signals = ARRAY_SIZE(i8254_signals);
+
+	mutex_init(&priv->lock);
+
+	err = i8254_init_hw(priv->map);
+	if (err)
+		return err;
+
+	err = devm_counter_add(dev, counter);
+	if (err < 0)
+		return dev_err_probe(dev, err, "Failed to add counter\n");
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(devm_i8254_regmap_register, I8254);
+
+MODULE_AUTHOR("William Breathitt Gray");
+MODULE_DESCRIPTION("Intel 8254 Programmable Interval Timer");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(COUNTER);
diff --git a/include/linux/i8254.h b/include/linux/i8254.h
new file mode 100644
index 000000000000..a675c309232b
--- /dev/null
+++ b/include/linux/i8254.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) William Breathitt Gray */
+#ifndef _I8254_H_
+#define _I8254_H_
+
+struct device;
+struct regmap;
+
+/**
+ * struct i8254_regmap_config - Configuration for the register map of an i8254
+ * @parent:	parent device
+ * @map:	regmap for the i8254
+ */
+struct i8254_regmap_config {
+	struct device *parent;
+	struct regmap *map;
+};
+
+int devm_i8254_regmap_register(struct device *dev, const struct i8254_regmap_config *config);
+
+#endif /* _I8254_H_ */
diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h
index 8ab12d731e3b..fc248ef00e86 100644
--- a/include/uapi/linux/counter.h
+++ b/include/uapi/linux/counter.h
@@ -127,6 +127,12 @@ enum counter_count_mode {
 	COUNTER_COUNT_MODE_RANGE_LIMIT,
 	COUNTER_COUNT_MODE_NON_RECYCLE,
 	COUNTER_COUNT_MODE_MODULO_N,
+	COUNTER_COUNT_MODE_INTERRUPT_ON_TERMINAL_COUNT,
+	COUNTER_COUNT_MODE_HARDWARE_RETRIGGERABLE_ONESHOT,
+	COUNTER_COUNT_MODE_RATE_GENERATOR,
+	COUNTER_COUNT_MODE_SQUARE_WAVE_MODE,
+	COUNTER_COUNT_MODE_SOFTWARE_TRIGGERED_STROBE,
+	COUNTER_COUNT_MODE_HARDWARE_TRIGGERED_STROBE,
 };
 
 /* Count function values */
-- 
cgit v1.2.3


From c4747d7ce3948766cdae2404b4a796ba4cda9d7d Mon Sep 17 00:00:00 2001
From: Jakob Hauser <jahau@rocketmail.com>
Date: Mon, 15 May 2023 22:57:11 +0200
Subject: mfd: rt5033: Fix chip revision readout

After reading the data from the DEVICE_ID register, mask 0x0f needs to be
applied to extract the revision of the chip [1].

The other part of the DEVICE_ID register, mask 0xf0, is a vendor identification
code. That's how it is set up at similar products of Richtek, e.g. RT9455 [2]
page 21 top.

[1] https://github.com/msm8916-mainline/linux-downstream/blob/GT-I9195I/drivers/mfd/rt5033_core.c#L484
[2] https://www.richtek.com/assets/product_file/RT9455/DS9455-00.pdf

Signed-off-by: Jakob Hauser <jahau@rocketmail.com>
Signed-off-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/9a98521ffdf76851d5d344afa6ce65f692ecc024.1684182964.git.jahau@rocketmail.com
---
 drivers/mfd/rt5033.c               | 5 +++--
 include/linux/mfd/rt5033-private.h | 4 ++++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rt5033.c b/drivers/mfd/rt5033.c
index 8029d444b794..3eee4242ee02 100644
--- a/drivers/mfd/rt5033.c
+++ b/drivers/mfd/rt5033.c
@@ -55,7 +55,7 @@ static const struct regmap_config rt5033_regmap_config = {
 static int rt5033_i2c_probe(struct i2c_client *i2c)
 {
 	struct rt5033_dev *rt5033;
-	unsigned int dev_id;
+	unsigned int dev_id, chip_rev;
 	int ret;
 
 	rt5033 = devm_kzalloc(&i2c->dev, sizeof(*rt5033), GFP_KERNEL);
@@ -78,7 +78,8 @@ static int rt5033_i2c_probe(struct i2c_client *i2c)
 		dev_err(&i2c->dev, "Device not found\n");
 		return -ENODEV;
 	}
-	dev_info(&i2c->dev, "Device found Device ID: %04x\n", dev_id);
+	chip_rev = dev_id & RT5033_CHIP_REV_MASK;
+	dev_info(&i2c->dev, "Device found (rev. %d)\n", chip_rev);
 
 	ret = regmap_add_irq_chip(rt5033->regmap, rt5033->irq,
 			IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
diff --git a/include/linux/mfd/rt5033-private.h b/include/linux/mfd/rt5033-private.h
index 6bb432f6a96c..b035a67cec73 100644
--- a/include/linux/mfd/rt5033-private.h
+++ b/include/linux/mfd/rt5033-private.h
@@ -71,6 +71,10 @@ enum rt5033_reg {
 /* RT5033 CHGCTRL2 register */
 #define RT5033_CHGCTRL2_CV_MASK		0xfc
 
+/* RT5033 DEVICE_ID register */
+#define RT5033_VENDOR_ID_MASK		0xf0
+#define RT5033_CHIP_REV_MASK		0x0f
+
 /* RT5033 CHGCTRL3 register */
 #define RT5033_CHGCTRL3_CFO_EN_MASK	0x40
 #define RT5033_CHGCTRL3_TIMER_MASK	0x38
-- 
cgit v1.2.3


From 4f7a2a08c735003893da177071bf5e5f6f14e39c Mon Sep 17 00:00:00 2001
From: Jakob Hauser <jahau@rocketmail.com>
Date: Mon, 15 May 2023 22:57:12 +0200
Subject: mfd: rt5033: Fix STAT_MASK, HZ_MASK and AICR defines

The charger state mask RT5033_CHG_STAT_MASK should be 0x30 [1][2].

The high impedance mask RT5033_RT_HZ_MASK is actually value 0x02 [3] and is
assosiated to the RT5033 CHGCTRL1 register [4]. Accordingly also change
RT5033_CHARGER_HZ_ENABLE to 0x02 to avoid the need of a bit shift upon
application.

For input current limiting AICR mode, the define for the 1000 mA step was
missing [5]. Additionally add the define for DISABLE option. Concerning the
mask, remove RT5033_AICR_MODE_MASK because there is already
RT5033_CHGCTRL1_IAICR_MASK further up. They are redundant and the upper one
makes more sense to have the masks of a register colleted there as an
overview.

[1] https://github.com/msm8916-mainline/linux-downstream/blob/GT-I9195I/drivers/battery/rt5033_charger.c#L669-L682
[2] https://github.com/torvalds/linux/blob/v6.0/include/linux/mfd/rt5033-private.h#L59-L62
[3] https://github.com/msm8916-mainline/linux-downstream/blob/GT-I9195I/include/linux/battery/charger/rt5033_charger.h#L44
[4] https://github.com/msm8916-mainline/linux-downstream/blob/GT-I9195I/drivers/battery/rt5033_charger.c#L223
[5] https://github.com/msm8916-mainline/linux-downstream/blob/GT-I9195I/drivers/battery/rt5033_charger.c#L278

Signed-off-by: Jakob Hauser <jahau@rocketmail.com>
Signed-off-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/2f17beec3d6c59b41d7e2451d177dc8aaeb7efe2.1684182964.git.jahau@rocketmail.com
---
 include/linux/mfd/rt5033-private.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/rt5033-private.h b/include/linux/mfd/rt5033-private.h
index b035a67cec73..b6773ebf4e6b 100644
--- a/include/linux/mfd/rt5033-private.h
+++ b/include/linux/mfd/rt5033-private.h
@@ -55,7 +55,7 @@ enum rt5033_reg {
 };
 
 /* RT5033 Charger state register */
-#define RT5033_CHG_STAT_MASK		0x20
+#define RT5033_CHG_STAT_MASK		0x30
 #define RT5033_CHG_STAT_DISCHARGING	0x00
 #define RT5033_CHG_STAT_FULL		0x10
 #define RT5033_CHG_STAT_CHARGING	0x20
@@ -67,6 +67,7 @@ enum rt5033_reg {
 /* RT5033 CHGCTRL1 register */
 #define RT5033_CHGCTRL1_IAICR_MASK	0xe0
 #define RT5033_CHGCTRL1_MODE_MASK	0x01
+#define RT5033_CHGCTRL1_HZ_MASK		0x02
 
 /* RT5033 CHGCTRL2 register */
 #define RT5033_CHGCTRL2_CV_MASK		0xfc
@@ -92,7 +93,6 @@ enum rt5033_reg {
 
 /* RT5033 RT CTRL1 register */
 #define RT5033_RT_CTRL1_UUG_MASK	0x02
-#define RT5033_RT_HZ_MASK		0x01
 
 /* RT5033 control register */
 #define RT5033_CTRL_FCCM_BUCK_MASK		BIT(0)
@@ -119,13 +119,14 @@ enum rt5033_reg {
  * register), AICR mode limits the input current. For example, the AIRC 100
  * mode limits the input current to 100 mA.
  */
+#define RT5033_AICR_DISABLE			0x00
 #define RT5033_AICR_100_MODE			0x20
 #define RT5033_AICR_500_MODE			0x40
 #define RT5033_AICR_700_MODE			0x60
 #define RT5033_AICR_900_MODE			0x80
+#define RT5033_AICR_1000_MODE			0xa0
 #define RT5033_AICR_1500_MODE			0xc0
 #define RT5033_AICR_2000_MODE			0xe0
-#define RT5033_AICR_MODE_MASK			0xe0
 
 /* RT5033 use internal timer need to set time */
 #define RT5033_FAST_CHARGE_TIMER4		0x00
@@ -195,7 +196,7 @@ enum rt5033_reg {
 
 /* RT5033 charger high impedance mode */
 #define RT5033_CHARGER_HZ_DISABLE		0x00
-#define RT5033_CHARGER_HZ_ENABLE		0x01
+#define RT5033_CHARGER_HZ_ENABLE		0x02
 
 /* RT5033 regulator BUCK output voltage uV */
 #define RT5033_REGULATOR_BUCK_VOLTAGE_MIN		1000000U
-- 
cgit v1.2.3


From 3d897612107a3d7b4d70ca4296a6fd80bfd1b04f Mon Sep 17 00:00:00 2001
From: Jakob Hauser <jahau@rocketmail.com>
Date: Mon, 15 May 2023 22:57:13 +0200
Subject: mfd: rt5033: Apply preparatory changes before adding rt5033-charger
 driver

Order the register blocks to have the masks in descending manner.

Add new defines for constant voltage shift (RT5033_CHGCTRL2_CV_SHIFT),
MIVR mask (RT5033_CHGCTRL4_MIVR_MASK), pre-charge current shift
(RT5033_CHGCTRL4_IPREC_SHIFT), internal timer disable
(RT5033_INT_TIMER_DISABLE), termination disable (RT5033_TE_DISABLE),
CFO disable (RT5033_CFO_DISABLE), UUG disable (RT5033_CHARGER_UUG_DISABLE).

The fast charge timer type needs to be written on mask 0x38
(RT5033_CHGCTRL3_TIMER_MASK). To avoid a bit shift on application, change the
values of the timer types to fit the mask. Added the timout duration as a
comment. And the timer between TIMER8 and TIMER12 is most likely TIMER10, see
e.g. RT5036 [1] page 28 bottom.

Add value options for MIVR (Minimum Input Voltage Regulation).

Move RT5033_TE_ENABLE_MASK to the block "RT5033 CHGCTRL1 register", in order
to have the masks of the register collected there. To fit the naming scheme,
rename it to RT5033_CHGCTRL1_TE_EN_MASK.

Move RT5033_CHG_MAX_CURRENT to the block "RT5033 charger fast-charge current".

Add new defines RT5033_CV_MAX_VOLTAGE and RT5033_CHG_MAX_PRE_CURRENT to the
blocks "RT5033 charger constant charge voltage" and "RT5033 charger pre-charge
current limits".

In include/linux/mfd/rt5033.h, turn power_supply "psy" into a pointer in order
to use it in devm_power_supply_register().

[1] https://media.digikey.com/pdf/Data%20Sheets/Richtek%20PDF/RT5036%20%20Preliminary.pdf

Signed-off-by: Jakob Hauser <jahau@rocketmail.com>
Signed-off-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/31c750ae13a1c1896b51d8f0a0d9869f8b85624f.1684182964.git.jahau@rocketmail.com
---
 include/linux/mfd/rt5033-private.h | 53 +++++++++++++++++++++++++-------------
 include/linux/mfd/rt5033.h         |  2 +-
 2 files changed, 36 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/rt5033-private.h b/include/linux/mfd/rt5033-private.h
index b6773ebf4e6b..0221f806d139 100644
--- a/include/linux/mfd/rt5033-private.h
+++ b/include/linux/mfd/rt5033-private.h
@@ -55,22 +55,24 @@ enum rt5033_reg {
 };
 
 /* RT5033 Charger state register */
+#define RT5033_CHG_STAT_TYPE_MASK	0x60
+#define RT5033_CHG_STAT_TYPE_PRE	0x20
+#define RT5033_CHG_STAT_TYPE_FAST	0x60
 #define RT5033_CHG_STAT_MASK		0x30
 #define RT5033_CHG_STAT_DISCHARGING	0x00
 #define RT5033_CHG_STAT_FULL		0x10
 #define RT5033_CHG_STAT_CHARGING	0x20
 #define RT5033_CHG_STAT_NOT_CHARGING	0x30
-#define RT5033_CHG_STAT_TYPE_MASK	0x60
-#define RT5033_CHG_STAT_TYPE_PRE	0x20
-#define RT5033_CHG_STAT_TYPE_FAST	0x60
 
 /* RT5033 CHGCTRL1 register */
 #define RT5033_CHGCTRL1_IAICR_MASK	0xe0
-#define RT5033_CHGCTRL1_MODE_MASK	0x01
+#define RT5033_CHGCTRL1_TE_EN_MASK	0x08
 #define RT5033_CHGCTRL1_HZ_MASK		0x02
+#define RT5033_CHGCTRL1_MODE_MASK	0x01
 
 /* RT5033 CHGCTRL2 register */
 #define RT5033_CHGCTRL2_CV_MASK		0xfc
+#define RT5033_CHGCTRL2_CV_SHIFT	0x02
 
 /* RT5033 DEVICE_ID register */
 #define RT5033_VENDOR_ID_MASK		0xf0
@@ -82,14 +84,15 @@ enum rt5033_reg {
 #define RT5033_CHGCTRL3_TIMER_EN_MASK	0x01
 
 /* RT5033 CHGCTRL4 register */
-#define RT5033_CHGCTRL4_EOC_MASK	0x07
+#define RT5033_CHGCTRL4_MIVR_MASK	0xe0
 #define RT5033_CHGCTRL4_IPREC_MASK	0x18
+#define RT5033_CHGCTRL4_IPREC_SHIFT	0x03
+#define RT5033_CHGCTRL4_EOC_MASK	0x07
 
 /* RT5033 CHGCTRL5 register */
-#define RT5033_CHGCTRL5_VPREC_MASK	0x0f
 #define RT5033_CHGCTRL5_ICHG_MASK	0xf0
 #define RT5033_CHGCTRL5_ICHG_SHIFT	0x04
-#define RT5033_CHG_MAX_CURRENT		0x0d
+#define RT5033_CHGCTRL5_VPREC_MASK	0x0f
 
 /* RT5033 RT CTRL1 register */
 #define RT5033_RT_CTRL1_UUG_MASK	0x02
@@ -128,20 +131,28 @@ enum rt5033_reg {
 #define RT5033_AICR_1500_MODE			0xc0
 #define RT5033_AICR_2000_MODE			0xe0
 
-/* RT5033 use internal timer need to set time */
-#define RT5033_FAST_CHARGE_TIMER4		0x00
-#define RT5033_FAST_CHARGE_TIMER6		0x01
-#define RT5033_FAST_CHARGE_TIMER8		0x02
-#define RT5033_FAST_CHARGE_TIMER9		0x03
-#define RT5033_FAST_CHARGE_TIMER12		0x04
-#define RT5033_FAST_CHARGE_TIMER14		0x05
-#define RT5033_FAST_CHARGE_TIMER16		0x06
+/* RT5033 charger minimum input voltage regulation */
+#define RT5033_CHARGER_MIVR_DISABLE		0x00
+#define RT5033_CHARGER_MIVR_4200MV		0x20
+#define RT5033_CHARGER_MIVR_4300MV		0x40
+#define RT5033_CHARGER_MIVR_4400MV		0x60
+#define RT5033_CHARGER_MIVR_4500MV		0x80
+#define RT5033_CHARGER_MIVR_4600MV		0xa0
+#define RT5033_CHARGER_MIVR_4700MV		0xc0
+#define RT5033_CHARGER_MIVR_4800MV		0xe0
 
+/* RT5033 use internal timer need to set time */
+#define RT5033_FAST_CHARGE_TIMER4		0x00 /*  4 hrs */
+#define RT5033_FAST_CHARGE_TIMER6		0x08 /*  6 hrs */
+#define RT5033_FAST_CHARGE_TIMER8		0x10 /*  8 hrs */
+#define RT5033_FAST_CHARGE_TIMER10		0x18 /* 10 hrs */
+#define RT5033_FAST_CHARGE_TIMER12		0x20 /* 12 hrs */
+#define RT5033_FAST_CHARGE_TIMER14		0x28 /* 14 hrs */
+#define RT5033_FAST_CHARGE_TIMER16		0x30 /* 16 hrs */
+
+#define RT5033_INT_TIMER_DISABLE		0x00
 #define RT5033_INT_TIMER_ENABLE			0x01
 
-/* RT5033 charger termination enable mask */
-#define RT5033_TE_ENABLE_MASK			0x08
-
 /*
  * RT5033 charger opa mode. RT5033 has two opa modes for OTG: charger mode
  * and boost mode.
@@ -150,25 +161,30 @@ enum rt5033_reg {
 #define RT5033_BOOST_MODE			0x01
 
 /* RT5033 charger termination enable */
+#define RT5033_TE_DISABLE			0x00
 #define RT5033_TE_ENABLE			0x08
 
 /* RT5033 charger CFO enable */
+#define RT5033_CFO_DISABLE			0x00
 #define RT5033_CFO_ENABLE			0x40
 
 /* RT5033 charger constant charge voltage (as in CHGCTRL2 register), uV */
 #define RT5033_CHARGER_CONST_VOLTAGE_LIMIT_MIN	3650000U
 #define RT5033_CHARGER_CONST_VOLTAGE_STEP_NUM   25000U
 #define RT5033_CHARGER_CONST_VOLTAGE_LIMIT_MAX	4400000U
+#define RT5033_CV_MAX_VOLTAGE			0x1e
 
 /* RT5033 charger pre-charge current limits (as in CHGCTRL4 register), uA */
 #define RT5033_CHARGER_PRE_CURRENT_LIMIT_MIN	350000U
 #define RT5033_CHARGER_PRE_CURRENT_STEP_NUM	100000U
 #define RT5033_CHARGER_PRE_CURRENT_LIMIT_MAX	650000U
+#define RT5033_CHG_MAX_PRE_CURRENT		0x03
 
 /* RT5033 charger fast-charge current (as in CHGCTRL5 register), uA */
 #define RT5033_CHARGER_FAST_CURRENT_MIN		700000U
 #define RT5033_CHARGER_FAST_CURRENT_STEP_NUM	100000U
 #define RT5033_CHARGER_FAST_CURRENT_MAX		2000000U
+#define RT5033_CHG_MAX_CURRENT			0x0d
 
 /*
  * RT5033 charger const-charge end of charger current (
@@ -192,6 +208,7 @@ enum rt5033_reg {
  * RT5033 charger UUG. It enables MOS auto control by H/W charger
  * circuit.
  */
+#define RT5033_CHARGER_UUG_DISABLE		0x00
 #define RT5033_CHARGER_UUG_ENABLE		0x02
 
 /* RT5033 charger high impedance mode */
diff --git a/include/linux/mfd/rt5033.h b/include/linux/mfd/rt5033.h
index 8f306ac15a27..e99e2ab0c1c1 100644
--- a/include/linux/mfd/rt5033.h
+++ b/include/linux/mfd/rt5033.h
@@ -51,7 +51,7 @@ struct rt5033_charger_data {
 struct rt5033_charger {
 	struct device			*dev;
 	struct rt5033_dev		*rt5033;
-	struct power_supply		psy;
+	struct power_supply		*psy;
 	struct rt5033_charger_data	*chg;
 };
 
-- 
cgit v1.2.3


From b7653853d5b12637c16b5ca5c8bab39b486bfe99 Mon Sep 17 00:00:00 2001
From: Jakob Hauser <jahau@rocketmail.com>
Date: Mon, 15 May 2023 22:57:14 +0200
Subject: power: supply: rt5033_charger: Add RT5033 charger device driver

This patch adds device driver of Richtek RT5033 PMIC. The driver supports
switching charger. rt5033 charger provides three charging modes. The charging
modes are pre-charge mode, fast charge mode and constant voltage mode. They
vary in charge rate, the charge parameters can be controlled by i2c interface.

Tested-by: Raymond Hackley <raymondhackley@protonmail.com>
Signed-off-by: Jakob Hauser <jahau@rocketmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/9556d4ebb30fd321e37aa0eb343554122e4720c9.1684182964.git.jahau@rocketmail.com
---
 drivers/power/supply/Kconfig          |   8 +
 drivers/power/supply/Makefile         |   1 +
 drivers/power/supply/rt5033_charger.c | 472 ++++++++++++++++++++++++++++++++++
 include/linux/mfd/rt5033.h            |  16 --
 4 files changed, 481 insertions(+), 16 deletions(-)
 create mode 100644 drivers/power/supply/rt5033_charger.c

(limited to 'include/linux')

diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index c78be9f322e6..ea11797670ca 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -766,6 +766,14 @@ config BATTERY_RT5033
 	  The fuelgauge calculates and determines the battery state of charge
 	  according to battery open circuit voltage.
 
+config CHARGER_RT5033
+	tristate "RT5033 battery charger support"
+	depends on MFD_RT5033
+	help
+	  This adds support for battery charger in Richtek RT5033 PMIC.
+	  The device supports pre-charge mode, fast charge mode and
+	  constant voltage mode.
+
 config CHARGER_RT9455
 	tristate "Richtek RT9455 battery charger driver"
 	depends on I2C
diff --git a/drivers/power/supply/Makefile b/drivers/power/supply/Makefile
index 4adbfba02d05..dfc624bbcf1d 100644
--- a/drivers/power/supply/Makefile
+++ b/drivers/power/supply/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_BATTERY_MAX17040)	+= max17040_battery.o
 obj-$(CONFIG_BATTERY_MAX17042)	+= max17042_battery.o
 obj-$(CONFIG_BATTERY_MAX1721X)	+= max1721x_battery.o
 obj-$(CONFIG_BATTERY_RT5033)	+= rt5033_battery.o
+obj-$(CONFIG_CHARGER_RT5033)	+= rt5033_charger.o
 obj-$(CONFIG_CHARGER_RT9455)	+= rt9455_charger.o
 obj-$(CONFIG_CHARGER_RT9467)	+= rt9467-charger.o
 obj-$(CONFIG_CHARGER_RT9471)	+= rt9471.o
diff --git a/drivers/power/supply/rt5033_charger.c b/drivers/power/supply/rt5033_charger.c
new file mode 100644
index 000000000000..5218dfbf5e1b
--- /dev/null
+++ b/drivers/power/supply/rt5033_charger.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Battery charger driver for RT5033
+ *
+ * Copyright (C) 2014 Samsung Electronics, Co., Ltd.
+ * Author: Beomho Seo <beomho.seo@samsung.com>
+ */
+
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+#include <linux/regmap.h>
+#include <linux/mfd/rt5033-private.h>
+
+struct rt5033_charger_data {
+	unsigned int pre_uamp;
+	unsigned int pre_uvolt;
+	unsigned int const_uvolt;
+	unsigned int eoc_uamp;
+	unsigned int fast_uamp;
+};
+
+struct rt5033_charger {
+	struct device			*dev;
+	struct regmap			*regmap;
+	struct power_supply		*psy;
+	struct rt5033_charger_data	*chg;
+};
+
+static int rt5033_get_charger_state(struct rt5033_charger *charger)
+{
+	struct regmap *regmap = charger->regmap;
+	unsigned int reg_data;
+	int state;
+
+	if (!regmap)
+		return POWER_SUPPLY_STATUS_UNKNOWN;
+
+	regmap_read(regmap, RT5033_REG_CHG_STAT, &reg_data);
+
+	switch (reg_data & RT5033_CHG_STAT_MASK) {
+	case RT5033_CHG_STAT_DISCHARGING:
+		state = POWER_SUPPLY_STATUS_DISCHARGING;
+		break;
+	case RT5033_CHG_STAT_CHARGING:
+		state = POWER_SUPPLY_STATUS_CHARGING;
+		break;
+	case RT5033_CHG_STAT_FULL:
+		state = POWER_SUPPLY_STATUS_FULL;
+		break;
+	case RT5033_CHG_STAT_NOT_CHARGING:
+		state = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		break;
+	default:
+		state = POWER_SUPPLY_STATUS_UNKNOWN;
+	}
+
+	return state;
+}
+
+static int rt5033_get_charger_type(struct rt5033_charger *charger)
+{
+	struct regmap *regmap = charger->regmap;
+	unsigned int reg_data;
+	int state;
+
+	regmap_read(regmap, RT5033_REG_CHG_STAT, &reg_data);
+
+	switch (reg_data & RT5033_CHG_STAT_TYPE_MASK) {
+	case RT5033_CHG_STAT_TYPE_FAST:
+		state = POWER_SUPPLY_CHARGE_TYPE_FAST;
+		break;
+	case RT5033_CHG_STAT_TYPE_PRE:
+		state = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+		break;
+	default:
+		state = POWER_SUPPLY_CHARGE_TYPE_NONE;
+	}
+
+	return state;
+}
+
+static int rt5033_get_charger_current_limit(struct rt5033_charger *charger)
+{
+	struct regmap *regmap = charger->regmap;
+	unsigned int state, reg_data, data;
+
+	regmap_read(regmap, RT5033_REG_CHG_CTRL5, &reg_data);
+
+	state = (reg_data & RT5033_CHGCTRL5_ICHG_MASK)
+		 >> RT5033_CHGCTRL5_ICHG_SHIFT;
+
+	data = RT5033_CHARGER_FAST_CURRENT_MIN +
+		RT5033_CHARGER_FAST_CURRENT_STEP_NUM * state;
+
+	return data;
+}
+
+static int rt5033_get_charger_const_voltage(struct rt5033_charger *charger)
+{
+	struct regmap *regmap = charger->regmap;
+	unsigned int state, reg_data, data;
+
+	regmap_read(regmap, RT5033_REG_CHG_CTRL2, &reg_data);
+
+	state = (reg_data & RT5033_CHGCTRL2_CV_MASK)
+		 >> RT5033_CHGCTRL2_CV_SHIFT;
+
+	data = RT5033_CHARGER_CONST_VOLTAGE_LIMIT_MIN +
+		RT5033_CHARGER_CONST_VOLTAGE_STEP_NUM * state;
+
+	return data;
+}
+
+static inline int rt5033_init_const_charge(struct rt5033_charger *charger)
+{
+	struct rt5033_charger_data *chg = charger->chg;
+	int ret;
+	unsigned int val;
+	u8 reg_data;
+
+	/* Set constant voltage mode */
+	if (chg->const_uvolt < RT5033_CHARGER_CONST_VOLTAGE_LIMIT_MIN ||
+	    chg->const_uvolt > RT5033_CHARGER_CONST_VOLTAGE_LIMIT_MAX) {
+		dev_err(charger->dev,
+			"Value 'constant-charge-voltage-max-microvolt' out of range\n");
+		return -EINVAL;
+	}
+
+	if (chg->const_uvolt == RT5033_CHARGER_CONST_VOLTAGE_LIMIT_MIN)
+		reg_data = 0x00;
+	else if (chg->const_uvolt == RT5033_CHARGER_CONST_VOLTAGE_LIMIT_MAX)
+		reg_data = RT5033_CV_MAX_VOLTAGE;
+	else {
+		val = chg->const_uvolt;
+		val -= RT5033_CHARGER_CONST_VOLTAGE_LIMIT_MIN;
+		val /= RT5033_CHARGER_CONST_VOLTAGE_STEP_NUM;
+		reg_data = val;
+	}
+
+	ret = regmap_update_bits(charger->regmap, RT5033_REG_CHG_CTRL2,
+			RT5033_CHGCTRL2_CV_MASK,
+			reg_data << RT5033_CHGCTRL2_CV_SHIFT);
+	if (ret) {
+		dev_err(charger->dev, "Failed regmap update\n");
+		return -EINVAL;
+	}
+
+	/* Set end of charge current */
+	if (chg->eoc_uamp < RT5033_CHARGER_EOC_MIN ||
+	    chg->eoc_uamp > RT5033_CHARGER_EOC_MAX) {
+		dev_err(charger->dev,
+			"Value 'charge-term-current-microamp' out of range\n");
+		return -EINVAL;
+	}
+
+	if (chg->eoc_uamp == RT5033_CHARGER_EOC_MIN)
+		reg_data = 0x01;
+	else if (chg->eoc_uamp == RT5033_CHARGER_EOC_MAX)
+		reg_data = 0x07;
+	else {
+		val = chg->eoc_uamp;
+		if (val < RT5033_CHARGER_EOC_REF) {
+			val -= RT5033_CHARGER_EOC_MIN;
+			val /= RT5033_CHARGER_EOC_STEP_NUM1;
+			reg_data = 0x01 + val;
+		} else if (val > RT5033_CHARGER_EOC_REF) {
+			val -= RT5033_CHARGER_EOC_REF;
+			val /= RT5033_CHARGER_EOC_STEP_NUM2;
+			reg_data = 0x04 + val;
+		} else {
+			reg_data = 0x04;
+		}
+	}
+
+	ret = regmap_update_bits(charger->regmap, RT5033_REG_CHG_CTRL4,
+			RT5033_CHGCTRL4_EOC_MASK, reg_data);
+	if (ret) {
+		dev_err(charger->dev, "Failed regmap update\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline int rt5033_init_fast_charge(struct rt5033_charger *charger)
+{
+	struct rt5033_charger_data *chg = charger->chg;
+	int ret;
+	unsigned int val;
+	u8 reg_data;
+
+	/* Set limit input current */
+	ret = regmap_update_bits(charger->regmap, RT5033_REG_CHG_CTRL1,
+			RT5033_CHGCTRL1_IAICR_MASK, RT5033_AICR_2000_MODE);
+	if (ret) {
+		dev_err(charger->dev, "Failed regmap update\n");
+		return -EINVAL;
+	}
+
+	/* Set fast-charge mode charging current */
+	if (chg->fast_uamp < RT5033_CHARGER_FAST_CURRENT_MIN ||
+	    chg->fast_uamp > RT5033_CHARGER_FAST_CURRENT_MAX) {
+		dev_err(charger->dev,
+			"Value 'constant-charge-current-max-microamp' out of range\n");
+		return -EINVAL;
+	}
+
+	if (chg->fast_uamp == RT5033_CHARGER_FAST_CURRENT_MIN)
+		reg_data = 0x00;
+	else if (chg->fast_uamp == RT5033_CHARGER_FAST_CURRENT_MAX)
+		reg_data = RT5033_CHG_MAX_CURRENT;
+	else {
+		val = chg->fast_uamp;
+		val -= RT5033_CHARGER_FAST_CURRENT_MIN;
+		val /= RT5033_CHARGER_FAST_CURRENT_STEP_NUM;
+		reg_data = val;
+	}
+
+	ret = regmap_update_bits(charger->regmap, RT5033_REG_CHG_CTRL5,
+			RT5033_CHGCTRL5_ICHG_MASK,
+			reg_data << RT5033_CHGCTRL5_ICHG_SHIFT);
+	if (ret) {
+		dev_err(charger->dev, "Failed regmap update\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline int rt5033_init_pre_charge(struct rt5033_charger *charger)
+{
+	struct rt5033_charger_data *chg = charger->chg;
+	int ret;
+	unsigned int val;
+	u8 reg_data;
+
+	/* Set pre-charge threshold voltage */
+	if (chg->pre_uvolt < RT5033_CHARGER_PRE_THRESHOLD_LIMIT_MIN ||
+	    chg->pre_uvolt > RT5033_CHARGER_PRE_THRESHOLD_LIMIT_MAX) {
+		dev_err(charger->dev,
+			"Value 'precharge-upper-limit-microvolt' out of range\n");
+		return -EINVAL;
+	}
+
+	if (chg->pre_uvolt == RT5033_CHARGER_PRE_THRESHOLD_LIMIT_MIN)
+		reg_data = 0x00;
+	else if (chg->pre_uvolt == RT5033_CHARGER_PRE_THRESHOLD_LIMIT_MAX)
+		reg_data = 0x0f;
+	else {
+		val = chg->pre_uvolt;
+		val -= RT5033_CHARGER_PRE_THRESHOLD_LIMIT_MIN;
+		val /= RT5033_CHARGER_PRE_THRESHOLD_STEP_NUM;
+		reg_data = val;
+	}
+
+	ret = regmap_update_bits(charger->regmap, RT5033_REG_CHG_CTRL5,
+			RT5033_CHGCTRL5_VPREC_MASK, reg_data);
+	if (ret) {
+		dev_err(charger->dev, "Failed regmap update\n");
+		return -EINVAL;
+	}
+
+	/* Set pre-charge mode charging current */
+	if (chg->pre_uamp < RT5033_CHARGER_PRE_CURRENT_LIMIT_MIN ||
+	    chg->pre_uamp > RT5033_CHARGER_PRE_CURRENT_LIMIT_MAX) {
+		dev_err(charger->dev,
+			"Value 'precharge-current-microamp' out of range\n");
+		return -EINVAL;
+	}
+
+	if (chg->pre_uamp == RT5033_CHARGER_PRE_CURRENT_LIMIT_MIN)
+		reg_data = 0x00;
+	else if (chg->pre_uamp == RT5033_CHARGER_PRE_CURRENT_LIMIT_MAX)
+		reg_data = RT5033_CHG_MAX_PRE_CURRENT;
+	else {
+		val = chg->pre_uamp;
+		val -= RT5033_CHARGER_PRE_CURRENT_LIMIT_MIN;
+		val /= RT5033_CHARGER_PRE_CURRENT_STEP_NUM;
+		reg_data = val;
+	}
+
+	ret = regmap_update_bits(charger->regmap, RT5033_REG_CHG_CTRL4,
+			RT5033_CHGCTRL4_IPREC_MASK,
+			reg_data << RT5033_CHGCTRL4_IPREC_SHIFT);
+	if (ret) {
+		dev_err(charger->dev, "Failed regmap update\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rt5033_charger_reg_init(struct rt5033_charger *charger)
+{
+	int ret = 0;
+
+	/* Enable charging termination */
+	ret = regmap_update_bits(charger->regmap, RT5033_REG_CHG_CTRL1,
+			RT5033_CHGCTRL1_TE_EN_MASK, RT5033_TE_ENABLE);
+	if (ret) {
+		dev_err(charger->dev, "Failed to enable charging termination.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Disable minimum input voltage regulation (MIVR), this improves
+	 * the charging performance.
+	 */
+	ret = regmap_update_bits(charger->regmap, RT5033_REG_CHG_CTRL4,
+			RT5033_CHGCTRL4_MIVR_MASK, RT5033_CHARGER_MIVR_DISABLE);
+	if (ret) {
+		dev_err(charger->dev, "Failed to disable MIVR.\n");
+		return -EINVAL;
+	}
+
+	ret = rt5033_init_pre_charge(charger);
+	if (ret)
+		return ret;
+
+	ret = rt5033_init_fast_charge(charger);
+	if (ret)
+		return ret;
+
+	ret = rt5033_init_const_charge(charger);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static enum power_supply_property rt5033_charger_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
+	POWER_SUPPLY_PROP_CURRENT_MAX,
+	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+	POWER_SUPPLY_PROP_ONLINE,
+};
+
+static int rt5033_charger_get_property(struct power_supply *psy,
+			enum power_supply_property psp,
+			union power_supply_propval *val)
+{
+	struct rt5033_charger *charger = power_supply_get_drvdata(psy);
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		val->intval = rt5033_get_charger_state(charger);
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_TYPE:
+		val->intval = rt5033_get_charger_type(charger);
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_MAX:
+		val->intval = rt5033_get_charger_current_limit(charger);
+		break;
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
+		val->intval = rt5033_get_charger_const_voltage(charger);
+		break;
+	case POWER_SUPPLY_PROP_MODEL_NAME:
+		val->strval = RT5033_CHARGER_MODEL;
+		break;
+	case POWER_SUPPLY_PROP_MANUFACTURER:
+		val->strval = RT5033_MANUFACTURER;
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = (rt5033_get_charger_state(charger) ==
+				POWER_SUPPLY_STATUS_CHARGING);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct rt5033_charger_data *rt5033_charger_dt_init(
+						struct rt5033_charger *charger)
+{
+	struct rt5033_charger_data *chg;
+	struct power_supply_battery_info *info;
+	int ret;
+
+	chg = devm_kzalloc(charger->dev, sizeof(*chg), GFP_KERNEL);
+	if (!chg)
+		return ERR_PTR(-ENOMEM);
+
+	ret = power_supply_get_battery_info(charger->psy, &info);
+	if (ret)
+		return ERR_PTR(dev_err_probe(charger->dev, -EINVAL,
+			       "missing battery info\n"));
+
+	/* Assign data. Validity will be checked in the init functions. */
+	chg->pre_uamp = info->precharge_current_ua;
+	chg->fast_uamp = info->constant_charge_current_max_ua;
+	chg->eoc_uamp = info->charge_term_current_ua;
+	chg->pre_uvolt = info->precharge_voltage_max_uv;
+	chg->const_uvolt = info->constant_charge_voltage_max_uv;
+
+	return chg;
+}
+
+static const struct power_supply_desc rt5033_charger_desc = {
+	.name = "rt5033-charger",
+	.type = POWER_SUPPLY_TYPE_USB,
+	.properties = rt5033_charger_props,
+	.num_properties = ARRAY_SIZE(rt5033_charger_props),
+	.get_property = rt5033_charger_get_property,
+};
+
+static int rt5033_charger_probe(struct platform_device *pdev)
+{
+	struct rt5033_charger *charger;
+	struct power_supply_config psy_cfg = {};
+	int ret;
+
+	charger = devm_kzalloc(&pdev->dev, sizeof(*charger), GFP_KERNEL);
+	if (!charger)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, charger);
+	charger->dev = &pdev->dev;
+	charger->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+
+	psy_cfg.of_node = pdev->dev.of_node;
+	psy_cfg.drv_data = charger;
+
+	charger->psy = devm_power_supply_register(&pdev->dev,
+						  &rt5033_charger_desc,
+						  &psy_cfg);
+	if (IS_ERR(charger->psy))
+		return dev_err_probe(&pdev->dev, PTR_ERR(charger->psy),
+				     "Failed to register power supply\n");
+
+	charger->chg = rt5033_charger_dt_init(charger);
+	if (IS_ERR_OR_NULL(charger->chg))
+		return PTR_ERR(charger->chg);
+
+	ret = rt5033_charger_reg_init(charger);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static const struct platform_device_id rt5033_charger_id[] = {
+	{ "rt5033-charger", },
+	{ }
+};
+MODULE_DEVICE_TABLE(platform, rt5033_charger_id);
+
+static const struct of_device_id rt5033_charger_of_match[] = {
+	{ .compatible = "richtek,rt5033-charger", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rt5033_charger_of_match);
+
+static struct platform_driver rt5033_charger_driver = {
+	.driver = {
+		.name = "rt5033-charger",
+		.of_match_table = rt5033_charger_of_match,
+	},
+	.probe = rt5033_charger_probe,
+	.id_table = rt5033_charger_id,
+};
+module_platform_driver(rt5033_charger_driver);
+
+MODULE_DESCRIPTION("Richtek RT5033 charger driver");
+MODULE_AUTHOR("Beomho Seo <beomho.seo@samsung.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/rt5033.h b/include/linux/mfd/rt5033.h
index e99e2ab0c1c1..3992fb2ef0a8 100644
--- a/include/linux/mfd/rt5033.h
+++ b/include/linux/mfd/rt5033.h
@@ -39,20 +39,4 @@ struct rt5033_battery {
 	struct power_supply	*psy;
 };
 
-/* RT5033 charger platform data */
-struct rt5033_charger_data {
-	unsigned int pre_uamp;
-	unsigned int pre_uvolt;
-	unsigned int const_uvolt;
-	unsigned int eoc_uamp;
-	unsigned int fast_uamp;
-};
-
-struct rt5033_charger {
-	struct device			*dev;
-	struct rt5033_dev		*rt5033;
-	struct power_supply		*psy;
-	struct rt5033_charger_data	*chg;
-};
-
 #endif /* __RT5033_H__ */
-- 
cgit v1.2.3


From 49b435908b2aeb0f3a8a5256256019594a5adc1c Mon Sep 17 00:00:00 2001
From: Jakob Hauser <jahau@rocketmail.com>
Date: Mon, 15 May 2023 22:57:16 +0200
Subject: power: supply: rt5033_battery: Move struct rt5033_battery to battery
 driver

Move struct rt5033_battery from the mfd header into the battery driver because
it's not used by others.

Within struct rt5033_battery, remove the line "struct rt5033_dev *rt5033;"
because it doesn't get used.

In rt5033.h, remove #include <linux/power_supply.h>, it's not necessary
anymore.

In rt5033_battery.c, remove #include <linux/mfd/rt5033.h>, it's not necessary
anymore either. Instead add #include <linux/regmap.h> and

Signed-off-by: Jakob Hauser <jahau@rocketmail.com>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/736e1cbee257853cb3d1da6f05c184e9a053263b.1684182964.git.jahau@rocketmail.com
---
 drivers/power/supply/rt5033_battery.c | 9 ++++++++-
 include/linux/mfd/rt5033.h            | 8 --------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c
index 5c04cf305219..91e1efd81f69 100644
--- a/drivers/power/supply/rt5033_battery.c
+++ b/drivers/power/supply/rt5033_battery.c
@@ -6,11 +6,18 @@
  * Author: Beomho Seo <beomho.seo@samsung.com>
  */
 
+#include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
+#include <linux/regmap.h>
 #include <linux/mfd/rt5033-private.h>
-#include <linux/mfd/rt5033.h>
+
+struct rt5033_battery {
+	struct i2c_client	*client;
+	struct regmap		*regmap;
+	struct power_supply	*psy;
+};
 
 static int rt5033_battery_get_capacity(struct i2c_client *client)
 {
diff --git a/include/linux/mfd/rt5033.h b/include/linux/mfd/rt5033.h
index 3992fb2ef0a8..bb3d18945d21 100644
--- a/include/linux/mfd/rt5033.h
+++ b/include/linux/mfd/rt5033.h
@@ -12,7 +12,6 @@
 #include <linux/regulator/consumer.h>
 #include <linux/i2c.h>
 #include <linux/regmap.h>
-#include <linux/power_supply.h>
 
 /* RT5033 regulator IDs */
 enum rt5033_regulators {
@@ -32,11 +31,4 @@ struct rt5033_dev {
 	bool wakeup;
 };
 
-struct rt5033_battery {
-	struct i2c_client	*client;
-	struct rt5033_dev	*rt5033;
-	struct regmap		*regmap;
-	struct power_supply	*psy;
-};
-
 #endif /* __RT5033_H__ */
-- 
cgit v1.2.3


From 0eeaf1eb40a34fddd1d568a9b32c3d6669238743 Mon Sep 17 00:00:00 2001
From: Maninder Singh <maninder1.s@samsung.com>
Date: Thu, 8 Jun 2023 09:01:19 +0530
Subject: kallsyms: make kallsyms_show_value() as generic function

This change makes function kallsyms_show_value() as
generic function without dependency on CONFIG_KALLSYMS.

Now module address will be displayed with lsmod and /proc/modules.

Earlier:
=======
/ # insmod  test.ko
/ # lsmod
test 12288 0 - Live 0x0000000000000000 (O)  // No Module Load address
/ #

With change:
==========
/ # insmod test.ko
/ # lsmod
test 12288 0 - Live 0xffff800000fc0000 (O)  // Module address
/ # cat /proc/modules
test 12288 0 - Live 0xffff800000fc0000 (O)

Co-developed-by: Onkarnath <onkarnath.1@samsung.com>
Signed-off-by: Onkarnath <onkarnath.1@samsung.com>
Signed-off-by: Maninder Singh <maninder1.s@samsung.com>
Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 include/linux/kallsyms.h | 11 +++--------
 kernel/ksyms_common.c    |  2 --
 2 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 1037f4957caa..c3f075e8f60c 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -65,6 +65,9 @@ static inline void *dereference_symbol_descriptor(void *ptr)
 	return ptr;
 }
 
+/* How and when do we show kallsyms values? */
+extern bool kallsyms_show_value(const struct cred *cred);
+
 #ifdef CONFIG_KALLSYMS
 unsigned long kallsyms_sym_address(int idx);
 int kallsyms_on_each_symbol(int (*fn)(void *, const char *, unsigned long),
@@ -94,9 +97,6 @@ extern int sprint_backtrace_build_id(char *buffer, unsigned long address);
 
 int lookup_symbol_name(unsigned long addr, char *symname);
 
-/* How and when do we show kallsyms values? */
-extern bool kallsyms_show_value(const struct cred *cred);
-
 #else /* !CONFIG_KALLSYMS */
 
 static inline unsigned long kallsyms_lookup_name(const char *name)
@@ -154,11 +154,6 @@ static inline int lookup_symbol_name(unsigned long addr, char *symname)
 	return -ERANGE;
 }
 
-static inline bool kallsyms_show_value(const struct cred *cred)
-{
-	return false;
-}
-
 static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, unsigned long),
 					  void *data)
 {
diff --git a/kernel/ksyms_common.c b/kernel/ksyms_common.c
index 3840fa1c9c86..cf1a73cbf2f6 100644
--- a/kernel/ksyms_common.c
+++ b/kernel/ksyms_common.c
@@ -6,7 +6,6 @@
 #include <linux/kallsyms.h>
 #include <linux/security.h>
 
-#ifdef CONFIG_KALLSYMS
 static inline int kallsyms_for_perf(void)
 {
 #ifdef CONFIG_PERF_EVENTS
@@ -42,4 +41,3 @@ bool kallsyms_show_value(const struct cred *cred)
 		return false;
 	}
 }
-#endif
-- 
cgit v1.2.3


From 861dc0b46432a7086bc6de526aae775b4d615e28 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Sun, 28 May 2023 13:43:46 -0700
Subject: sysctl: move umh sysctl registration to its own file

Move the umh sysctl registration to its own file, the array is
already there. We do this to remove the clutter out of kernel/sysctl.c
to avoid merge conflicts.

This also lets the sysctls not be built at all now when CONFIG_SYSCTL
is not enabled.

This has a small penalty of 23 bytes but soon we'll be removing
all the empty entries on sysctl arrays so just do this cleanup
now:

./scripts/bloat-o-meter vmlinux.base vmlinux.1
add/remove: 2/0 grow/shrink: 0/1 up/down: 49/-26 (23)
Function                                     old     new   delta
init_umh_sysctls                               -      33     +33
__pfx_init_umh_sysctls                         -      16     +16
sysctl_init_bases                            111      85     -26
Total: Before=21256914, After=21256937, chg +0.00%

Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 include/linux/umh.h |  2 --
 kernel/sysctl.c     |  1 -
 kernel/umh.c        | 11 ++++++++++-
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/umh.h b/include/linux/umh.h
index 5d1f6129b847..daa6a7048c11 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -42,8 +42,6 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp,
 extern int
 call_usermodehelper_exec(struct subprocess_info *info, int wait);
 
-extern struct ctl_table usermodehelper_table[];
-
 enum umh_disable_depth {
 	UMH_ENABLED = 0,
 	UMH_FREEZING,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 241b817c0240..caf4a91522a1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2322,7 +2322,6 @@ static struct ctl_table vm_table[] = {
 int __init sysctl_init_bases(void)
 {
 	register_sysctl_init("kernel", kern_table);
-	register_sysctl_init("kernel/usermodehelper", usermodehelper_table);
 #ifdef CONFIG_KEYS
 	register_sysctl_init("kernel/keys", key_sysctls);
 #endif
diff --git a/kernel/umh.c b/kernel/umh.c
index 60aa9e764a38..41088c5c39fd 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -544,7 +544,8 @@ static int proc_cap_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
-struct ctl_table usermodehelper_table[] = {
+#if defined(CONFIG_SYSCTL)
+static struct ctl_table usermodehelper_table[] = {
 	{
 		.procname	= "bset",
 		.data		= &usermodehelper_bset,
@@ -561,3 +562,11 @@ struct ctl_table usermodehelper_table[] = {
 	},
 	{ }
 };
+
+static int __init init_umh_sysctls(void)
+{
+	register_sysctl_init("kernel/usermodehelper", usermodehelper_table);
+	return 0;
+}
+early_initcall(init_umh_sysctls);
+#endif /* CONFIG_SYSCTL */
-- 
cgit v1.2.3


From 28898e260a34e840f86ca80bf0c7657d76ad3f80 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Sun, 28 May 2023 13:54:20 -0700
Subject: sysctl: move security keys sysctl registration to its own file

The security keys sysctls are already declared on its own file,
just move the sysctl registration to its own file to help avoid
merge conflicts on sysctls.c, and help with clearing up sysctl.c
further.

This creates a small penalty of 23 bytes:

./scripts/bloat-o-meter vmlinux.1 vmlinux.2
add/remove: 2/0 grow/shrink: 0/1 up/down: 49/-26 (23)
Function                                     old     new   delta
init_security_keys_sysctls                     -      33     +33
__pfx_init_security_keys_sysctls               -      16     +16
sysctl_init_bases                             85      59     -26
Total: Before=21256937, After=21256960, chg +0.00%

But soon we'll be saving tons of bytes anyway, as we modify the
sysctl registrations to use ARRAY_SIZE and so we get rid of all the
empty array elements so let's just clean this up now.

Reviewed-by: Paul Moore <paul@paul-moore.com>
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 include/linux/key.h    | 3 ---
 kernel/sysctl.c        | 4 ----
 security/keys/sysctl.c | 7 +++++++
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 8dc7f7c3088b..938d7ecfb495 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -490,9 +490,6 @@ do {									\
 	rcu_assign_pointer((KEY)->payload.rcu_data0, (PAYLOAD));	\
 } while (0)
 
-#ifdef CONFIG_SYSCTL
-extern struct ctl_table key_sysctls[];
-#endif
 /*
  * the userspace interface
  */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index caf4a91522a1..48046932d573 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2322,10 +2322,6 @@ static struct ctl_table vm_table[] = {
 int __init sysctl_init_bases(void)
 {
 	register_sysctl_init("kernel", kern_table);
-#ifdef CONFIG_KEYS
-	register_sysctl_init("kernel/keys", key_sysctls);
-#endif
-
 	register_sysctl_init("vm", vm_table);
 
 	return 0;
diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c
index b46b651b3c4c..b72b82bb20c6 100644
--- a/security/keys/sysctl.c
+++ b/security/keys/sysctl.c
@@ -68,3 +68,10 @@ struct ctl_table key_sysctls[] = {
 #endif
 	{ }
 };
+
+static int __init init_security_keys_sysctls(void)
+{
+	register_sysctl_init("kernel/keys", key_sysctls);
+	return 0;
+}
+early_initcall(init_security_keys_sysctls);
-- 
cgit v1.2.3


From 33b70fbc4f815f0acb327fa506c988ef25cd943d Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Fri, 5 May 2023 13:25:06 +0200
Subject: clk: Introduce clk_hw_determine_rate_no_reparent()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some clock drivers do not want to allow any reparenting on a given
clock, but usually do so by not providing any determine_rate
implementation.

Whenever we call clk_round_rate() or clk_set_rate(), this leads to
clk_core_can_round() returning false and thus the rest of the function
either forwarding the rate request to its current parent if
CLK_SET_RATE_PARENT is set, or just returning the current clock rate.

This behaviour happens implicitly, and as we move forward to making a
determine_rate implementation required for muxes, we need some way to
explicitly opt-in for that behaviour.

Fortunately, this is exactly what the clk_core_determine_rate_no_reparent()
function is doing, so we can simply make it available to drivers.

Cc: Abel Vesa <abelvesa@kernel.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: "Andreas Färber" <afaerber@suse.de>
Cc: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Charles Keepax <ckeepax@opensource.cirrus.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Chen-Yu Tsai <wenst@chromium.org>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: Claudiu Beznea <claudiu.beznea@microchip.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: David Airlie <airlied@gmail.com>
Cc: David Lechner <david@lechnology.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Fabio Estevam <festevam@gmail.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Jernej Skrabec <jernej.skrabec@gmail.com>
Cc: Jonathan Hunter <jonathanh@nvidia.com>
Cc: Kishon Vijay Abraham I <kishon@kernel.org>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Luca Ceresoli <luca.ceresoli@bootlin.com>
Cc: Manivannan Sadhasivam <mani@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Markus Schneider-Pargmann <msp@baylibre.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Mikko Perttunen <mperttunen@nvidia.com>
Cc: Miles Chen <miles.chen@mediatek.com>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Orson Zhai <orsonzhai@gmail.com>
Cc: Paul Cercueil <paul@crapouillou.net>
Cc: Peng Fan <peng.fan@nxp.com>
Cc: Peter De Schrijver <pdeschrijver@nvidia.com>
Cc: Prashant Gaikwad <pgaikwad@nvidia.com>
Cc: Richard Fitzgerald <rf@opensource.cirrus.com>
Cc: Samuel Holland <samuel@sholland.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Sekhar Nori <nsekhar@ti.com>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Takashi Iwai <tiwai@suse.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Vinod Koul <vkoul@kernel.org>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-actions@lists.infradead.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-mips@vger.kernel.org
Cc: linux-phy@lists.infradead.org
Cc: linux-renesas-soc@vger.kernel.org
Cc: linux-rtc@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-sunxi@lists.linux.dev
Cc: linux-tegra@vger.kernel.org
Cc: NXP Linux Team <linux-imx@nxp.com>
Cc: patches@opensource.cirrus.com
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20221018-clk-range-checks-fixes-v4-4-971d5077e7d2@cerno.tech
| Reported-by: kernel test robot <lkp@intel.com>:
---
 drivers/clk/clk.c            |  19 ++++++
 drivers/clk/clk_test.c       | 152 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/clk-provider.h |   2 +
 3 files changed, 173 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index f57f821a5e5a..dcc5378f8920 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -783,6 +783,25 @@ int __clk_mux_determine_rate_closest(struct clk_hw *hw,
 }
 EXPORT_SYMBOL_GPL(__clk_mux_determine_rate_closest);
 
+/*
+ * clk_hw_determine_rate_no_reparent - clk_ops::determine_rate implementation for a clk that doesn't reparent
+ * @hw: mux type clk to determine rate on
+ * @req: rate request, also used to return preferred frequency
+ *
+ * Helper for finding best parent rate to provide a given frequency.
+ * This can be used directly as a determine_rate callback (e.g. for a
+ * mux), or from a more complex clock that may combine a mux with other
+ * operations.
+ *
+ * Returns: 0 on success, -EERROR value on error
+ */
+int clk_hw_determine_rate_no_reparent(struct clk_hw *hw,
+				      struct clk_rate_request *req)
+{
+	return clk_core_determine_rate_no_reparent(hw, req);
+}
+EXPORT_SYMBOL_GPL(clk_hw_determine_rate_no_reparent);
+
 /***        clk api        ***/
 
 static void clk_core_rate_unprotect(struct clk_core *core)
diff --git a/drivers/clk/clk_test.c b/drivers/clk/clk_test.c
index 2cb51153750d..b3ed3b0e4c31 100644
--- a/drivers/clk/clk_test.c
+++ b/drivers/clk/clk_test.c
@@ -141,6 +141,12 @@ static const struct clk_ops clk_multiple_parents_mux_ops = {
 	.determine_rate = __clk_mux_determine_rate_closest,
 };
 
+static const struct clk_ops clk_multiple_parents_no_reparent_mux_ops = {
+	.determine_rate = clk_hw_determine_rate_no_reparent,
+	.get_parent = clk_multiple_parents_mux_get_parent,
+	.set_parent = clk_multiple_parents_mux_set_parent,
+};
+
 static int clk_test_init_with_ops(struct kunit *test, const struct clk_ops *ops)
 {
 	struct clk_dummy_context *ctx;
@@ -2395,10 +2401,156 @@ static struct kunit_suite clk_mux_notifier_test_suite = {
 	.test_cases = clk_mux_notifier_test_cases,
 };
 
+static int
+clk_mux_no_reparent_test_init(struct kunit *test)
+{
+	struct clk_multiple_parent_ctx *ctx;
+	const char *parents[2] = { "parent-0", "parent-1"};
+	int ret;
+
+	ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	test->priv = ctx;
+
+	ctx->parents_ctx[0].hw.init = CLK_HW_INIT_NO_PARENT("parent-0",
+							    &clk_dummy_rate_ops,
+							    0);
+	ctx->parents_ctx[0].rate = DUMMY_CLOCK_RATE_1;
+	ret = clk_hw_register(NULL, &ctx->parents_ctx[0].hw);
+	if (ret)
+		return ret;
+
+	ctx->parents_ctx[1].hw.init = CLK_HW_INIT_NO_PARENT("parent-1",
+							    &clk_dummy_rate_ops,
+							    0);
+	ctx->parents_ctx[1].rate = DUMMY_CLOCK_RATE_2;
+	ret = clk_hw_register(NULL, &ctx->parents_ctx[1].hw);
+	if (ret)
+		return ret;
+
+	ctx->current_parent = 0;
+	ctx->hw.init = CLK_HW_INIT_PARENTS("test-mux", parents,
+					   &clk_multiple_parents_no_reparent_mux_ops,
+					   0);
+	ret = clk_hw_register(NULL, &ctx->hw);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void
+clk_mux_no_reparent_test_exit(struct kunit *test)
+{
+	struct clk_multiple_parent_ctx *ctx = test->priv;
+
+	clk_hw_unregister(&ctx->hw);
+	clk_hw_unregister(&ctx->parents_ctx[0].hw);
+	clk_hw_unregister(&ctx->parents_ctx[1].hw);
+}
+
+/*
+ * Test that if the we have a mux that cannot change parent and we call
+ * clk_round_rate() on it with a rate that should cause it to change
+ * parent, it won't.
+ */
+static void clk_mux_no_reparent_round_rate(struct kunit *test)
+{
+	struct clk_multiple_parent_ctx *ctx = test->priv;
+	struct clk_hw *hw = &ctx->hw;
+	struct clk *clk = clk_hw_get_clk(hw, NULL);
+	struct clk *other_parent, *parent;
+	unsigned long other_parent_rate;
+	unsigned long parent_rate;
+	long rounded_rate;
+
+	parent = clk_get_parent(clk);
+	KUNIT_ASSERT_PTR_NE(test, parent, NULL);
+
+	parent_rate = clk_get_rate(parent);
+	KUNIT_ASSERT_GT(test, parent_rate, 0);
+
+	other_parent = clk_hw_get_clk(&ctx->parents_ctx[1].hw, NULL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, other_parent);
+	KUNIT_ASSERT_FALSE(test, clk_is_match(parent, other_parent));
+
+	other_parent_rate = clk_get_rate(other_parent);
+	KUNIT_ASSERT_GT(test, other_parent_rate, 0);
+	clk_put(other_parent);
+
+	rounded_rate = clk_round_rate(clk, other_parent_rate);
+	KUNIT_ASSERT_GT(test, rounded_rate, 0);
+	KUNIT_EXPECT_EQ(test, rounded_rate, parent_rate);
+
+	clk_put(clk);
+}
+
+/*
+ * Test that if the we have a mux that cannot change parent and we call
+ * clk_set_rate() on it with a rate that should cause it to change
+ * parent, it won't.
+ */
+static void clk_mux_no_reparent_set_rate(struct kunit *test)
+{
+	struct clk_multiple_parent_ctx *ctx = test->priv;
+	struct clk_hw *hw = &ctx->hw;
+	struct clk *clk = clk_hw_get_clk(hw, NULL);
+	struct clk *other_parent, *parent;
+	unsigned long other_parent_rate;
+	unsigned long parent_rate;
+	unsigned long rate;
+	int ret;
+
+	parent = clk_get_parent(clk);
+	KUNIT_ASSERT_PTR_NE(test, parent, NULL);
+
+	parent_rate = clk_get_rate(parent);
+	KUNIT_ASSERT_GT(test, parent_rate, 0);
+
+	other_parent = clk_hw_get_clk(&ctx->parents_ctx[1].hw, NULL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, other_parent);
+	KUNIT_ASSERT_FALSE(test, clk_is_match(parent, other_parent));
+
+	other_parent_rate = clk_get_rate(other_parent);
+	KUNIT_ASSERT_GT(test, other_parent_rate, 0);
+	clk_put(other_parent);
+
+	ret = clk_set_rate(clk, other_parent_rate);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	rate = clk_get_rate(clk);
+	KUNIT_ASSERT_GT(test, rate, 0);
+	KUNIT_EXPECT_EQ(test, rate, parent_rate);
+
+	clk_put(clk);
+}
+
+static struct kunit_case clk_mux_no_reparent_test_cases[] = {
+	KUNIT_CASE(clk_mux_no_reparent_round_rate),
+	KUNIT_CASE(clk_mux_no_reparent_set_rate),
+	{}
+};
+
+/*
+ * Test suite for a clock mux that isn't allowed to change parent, using
+ * the clk_hw_determine_rate_no_reparent() helper.
+ *
+ * These tests exercise that helper, and the proper selection of
+ * rates and parents.
+ */
+static struct kunit_suite clk_mux_no_reparent_test_suite = {
+	.name = "clk-mux-no-reparent",
+	.init = clk_mux_no_reparent_test_init,
+	.exit = clk_mux_no_reparent_test_exit,
+	.test_cases = clk_mux_no_reparent_test_cases,
+};
+
 kunit_test_suites(
 	&clk_leaf_mux_set_rate_parent_test_suite,
 	&clk_test_suite,
 	&clk_multiple_parents_mux_test_suite,
+	&clk_mux_no_reparent_test_suite,
 	&clk_mux_notifier_test_suite,
 	&clk_orphan_transparent_multiple_parent_mux_test_suite,
 	&clk_orphan_transparent_single_parent_test_suite,
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 28ff6f1a6ada..f8f220fb5dab 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -1333,6 +1333,8 @@ int __clk_mux_determine_rate_closest(struct clk_hw *hw,
 int clk_mux_determine_rate_flags(struct clk_hw *hw,
 				 struct clk_rate_request *req,
 				 unsigned long flags);
+int clk_hw_determine_rate_no_reparent(struct clk_hw *hw,
+				      struct clk_rate_request *req);
 void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent);
 void clk_hw_get_rate_range(struct clk_hw *hw, unsigned long *min_rate,
 			   unsigned long *max_rate);
-- 
cgit v1.2.3


From b3b984dc0ba60ce4787f661d8fc7f44e8953b51d Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 7 Jun 2023 12:58:24 +0100
Subject: net: pcs: lynx: remove lynx_get_mdio_device()

lynx_get_mdio_device() is no longer necessary, let's remove it so the
lynx PCS code is always managing the lifetime of the mdiodev.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/pcs/pcs-lynx.c | 8 --------
 include/linux/pcs-lynx.h   | 2 --
 2 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index f04dc580ffb8..a90f74172f49 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -34,14 +34,6 @@ enum sgmii_speed {
 #define phylink_pcs_to_lynx(pl_pcs) container_of((pl_pcs), struct lynx_pcs, pcs)
 #define lynx_to_phylink_pcs(lynx) (&(lynx)->pcs)
 
-struct mdio_device *lynx_get_mdio_device(struct phylink_pcs *pcs)
-{
-	struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs);
-
-	return lynx->mdio;
-}
-EXPORT_SYMBOL(lynx_get_mdio_device);
-
 static void lynx_pcs_get_state_usxgmii(struct mdio_device *pcs,
 				       struct phylink_link_state *state)
 {
diff --git a/include/linux/pcs-lynx.h b/include/linux/pcs-lynx.h
index 885b59d10581..25f68a096bfe 100644
--- a/include/linux/pcs-lynx.h
+++ b/include/linux/pcs-lynx.h
@@ -9,8 +9,6 @@
 #include <linux/mdio.h>
 #include <linux/phylink.h>
 
-struct mdio_device *lynx_get_mdio_device(struct phylink_pcs *pcs);
-
 struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio);
 struct phylink_pcs *lynx_pcs_create_mdiodev(struct mii_bus *bus, int addr);
 
-- 
cgit v1.2.3


From 6e1a12821d34ee37b1196872eccc7dc9b5218a87 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 7 Jun 2023 12:58:29 +0100
Subject: net: pcs: lynx: add lynx_pcs_create_fwnode()

Add a helper to create a lynx PCS from a fwnode handle.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/pcs/pcs-lynx.c | 29 +++++++++++++++++++++++++++++
 include/linux/pcs-lynx.h   |  1 +
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index a90f74172f49..b0907c67d469 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -353,6 +353,35 @@ struct phylink_pcs *lynx_pcs_create_mdiodev(struct mii_bus *bus, int addr)
 }
 EXPORT_SYMBOL(lynx_pcs_create_mdiodev);
 
+struct phylink_pcs *lynx_pcs_create_fwnode(struct fwnode_handle *node)
+{
+	struct mdio_device *mdio;
+	struct phylink_pcs *pcs;
+
+	mdio = fwnode_mdio_find_device(node);
+	if (!mdio)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	pcs = lynx_pcs_create(mdio);
+
+	/* Convert failure to create the PCS to an error pointer, so this
+	 * function has a consistent return value strategy.
+	 */
+	if (!pcs)
+		pcs = ERR_PTR(-ENOMEM);
+
+	/* lynx_create() has taken a refcount on the mdiodev if it was
+	 * successful. If lynx_create() fails, this will free the mdio
+	 * device here. In any case, we don't need to hold our reference
+	 * anymore, and putting it here will allow mdio_device_put() in
+	 * lynx_destroy() to automatically free the mdio device.
+	 */
+	mdio_device_put(mdio);
+
+	return pcs;
+}
+EXPORT_SYMBOL_GPL(lynx_pcs_create_fwnode);
+
 void lynx_pcs_destroy(struct phylink_pcs *pcs)
 {
 	struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs);
diff --git a/include/linux/pcs-lynx.h b/include/linux/pcs-lynx.h
index 25f68a096bfe..123e813df771 100644
--- a/include/linux/pcs-lynx.h
+++ b/include/linux/pcs-lynx.h
@@ -11,6 +11,7 @@
 
 struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio);
 struct phylink_pcs *lynx_pcs_create_mdiodev(struct mii_bus *bus, int addr);
+struct phylink_pcs *lynx_pcs_create_fwnode(struct fwnode_handle *node);
 
 void lynx_pcs_destroy(struct phylink_pcs *pcs);
 
-- 
cgit v1.2.3


From 84e476b876d9164af4b965c97eee90fa88204b63 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 7 Jun 2023 12:58:44 +0100
Subject: net: pcs: lynx: make lynx_pcs_create() static

We no longer need to export lynx_pcs_create() for drivers to use as we
now have all the functionality we need in the two new creation helpers.
Remove the export and prototype, and make it static.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/pcs/pcs-lynx.c | 3 +--
 include/linux/pcs-lynx.h   | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index b0907c67d469..b8c66137e28d 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -307,7 +307,7 @@ static const struct phylink_pcs_ops lynx_pcs_phylink_ops = {
 	.pcs_link_up = lynx_pcs_link_up,
 };
 
-struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio)
+static struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio)
 {
 	struct lynx_pcs *lynx;
 
@@ -322,7 +322,6 @@ struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio)
 
 	return lynx_to_phylink_pcs(lynx);
 }
-EXPORT_SYMBOL(lynx_pcs_create);
 
 struct phylink_pcs *lynx_pcs_create_mdiodev(struct mii_bus *bus, int addr)
 {
diff --git a/include/linux/pcs-lynx.h b/include/linux/pcs-lynx.h
index 123e813df771..7958cccd16f2 100644
--- a/include/linux/pcs-lynx.h
+++ b/include/linux/pcs-lynx.h
@@ -9,7 +9,6 @@
 #include <linux/mdio.h>
 #include <linux/phylink.h>
 
-struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio);
 struct phylink_pcs *lynx_pcs_create_mdiodev(struct mii_bus *bus, int addr);
 struct phylink_pcs *lynx_pcs_create_fwnode(struct fwnode_handle *node);
 
-- 
cgit v1.2.3


From 4fe38acdac8a71f7cccf347a2e9902bc818ecef7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 7 Jun 2023 19:19:07 +0100
Subject: net: Block MSG_SENDPAGE_* from being passed to sendmsg() by userspace

It is necessary to allow MSG_SENDPAGE_* to be passed into ->sendmsg() to
allow sendmsg(MSG_SPLICE_PAGES) to replace ->sendpage().  Unblocking them
in the network protocol, however, allows these flags to be passed in by
userspace too[1].

Fix this by marking MSG_SENDPAGE_NOPOLICY, MSG_SENDPAGE_NOTLAST and
MSG_SENDPAGE_DECRYPTED as internal flags, which causes sendmsg() to object
if they are passed to sendmsg() by userspace.  Network protocol ->sendmsg()
implementations can then allow them through.

Note that it should be possible to remove MSG_SENDPAGE_NOTLAST once
sendpage is removed as a whole slew of pages will be passed in in one go by
splice through sendmsg, with MSG_MORE being set if it has more data waiting
in the pipe.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Chuck Lever <chuck.lever@oracle.com>
cc: Boris Pismenny <borisp@nvidia.com>
cc: John Fastabend <john.fastabend@gmail.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Link: https://lore.kernel.org/r/20230526181338.03a99016@kernel.org/ [1]
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/socket.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index bd1cc3238851..3fd3436bc09f 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -339,7 +339,9 @@ struct ucred {
 #endif
 
 /* Flags to be cleared on entry by sendmsg and sendmmsg syscalls */
-#define MSG_INTERNAL_SENDMSG_FLAGS (MSG_SPLICE_PAGES)
+#define MSG_INTERNAL_SENDMSG_FLAGS \
+	(MSG_SPLICE_PAGES | MSG_SENDPAGE_NOPOLICY | MSG_SENDPAGE_NOTLAST | \
+	 MSG_SENDPAGE_DECRYPTED)
 
 /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
 #define SOL_IP		0
-- 
cgit v1.2.3


From 2dc334f1a63a8839b88483a3e73c0f27c9c1791c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 7 Jun 2023 19:19:09 +0100
Subject: splice, net: Use sendmsg(MSG_SPLICE_PAGES) rather than ->sendpage()

Replace generic_splice_sendpage() + splice_from_pipe + pipe_to_sendpage()
with a net-specific handler, splice_to_socket(), that calls sendmsg() with
MSG_SPLICE_PAGES set instead of calling ->sendpage().

MSG_MORE is used to indicate if the sendmsg() is expected to be followed
with more data.

This allows multiple pipe-buffer pages to be passed in a single call in a
BVEC iterator, allowing the processing to be pushed down to a loop in the
protocol driver.  This helps pave the way for passing multipage folios down
too.

Protocols that haven't been converted to handle MSG_SPLICE_PAGES yet should
just ignore it and do a normal sendmsg() for now - although that may be a
bit slower as it may copy everything.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 fs/splice.c            | 158 +++++++++++++++++++++++++++++++++++++++----------
 include/linux/fs.h     |   2 -
 include/linux/splice.h |   2 +
 net/socket.c           |  26 +-------
 4 files changed, 131 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/fs/splice.c b/fs/splice.c
index 3e06611d19ae..e337630aed64 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -33,6 +33,7 @@
 #include <linux/fsnotify.h>
 #include <linux/security.h>
 #include <linux/gfp.h>
+#include <linux/net.h>
 #include <linux/socket.h>
 #include <linux/sched/signal.h>
 
@@ -448,30 +449,6 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
 };
 EXPORT_SYMBOL(nosteal_pipe_buf_ops);
 
-/*
- * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
- * using sendpage(). Return the number of bytes sent.
- */
-static int pipe_to_sendpage(struct pipe_inode_info *pipe,
-			    struct pipe_buffer *buf, struct splice_desc *sd)
-{
-	struct file *file = sd->u.file;
-	loff_t pos = sd->pos;
-	int more;
-
-	if (!likely(file->f_op->sendpage))
-		return -EINVAL;
-
-	more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
-
-	if (sd->len < sd->total_len &&
-	    pipe_occupancy(pipe->head, pipe->tail) > 1)
-		more |= MSG_SENDPAGE_NOTLAST;
-
-	return file->f_op->sendpage(file, buf->page, buf->offset,
-				    sd->len, &pos, more);
-}
-
 static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
 {
 	smp_mb();
@@ -652,7 +629,7 @@ static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_des
  * Description:
  *    This function does little more than loop over the pipe and call
  *    @actor to do the actual moving of a single struct pipe_buffer to
- *    the desired destination. See pipe_to_file, pipe_to_sendpage, or
+ *    the desired destination. See pipe_to_file, pipe_to_sendmsg, or
  *    pipe_to_user.
  *
  */
@@ -833,8 +810,9 @@ done:
 
 EXPORT_SYMBOL(iter_file_splice_write);
 
+#ifdef CONFIG_NET
 /**
- * generic_splice_sendpage - splice data from a pipe to a socket
+ * splice_to_socket - splice data from a pipe to a socket
  * @pipe:	pipe to splice from
  * @out:	socket to write to
  * @ppos:	position in @out
@@ -846,13 +824,131 @@ EXPORT_SYMBOL(iter_file_splice_write);
  *    is involved.
  *
  */
-ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
-				loff_t *ppos, size_t len, unsigned int flags)
+ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
+			 loff_t *ppos, size_t len, unsigned int flags)
 {
-	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
-}
+	struct socket *sock = sock_from_file(out);
+	struct bio_vec bvec[16];
+	struct msghdr msg = {};
+	ssize_t ret = 0;
+	size_t spliced = 0;
+	bool need_wakeup = false;
+
+	pipe_lock(pipe);
+
+	while (len > 0) {
+		unsigned int head, tail, mask, bc = 0;
+		size_t remain = len;
+
+		/*
+		 * Check for signal early to make process killable when there
+		 * are always buffers available
+		 */
+		ret = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
 
-EXPORT_SYMBOL(generic_splice_sendpage);
+		while (pipe_empty(pipe->head, pipe->tail)) {
+			ret = 0;
+			if (!pipe->writers)
+				goto out;
+
+			if (spliced)
+				goto out;
+
+			ret = -EAGAIN;
+			if (flags & SPLICE_F_NONBLOCK)
+				goto out;
+
+			ret = -ERESTARTSYS;
+			if (signal_pending(current))
+				goto out;
+
+			if (need_wakeup) {
+				wakeup_pipe_writers(pipe);
+				need_wakeup = false;
+			}
+
+			pipe_wait_readable(pipe);
+		}
+
+		head = pipe->head;
+		tail = pipe->tail;
+		mask = pipe->ring_size - 1;
+
+		while (!pipe_empty(head, tail)) {
+			struct pipe_buffer *buf = &pipe->bufs[tail & mask];
+			size_t seg;
+
+			if (!buf->len) {
+				tail++;
+				continue;
+			}
+
+			seg = min_t(size_t, remain, buf->len);
+			seg = min_t(size_t, seg, PAGE_SIZE);
+
+			ret = pipe_buf_confirm(pipe, buf);
+			if (unlikely(ret)) {
+				if (ret == -ENODATA)
+					ret = 0;
+				break;
+			}
+
+			bvec_set_page(&bvec[bc++], buf->page, seg, buf->offset);
+			remain -= seg;
+			if (seg >= buf->len)
+				tail++;
+			if (bc >= ARRAY_SIZE(bvec))
+				break;
+		}
+
+		if (!bc)
+			break;
+
+		msg.msg_flags = MSG_SPLICE_PAGES;
+		if (flags & SPLICE_F_MORE)
+			msg.msg_flags |= MSG_MORE;
+		if (remain && pipe_occupancy(pipe->head, tail) > 0)
+			msg.msg_flags |= MSG_MORE;
+
+		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc,
+			      len - remain);
+		ret = sock_sendmsg(sock, &msg);
+		if (ret <= 0)
+			break;
+
+		spliced += ret;
+		len -= ret;
+		tail = pipe->tail;
+		while (ret > 0) {
+			struct pipe_buffer *buf = &pipe->bufs[tail & mask];
+			size_t seg = min_t(size_t, ret, buf->len);
+
+			buf->offset += seg;
+			buf->len -= seg;
+			ret -= seg;
+
+			if (!buf->len) {
+				pipe_buf_release(pipe, buf);
+				tail++;
+			}
+		}
+
+		if (tail != pipe->tail) {
+			pipe->tail = tail;
+			if (pipe->files)
+				need_wakeup = true;
+		}
+	}
+
+out:
+	pipe_unlock(pipe);
+	if (need_wakeup)
+		wakeup_pipe_writers(pipe);
+	return spliced ?: ret;
+}
+#endif
 
 static int warn_unsupported(struct file *file, const char *op)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 133f0640fb24..df92f4b3d122 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2759,8 +2759,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
-extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
-		struct file *out, loff_t *, size_t len, unsigned int flags);
 extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 		loff_t *opos, size_t len, unsigned int flags);
 
diff --git a/include/linux/splice.h b/include/linux/splice.h
index a55179fd60fc..991ae318b6eb 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -84,6 +84,8 @@ extern long do_splice(struct file *in, loff_t *off_in,
 
 extern long do_tee(struct file *in, struct file *out, size_t len,
 		   unsigned int flags);
+extern ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
+				loff_t *ppos, size_t len, unsigned int flags);
 
 /*
  * for dynamic pipe sizing
diff --git a/net/socket.c b/net/socket.c
index 3df96e9ba4e2..c4d9104418c8 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -57,6 +57,7 @@
 #include <linux/mm.h>
 #include <linux/socket.h>
 #include <linux/file.h>
+#include <linux/splice.h>
 #include <linux/net.h>
 #include <linux/interrupt.h>
 #include <linux/thread_info.h>
@@ -126,8 +127,6 @@ static long compat_sock_ioctl(struct file *file,
 			      unsigned int cmd, unsigned long arg);
 #endif
 static int sock_fasync(int fd, struct file *filp, int on);
-static ssize_t sock_sendpage(struct file *file, struct page *page,
-			     int offset, size_t size, loff_t *ppos, int more);
 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags);
@@ -162,8 +161,7 @@ static const struct file_operations socket_file_ops = {
 	.mmap =		sock_mmap,
 	.release =	sock_close,
 	.fasync =	sock_fasync,
-	.sendpage =	sock_sendpage,
-	.splice_write = generic_splice_sendpage,
+	.splice_write = splice_to_socket,
 	.splice_read =	sock_splice_read,
 	.show_fdinfo =	sock_show_fdinfo,
 };
@@ -1066,26 +1064,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_recvmsg);
 
-static ssize_t sock_sendpage(struct file *file, struct page *page,
-			     int offset, size_t size, loff_t *ppos, int more)
-{
-	struct socket *sock;
-	int flags;
-	int ret;
-
-	sock = file->private_data;
-
-	flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
-	/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
-	flags |= more;
-
-	ret = kernel_sendpage(sock, page, offset, size, flags);
-
-	if (trace_sock_send_length_enabled())
-		call_trace_sock_send_length(sock->sk, ret, 0);
-	return ret;
-}
-
 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags)
-- 
cgit v1.2.3


From 2bfc66850952b6921b2033b09729ec59eabbc81d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 7 Jun 2023 19:19:10 +0100
Subject: splice, net: Add a splice_eof op to file-ops and socket-ops

Add an optional method, ->splice_eof(), to allow splice to indicate the
premature termination of a splice to struct file_operations and struct
proto_ops.

This is called if sendfile() or splice() encounters all of the following
conditions inside splice_direct_to_actor():

 (1) the user did not set SPLICE_F_MORE (splice only), and

 (2) an EOF condition occurred (->splice_read() returned 0), and

 (3) we haven't read enough to fulfill the request (ie. len > 0 still), and

 (4) we have already spliced at least one byte.

A further patch will modify the behaviour of SPLICE_F_MORE to always be
passed to the actor if either the user set it or we haven't yet read
sufficient data to fulfill the request.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Christoph Hellwig <hch@lst.de>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Matthew Wilcox <willy@infradead.org>
cc: Jan Kara <jack@suse.cz>
cc: Jeff Layton <jlayton@kernel.org>
cc: David Hildenbrand <david@redhat.com>
cc: Christian Brauner <brauner@kernel.org>
cc: Chuck Lever <chuck.lever@oracle.com>
cc: Boris Pismenny <borisp@nvidia.com>
cc: John Fastabend <john.fastabend@gmail.com>
cc: linux-mm@kvack.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 fs/splice.c            | 31 ++++++++++++++++++++++++++++++-
 include/linux/fs.h     |  1 +
 include/linux/net.h    |  1 +
 include/linux/splice.h |  1 +
 include/net/sock.h     |  1 +
 net/socket.c           | 10 ++++++++++
 6 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/splice.c b/fs/splice.c
index e337630aed64..67dbd85db207 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -969,6 +969,17 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 	return out->f_op->splice_write(pipe, out, ppos, len, flags);
 }
 
+/*
+ * Indicate to the caller that there was a premature EOF when reading from the
+ * source and the caller didn't indicate they would be sending more data after
+ * this.
+ */
+static void do_splice_eof(struct splice_desc *sd)
+{
+	if (sd->splice_eof)
+		sd->splice_eof(sd);
+}
+
 /*
  * Attempt to initiate a splice from a file to a pipe.
  */
@@ -1068,7 +1079,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 
 		ret = do_splice_to(in, &pos, pipe, len, flags);
 		if (unlikely(ret <= 0))
-			goto out_release;
+			goto read_failure;
 
 		read_len = ret;
 		sd->total_len = read_len;
@@ -1108,6 +1119,15 @@ done:
 	file_accessed(in);
 	return bytes;
 
+read_failure:
+	/*
+	 * If the user did *not* set SPLICE_F_MORE *and* we didn't hit that
+	 * "use all of len" case that cleared SPLICE_F_MORE, *and* we did a
+	 * "->splice_in()" that returned EOF (ie zero) *and* we have sent at
+	 * least 1 byte *then* we will also do the ->splice_eof() call.
+	 */
+	if (ret == 0 && !more && len > 0 && bytes)
+		do_splice_eof(sd);
 out_release:
 	/*
 	 * If we did an incomplete transfer we must release
@@ -1136,6 +1156,14 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
 			      sd->flags);
 }
 
+static void direct_file_splice_eof(struct splice_desc *sd)
+{
+	struct file *file = sd->u.file;
+
+	if (file->f_op->splice_eof)
+		file->f_op->splice_eof(file);
+}
+
 /**
  * do_splice_direct - splices data directly between two files
  * @in:		file to splice from
@@ -1161,6 +1189,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 		.flags		= flags,
 		.pos		= *ppos,
 		.u.file		= out,
+		.splice_eof	= direct_file_splice_eof,
 		.opos		= opos,
 	};
 	long ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index df92f4b3d122..de2cb1132f07 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1796,6 +1796,7 @@ struct file_operations {
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+	void (*splice_eof)(struct file *file);
 	int (*setlease)(struct file *, long, struct file_lock **, void **);
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
diff --git a/include/linux/net.h b/include/linux/net.h
index b73ad8e3c212..8defc8f1d82e 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -210,6 +210,7 @@ struct proto_ops {
 				      int offset, size_t size, int flags);
 	ssize_t 	(*splice_read)(struct socket *sock,  loff_t *ppos,
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
+	void		(*splice_eof)(struct socket *sock);
 	int		(*set_peek_off)(struct sock *sk, int val);
 	int		(*peek_len)(struct socket *sock);
 
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 991ae318b6eb..4fab18a6e371 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -38,6 +38,7 @@ struct splice_desc {
 		struct file *file;	/* file to read/write */
 		void *data;		/* cookie */
 	} u;
+	void (*splice_eof)(struct splice_desc *sd); /* Unexpected EOF handler */
 	loff_t pos;			/* file position */
 	loff_t *opos;			/* sendfile: output position */
 	size_t num_spliced;		/* number of bytes already spliced */
diff --git a/include/net/sock.h b/include/net/sock.h
index 6f428a7f3567..2790133b4b76 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1279,6 +1279,7 @@ struct proto {
 					   size_t len, int flags, int *addr_len);
 	int			(*sendpage)(struct sock *sk, struct page *page,
 					int offset, size_t size, int flags);
+	void			(*splice_eof)(struct socket *sock);
 	int			(*bind)(struct sock *sk,
 					struct sockaddr *addr, int addr_len);
 	int			(*bind_add)(struct sock *sk,
diff --git a/net/socket.c b/net/socket.c
index c4d9104418c8..b778fc03c6e0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -130,6 +130,7 @@ static int sock_fasync(int fd, struct file *filp, int on);
 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags);
+static void sock_splice_eof(struct file *file);
 
 #ifdef CONFIG_PROC_FS
 static void sock_show_fdinfo(struct seq_file *m, struct file *f)
@@ -163,6 +164,7 @@ static const struct file_operations socket_file_ops = {
 	.fasync =	sock_fasync,
 	.splice_write = splice_to_socket,
 	.splice_read =	sock_splice_read,
+	.splice_eof =	sock_splice_eof,
 	.show_fdinfo =	sock_show_fdinfo,
 };
 
@@ -1076,6 +1078,14 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
 
+static void sock_splice_eof(struct file *file)
+{
+	struct socket *sock = file->private_data;
+
+	if (sock->ops->splice_eof)
+		sock->ops->splice_eof(sock);
+}
+
 static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
-- 
cgit v1.2.3


From 57fd7d59b1c7d6f6a1c34863a2bc4ff1f6c92d40 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@broadcom.com>
Date: Wed, 7 Jun 2023 11:34:52 -0700
Subject: net: phy: broadcom: Rename LED registers

These registers are common to most PHYs and are not specific to the
BCM5482, renamed the constants accordingly, no functional change.

Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 10 +++++-----
 include/linux/brcmphy.h    |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 822c8b01dc53..57a865aa1fe5 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -414,13 +414,13 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 	 * these settings will cause LOS to malfunction.
 	 */
 	if (!phy_on_sfp(phydev)) {
-		val = BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_MULTICOLOR1) |
-			BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_MULTICOLOR1);
-		bcm_phy_write_shadow(phydev, BCM5482_SHD_LEDS1, val);
+		val = BCM54XX_SHD_LEDS1_LED1(BCM_LED_SRC_MULTICOLOR1) |
+			BCM54XX_SHD_LEDS1_LED3(BCM_LED_SRC_MULTICOLOR1);
+		bcm_phy_write_shadow(phydev, BCM54XX_SHD_LEDS1, val);
 
 		val = BCM_LED_MULTICOLOR_IN_PHASE |
-			BCM5482_SHD_LEDS1_LED1(BCM_LED_MULTICOLOR_LINK_ACT) |
-			BCM5482_SHD_LEDS1_LED3(BCM_LED_MULTICOLOR_LINK_ACT);
+			BCM54XX_SHD_LEDS1_LED1(BCM_LED_MULTICOLOR_LINK_ACT) |
+			BCM54XX_SHD_LEDS1_LED3(BCM_LED_MULTICOLOR_LINK_ACT);
 		bcm_phy_write_exp(phydev, BCM_EXP_MULTICOLOR, val);
 	}
 
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 251833ab271f..ab21b8a1b2c8 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -206,11 +206,11 @@
 #define  BCM_NO_ANEG_APD_EN		0x0060 /* bits 5 & 6 */
 #define  BCM_APD_SINGLELP_EN	0x0100 /* Bit 8 */
 
-#define BCM5482_SHD_LEDS1	0x0d	/* 01101: LED Selector 1 */
+#define BCM54XX_SHD_LEDS1	0x0d	/* 01101: LED Selector 1 */
 					/* LED3 / ~LINKSPD[2] selector */
-#define BCM5482_SHD_LEDS1_LED3(src)	((src & 0xf) << 4)
+#define BCM54XX_SHD_LEDS1_LED3(src)	((src & 0xf) << 4)
 					/* LED1 / ~LINKSPD[1] selector */
-#define BCM5482_SHD_LEDS1_LED1(src)	((src & 0xf) << 0)
+#define BCM54XX_SHD_LEDS1_LED1(src)	((src & 0xf) << 0)
 #define BCM54XX_SHD_RGMII_MODE	0x0b	/* 01011: RGMII Mode Selector */
 #define BCM5482_SHD_SSD		0x14	/* 10100: Secondary SerDes control */
 #define BCM5482_SHD_SSD_LEDM	0x0008	/* SSD LED Mode enable */
-- 
cgit v1.2.3


From bd5736e146e35f9eabe8c1bfc0ab00979ae62930 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <florian.fainelli@broadcom.com>
Date: Wed, 7 Jun 2023 11:34:53 -0700
Subject: net: phy: broadcom: Add support for setting LED brightness

Broadcom PHYs have two LEDs selector registers which allow us to control
the LED assignment, including how to turn them on/off.

Signed-off-by: Florian Fainelli <florian.fainelli@broadcom.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm-phy-lib.c | 27 +++++++++++++++++++++++++++
 drivers/net/phy/bcm-phy-lib.h |  3 +++
 drivers/net/phy/broadcom.c    | 15 +++++++++++++++
 include/linux/brcmphy.h       |  3 +++
 4 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index c6e2e5f636d4..876f28fd8256 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -1039,6 +1039,33 @@ irqreturn_t bcm_phy_wol_isr(int irq, void *dev_id)
 }
 EXPORT_SYMBOL_GPL(bcm_phy_wol_isr);
 
+int bcm_phy_led_brightness_set(struct phy_device *phydev,
+			       u8 index, enum led_brightness value)
+{
+	u8 led_num;
+	int ret;
+	u16 reg;
+
+	if (index >= 4)
+		return -EINVAL;
+
+	/* Two LEDS per register */
+	led_num = index % 2;
+	reg = index >= 2 ? BCM54XX_SHD_LEDS2 : BCM54XX_SHD_LEDS1;
+
+	ret = bcm_phy_read_shadow(phydev, reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= ~(BCM_LED_SRC_MASK << BCM54XX_SHD_LEDS_SHIFT(led_num));
+	if (value == LED_OFF)
+		ret |= BCM_LED_SRC_OFF << BCM54XX_SHD_LEDS_SHIFT(led_num);
+	else
+		ret |= BCM_LED_SRC_ON << BCM54XX_SHD_LEDS_SHIFT(led_num);
+	return bcm_phy_write_shadow(phydev, reg, ret);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_led_brightness_set);
+
 MODULE_DESCRIPTION("Broadcom PHY Library");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Broadcom Corporation");
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index 2f30ce0cab0e..b52189e45a84 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -118,4 +118,7 @@ int bcm_phy_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol);
 void bcm_phy_get_wol(struct phy_device *phydev, struct ethtool_wolinfo *wol);
 irqreturn_t bcm_phy_wol_isr(int irq, void *dev_id);
 
+int bcm_phy_led_brightness_set(struct phy_device *phydev,
+			       u8 index, enum led_brightness value);
+
 #endif /* _LINUX_BCM_PHY_LIB_H */
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 57a865aa1fe5..9f0a9c575bd7 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -1031,6 +1031,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.resume		= bcm54xx_resume,
 	.get_wol	= bcm54xx_phy_get_wol,
 	.set_wol	= bcm54xx_phy_set_wol,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM5461,
 	.phy_id_mask	= 0xfffffff0,
@@ -1044,6 +1045,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM54612E,
 	.phy_id_mask	= 0xfffffff0,
@@ -1057,6 +1059,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM54616S,
 	.phy_id_mask	= 0xfffffff0,
@@ -1070,6 +1073,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.read_status	= bcm54616s_read_status,
 	.probe		= bcm54616s_probe,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM5464,
 	.phy_id_mask	= 0xfffffff0,
@@ -1085,6 +1089,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM5481,
 	.phy_id_mask	= 0xfffffff0,
@@ -1099,6 +1104,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id         = PHY_ID_BCM54810,
 	.phy_id_mask    = 0xfffffff0,
@@ -1115,6 +1121,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.suspend	= bcm54xx_suspend,
 	.resume		= bcm54xx_resume,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id         = PHY_ID_BCM54811,
 	.phy_id_mask    = 0xfffffff0,
@@ -1131,6 +1138,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.suspend	= bcm54xx_suspend,
 	.resume		= bcm54xx_resume,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM5482,
 	.phy_id_mask	= 0xfffffff0,
@@ -1144,6 +1152,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM50610,
 	.phy_id_mask	= 0xfffffff0,
@@ -1159,6 +1168,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.link_change_notify	= bcm54xx_link_change_notify,
 	.suspend	= bcm54xx_suspend,
 	.resume		= bcm54xx_resume,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM50610M,
 	.phy_id_mask	= 0xfffffff0,
@@ -1174,6 +1184,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.link_change_notify	= bcm54xx_link_change_notify,
 	.suspend	= bcm54xx_suspend,
 	.resume		= bcm54xx_resume,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM57780,
 	.phy_id_mask	= 0xfffffff0,
@@ -1187,6 +1198,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCMAC131,
 	.phy_id_mask	= 0xfffffff0,
@@ -1218,6 +1230,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.get_stats	= bcm54xx_get_stats,
 	.probe		= bcm54xx_phy_probe,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM53125,
 	.phy_id_mask	= 0xfffffff0,
@@ -1232,6 +1245,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id		= PHY_ID_BCM53128,
 	.phy_id_mask	= 0xfffffff0,
@@ -1246,6 +1260,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.led_brightness_set	= bcm_phy_led_brightness_set,
 }, {
 	.phy_id         = PHY_ID_BCM89610,
 	.phy_id_mask    = 0xfffffff0,
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index ab21b8a1b2c8..5d732f48f787 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -161,6 +161,7 @@
 #define BCM_LED_SRC_OPENSHORT	0xb
 #define BCM_LED_SRC_OFF		0xe	/* Tied high */
 #define BCM_LED_SRC_ON		0xf	/* Tied low */
+#define BCM_LED_SRC_MASK	GENMASK(3, 0)
 
 /*
  * Broadcom Multicolor LED configurations (expansion register 4)
@@ -208,9 +209,11 @@
 
 #define BCM54XX_SHD_LEDS1	0x0d	/* 01101: LED Selector 1 */
 					/* LED3 / ~LINKSPD[2] selector */
+#define BCM54XX_SHD_LEDS_SHIFT(led)	(4 * (led))
 #define BCM54XX_SHD_LEDS1_LED3(src)	((src & 0xf) << 4)
 					/* LED1 / ~LINKSPD[1] selector */
 #define BCM54XX_SHD_LEDS1_LED1(src)	((src & 0xf) << 0)
+#define BCM54XX_SHD_LEDS2	0x0e	/* 01110: LED Selector 2 */
 #define BCM54XX_SHD_RGMII_MODE	0x0b	/* 01011: RGMII Mode Selector */
 #define BCM5482_SHD_SSD		0x14	/* 10100: Secondary SerDes control */
 #define BCM5482_SHD_SSD_LEDM	0x0008	/* SSD LED Mode enable */
-- 
cgit v1.2.3


From 21d4631eedb136f101d2633b72cf42c20db79202 Mon Sep 17 00:00:00 2001
From: "Jason-JH.Lin" <jason-jh.lin@mediatek.com>
Date: Mon, 6 Mar 2023 16:06:59 +0800
Subject: soc: mediatek: remove DDP_DOMPONENT_DITHER from enum

After mmsys and drm change DITHER enum to DDP_COMPONENT_DITHER0,
mmsys header can remove the useless DDP_COMPONENT_DITHER enum.

Signed-off-by: Jason-JH.Lin <jason-jh.lin@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Rex-BC Chen <rex-bc.chen@mediatek.com>
Acked-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20230306080659.15261-3-jason-jh.lin@mediatek.com
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 include/linux/soc/mediatek/mtk-mmsys.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h
index 37544ea6286d..2475ef914746 100644
--- a/include/linux/soc/mediatek/mtk-mmsys.h
+++ b/include/linux/soc/mediatek/mtk-mmsys.h
@@ -27,8 +27,7 @@ enum mtk_ddp_comp_id {
 	DDP_COMPONENT_CCORR,
 	DDP_COMPONENT_COLOR0,
 	DDP_COMPONENT_COLOR1,
-	DDP_COMPONENT_DITHER,
-	DDP_COMPONENT_DITHER0 = DDP_COMPONENT_DITHER,
+	DDP_COMPONENT_DITHER0,
 	DDP_COMPONENT_DITHER1,
 	DDP_COMPONENT_DP_INTF0,
 	DDP_COMPONENT_DP_INTF1,
-- 
cgit v1.2.3


From 564d73c4d9201526bd976b9379d2aaf1a7133e84 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Wed, 7 Jun 2023 17:57:38 +0100
Subject: i2c: Add i2c_get_match_data()

Add i2c_get_match_data() to get match data for I2C, ACPI and
DT-based matching, so that we can optimize the driver code.

Suggested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
[wsa: simplified var initialization]
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/i2c-core-base.c | 19 +++++++++++++++++++
 include/linux/i2c.h         |  2 ++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index ae3af738b03f..60746652fd52 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -114,6 +114,25 @@ const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
 }
 EXPORT_SYMBOL_GPL(i2c_match_id);
 
+const void *i2c_get_match_data(const struct i2c_client *client)
+{
+	struct i2c_driver *driver = to_i2c_driver(client->dev.driver);
+	const struct i2c_device_id *match;
+	const void *data;
+
+	data = device_get_match_data(&client->dev);
+	if (!data) {
+		match = i2c_match_id(driver->id_table, client);
+		if (!match)
+			return NULL;
+
+		data = (const void *)match->driver_data;
+	}
+
+	return data;
+}
+EXPORT_SYMBOL(i2c_get_match_data);
+
 static int i2c_device_match(struct device *dev, struct device_driver *drv)
 {
 	struct i2c_client	*client = i2c_verify_client(dev);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 13a1ce38cb0c..3430cc2b05a6 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -367,6 +367,8 @@ struct i2c_adapter *i2c_verify_adapter(struct device *dev);
 const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
 					 const struct i2c_client *client);
 
+const void *i2c_get_match_data(const struct i2c_client *client);
+
 static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj)
 {
 	struct device * const dev = kobj_to_dev(kobj);
-- 
cgit v1.2.3


From 9d65b1b4bcf3918164e17365eec169875eef8ee3 Mon Sep 17 00:00:00 2001
From: Shiwu Zhang <shiwu.zhang@amd.com>
Date: Tue, 23 May 2023 12:02:32 +0800
Subject: drm/amdgpu: add the accelerator PCIe class

Add the accelerator PCIe class and match the
class in amdgpu for 0x1002 devices of that class.

From PCI spec:
"PCI Code and ID Assignment, r1.9, sec 1, 1.19"

Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com>
Acked-by: Lijo Lazar <lijo.lazar@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>  # pci_ids.h
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +-
 drivers/gpu/drm/amd/include/amd_shared.h | 1 -
 include/linux/pci_ids.h                  | 3 +++
 4 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b8a1e4571cd9..8e58d187b173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -2044,7 +2044,7 @@ static const struct pci_device_id pciidlist[] = {
 	  .driver_data = CHIP_IP_DISCOVERY },
 
 	{ PCI_DEVICE(0x1002, PCI_ANY_ID),
-	  .class = AMD_ACCELERATOR_PROCESSING << 8,
+	  .class = PCI_CLASS_ACCELERATOR_PROCESSING << 8,
 	  .class_mask = 0xffffff,
 	  .driver_data = CHIP_IP_DISCOVERY },
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 0f1ca0136f50..25b4d7f0bd35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -57,7 +57,7 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 	/* enable virtual display */
 	if (adev->asic_type != CHIP_ALDEBARAN &&
 	    adev->asic_type != CHIP_ARCTURUS &&
-	    ((adev->pdev->class >> 8) != AMD_ACCELERATOR_PROCESSING)) {
+	    ((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) {
 		if (adev->mode_info.num_crtc == 0)
 			adev->mode_info.num_crtc = 1;
 		adev->enable_virtual_display = true;
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 57d95e2cc54b..f175e65b853a 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -27,7 +27,6 @@
 
 
 #define AMD_MAX_USEC_TIMEOUT		1000000  /* 1000 ms */
-#define AMD_ACCELERATOR_PROCESSING	0x1200   /* hardcoded pci class */
 
 /*
  * Chip flags
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 45c3d62e616d..0fbfbda3dc26 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -151,6 +151,9 @@
 #define PCI_CLASS_SP_DPIO		0x1100
 #define PCI_CLASS_SP_OTHER		0x1180
 
+#define PCI_BASE_CLASS_ACCELERATOR	0x12
+#define PCI_CLASS_ACCELERATOR_PROCESSING	0x1200
+
 #define PCI_CLASS_OTHERS		0xff
 
 /* Vendors and devices.  Sort key: vendor first, device next. */
-- 
cgit v1.2.3


From 35822fdae3bf509532b0954088070f17de51ff15 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Fri, 21 Apr 2023 17:40:19 +0000
Subject: memcg: remove mem_cgroup_flush_stats_atomic()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous patches removed all callers of mem_cgroup_flush_stats_atomic().
Remove the function and simplify the code.

Link: https://lkml.kernel.org/r/20230421174020.2994750-5-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Shakeel Butt <shakeelb@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 -----
 mm/memcontrol.c            | 24 +++++-------------------
 2 files changed, 5 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 222d7370134c..00a88cf947e1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1038,7 +1038,6 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 }
 
 void mem_cgroup_flush_stats(void);
-void mem_cgroup_flush_stats_atomic(void);
 void mem_cgroup_flush_stats_ratelimited(void);
 
 void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
@@ -1537,10 +1536,6 @@ static inline void mem_cgroup_flush_stats(void)
 {
 }
 
-static inline void mem_cgroup_flush_stats_atomic(void)
-{
-}
-
 static inline void mem_cgroup_flush_stats_ratelimited(void)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7474aa8e4026..2184a9c566f1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -639,7 +639,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
 	}
 }
 
-static void do_flush_stats(bool atomic)
+static void do_flush_stats(void)
 {
 	/*
 	 * We always flush the entire tree, so concurrent flushers can just
@@ -652,30 +652,16 @@ static void do_flush_stats(bool atomic)
 
 	WRITE_ONCE(flush_next_time, jiffies_64 + 2*FLUSH_TIME);
 
-	if (atomic)
-		cgroup_rstat_flush_atomic(root_mem_cgroup->css.cgroup);
-	else
-		cgroup_rstat_flush(root_mem_cgroup->css.cgroup);
+	cgroup_rstat_flush(root_mem_cgroup->css.cgroup);
 
 	atomic_set(&stats_flush_threshold, 0);
 	atomic_set(&stats_flush_ongoing, 0);
 }
 
-static bool should_flush_stats(void)
-{
-	return atomic_read(&stats_flush_threshold) > num_online_cpus();
-}
-
 void mem_cgroup_flush_stats(void)
 {
-	if (should_flush_stats())
-		do_flush_stats(false);
-}
-
-void mem_cgroup_flush_stats_atomic(void)
-{
-	if (should_flush_stats())
-		do_flush_stats(true);
+	if (atomic_read(&stats_flush_threshold) > num_online_cpus())
+		do_flush_stats();
 }
 
 void mem_cgroup_flush_stats_ratelimited(void)
@@ -690,7 +676,7 @@ static void flush_memcg_stats_dwork(struct work_struct *w)
 	 * Always flush here so that flushing in latency-sensitive paths is
 	 * as cheap as possible.
 	 */
-	do_flush_stats(false);
+	do_flush_stats();
 	queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME);
 }
 
-- 
cgit v1.2.3


From 0a2dc6ac33297f8a1a65f81b633a1ea753f19f69 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Fri, 21 Apr 2023 17:40:20 +0000
Subject: cgroup: remove cgroup_rstat_flush_atomic()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous patches removed the only caller of cgroup_rstat_flush_atomic().
Remove the function and simplify the code.

Link: https://lkml.kernel.org/r/20230421174020.2994750-6-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cgroup.h |  1 -
 kernel/cgroup/rstat.c  | 26 +++++---------------------
 2 files changed, 5 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 885f5395fcd0..567c547cf371 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -692,7 +692,6 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
  */
 void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
 void cgroup_rstat_flush(struct cgroup *cgrp);
-void cgroup_rstat_flush_atomic(struct cgroup *cgrp);
 void cgroup_rstat_flush_hold(struct cgroup *cgrp);
 void cgroup_rstat_flush_release(void);
 
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 9c4c55228567..2542c21b6b6d 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -171,7 +171,7 @@ __weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
 __diag_pop();
 
 /* see cgroup_rstat_flush() */
-static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
+static void cgroup_rstat_flush_locked(struct cgroup *cgrp)
 	__releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
 {
 	int cpu;
@@ -207,9 +207,8 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
 		}
 		raw_spin_unlock_irqrestore(cpu_lock, flags);
 
-		/* if @may_sleep, play nice and yield if necessary */
-		if (may_sleep && (need_resched() ||
-				  spin_needbreak(&cgroup_rstat_lock))) {
+		/* play nice and yield if necessary */
+		if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) {
 			spin_unlock_irq(&cgroup_rstat_lock);
 			if (!cond_resched())
 				cpu_relax();
@@ -236,25 +235,10 @@ __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
 	might_sleep();
 
 	spin_lock_irq(&cgroup_rstat_lock);
-	cgroup_rstat_flush_locked(cgrp, true);
+	cgroup_rstat_flush_locked(cgrp);
 	spin_unlock_irq(&cgroup_rstat_lock);
 }
 
-/**
- * cgroup_rstat_flush_atomic- atomic version of cgroup_rstat_flush()
- * @cgrp: target cgroup
- *
- * This function can be called from any context.
- */
-void cgroup_rstat_flush_atomic(struct cgroup *cgrp)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cgroup_rstat_lock, flags);
-	cgroup_rstat_flush_locked(cgrp, false);
-	spin_unlock_irqrestore(&cgroup_rstat_lock, flags);
-}
-
 /**
  * cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold
  * @cgrp: target cgroup
@@ -269,7 +253,7 @@ void cgroup_rstat_flush_hold(struct cgroup *cgrp)
 {
 	might_sleep();
 	spin_lock_irq(&cgroup_rstat_lock);
-	cgroup_rstat_flush_locked(cgrp, true);
+	cgroup_rstat_flush_locked(cgrp);
 }
 
 /**
-- 
cgit v1.2.3


From ffcb5f5262b756a598eefb11e340bbd027cde037 Mon Sep 17 00:00:00 2001
From: Nhat Pham <nphamcs@gmail.com>
Date: Tue, 2 May 2023 18:36:06 -0700
Subject: workingset: refactor LRU refault to expose refault recency check

Patch series "cachestat: a new syscall for page cache state of files",
v13.

There is currently no good way to query the page cache statistics of large
files and directory trees.  There is mincore(), but it scales poorly: the
kernel writes out a lot of bitmap data that userspace has to aggregate,
when the user really does not care about per-page information in that
case.  The user also needs to mmap and unmap each file as it goes along,
which can be quite slow as well.

Some use cases where this information could come in handy:
  * Allowing database to decide whether to perform an index scan or direct
    table queries based on the in-memory cache state of the index.
  * Visibility into the writeback algorithm, for performance issues
    diagnostic.
  * Workload-aware writeback pacing: estimating IO fulfilled by page cache
    (and IO to be done) within a range of a file, allowing for more
    frequent syncing when and where there is IO capacity, and batching
    when there is not.
  * Computing memory usage of large files/directory trees, analogous to
    the du tool for disk usage.

More information about these use cases could be found in this thread:
https://lore.kernel.org/lkml/20230315170934.GA97793@cmpxchg.org/

This series of patches introduces a new system call, cachestat, that
summarizes the page cache statistics (number of cached pages, dirty pages,
pages marked for writeback, evicted pages etc.) of a file, in a specified
range of bytes.  It also include a selftest suite that tests some typical
usage.  Currently, the syscall is only wired in for x86 architecture.

This interface is inspired by past discussion and concerns with fincore,
which has a similar design (and as a result, issues) as mincore.  Relevant
links:

https://lkml.indiana.edu/hypermail/linux/kernel/1302.1/04207.html
https://lkml.indiana.edu/hypermail/linux/kernel/1302.1/04209.html


I have also developed a small tool that computes the memory usage of files
and directories, analogous to the du utility.  User can choose between
mincore or cachestat (with cachestat exporting more information than
mincore).  To compare the performance of these two options, I benchmarked
the tool on the root directory of a Meta's server machine, each for five
runs:

Using cachestat
real -- Median: 33.377s, Average: 33.475s, Standard Deviation: 0.3602
user -- Median: 4.08s, Average: 4.1078s, Standard Deviation: 0.0742
sys -- Median: 28.823s, Average: 28.8866s, Standard Deviation: 0.2689

Using mincore:
real -- Median: 102.352s, Average: 102.3442s, Standard Deviation: 0.2059
user -- Median: 10.149s, Average: 10.1482s, Standard Deviation: 0.0162
sys -- Median: 91.186s, Average: 91.2084s, Standard Deviation: 0.2046

I also ran both syscalls on a 2TB sparse file:

Using cachestat:
real    0m0.009s
user    0m0.000s
sys     0m0.009s

Using mincore:
real    0m37.510s
user    0m2.934s
sys     0m34.558s

Very large files like this are the pathological case for mincore.  In
fact, to compute the stats for a single 2TB file, mincore takes as long as
cachestat takes to compute the stats for the entire tree!  This could
easily happen inadvertently when we run it on subdirectories.  Mincore is
clearly not suitable for a general-purpose command line tool.

Regarding security concerns, cachestat() should not pose any additional
issues.  The caller already has read permission to the file itself (since
they need an fd to that file to call cachestat).  This means that the
caller can access the underlying data in its entirety, which is a much
greater source of information (and as a result, a much greater security
risk) than the cache status itself.

The latest API change (in v13 of the patch series) is suggested by Jens
Axboe.  It allows for 64-bit length argument, even on 32-bit architecture
(which is previously not possible due to the limit on the number of
syscall arguments).  Furthermore, it eliminates the need for compatibility
handling - every user can use the same ABI.


This patch (of 4):

In preparation for computing recently evicted pages in cachestat, refactor
workingset_refault and lru_gen_refault to expose a helper function that
would test if an evicted page is recently evicted.

[penguin-kernel@I-love.SAKURA.ne.jp: add missing rcu_read_unlock() in lru_gen_refault()]
  Link: https://lkml.kernel.org/r/610781bc-cf11-fc89-a46f-87cb8235d439@I-love.SAKURA.ne.jp
Link: https://lkml.kernel.org/r/20230503013608.2431726-1-nphamcs@gmail.com
Link: https://lkml.kernel.org/r/20230503013608.2431726-2-nphamcs@gmail.com
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h |   1 +
 mm/workingset.c      | 150 ++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 103 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3c69cb653cb9..b2128df5edea 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -368,6 +368,7 @@ static inline void folio_set_swap_entry(struct folio *folio, swp_entry_t entry)
 }
 
 /* linux/mm/workingset.c */
+bool workingset_test_recent(void *shadow, bool file, bool *workingset);
 void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
 void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg);
 void workingset_refault(struct folio *folio, void *shadow);
diff --git a/mm/workingset.c b/mm/workingset.c
index 817758951886..90ae785d4c9c 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -255,6 +255,29 @@ static void *lru_gen_eviction(struct folio *folio)
 	return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs);
 }
 
+/*
+ * Tests if the shadow entry is for a folio that was recently evicted.
+ * Fills in @memcgid, @pglist_data, @token, @workingset with the values
+ * unpacked from shadow.
+ */
+static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
+		struct pglist_data **pgdat, unsigned long *token, bool *workingset)
+{
+	struct mem_cgroup *eviction_memcg;
+	struct lruvec *lruvec;
+	struct lru_gen_folio *lrugen;
+	unsigned long min_seq;
+
+	unpack_shadow(shadow, memcgid, pgdat, token, workingset);
+	eviction_memcg = mem_cgroup_from_id(*memcgid);
+
+	lruvec = mem_cgroup_lruvec(eviction_memcg, *pgdat);
+	lrugen = &lruvec->lrugen;
+
+	min_seq = READ_ONCE(lrugen->min_seq[file]);
+	return (*token >> LRU_REFS_WIDTH) == (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH));
+}
+
 static void lru_gen_refault(struct folio *folio, void *shadow)
 {
 	int hist, tier, refs;
@@ -269,23 +292,22 @@ static void lru_gen_refault(struct folio *folio, void *shadow)
 	int type = folio_is_file_lru(folio);
 	int delta = folio_nr_pages(folio);
 
-	unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
-
-	if (pgdat != folio_pgdat(folio))
-		return;
-
 	rcu_read_lock();
 
+	if (!lru_gen_test_recent(shadow, type, &memcg_id, &pgdat, &token,
+			&workingset))
+		goto unlock;
+
 	memcg = folio_memcg_rcu(folio);
 	if (memcg_id != mem_cgroup_id(memcg))
 		goto unlock;
 
+	if (pgdat != folio_pgdat(folio))
+		goto unlock;
+
 	lruvec = mem_cgroup_lruvec(memcg, pgdat);
 	lrugen = &lruvec->lrugen;
-
 	min_seq = READ_ONCE(lrugen->min_seq[type]);
-	if ((token >> LRU_REFS_WIDTH) != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
-		goto unlock;
 
 	hist = lru_hist_from_seq(min_seq);
 	/* see the comment in folio_lru_refs() */
@@ -317,6 +339,12 @@ static void *lru_gen_eviction(struct folio *folio)
 	return NULL;
 }
 
+static bool lru_gen_test_recent(void *shadow, bool file, int *memcgid,
+		struct pglist_data **pgdat, unsigned long *token, bool *workingset)
+{
+	return false;
+}
+
 static void lru_gen_refault(struct folio *folio, void *shadow)
 {
 }
@@ -385,42 +413,34 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
 }
 
 /**
- * workingset_refault - Evaluate the refault of a previously evicted folio.
- * @folio: The freshly allocated replacement folio.
- * @shadow: Shadow entry of the evicted folio.
- *
- * Calculates and evaluates the refault distance of the previously
- * evicted folio in the context of the node and the memcg whose memory
- * pressure caused the eviction.
+ * workingset_test_recent - tests if the shadow entry is for a folio that was
+ * recently evicted. Also fills in @workingset with the value unpacked from
+ * shadow.
+ * @shadow: the shadow entry to be tested.
+ * @file: whether the corresponding folio is from the file lru.
+ * @workingset: where the workingset value unpacked from shadow should
+ * be stored.
+ *
+ * Return: true if the shadow is for a recently evicted folio; false otherwise.
  */
-void workingset_refault(struct folio *folio, void *shadow)
+bool workingset_test_recent(void *shadow, bool file, bool *workingset)
 {
-	bool file = folio_is_file_lru(folio);
 	struct mem_cgroup *eviction_memcg;
 	struct lruvec *eviction_lruvec;
 	unsigned long refault_distance;
 	unsigned long workingset_size;
-	struct pglist_data *pgdat;
-	struct mem_cgroup *memcg;
-	unsigned long eviction;
-	struct lruvec *lruvec;
 	unsigned long refault;
-	bool workingset;
 	int memcgid;
-	long nr;
+	struct pglist_data *pgdat;
+	unsigned long eviction;
 
-	if (lru_gen_enabled()) {
-		lru_gen_refault(folio, shadow);
-		return;
-	}
+	if (lru_gen_enabled())
+		return lru_gen_test_recent(shadow, file, &memcgid, &pgdat, &eviction,
+			workingset);
 
-	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
+	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, workingset);
 	eviction <<= bucket_order;
 
-	/* Flush stats (and potentially sleep) before holding RCU read lock */
-	mem_cgroup_flush_stats_ratelimited();
-
-	rcu_read_lock();
 	/*
 	 * Look up the memcg associated with the stored ID. It might
 	 * have been deleted since the folio's eviction.
@@ -439,7 +459,8 @@ void workingset_refault(struct folio *folio, void *shadow)
 	 */
 	eviction_memcg = mem_cgroup_from_id(memcgid);
 	if (!mem_cgroup_disabled() && !eviction_memcg)
-		goto out;
+		return false;
+
 	eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
 	refault = atomic_long_read(&eviction_lruvec->nonresident_age);
 
@@ -461,20 +482,6 @@ void workingset_refault(struct folio *folio, void *shadow)
 	 */
 	refault_distance = (refault - eviction) & EVICTION_MASK;
 
-	/*
-	 * The activation decision for this folio is made at the level
-	 * where the eviction occurred, as that is where the LRU order
-	 * during folio reclaim is being determined.
-	 *
-	 * However, the cgroup that will own the folio is the one that
-	 * is actually experiencing the refault event.
-	 */
-	nr = folio_nr_pages(folio);
-	memcg = folio_memcg(folio);
-	pgdat = folio_pgdat(folio);
-	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-
-	mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
 	/*
 	 * Compare the distance to the existing workingset size. We
 	 * don't activate pages that couldn't stay resident even if
@@ -495,7 +502,54 @@ void workingset_refault(struct folio *folio, void *shadow)
 						     NR_INACTIVE_ANON);
 		}
 	}
-	if (refault_distance > workingset_size)
+
+	return refault_distance <= workingset_size;
+}
+
+/**
+ * workingset_refault - Evaluate the refault of a previously evicted folio.
+ * @folio: The freshly allocated replacement folio.
+ * @shadow: Shadow entry of the evicted folio.
+ *
+ * Calculates and evaluates the refault distance of the previously
+ * evicted folio in the context of the node and the memcg whose memory
+ * pressure caused the eviction.
+ */
+void workingset_refault(struct folio *folio, void *shadow)
+{
+	bool file = folio_is_file_lru(folio);
+	struct pglist_data *pgdat;
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec;
+	bool workingset;
+	long nr;
+
+	if (lru_gen_enabled()) {
+		lru_gen_refault(folio, shadow);
+		return;
+	}
+
+	/* Flush stats (and potentially sleep) before holding RCU read lock */
+	mem_cgroup_flush_stats_ratelimited();
+
+	rcu_read_lock();
+
+	/*
+	 * The activation decision for this folio is made at the level
+	 * where the eviction occurred, as that is where the LRU order
+	 * during folio reclaim is being determined.
+	 *
+	 * However, the cgroup that will own the folio is the one that
+	 * is actually experiencing the refault event.
+	 */
+	nr = folio_nr_pages(folio);
+	memcg = folio_memcg(folio);
+	pgdat = folio_pgdat(folio);
+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
+
+	mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
+
+	if (!workingset_test_recent(shadow, file, &workingset))
 		goto out;
 
 	folio_set_active(folio);
-- 
cgit v1.2.3


From cf264e1329fb0307e044f7675849f9f38b44c11a Mon Sep 17 00:00:00 2001
From: Nhat Pham <nphamcs@gmail.com>
Date: Tue, 2 May 2023 18:36:07 -0700
Subject: cachestat: implement cachestat syscall

There is currently no good way to query the page cache state of large file
sets and directory trees.  There is mincore(), but it scales poorly: the
kernel writes out a lot of bitmap data that userspace has to aggregate,
when the user really doesn not care about per-page information in that
case.  The user also needs to mmap and unmap each file as it goes along,
which can be quite slow as well.

Some use cases where this information could come in handy:
  * Allowing database to decide whether to perform an index scan or
    direct table queries based on the in-memory cache state of the
    index.
  * Visibility into the writeback algorithm, for performance issues
    diagnostic.
  * Workload-aware writeback pacing: estimating IO fulfilled by page
    cache (and IO to be done) within a range of a file, allowing for
    more frequent syncing when and where there is IO capacity, and
    batching when there is not.
  * Computing memory usage of large files/directory trees, analogous to
    the du tool for disk usage.

More information about these use cases could be found in the following
thread:

https://lore.kernel.org/lkml/20230315170934.GA97793@cmpxchg.org/

This patch implements a new syscall that queries cache state of a file and
summarizes the number of cached pages, number of dirty pages, number of
pages marked for writeback, number of (recently) evicted pages, etc.  in a
given range.  Currently, the syscall is only wired in for x86
architecture.

NAME
    cachestat - query the page cache statistics of a file.

SYNOPSIS
    #include <sys/mman.h>

    struct cachestat_range {
        __u64 off;
        __u64 len;
    };

    struct cachestat {
        __u64 nr_cache;
        __u64 nr_dirty;
        __u64 nr_writeback;
        __u64 nr_evicted;
        __u64 nr_recently_evicted;
    };

    int cachestat(unsigned int fd, struct cachestat_range *cstat_range,
        struct cachestat *cstat, unsigned int flags);

DESCRIPTION
    cachestat() queries the number of cached pages, number of dirty
    pages, number of pages marked for writeback, number of evicted
    pages, number of recently evicted pages, in the bytes range given by
    `off` and `len`.

    An evicted page is a page that is previously in the page cache but
    has been evicted since. A page is recently evicted if its last
    eviction was recent enough that its reentry to the cache would
    indicate that it is actively being used by the system, and that
    there is memory pressure on the system.

    These values are returned in a cachestat struct, whose address is
    given by the `cstat` argument.

    The `off` and `len` arguments must be non-negative integers. If
    `len` > 0, the queried range is [`off`, `off` + `len`]. If `len` ==
    0, we will query in the range from `off` to the end of the file.

    The `flags` argument is unused for now, but is included for future
    extensibility. User should pass 0 (i.e no flag specified).

    Currently, hugetlbfs is not supported.

    Because the status of a page can change after cachestat() checks it
    but before it returns to the application, the returned values may
    contain stale information.

RETURN VALUE
    On success, cachestat returns 0. On error, -1 is returned, and errno
    is set to indicate the error.

ERRORS
    EFAULT cstat or cstat_args points to an invalid address.

    EINVAL invalid flags.

    EBADF  invalid file descriptor.

    EOPNOTSUPP file descriptor is of a hugetlbfs file

[nphamcs@gmail.com: replace rounddown logic with the existing helper]
  Link: https://lkml.kernel.org/r/20230504022044.3675469-1-nphamcs@gmail.com
Link: https://lkml.kernel.org/r/20230503013608.2431726-3-nphamcs@gmail.com
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Brian Foster <bfoster@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/entry/syscalls/syscall_32.tbl |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl |   1 +
 include/linux/syscalls.h               |   5 +
 include/uapi/asm-generic/unistd.h      |   5 +-
 include/uapi/linux/mman.h              |  14 +++
 init/Kconfig                           |  10 ++
 kernel/sys_ni.c                        |   1 +
 mm/filemap.c                           | 171 +++++++++++++++++++++++++++++++++
 8 files changed, 207 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 320480a8db4f..bc0a3c941b35 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -455,3 +455,4 @@
 448	i386	process_mrelease	sys_process_mrelease
 449	i386	futex_waitv		sys_futex_waitv
 450	i386	set_mempolicy_home_node		sys_set_mempolicy_home_node
+451	i386	cachestat		sys_cachestat
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index c84d12608cd2..227538b0ce80 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -372,6 +372,7 @@
 448	common	process_mrelease	sys_process_mrelease
 449	common	futex_waitv		sys_futex_waitv
 450	common	set_mempolicy_home_node	sys_set_mempolicy_home_node
+451	common	cachestat		sys_cachestat
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 33a0ee3bcb2e..6648c07c4381 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -72,6 +72,8 @@ struct open_how;
 struct mount_attr;
 struct landlock_ruleset_attr;
 enum landlock_rule_type;
+struct cachestat_range;
+struct cachestat;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -1058,6 +1060,9 @@ asmlinkage long sys_memfd_secret(unsigned int flags);
 asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long len,
 					    unsigned long home_node,
 					    unsigned long flags);
+asmlinkage long sys_cachestat(unsigned int fd,
+		struct cachestat_range __user *cstat_range,
+		struct cachestat __user *cstat, unsigned int flags);
 
 /*
  * Architecture-specific system calls
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 45fa180cc56a..cd639fae9086 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -886,8 +886,11 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
 #define __NR_set_mempolicy_home_node 450
 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
 
+#define __NR_cachestat 451
+__SYSCALL(__NR_cachestat, sys_cachestat)
+
 #undef __NR_syscalls
-#define __NR_syscalls 451
+#define __NR_syscalls 452
 
 /*
  * 32 bit systems traditionally used different
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index f55bc680b5b0..a246e11988d5 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -4,6 +4,7 @@
 
 #include <asm/mman.h>
 #include <asm-generic/hugetlb_encode.h>
+#include <linux/types.h>
 
 #define MREMAP_MAYMOVE		1
 #define MREMAP_FIXED		2
@@ -41,4 +42,17 @@
 #define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
 
+struct cachestat_range {
+	__u64 off;
+	__u64 len;
+};
+
+struct cachestat {
+	__u64 nr_cache;
+	__u64 nr_dirty;
+	__u64 nr_writeback;
+	__u64 nr_evicted;
+	__u64 nr_recently_evicted;
+};
+
 #endif /* _UAPI_LINUX_MMAN_H */
diff --git a/init/Kconfig b/init/Kconfig
index 32c24950c4ce..f7f65af4ee12 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1771,6 +1771,16 @@ config RSEQ
 
 	  If unsure, say Y.
 
+config CACHESTAT_SYSCALL
+	bool "Enable cachestat() system call" if EXPERT
+	default y
+	help
+	  Enable the cachestat system call, which queries the page cache
+	  statistics of a file (number of cached pages, dirty pages,
+	  pages marked for writeback, (recently) evicted pages).
+
+	  If unsure say Y here.
+
 config DEBUG_RSEQ
 	default n
 	bool "Enabled debugging of rseq() system call" if EXPERT
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 860b2dcf3ac4..04bfb1e4d377 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -299,6 +299,7 @@ COND_SYSCALL(set_mempolicy);
 COND_SYSCALL(migrate_pages);
 COND_SYSCALL(move_pages);
 COND_SYSCALL(set_mempolicy_home_node);
+COND_SYSCALL(cachestat);
 
 COND_SYSCALL(perf_event_open);
 COND_SYSCALL(accept4);
diff --git a/mm/filemap.c b/mm/filemap.c
index b4c9bd368b7e..2d3d70c64dfd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -22,6 +22,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
@@ -58,6 +59,8 @@
 
 #include <asm/mman.h>
 
+#include "swap.h"
+
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  * though.
@@ -4119,3 +4122,171 @@ bool filemap_release_folio(struct folio *folio, gfp_t gfp)
 	return try_to_free_buffers(folio);
 }
 EXPORT_SYMBOL(filemap_release_folio);
+
+#ifdef CONFIG_CACHESTAT_SYSCALL
+/**
+ * filemap_cachestat() - compute the page cache statistics of a mapping
+ * @mapping:	The mapping to compute the statistics for.
+ * @first_index:	The starting page cache index.
+ * @last_index:	The final page index (inclusive).
+ * @cs:	the cachestat struct to write the result to.
+ *
+ * This will query the page cache statistics of a mapping in the
+ * page range of [first_index, last_index] (inclusive). The statistics
+ * queried include: number of dirty pages, number of pages marked for
+ * writeback, and the number of (recently) evicted pages.
+ */
+static void filemap_cachestat(struct address_space *mapping,
+		pgoff_t first_index, pgoff_t last_index, struct cachestat *cs)
+{
+	XA_STATE(xas, &mapping->i_pages, first_index);
+	struct folio *folio;
+
+	rcu_read_lock();
+	xas_for_each(&xas, folio, last_index) {
+		unsigned long nr_pages;
+		pgoff_t folio_first_index, folio_last_index;
+
+		if (xas_retry(&xas, folio))
+			continue;
+
+		if (xa_is_value(folio)) {
+			/* page is evicted */
+			void *shadow = (void *)folio;
+			bool workingset; /* not used */
+			int order = xa_get_order(xas.xa, xas.xa_index);
+
+			nr_pages = 1 << order;
+			folio_first_index = round_down(xas.xa_index, 1 << order);
+			folio_last_index = folio_first_index + nr_pages - 1;
+
+			/* Folios might straddle the range boundaries, only count covered pages */
+			if (folio_first_index < first_index)
+				nr_pages -= first_index - folio_first_index;
+
+			if (folio_last_index > last_index)
+				nr_pages -= folio_last_index - last_index;
+
+			cs->nr_evicted += nr_pages;
+
+#ifdef CONFIG_SWAP /* implies CONFIG_MMU */
+			if (shmem_mapping(mapping)) {
+				/* shmem file - in swap cache */
+				swp_entry_t swp = radix_to_swp_entry(folio);
+
+				shadow = get_shadow_from_swap_cache(swp);
+			}
+#endif
+			if (workingset_test_recent(shadow, true, &workingset))
+				cs->nr_recently_evicted += nr_pages;
+
+			goto resched;
+		}
+
+		nr_pages = folio_nr_pages(folio);
+		folio_first_index = folio_pgoff(folio);
+		folio_last_index = folio_first_index + nr_pages - 1;
+
+		/* Folios might straddle the range boundaries, only count covered pages */
+		if (folio_first_index < first_index)
+			nr_pages -= first_index - folio_first_index;
+
+		if (folio_last_index > last_index)
+			nr_pages -= folio_last_index - last_index;
+
+		/* page is in cache */
+		cs->nr_cache += nr_pages;
+
+		if (folio_test_dirty(folio))
+			cs->nr_dirty += nr_pages;
+
+		if (folio_test_writeback(folio))
+			cs->nr_writeback += nr_pages;
+
+resched:
+		if (need_resched()) {
+			xas_pause(&xas);
+			cond_resched_rcu();
+		}
+	}
+	rcu_read_unlock();
+}
+
+/*
+ * The cachestat(2) system call.
+ *
+ * cachestat() returns the page cache statistics of a file in the
+ * bytes range specified by `off` and `len`: number of cached pages,
+ * number of dirty pages, number of pages marked for writeback,
+ * number of evicted pages, and number of recently evicted pages.
+ *
+ * An evicted page is a page that is previously in the page cache
+ * but has been evicted since. A page is recently evicted if its last
+ * eviction was recent enough that its reentry to the cache would
+ * indicate that it is actively being used by the system, and that
+ * there is memory pressure on the system.
+ *
+ * `off` and `len` must be non-negative integers. If `len` > 0,
+ * the queried range is [`off`, `off` + `len`]. If `len` == 0,
+ * we will query in the range from `off` to the end of the file.
+ *
+ * The `flags` argument is unused for now, but is included for future
+ * extensibility. User should pass 0 (i.e no flag specified).
+ *
+ * Currently, hugetlbfs is not supported.
+ *
+ * Because the status of a page can change after cachestat() checks it
+ * but before it returns to the application, the returned values may
+ * contain stale information.
+ *
+ * return values:
+ *  zero        - success
+ *  -EFAULT     - cstat or cstat_range points to an illegal address
+ *  -EINVAL     - invalid flags
+ *  -EBADF      - invalid file descriptor
+ *  -EOPNOTSUPP - file descriptor is of a hugetlbfs file
+ */
+SYSCALL_DEFINE4(cachestat, unsigned int, fd,
+		struct cachestat_range __user *, cstat_range,
+		struct cachestat __user *, cstat, unsigned int, flags)
+{
+	struct fd f = fdget(fd);
+	struct address_space *mapping;
+	struct cachestat_range csr;
+	struct cachestat cs;
+	pgoff_t first_index, last_index;
+
+	if (!f.file)
+		return -EBADF;
+
+	if (copy_from_user(&csr, cstat_range,
+			sizeof(struct cachestat_range))) {
+		fdput(f);
+		return -EFAULT;
+	}
+
+	/* hugetlbfs is not supported */
+	if (is_file_hugepages(f.file)) {
+		fdput(f);
+		return -EOPNOTSUPP;
+	}
+
+	if (flags != 0) {
+		fdput(f);
+		return -EINVAL;
+	}
+
+	first_index = csr.off >> PAGE_SHIFT;
+	last_index =
+		csr.len == 0 ? ULONG_MAX : (csr.off + csr.len - 1) >> PAGE_SHIFT;
+	memset(&cs, 0, sizeof(struct cachestat));
+	mapping = f.file->f_mapping;
+	filemap_cachestat(mapping, first_index, last_index, &cs);
+	fdput(f);
+
+	if (copy_to_user(cstat, &cs, sizeof(struct cachestat)))
+		return -EFAULT;
+
+	return 0;
+}
+#endif /* CONFIG_CACHESTAT_SYSCALL */
-- 
cgit v1.2.3


From c963901197188189e85b4d768a059fe1bbc2a502 Mon Sep 17 00:00:00 2001
From: Pankaj Raghav <p.raghav@samsung.com>
Date: Wed, 10 May 2023 14:47:16 +0200
Subject: filemap: remove page_endio()

page_endio() is not used anymore. Remove it.

Link: https://lkml.kernel.org/r/20230510124716.73655-1-p.raghav@samsung.com
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h |  2 --
 mm/filemap.c            | 30 ------------------------------
 2 files changed, 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a56308a9d1a4..c1ae5ebc375f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1078,8 +1078,6 @@ int filemap_migrate_folio(struct address_space *mapping, struct folio *dst,
 #else
 #define filemap_migrate_folio NULL
 #endif
-void page_endio(struct page *page, bool is_write, int err);
-
 void folio_end_private_2(struct folio *folio);
 void folio_wait_private_2(struct folio *folio);
 int folio_wait_private_2_killable(struct folio *folio);
diff --git a/mm/filemap.c b/mm/filemap.c
index 2d3d70c64dfd..570bc8c3db87 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1628,36 +1628,6 @@ void folio_end_writeback(struct folio *folio)
 }
 EXPORT_SYMBOL(folio_end_writeback);
 
-/*
- * After completing I/O on a page, call this routine to update the page
- * flags appropriately
- */
-void page_endio(struct page *page, bool is_write, int err)
-{
-	struct folio *folio = page_folio(page);
-
-	if (!is_write) {
-		if (!err) {
-			folio_mark_uptodate(folio);
-		} else {
-			folio_clear_uptodate(folio);
-			folio_set_error(folio);
-		}
-		folio_unlock(folio);
-	} else {
-		if (err) {
-			struct address_space *mapping;
-
-			folio_set_error(folio);
-			mapping = folio_mapping(folio);
-			if (mapping)
-				mapping_set_error(mapping, err);
-		}
-		folio_end_writeback(folio);
-	}
-}
-EXPORT_SYMBOL_GPL(page_endio);
-
 /**
  * __folio_lock - Get a lock on the folio, assuming we need to sleep to get it.
  * @folio: The folio to lock
-- 
cgit v1.2.3


From bb6e04a173f06e51819a4bb512e127dfbc50dcfa Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 9 May 2023 16:57:21 +0200
Subject: kasan: use internal prototypes matching gcc-13 builtins

gcc-13 warns about function definitions for builtin interfaces that have a
different prototype, e.g.:

In file included from kasan_test.c:31:
kasan.h:574:6: error: conflicting types for built-in function '__asan_register_globals'; expected 'void(void *, long int)' [-Werror=builtin-declaration-mismatch]
  574 | void __asan_register_globals(struct kasan_global *globals, size_t size);
kasan.h:577:6: error: conflicting types for built-in function '__asan_alloca_poison'; expected 'void(void *, long int)' [-Werror=builtin-declaration-mismatch]
  577 | void __asan_alloca_poison(unsigned long addr, size_t size);
kasan.h:580:6: error: conflicting types for built-in function '__asan_load1'; expected 'void(void *)' [-Werror=builtin-declaration-mismatch]
  580 | void __asan_load1(unsigned long addr);
kasan.h:581:6: error: conflicting types for built-in function '__asan_store1'; expected 'void(void *)' [-Werror=builtin-declaration-mismatch]
  581 | void __asan_store1(unsigned long addr);
kasan.h:643:6: error: conflicting types for built-in function '__hwasan_tag_memory'; expected 'void(void *, unsigned char,  long int)' [-Werror=builtin-declaration-mismatch]
  643 | void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size);

The two problems are:

 - Addresses are passes as 'unsigned long' in the kernel, but gcc-13
   expects a 'void *'.

 - sizes meant to use a signed ssize_t rather than size_t.

Change all the prototypes to match these.  Using 'void *' consistently for
addresses gets rid of a couple of type casts, so push that down to the
leaf functions where possible.

This now passes all randconfig builds on arm, arm64 and x86, but I have
not tested it on the other architectures that support kasan, since they
tend to fail randconfig builds in other ways.  This might fail if any of
the 32-bit architectures expect a 'long' instead of 'int' for the size
argument.

The __asan_allocas_unpoison() function prototype is somewhat weird, since
it uses a pointer for 'stack_top' and an size_t for 'stack_bottom'.  This
looks like it is meant to be 'addr' and 'size' like the others, but the
implementation clearly treats them as 'top' and 'bottom'.

Link: https://lkml.kernel.org/r/20230509145735.9263-2-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Marco Elver <elver@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/kernel/traps.c |   2 +-
 arch/arm64/mm/fault.c     |   2 +-
 include/linux/kasan.h     |   2 +-
 mm/kasan/common.c         |   2 +-
 mm/kasan/generic.c        |  72 ++++++++++-----------
 mm/kasan/kasan.h          | 160 +++++++++++++++++++++++-----------------------
 mm/kasan/report.c         |  17 +++--
 mm/kasan/report_generic.c |  12 ++--
 mm/kasan/report_hw_tags.c |   2 +-
 mm/kasan/report_sw_tags.c |   2 +-
 mm/kasan/shadow.c         |  36 +++++------
 mm/kasan/sw_tags.c        |  20 +++---
 12 files changed, 164 insertions(+), 165 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 4bb1b8f47298..7b889445e5c6 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -1044,7 +1044,7 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr)
 	bool recover = esr & KASAN_ESR_RECOVER;
 	bool write = esr & KASAN_ESR_WRITE;
 	size_t size = KASAN_ESR_SIZE(esr);
-	u64 addr = regs->regs[0];
+	void *addr = (void *)regs->regs[0];
 	u64 pc = regs->pc;
 
 	kasan_report(addr, size, write, pc);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index cb21ccd7940d..d5047eef4295 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -317,7 +317,7 @@ static void report_tag_fault(unsigned long addr, unsigned long esr,
 	 * find out access size.
 	 */
 	bool is_write = !!(esr & ESR_ELx_WNR);
-	kasan_report(addr, 0, is_write, regs->pc);
+	kasan_report((void *)addr, 0, is_write, regs->pc);
 }
 #else
 /* Tag faults aren't enabled without CONFIG_KASAN_HW_TAGS. */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index f7ef70661ce2..819b6bc8ac08 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -343,7 +343,7 @@ static inline void *kasan_reset_tag(const void *addr)
  * @is_write: whether the bad access is a write or a read
  * @ip: instruction pointer for the accessibility check or the bad access itself
  */
-bool kasan_report(unsigned long addr, size_t size,
+bool kasan_report(const void *addr, size_t size,
 		bool is_write, unsigned long ip);
 
 #else /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index b376a5d055e5..256930da578a 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -445,7 +445,7 @@ void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flag
 bool __kasan_check_byte(const void *address, unsigned long ip)
 {
 	if (!kasan_byte_accessible(address)) {
-		kasan_report((unsigned long)address, 1, false, ip);
+		kasan_report(address, 1, false, ip);
 		return false;
 	}
 	return true;
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index e5eef670735e..224d161a5a22 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -40,39 +40,39 @@
  * depending on memory access size X.
  */
 
-static __always_inline bool memory_is_poisoned_1(unsigned long addr)
+static __always_inline bool memory_is_poisoned_1(const void *addr)
 {
-	s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr);
+	s8 shadow_value = *(s8 *)kasan_mem_to_shadow(addr);
 
 	if (unlikely(shadow_value)) {
-		s8 last_accessible_byte = addr & KASAN_GRANULE_MASK;
+		s8 last_accessible_byte = (unsigned long)addr & KASAN_GRANULE_MASK;
 		return unlikely(last_accessible_byte >= shadow_value);
 	}
 
 	return false;
 }
 
-static __always_inline bool memory_is_poisoned_2_4_8(unsigned long addr,
+static __always_inline bool memory_is_poisoned_2_4_8(const void *addr,
 						unsigned long size)
 {
-	u8 *shadow_addr = (u8 *)kasan_mem_to_shadow((void *)addr);
+	u8 *shadow_addr = (u8 *)kasan_mem_to_shadow(addr);
 
 	/*
 	 * Access crosses 8(shadow size)-byte boundary. Such access maps
 	 * into 2 shadow bytes, so we need to check them both.
 	 */
-	if (unlikely(((addr + size - 1) & KASAN_GRANULE_MASK) < size - 1))
+	if (unlikely((((unsigned long)addr + size - 1) & KASAN_GRANULE_MASK) < size - 1))
 		return *shadow_addr || memory_is_poisoned_1(addr + size - 1);
 
 	return memory_is_poisoned_1(addr + size - 1);
 }
 
-static __always_inline bool memory_is_poisoned_16(unsigned long addr)
+static __always_inline bool memory_is_poisoned_16(const void *addr)
 {
-	u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
+	u16 *shadow_addr = (u16 *)kasan_mem_to_shadow(addr);
 
 	/* Unaligned 16-bytes access maps into 3 shadow bytes. */
-	if (unlikely(!IS_ALIGNED(addr, KASAN_GRANULE_SIZE)))
+	if (unlikely(!IS_ALIGNED((unsigned long)addr, KASAN_GRANULE_SIZE)))
 		return *shadow_addr || memory_is_poisoned_1(addr + 15);
 
 	return *shadow_addr;
@@ -120,26 +120,25 @@ static __always_inline unsigned long memory_is_nonzero(const void *start,
 	return bytes_is_nonzero(start, (end - start) % 8);
 }
 
-static __always_inline bool memory_is_poisoned_n(unsigned long addr,
-						size_t size)
+static __always_inline bool memory_is_poisoned_n(const void *addr, size_t size)
 {
 	unsigned long ret;
 
-	ret = memory_is_nonzero(kasan_mem_to_shadow((void *)addr),
-			kasan_mem_to_shadow((void *)addr + size - 1) + 1);
+	ret = memory_is_nonzero(kasan_mem_to_shadow(addr),
+			kasan_mem_to_shadow(addr + size - 1) + 1);
 
 	if (unlikely(ret)) {
-		unsigned long last_byte = addr + size - 1;
-		s8 *last_shadow = (s8 *)kasan_mem_to_shadow((void *)last_byte);
+		const void *last_byte = addr + size - 1;
+		s8 *last_shadow = (s8 *)kasan_mem_to_shadow(last_byte);
 
 		if (unlikely(ret != (unsigned long)last_shadow ||
-			((long)(last_byte & KASAN_GRANULE_MASK) >= *last_shadow)))
+			(((long)last_byte & KASAN_GRANULE_MASK) >= *last_shadow)))
 			return true;
 	}
 	return false;
 }
 
-static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size)
+static __always_inline bool memory_is_poisoned(const void *addr, size_t size)
 {
 	if (__builtin_constant_p(size)) {
 		switch (size) {
@@ -159,7 +158,7 @@ static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size)
 	return memory_is_poisoned_n(addr, size);
 }
 
-static __always_inline bool check_region_inline(unsigned long addr,
+static __always_inline bool check_region_inline(const void *addr,
 						size_t size, bool write,
 						unsigned long ret_ip)
 {
@@ -172,7 +171,7 @@ static __always_inline bool check_region_inline(unsigned long addr,
 	if (unlikely(addr + size < addr))
 		return !kasan_report(addr, size, write, ret_ip);
 
-	if (unlikely(!addr_has_metadata((void *)addr)))
+	if (unlikely(!addr_has_metadata(addr)))
 		return !kasan_report(addr, size, write, ret_ip);
 
 	if (likely(!memory_is_poisoned(addr, size)))
@@ -181,7 +180,7 @@ static __always_inline bool check_region_inline(unsigned long addr,
 	return !kasan_report(addr, size, write, ret_ip);
 }
 
-bool kasan_check_range(unsigned long addr, size_t size, bool write,
+bool kasan_check_range(const void *addr, size_t size, bool write,
 					unsigned long ret_ip)
 {
 	return check_region_inline(addr, size, write, ret_ip);
@@ -221,36 +220,37 @@ static void register_global(struct kasan_global *global)
 		     KASAN_GLOBAL_REDZONE, false);
 }
 
-void __asan_register_globals(struct kasan_global *globals, size_t size)
+void __asan_register_globals(void *ptr, ssize_t size)
 {
 	int i;
+	struct kasan_global *globals = ptr;
 
 	for (i = 0; i < size; i++)
 		register_global(&globals[i]);
 }
 EXPORT_SYMBOL(__asan_register_globals);
 
-void __asan_unregister_globals(struct kasan_global *globals, size_t size)
+void __asan_unregister_globals(void *ptr, ssize_t size)
 {
 }
 EXPORT_SYMBOL(__asan_unregister_globals);
 
 #define DEFINE_ASAN_LOAD_STORE(size)					\
-	void __asan_load##size(unsigned long addr)			\
+	void __asan_load##size(void *addr)				\
 	{								\
 		check_region_inline(addr, size, false, _RET_IP_);	\
 	}								\
 	EXPORT_SYMBOL(__asan_load##size);				\
 	__alias(__asan_load##size)					\
-	void __asan_load##size##_noabort(unsigned long);		\
+	void __asan_load##size##_noabort(void *);			\
 	EXPORT_SYMBOL(__asan_load##size##_noabort);			\
-	void __asan_store##size(unsigned long addr)			\
+	void __asan_store##size(void *addr)				\
 	{								\
 		check_region_inline(addr, size, true, _RET_IP_);	\
 	}								\
 	EXPORT_SYMBOL(__asan_store##size);				\
 	__alias(__asan_store##size)					\
-	void __asan_store##size##_noabort(unsigned long);		\
+	void __asan_store##size##_noabort(void *);			\
 	EXPORT_SYMBOL(__asan_store##size##_noabort)
 
 DEFINE_ASAN_LOAD_STORE(1);
@@ -259,24 +259,24 @@ DEFINE_ASAN_LOAD_STORE(4);
 DEFINE_ASAN_LOAD_STORE(8);
 DEFINE_ASAN_LOAD_STORE(16);
 
-void __asan_loadN(unsigned long addr, size_t size)
+void __asan_loadN(void *addr, ssize_t size)
 {
 	kasan_check_range(addr, size, false, _RET_IP_);
 }
 EXPORT_SYMBOL(__asan_loadN);
 
 __alias(__asan_loadN)
-void __asan_loadN_noabort(unsigned long, size_t);
+void __asan_loadN_noabort(void *, ssize_t);
 EXPORT_SYMBOL(__asan_loadN_noabort);
 
-void __asan_storeN(unsigned long addr, size_t size)
+void __asan_storeN(void *addr, ssize_t size)
 {
 	kasan_check_range(addr, size, true, _RET_IP_);
 }
 EXPORT_SYMBOL(__asan_storeN);
 
 __alias(__asan_storeN)
-void __asan_storeN_noabort(unsigned long, size_t);
+void __asan_storeN_noabort(void *, ssize_t);
 EXPORT_SYMBOL(__asan_storeN_noabort);
 
 /* to shut up compiler complaints */
@@ -284,7 +284,7 @@ void __asan_handle_no_return(void) {}
 EXPORT_SYMBOL(__asan_handle_no_return);
 
 /* Emitted by compiler to poison alloca()ed objects. */
-void __asan_alloca_poison(unsigned long addr, size_t size)
+void __asan_alloca_poison(void *addr, ssize_t size)
 {
 	size_t rounded_up_size = round_up(size, KASAN_GRANULE_SIZE);
 	size_t padding_size = round_up(size, KASAN_ALLOCA_REDZONE_SIZE) -
@@ -295,7 +295,7 @@ void __asan_alloca_poison(unsigned long addr, size_t size)
 			KASAN_ALLOCA_REDZONE_SIZE);
 	const void *right_redzone = (const void *)(addr + rounded_up_size);
 
-	WARN_ON(!IS_ALIGNED(addr, KASAN_ALLOCA_REDZONE_SIZE));
+	WARN_ON(!IS_ALIGNED((unsigned long)addr, KASAN_ALLOCA_REDZONE_SIZE));
 
 	kasan_unpoison((const void *)(addr + rounded_down_size),
 			size - rounded_down_size, false);
@@ -307,18 +307,18 @@ void __asan_alloca_poison(unsigned long addr, size_t size)
 EXPORT_SYMBOL(__asan_alloca_poison);
 
 /* Emitted by compiler to unpoison alloca()ed areas when the stack unwinds. */
-void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom)
+void __asan_allocas_unpoison(void *stack_top, ssize_t stack_bottom)
 {
-	if (unlikely(!stack_top || stack_top > stack_bottom))
+	if (unlikely(!stack_top || stack_top > (void *)stack_bottom))
 		return;
 
-	kasan_unpoison(stack_top, stack_bottom - stack_top, false);
+	kasan_unpoison(stack_top, (void *)stack_bottom - stack_top, false);
 }
 EXPORT_SYMBOL(__asan_allocas_unpoison);
 
 /* Emitted by the compiler to [un]poison local variables. */
 #define DEFINE_ASAN_SET_SHADOW(byte) \
-	void __asan_set_shadow_##byte(const void *addr, size_t size)	\
+	void __asan_set_shadow_##byte(const void *addr, ssize_t size)	\
 	{								\
 		__memset((void *)addr, 0x##byte, size);			\
 	}								\
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index cd846ca34f44..b799f11e45dc 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -198,13 +198,13 @@ enum kasan_report_type {
 struct kasan_report_info {
 	/* Filled in by kasan_report_*(). */
 	enum kasan_report_type type;
-	void *access_addr;
+	const void *access_addr;
 	size_t access_size;
 	bool is_write;
 	unsigned long ip;
 
 	/* Filled in by the common reporting code. */
-	void *first_bad_addr;
+	const void *first_bad_addr;
 	struct kmem_cache *cache;
 	void *object;
 	size_t alloc_size;
@@ -311,7 +311,7 @@ static __always_inline bool addr_has_metadata(const void *addr)
  * @ret_ip: return address
  * @return: true if access was valid, false if invalid
  */
-bool kasan_check_range(unsigned long addr, size_t size, bool write,
+bool kasan_check_range(const void *addr, size_t size, bool write,
 				unsigned long ret_ip);
 
 #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
@@ -323,7 +323,7 @@ static __always_inline bool addr_has_metadata(const void *addr)
 
 #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
 
-void *kasan_find_first_bad_addr(void *addr, size_t size);
+const void *kasan_find_first_bad_addr(const void *addr, size_t size);
 size_t kasan_get_alloc_size(void *object, struct kmem_cache *cache);
 void kasan_complete_mode_report_info(struct kasan_report_info *info);
 void kasan_metadata_fetch_row(char *buffer, void *row);
@@ -346,7 +346,7 @@ void kasan_print_aux_stacks(struct kmem_cache *cache, const void *object);
 static inline void kasan_print_aux_stacks(struct kmem_cache *cache, const void *object) { }
 #endif
 
-bool kasan_report(unsigned long addr, size_t size,
+bool kasan_report(const void *addr, size_t size,
 		bool is_write, unsigned long ip);
 void kasan_report_invalid_free(void *object, unsigned long ip, enum kasan_report_type type);
 
@@ -571,82 +571,82 @@ void kasan_restore_multi_shot(bool enabled);
  */
 
 asmlinkage void kasan_unpoison_task_stack_below(const void *watermark);
-void __asan_register_globals(struct kasan_global *globals, size_t size);
-void __asan_unregister_globals(struct kasan_global *globals, size_t size);
+void __asan_register_globals(void *globals, ssize_t size);
+void __asan_unregister_globals(void *globals, ssize_t size);
 void __asan_handle_no_return(void);
-void __asan_alloca_poison(unsigned long addr, size_t size);
-void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom);
-
-void __asan_load1(unsigned long addr);
-void __asan_store1(unsigned long addr);
-void __asan_load2(unsigned long addr);
-void __asan_store2(unsigned long addr);
-void __asan_load4(unsigned long addr);
-void __asan_store4(unsigned long addr);
-void __asan_load8(unsigned long addr);
-void __asan_store8(unsigned long addr);
-void __asan_load16(unsigned long addr);
-void __asan_store16(unsigned long addr);
-void __asan_loadN(unsigned long addr, size_t size);
-void __asan_storeN(unsigned long addr, size_t size);
-
-void __asan_load1_noabort(unsigned long addr);
-void __asan_store1_noabort(unsigned long addr);
-void __asan_load2_noabort(unsigned long addr);
-void __asan_store2_noabort(unsigned long addr);
-void __asan_load4_noabort(unsigned long addr);
-void __asan_store4_noabort(unsigned long addr);
-void __asan_load8_noabort(unsigned long addr);
-void __asan_store8_noabort(unsigned long addr);
-void __asan_load16_noabort(unsigned long addr);
-void __asan_store16_noabort(unsigned long addr);
-void __asan_loadN_noabort(unsigned long addr, size_t size);
-void __asan_storeN_noabort(unsigned long addr, size_t size);
-
-void __asan_report_load1_noabort(unsigned long addr);
-void __asan_report_store1_noabort(unsigned long addr);
-void __asan_report_load2_noabort(unsigned long addr);
-void __asan_report_store2_noabort(unsigned long addr);
-void __asan_report_load4_noabort(unsigned long addr);
-void __asan_report_store4_noabort(unsigned long addr);
-void __asan_report_load8_noabort(unsigned long addr);
-void __asan_report_store8_noabort(unsigned long addr);
-void __asan_report_load16_noabort(unsigned long addr);
-void __asan_report_store16_noabort(unsigned long addr);
-void __asan_report_load_n_noabort(unsigned long addr, size_t size);
-void __asan_report_store_n_noabort(unsigned long addr, size_t size);
-
-void __asan_set_shadow_00(const void *addr, size_t size);
-void __asan_set_shadow_f1(const void *addr, size_t size);
-void __asan_set_shadow_f2(const void *addr, size_t size);
-void __asan_set_shadow_f3(const void *addr, size_t size);
-void __asan_set_shadow_f5(const void *addr, size_t size);
-void __asan_set_shadow_f8(const void *addr, size_t size);
-
-void *__asan_memset(void *addr, int c, size_t len);
-void *__asan_memmove(void *dest, const void *src, size_t len);
-void *__asan_memcpy(void *dest, const void *src, size_t len);
-
-void __hwasan_load1_noabort(unsigned long addr);
-void __hwasan_store1_noabort(unsigned long addr);
-void __hwasan_load2_noabort(unsigned long addr);
-void __hwasan_store2_noabort(unsigned long addr);
-void __hwasan_load4_noabort(unsigned long addr);
-void __hwasan_store4_noabort(unsigned long addr);
-void __hwasan_load8_noabort(unsigned long addr);
-void __hwasan_store8_noabort(unsigned long addr);
-void __hwasan_load16_noabort(unsigned long addr);
-void __hwasan_store16_noabort(unsigned long addr);
-void __hwasan_loadN_noabort(unsigned long addr, size_t size);
-void __hwasan_storeN_noabort(unsigned long addr, size_t size);
-
-void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size);
-
-void *__hwasan_memset(void *addr, int c, size_t len);
-void *__hwasan_memmove(void *dest, const void *src, size_t len);
-void *__hwasan_memcpy(void *dest, const void *src, size_t len);
-
-void kasan_tag_mismatch(unsigned long addr, unsigned long access_info,
+void __asan_alloca_poison(void *, ssize_t size);
+void __asan_allocas_unpoison(void *stack_top, ssize_t stack_bottom);
+
+void __asan_load1(void *);
+void __asan_store1(void *);
+void __asan_load2(void *);
+void __asan_store2(void *);
+void __asan_load4(void *);
+void __asan_store4(void *);
+void __asan_load8(void *);
+void __asan_store8(void *);
+void __asan_load16(void *);
+void __asan_store16(void *);
+void __asan_loadN(void *, ssize_t size);
+void __asan_storeN(void *, ssize_t size);
+
+void __asan_load1_noabort(void *);
+void __asan_store1_noabort(void *);
+void __asan_load2_noabort(void *);
+void __asan_store2_noabort(void *);
+void __asan_load4_noabort(void *);
+void __asan_store4_noabort(void *);
+void __asan_load8_noabort(void *);
+void __asan_store8_noabort(void *);
+void __asan_load16_noabort(void *);
+void __asan_store16_noabort(void *);
+void __asan_loadN_noabort(void *, ssize_t size);
+void __asan_storeN_noabort(void *, ssize_t size);
+
+void __asan_report_load1_noabort(void *);
+void __asan_report_store1_noabort(void *);
+void __asan_report_load2_noabort(void *);
+void __asan_report_store2_noabort(void *);
+void __asan_report_load4_noabort(void *);
+void __asan_report_store4_noabort(void *);
+void __asan_report_load8_noabort(void *);
+void __asan_report_store8_noabort(void *);
+void __asan_report_load16_noabort(void *);
+void __asan_report_store16_noabort(void *);
+void __asan_report_load_n_noabort(void *, ssize_t size);
+void __asan_report_store_n_noabort(void *, ssize_t size);
+
+void __asan_set_shadow_00(const void *addr, ssize_t size);
+void __asan_set_shadow_f1(const void *addr, ssize_t size);
+void __asan_set_shadow_f2(const void *addr, ssize_t size);
+void __asan_set_shadow_f3(const void *addr, ssize_t size);
+void __asan_set_shadow_f5(const void *addr, ssize_t size);
+void __asan_set_shadow_f8(const void *addr, ssize_t size);
+
+void *__asan_memset(void *addr, int c, ssize_t len);
+void *__asan_memmove(void *dest, const void *src, ssize_t len);
+void *__asan_memcpy(void *dest, const void *src, ssize_t len);
+
+void __hwasan_load1_noabort(void *);
+void __hwasan_store1_noabort(void *);
+void __hwasan_load2_noabort(void *);
+void __hwasan_store2_noabort(void *);
+void __hwasan_load4_noabort(void *);
+void __hwasan_store4_noabort(void *);
+void __hwasan_load8_noabort(void *);
+void __hwasan_store8_noabort(void *);
+void __hwasan_load16_noabort(void *);
+void __hwasan_store16_noabort(void *);
+void __hwasan_loadN_noabort(void *, ssize_t size);
+void __hwasan_storeN_noabort(void *, ssize_t size);
+
+void __hwasan_tag_memory(void *, u8 tag, ssize_t size);
+
+void *__hwasan_memset(void *addr, int c, ssize_t len);
+void *__hwasan_memmove(void *dest, const void *src, ssize_t len);
+void *__hwasan_memcpy(void *dest, const void *src, ssize_t len);
+
+void kasan_tag_mismatch(void *addr, unsigned long access_info,
 			unsigned long ret_ip);
 
 #endif /* __MM_KASAN_KASAN_H */
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 892a9dc9d4d3..84d9f3b37014 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -211,7 +211,7 @@ static void start_report(unsigned long *flags, bool sync)
 	pr_err("==================================================================\n");
 }
 
-static void end_report(unsigned long *flags, void *addr)
+static void end_report(unsigned long *flags, const void *addr)
 {
 	if (addr)
 		trace_error_report_end(ERROR_DETECTOR_KASAN,
@@ -450,8 +450,8 @@ static void print_memory_metadata(const void *addr)
 
 static void print_report(struct kasan_report_info *info)
 {
-	void *addr = kasan_reset_tag(info->access_addr);
-	u8 tag = get_tag(info->access_addr);
+	void *addr = kasan_reset_tag((void *)info->access_addr);
+	u8 tag = get_tag((void *)info->access_addr);
 
 	print_error_description(info);
 	if (addr_has_metadata(addr))
@@ -468,12 +468,12 @@ static void print_report(struct kasan_report_info *info)
 
 static void complete_report_info(struct kasan_report_info *info)
 {
-	void *addr = kasan_reset_tag(info->access_addr);
+	void *addr = kasan_reset_tag((void *)info->access_addr);
 	struct slab *slab;
 
 	if (info->type == KASAN_REPORT_ACCESS)
 		info->first_bad_addr = kasan_find_first_bad_addr(
-					info->access_addr, info->access_size);
+					(void *)info->access_addr, info->access_size);
 	else
 		info->first_bad_addr = addr;
 
@@ -544,11 +544,10 @@ void kasan_report_invalid_free(void *ptr, unsigned long ip, enum kasan_report_ty
  * user_access_save/restore(): kasan_report_invalid_free() cannot be called
  * from a UACCESS region, and kasan_report_async() is not used on x86.
  */
-bool kasan_report(unsigned long addr, size_t size, bool is_write,
+bool kasan_report(const void *addr, size_t size, bool is_write,
 			unsigned long ip)
 {
 	bool ret = true;
-	void *ptr = (void *)addr;
 	unsigned long ua_flags = user_access_save();
 	unsigned long irq_flags;
 	struct kasan_report_info info;
@@ -562,7 +561,7 @@ bool kasan_report(unsigned long addr, size_t size, bool is_write,
 
 	memset(&info, 0, sizeof(info));
 	info.type = KASAN_REPORT_ACCESS;
-	info.access_addr = ptr;
+	info.access_addr = addr;
 	info.access_size = size;
 	info.is_write = is_write;
 	info.ip = ip;
@@ -571,7 +570,7 @@ bool kasan_report(unsigned long addr, size_t size, bool is_write,
 
 	print_report(&info);
 
-	end_report(&irq_flags, ptr);
+	end_report(&irq_flags, (void *)addr);
 
 out:
 	user_access_restore(ua_flags);
diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c
index 87d39bc0a673..51a1e8a8877f 100644
--- a/mm/kasan/report_generic.c
+++ b/mm/kasan/report_generic.c
@@ -30,9 +30,9 @@
 #include "kasan.h"
 #include "../slab.h"
 
-void *kasan_find_first_bad_addr(void *addr, size_t size)
+const void *kasan_find_first_bad_addr(const void *addr, size_t size)
 {
-	void *p = addr;
+	const void *p = addr;
 
 	if (!addr_has_metadata(p))
 		return p;
@@ -362,14 +362,14 @@ void kasan_print_address_stack_frame(const void *addr)
 #endif /* CONFIG_KASAN_STACK */
 
 #define DEFINE_ASAN_REPORT_LOAD(size)                     \
-void __asan_report_load##size##_noabort(unsigned long addr) \
+void __asan_report_load##size##_noabort(void *addr) \
 {                                                         \
 	kasan_report(addr, size, false, _RET_IP_);	  \
 }                                                         \
 EXPORT_SYMBOL(__asan_report_load##size##_noabort)
 
 #define DEFINE_ASAN_REPORT_STORE(size)                     \
-void __asan_report_store##size##_noabort(unsigned long addr) \
+void __asan_report_store##size##_noabort(void *addr) \
 {                                                          \
 	kasan_report(addr, size, true, _RET_IP_);	   \
 }                                                          \
@@ -386,13 +386,13 @@ DEFINE_ASAN_REPORT_STORE(4);
 DEFINE_ASAN_REPORT_STORE(8);
 DEFINE_ASAN_REPORT_STORE(16);
 
-void __asan_report_load_n_noabort(unsigned long addr, size_t size)
+void __asan_report_load_n_noabort(void *addr, ssize_t size)
 {
 	kasan_report(addr, size, false, _RET_IP_);
 }
 EXPORT_SYMBOL(__asan_report_load_n_noabort);
 
-void __asan_report_store_n_noabort(unsigned long addr, size_t size)
+void __asan_report_store_n_noabort(void *addr, ssize_t size)
 {
 	kasan_report(addr, size, true, _RET_IP_);
 }
diff --git a/mm/kasan/report_hw_tags.c b/mm/kasan/report_hw_tags.c
index 32e80f78de7d..065e1b2fc484 100644
--- a/mm/kasan/report_hw_tags.c
+++ b/mm/kasan/report_hw_tags.c
@@ -15,7 +15,7 @@
 
 #include "kasan.h"
 
-void *kasan_find_first_bad_addr(void *addr, size_t size)
+const void *kasan_find_first_bad_addr(const void *addr, size_t size)
 {
 	/*
 	 * Hardware Tag-Based KASAN only calls this function for normal memory
diff --git a/mm/kasan/report_sw_tags.c b/mm/kasan/report_sw_tags.c
index 8b1f5a73ee6d..689e94f9fe3c 100644
--- a/mm/kasan/report_sw_tags.c
+++ b/mm/kasan/report_sw_tags.c
@@ -30,7 +30,7 @@
 #include "kasan.h"
 #include "../slab.h"
 
-void *kasan_find_first_bad_addr(void *addr, size_t size)
+const void *kasan_find_first_bad_addr(const void *addr, size_t size)
 {
 	u8 tag = get_tag(addr);
 	void *p = kasan_reset_tag(addr);
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index c8b86f3273b5..3e62728ae25d 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -28,13 +28,13 @@
 
 bool __kasan_check_read(const volatile void *p, unsigned int size)
 {
-	return kasan_check_range((unsigned long)p, size, false, _RET_IP_);
+	return kasan_check_range((void *)p, size, false, _RET_IP_);
 }
 EXPORT_SYMBOL(__kasan_check_read);
 
 bool __kasan_check_write(const volatile void *p, unsigned int size)
 {
-	return kasan_check_range((unsigned long)p, size, true, _RET_IP_);
+	return kasan_check_range((void *)p, size, true, _RET_IP_);
 }
 EXPORT_SYMBOL(__kasan_check_write);
 
@@ -50,7 +50,7 @@ EXPORT_SYMBOL(__kasan_check_write);
 #undef memset
 void *memset(void *addr, int c, size_t len)
 {
-	if (!kasan_check_range((unsigned long)addr, len, true, _RET_IP_))
+	if (!kasan_check_range(addr, len, true, _RET_IP_))
 		return NULL;
 
 	return __memset(addr, c, len);
@@ -60,8 +60,8 @@ void *memset(void *addr, int c, size_t len)
 #undef memmove
 void *memmove(void *dest, const void *src, size_t len)
 {
-	if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) ||
-	    !kasan_check_range((unsigned long)dest, len, true, _RET_IP_))
+	if (!kasan_check_range(src, len, false, _RET_IP_) ||
+	    !kasan_check_range(dest, len, true, _RET_IP_))
 		return NULL;
 
 	return __memmove(dest, src, len);
@@ -71,17 +71,17 @@ void *memmove(void *dest, const void *src, size_t len)
 #undef memcpy
 void *memcpy(void *dest, const void *src, size_t len)
 {
-	if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) ||
-	    !kasan_check_range((unsigned long)dest, len, true, _RET_IP_))
+	if (!kasan_check_range(src, len, false, _RET_IP_) ||
+	    !kasan_check_range(dest, len, true, _RET_IP_))
 		return NULL;
 
 	return __memcpy(dest, src, len);
 }
 #endif
 
-void *__asan_memset(void *addr, int c, size_t len)
+void *__asan_memset(void *addr, int c, ssize_t len)
 {
-	if (!kasan_check_range((unsigned long)addr, len, true, _RET_IP_))
+	if (!kasan_check_range(addr, len, true, _RET_IP_))
 		return NULL;
 
 	return __memset(addr, c, len);
@@ -89,10 +89,10 @@ void *__asan_memset(void *addr, int c, size_t len)
 EXPORT_SYMBOL(__asan_memset);
 
 #ifdef __HAVE_ARCH_MEMMOVE
-void *__asan_memmove(void *dest, const void *src, size_t len)
+void *__asan_memmove(void *dest, const void *src, ssize_t len)
 {
-	if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) ||
-	    !kasan_check_range((unsigned long)dest, len, true, _RET_IP_))
+	if (!kasan_check_range(src, len, false, _RET_IP_) ||
+	    !kasan_check_range(dest, len, true, _RET_IP_))
 		return NULL;
 
 	return __memmove(dest, src, len);
@@ -100,10 +100,10 @@ void *__asan_memmove(void *dest, const void *src, size_t len)
 EXPORT_SYMBOL(__asan_memmove);
 #endif
 
-void *__asan_memcpy(void *dest, const void *src, size_t len)
+void *__asan_memcpy(void *dest, const void *src, ssize_t len)
 {
-	if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) ||
-	    !kasan_check_range((unsigned long)dest, len, true, _RET_IP_))
+	if (!kasan_check_range(src, len, false, _RET_IP_) ||
+	    !kasan_check_range(dest, len, true, _RET_IP_))
 		return NULL;
 
 	return __memcpy(dest, src, len);
@@ -111,13 +111,13 @@ void *__asan_memcpy(void *dest, const void *src, size_t len)
 EXPORT_SYMBOL(__asan_memcpy);
 
 #ifdef CONFIG_KASAN_SW_TAGS
-void *__hwasan_memset(void *addr, int c, size_t len) __alias(__asan_memset);
+void *__hwasan_memset(void *addr, int c, ssize_t len) __alias(__asan_memset);
 EXPORT_SYMBOL(__hwasan_memset);
 #ifdef __HAVE_ARCH_MEMMOVE
-void *__hwasan_memmove(void *dest, const void *src, size_t len) __alias(__asan_memmove);
+void *__hwasan_memmove(void *dest, const void *src, ssize_t len) __alias(__asan_memmove);
 EXPORT_SYMBOL(__hwasan_memmove);
 #endif
-void *__hwasan_memcpy(void *dest, const void *src, size_t len) __alias(__asan_memcpy);
+void *__hwasan_memcpy(void *dest, const void *src, ssize_t len) __alias(__asan_memcpy);
 EXPORT_SYMBOL(__hwasan_memcpy);
 #endif
 
diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c
index 30da65fa02a1..220b5d4c6876 100644
--- a/mm/kasan/sw_tags.c
+++ b/mm/kasan/sw_tags.c
@@ -70,8 +70,8 @@ u8 kasan_random_tag(void)
 	return (u8)(state % (KASAN_TAG_MAX + 1));
 }
 
-bool kasan_check_range(unsigned long addr, size_t size, bool write,
-				unsigned long ret_ip)
+bool kasan_check_range(const void *addr, size_t size, bool write,
+			unsigned long ret_ip)
 {
 	u8 tag;
 	u8 *shadow_first, *shadow_last, *shadow;
@@ -133,12 +133,12 @@ bool kasan_byte_accessible(const void *addr)
 }
 
 #define DEFINE_HWASAN_LOAD_STORE(size)					\
-	void __hwasan_load##size##_noabort(unsigned long addr)		\
+	void __hwasan_load##size##_noabort(void *addr)			\
 	{								\
-		kasan_check_range(addr, size, false, _RET_IP_);	\
+		kasan_check_range(addr, size, false, _RET_IP_);		\
 	}								\
 	EXPORT_SYMBOL(__hwasan_load##size##_noabort);			\
-	void __hwasan_store##size##_noabort(unsigned long addr)		\
+	void __hwasan_store##size##_noabort(void *addr)			\
 	{								\
 		kasan_check_range(addr, size, true, _RET_IP_);		\
 	}								\
@@ -150,25 +150,25 @@ DEFINE_HWASAN_LOAD_STORE(4);
 DEFINE_HWASAN_LOAD_STORE(8);
 DEFINE_HWASAN_LOAD_STORE(16);
 
-void __hwasan_loadN_noabort(unsigned long addr, unsigned long size)
+void __hwasan_loadN_noabort(void *addr, ssize_t size)
 {
 	kasan_check_range(addr, size, false, _RET_IP_);
 }
 EXPORT_SYMBOL(__hwasan_loadN_noabort);
 
-void __hwasan_storeN_noabort(unsigned long addr, unsigned long size)
+void __hwasan_storeN_noabort(void *addr, ssize_t size)
 {
 	kasan_check_range(addr, size, true, _RET_IP_);
 }
 EXPORT_SYMBOL(__hwasan_storeN_noabort);
 
-void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size)
+void __hwasan_tag_memory(void *addr, u8 tag, ssize_t size)
 {
-	kasan_poison((void *)addr, size, tag, false);
+	kasan_poison(addr, size, tag, false);
 }
 EXPORT_SYMBOL(__hwasan_tag_memory);
 
-void kasan_tag_mismatch(unsigned long addr, unsigned long access_info,
+void kasan_tag_mismatch(void *addr, unsigned long access_info,
 			unsigned long ret_ip)
 {
 	kasan_report(addr, 1 << (access_info & 0xf), access_info & 0x10,
-- 
cgit v1.2.3


From 870388db25324fec267862baddc28aaaf0baca73 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 8 May 2023 19:41:27 +0800
Subject: mm: memory_failure: move memory_failure_attr_group under
 MEMORY_FAILURE

The memory_failure_attr_group is only called if MEMORY_FAILURE enabled,
move it under this configuration.

Link: https://lkml.kernel.org/r/20230508114128.37081-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27ce77080c79..f64bfbd53c65 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3586,6 +3586,10 @@ extern void shake_page(struct page *p);
 extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Sysfs entries for memory failure handling statistics.
+ */
+extern const struct attribute_group memory_failure_attr_group;
 extern void memory_failure_queue(unsigned long pfn, int flags);
 extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared);
@@ -3678,11 +3682,6 @@ enum mf_action_page_type {
 	MF_MSG_UNKNOWN,
 };
 
-/*
- * Sysfs entries for memory failure handling statistics.
- */
-extern const struct attribute_group memory_failure_attr_group;
-
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
 extern void clear_huge_page(struct page *page,
 			    unsigned long addr_hint,
-- 
cgit v1.2.3


From 904d58578fce531be07619a2bc2cdc16c9fd49b6 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 16 May 2023 14:38:11 +0800
Subject: mm: page_alloc: move set_zone_contiguous() into mm_init.c

set_zone_contiguous() is only used in mm init/hotplug, and
clear_zone_contiguous() only used in hotplug, move them from page_alloc.c
to the more appropriate file.

Link: https://lkml.kernel.org/r/20230516063821.121844-4-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  3 ---
 mm/internal.h                  |  7 +++++++
 mm/mm_init.c                   | 22 ++++++++++++++++++++++
 mm/page_alloc.c                | 27 ---------------------------
 4 files changed, 29 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 9fcbf5706595..04bc286eed42 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -326,9 +326,6 @@ static inline int remove_memory(u64 start, u64 size)
 static inline void __remove_memory(u64 start, u64 size) {}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat);
 extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
diff --git a/mm/internal.h b/mm/internal.h
index 68410c6d97ac..c99da2cfac71 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -371,6 +371,13 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
 	return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
 }
 
+void set_zone_contiguous(struct zone *zone);
+
+static inline void clear_zone_contiguous(struct zone *zone)
+{
+	zone->contiguous = false;
+}
+
 extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __putback_isolated_page(struct page *page, unsigned int order,
 				    int mt);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 15201887f8e0..0fd4ddfdfb2e 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2330,6 +2330,28 @@ void __init init_cma_reserved_pageblock(struct page *page)
 }
 #endif
 
+void set_zone_contiguous(struct zone *zone)
+{
+	unsigned long block_start_pfn = zone->zone_start_pfn;
+	unsigned long block_end_pfn;
+
+	block_end_pfn = pageblock_end_pfn(block_start_pfn);
+	for (; block_start_pfn < zone_end_pfn(zone);
+			block_start_pfn = block_end_pfn,
+			 block_end_pfn += pageblock_nr_pages) {
+
+		block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
+
+		if (!__pageblock_pfn_to_page(block_start_pfn,
+					     block_end_pfn, zone))
+			return;
+		cond_resched();
+	}
+
+	/* We confirm that there is no hole */
+	zone->contiguous = true;
+}
+
 void __init page_alloc_init_late(void)
 {
 	struct zone *zone;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4f094ba7c8fb..7bb0d6abfe3d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1532,33 +1532,6 @@ struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
 	return start_page;
 }
 
-void set_zone_contiguous(struct zone *zone)
-{
-	unsigned long block_start_pfn = zone->zone_start_pfn;
-	unsigned long block_end_pfn;
-
-	block_end_pfn = pageblock_end_pfn(block_start_pfn);
-	for (; block_start_pfn < zone_end_pfn(zone);
-			block_start_pfn = block_end_pfn,
-			 block_end_pfn += pageblock_nr_pages) {
-
-		block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
-
-		if (!__pageblock_pfn_to_page(block_start_pfn,
-					     block_end_pfn, zone))
-			return;
-		cond_resched();
-	}
-
-	/* We confirm that there is no hole */
-	zone->contiguous = true;
-}
-
-void clear_zone_contiguous(struct zone *zone)
-{
-	zone->contiguous = false;
-}
-
 /*
  * The order of subdivision here is critical for the IO subsystem.
  * Please do not alter this order without good reasons and regression
-- 
cgit v1.2.3


From 0866e82e40fba45dae07e6e8385929b574201752 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 16 May 2023 14:38:15 +0800
Subject: mm: page_alloc: split out FAIL_PAGE_ALLOC

... to a single file to reduce a bit of page_alloc.c.

Link: https://lkml.kernel.org/r/20230516063821.121844-8-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/fault-inject.h |  9 ++++++
 mm/Makefile                  |  1 +
 mm/fail_page_alloc.c         | 66 +++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c              | 74 --------------------------------------------
 4 files changed, 76 insertions(+), 74 deletions(-)
 create mode 100644 mm/fail_page_alloc.c

(limited to 'include/linux')

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 481abf530b3c..6d5edef09d45 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -93,6 +93,15 @@ struct kmem_cache;
 
 bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
 
+#ifdef CONFIG_FAIL_PAGE_ALLOC
+bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order);
+#else
+static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+{
+	return false;
+}
+#endif /* CONFIG_FAIL_PAGE_ALLOC */
+
 int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #ifdef CONFIG_FAILSLAB
 extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
diff --git a/mm/Makefile b/mm/Makefile
index 5262ce5baa28..0eec4bc72d3f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -89,6 +89,7 @@ obj-$(CONFIG_KASAN)	+= kasan/
 obj-$(CONFIG_KFENCE) += kfence/
 obj-$(CONFIG_KMSAN)	+= kmsan/
 obj-$(CONFIG_FAILSLAB) += failslab.o
+obj-$(CONFIG_FAIL_PAGE_ALLOC) += fail_page_alloc.o
 obj-$(CONFIG_MEMTEST)		+= memtest.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_NUMA) += memory-tiers.o
diff --git a/mm/fail_page_alloc.c b/mm/fail_page_alloc.c
new file mode 100644
index 000000000000..b1b09cce9394
--- /dev/null
+++ b/mm/fail_page_alloc.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/fault-inject.h>
+#include <linux/mm.h>
+
+static struct {
+	struct fault_attr attr;
+
+	bool ignore_gfp_highmem;
+	bool ignore_gfp_reclaim;
+	u32 min_order;
+} fail_page_alloc = {
+	.attr = FAULT_ATTR_INITIALIZER,
+	.ignore_gfp_reclaim = true,
+	.ignore_gfp_highmem = true,
+	.min_order = 1,
+};
+
+static int __init setup_fail_page_alloc(char *str)
+{
+	return setup_fault_attr(&fail_page_alloc.attr, str);
+}
+__setup("fail_page_alloc=", setup_fail_page_alloc);
+
+bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
+{
+	int flags = 0;
+
+	if (order < fail_page_alloc.min_order)
+		return false;
+	if (gfp_mask & __GFP_NOFAIL)
+		return false;
+	if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
+		return false;
+	if (fail_page_alloc.ignore_gfp_reclaim &&
+			(gfp_mask & __GFP_DIRECT_RECLAIM))
+		return false;
+
+	/* See comment in __should_failslab() */
+	if (gfp_mask & __GFP_NOWARN)
+		flags |= FAULT_NOWARN;
+
+	return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
+}
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init fail_page_alloc_debugfs(void)
+{
+	umode_t mode = S_IFREG | 0600;
+	struct dentry *dir;
+
+	dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
+					&fail_page_alloc.attr);
+
+	debugfs_create_bool("ignore-gfp-wait", mode, dir,
+			    &fail_page_alloc.ignore_gfp_reclaim);
+	debugfs_create_bool("ignore-gfp-highmem", mode, dir,
+			    &fail_page_alloc.ignore_gfp_highmem);
+	debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order);
+
+	return 0;
+}
+
+late_initcall(fail_page_alloc_debugfs);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 37ac82083229..2b8e4a086c3d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2994,80 +2994,6 @@ out:
 	return page;
 }
 
-#ifdef CONFIG_FAIL_PAGE_ALLOC
-
-static struct {
-	struct fault_attr attr;
-
-	bool ignore_gfp_highmem;
-	bool ignore_gfp_reclaim;
-	u32 min_order;
-} fail_page_alloc = {
-	.attr = FAULT_ATTR_INITIALIZER,
-	.ignore_gfp_reclaim = true,
-	.ignore_gfp_highmem = true,
-	.min_order = 1,
-};
-
-static int __init setup_fail_page_alloc(char *str)
-{
-	return setup_fault_attr(&fail_page_alloc.attr, str);
-}
-__setup("fail_page_alloc=", setup_fail_page_alloc);
-
-static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
-{
-	int flags = 0;
-
-	if (order < fail_page_alloc.min_order)
-		return false;
-	if (gfp_mask & __GFP_NOFAIL)
-		return false;
-	if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
-		return false;
-	if (fail_page_alloc.ignore_gfp_reclaim &&
-			(gfp_mask & __GFP_DIRECT_RECLAIM))
-		return false;
-
-	/* See comment in __should_failslab() */
-	if (gfp_mask & __GFP_NOWARN)
-		flags |= FAULT_NOWARN;
-
-	return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
-}
-
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-
-static int __init fail_page_alloc_debugfs(void)
-{
-	umode_t mode = S_IFREG | 0600;
-	struct dentry *dir;
-
-	dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
-					&fail_page_alloc.attr);
-
-	debugfs_create_bool("ignore-gfp-wait", mode, dir,
-			    &fail_page_alloc.ignore_gfp_reclaim);
-	debugfs_create_bool("ignore-gfp-highmem", mode, dir,
-			    &fail_page_alloc.ignore_gfp_highmem);
-	debugfs_create_u32("min-order", mode, dir, &fail_page_alloc.min_order);
-
-	return 0;
-}
-
-late_initcall(fail_page_alloc_debugfs);
-
-#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-
-#else /* CONFIG_FAIL_PAGE_ALLOC */
-
-static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
-{
-	return false;
-}
-
-#endif /* CONFIG_FAIL_PAGE_ALLOC */
-
 noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 {
 	return __should_fail_alloc_page(gfp_mask, order);
-- 
cgit v1.2.3


From 884c175f12ce1fabff18ac113349628149fc6cf2 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 16 May 2023 14:38:16 +0800
Subject: mm: page_alloc: split out DEBUG_PAGEALLOC

Move DEBUG_PAGEALLOC related functions into a single file to reduce a bit
of page_alloc.c.

Link: https://lkml.kernel.org/r/20230516063821.121844-9-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h    | 76 +++++++++++++++++++++++++++++++++------------------
 mm/Makefile           |  1 +
 mm/debug_page_alloc.c | 59 +++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c       | 69 ----------------------------------------------
 4 files changed, 109 insertions(+), 96 deletions(-)
 create mode 100644 mm/debug_page_alloc.c

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f64bfbd53c65..2382eaf6fd81 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3471,9 +3471,58 @@ static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages)
 	if (debug_pagealloc_enabled_static())
 		__kernel_map_pages(page, numpages, 0);
 }
+
+extern unsigned int _debug_guardpage_minorder;
+DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
+
+static inline unsigned int debug_guardpage_minorder(void)
+{
+	return _debug_guardpage_minorder;
+}
+
+static inline bool debug_guardpage_enabled(void)
+{
+	return static_branch_unlikely(&_debug_guardpage_enabled);
+}
+
+static inline bool page_is_guard(struct page *page)
+{
+	if (!debug_guardpage_enabled())
+		return false;
+
+	return PageGuard(page);
+}
+
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
+		      int migratetype);
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+				  unsigned int order, int migratetype)
+{
+	if (!debug_guardpage_enabled())
+		return false;
+	return __set_page_guard(zone, page, order, migratetype);
+}
+
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
+			int migratetype);
+static inline void clear_page_guard(struct zone *zone, struct page *page,
+				    unsigned int order, int migratetype)
+{
+	if (!debug_guardpage_enabled())
+		return;
+	__clear_page_guard(zone, page, order, migratetype);
+}
+
 #else	/* CONFIG_DEBUG_PAGEALLOC */
 static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {}
 static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {}
+static inline unsigned int debug_guardpage_minorder(void) { return 0; }
+static inline bool debug_guardpage_enabled(void) { return false; }
+static inline bool page_is_guard(struct page *page) { return false; }
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+			unsigned int order, int migratetype) { return false; }
+static inline void clear_page_guard(struct zone *zone, struct page *page,
+				unsigned int order, int migratetype) {}
 #endif	/* CONFIG_DEBUG_PAGEALLOC */
 
 #ifdef __HAVE_ARCH_GATE_AREA
@@ -3711,33 +3760,6 @@ static inline bool vma_is_special_huge(const struct vm_area_struct *vma)
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-extern unsigned int _debug_guardpage_minorder;
-DECLARE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
-
-static inline unsigned int debug_guardpage_minorder(void)
-{
-	return _debug_guardpage_minorder;
-}
-
-static inline bool debug_guardpage_enabled(void)
-{
-	return static_branch_unlikely(&_debug_guardpage_enabled);
-}
-
-static inline bool page_is_guard(struct page *page)
-{
-	if (!debug_guardpage_enabled())
-		return false;
-
-	return PageGuard(page);
-}
-#else
-static inline unsigned int debug_guardpage_minorder(void) { return 0; }
-static inline bool debug_guardpage_enabled(void) { return false; }
-static inline bool page_is_guard(struct page *page) { return false; }
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
 #if MAX_NUMNODES > 1
 void __init setup_nr_node_ids(void);
 #else
diff --git a/mm/Makefile b/mm/Makefile
index 0eec4bc72d3f..678530a07326 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -124,6 +124,7 @@ obj-$(CONFIG_SECRETMEM) += secretmem.o
 obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
 obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
 obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
+obj-$(CONFIG_DEBUG_PAGEALLOC) += debug_page_alloc.o
 obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
 obj-$(CONFIG_DAMON) += damon/
 obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
diff --git a/mm/debug_page_alloc.c b/mm/debug_page_alloc.c
new file mode 100644
index 000000000000..f9d145730fd1
--- /dev/null
+++ b/mm/debug_page_alloc.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/page-isolation.h>
+
+unsigned int _debug_guardpage_minorder;
+
+bool _debug_pagealloc_enabled_early __read_mostly
+			= IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
+EXPORT_SYMBOL(_debug_pagealloc_enabled_early);
+DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
+EXPORT_SYMBOL(_debug_pagealloc_enabled);
+
+DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
+
+static int __init early_debug_pagealloc(char *buf)
+{
+	return kstrtobool(buf, &_debug_pagealloc_enabled_early);
+}
+early_param("debug_pagealloc", early_debug_pagealloc);
+
+static int __init debug_guardpage_minorder_setup(char *buf)
+{
+	unsigned long res;
+
+	if (kstrtoul(buf, 10, &res) < 0 ||  res > MAX_ORDER / 2) {
+		pr_err("Bad debug_guardpage_minorder value\n");
+		return 0;
+	}
+	_debug_guardpage_minorder = res;
+	pr_info("Setting debug_guardpage_minorder to %lu\n", res);
+	return 0;
+}
+early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
+
+bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order,
+		      int migratetype)
+{
+	if (order >= debug_guardpage_minorder())
+		return false;
+
+	__SetPageGuard(page);
+	INIT_LIST_HEAD(&page->buddy_list);
+	set_page_private(page, order);
+	/* Guard pages are not available for any usage */
+	if (!is_migrate_isolate(migratetype))
+		__mod_zone_freepage_state(zone, -(1 << order), migratetype);
+
+	return true;
+}
+
+void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order,
+		      int migratetype)
+{
+	__ClearPageGuard(page);
+
+	set_page_private(page, 0);
+	if (!is_migrate_isolate(migratetype))
+		__mod_zone_freepage_state(zone, (1 << order), migratetype);
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2b8e4a086c3d..40fa763c5074 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -664,75 +664,6 @@ void destroy_large_folio(struct folio *folio)
 	compound_page_dtors[dtor](&folio->page);
 }
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-unsigned int _debug_guardpage_minorder;
-
-bool _debug_pagealloc_enabled_early __read_mostly
-			= IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
-EXPORT_SYMBOL(_debug_pagealloc_enabled_early);
-DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
-EXPORT_SYMBOL(_debug_pagealloc_enabled);
-
-DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
-
-static int __init early_debug_pagealloc(char *buf)
-{
-	return kstrtobool(buf, &_debug_pagealloc_enabled_early);
-}
-early_param("debug_pagealloc", early_debug_pagealloc);
-
-static int __init debug_guardpage_minorder_setup(char *buf)
-{
-	unsigned long res;
-
-	if (kstrtoul(buf, 10, &res) < 0 ||  res > MAX_ORDER / 2) {
-		pr_err("Bad debug_guardpage_minorder value\n");
-		return 0;
-	}
-	_debug_guardpage_minorder = res;
-	pr_info("Setting debug_guardpage_minorder to %lu\n", res);
-	return 0;
-}
-early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
-
-static inline bool set_page_guard(struct zone *zone, struct page *page,
-				unsigned int order, int migratetype)
-{
-	if (!debug_guardpage_enabled())
-		return false;
-
-	if (order >= debug_guardpage_minorder())
-		return false;
-
-	__SetPageGuard(page);
-	INIT_LIST_HEAD(&page->buddy_list);
-	set_page_private(page, order);
-	/* Guard pages are not available for any usage */
-	if (!is_migrate_isolate(migratetype))
-		__mod_zone_freepage_state(zone, -(1 << order), migratetype);
-
-	return true;
-}
-
-static inline void clear_page_guard(struct zone *zone, struct page *page,
-				unsigned int order, int migratetype)
-{
-	if (!debug_guardpage_enabled())
-		return;
-
-	__ClearPageGuard(page);
-
-	set_page_private(page, 0);
-	if (!is_migrate_isolate(migratetype))
-		__mod_zone_freepage_state(zone, (1 << order), migratetype);
-}
-#else
-static inline bool set_page_guard(struct zone *zone, struct page *page,
-			unsigned int order, int migratetype) { return false; }
-static inline void clear_page_guard(struct zone *zone, struct page *page,
-				unsigned int order, int migratetype) {}
-#endif
-
 static inline void set_buddy_order(struct page *page, unsigned int order)
 {
 	set_page_private(page, order);
-- 
cgit v1.2.3


From 31a1b9d7fe768db521b12287ec6426983e9787e3 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 16 May 2023 14:38:17 +0800
Subject: mm: page_alloc: move mark_free_page() into snapshot.c

The mark_free_page() is only used in kernel/power/snapshot.c, move it out
to reduce a bit of page_alloc.c

Link: https://lkml.kernel.org/r/20230516063821.121844-10-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/suspend.h |  3 ---
 kernel/power/snapshot.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c         | 55 -------------------------------------------------
 3 files changed, 52 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index d0d4598a7b3f..3950a7bf33ae 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -364,9 +364,6 @@ struct pbe {
 	struct pbe *next;
 };
 
-/* mm/page_alloc.c */
-extern void mark_free_pages(struct zone *zone);
-
 /**
  * struct platform_hibernation_ops - hibernation platform support
  *
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index cd8b7b35f1e8..45ef0bf81c85 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1228,6 +1228,58 @@ unsigned int snapshot_additional_pages(struct zone *zone)
 	return 2 * rtree;
 }
 
+/*
+ * Touch the watchdog for every WD_PAGE_COUNT pages.
+ */
+#define WD_PAGE_COUNT	(128*1024)
+
+static void mark_free_pages(struct zone *zone)
+{
+	unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT;
+	unsigned long flags;
+	unsigned int order, t;
+	struct page *page;
+
+	if (zone_is_empty(zone))
+		return;
+
+	spin_lock_irqsave(&zone->lock, flags);
+
+	max_zone_pfn = zone_end_pfn(zone);
+	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
+		if (pfn_valid(pfn)) {
+			page = pfn_to_page(pfn);
+
+			if (!--page_count) {
+				touch_nmi_watchdog();
+				page_count = WD_PAGE_COUNT;
+			}
+
+			if (page_zone(page) != zone)
+				continue;
+
+			if (!swsusp_page_is_forbidden(page))
+				swsusp_unset_page_free(page);
+		}
+
+	for_each_migratetype_order(order, t) {
+		list_for_each_entry(page,
+				&zone->free_area[order].free_list[t], buddy_list) {
+			unsigned long i;
+
+			pfn = page_to_pfn(page);
+			for (i = 0; i < (1UL << order); i++) {
+				if (!--page_count) {
+					touch_nmi_watchdog();
+					page_count = WD_PAGE_COUNT;
+				}
+				swsusp_set_page_free(pfn_to_page(pfn + i));
+			}
+		}
+	}
+	spin_unlock_irqrestore(&zone->lock, flags);
+}
+
 #ifdef CONFIG_HIGHMEM
 /**
  * count_free_highmem_pages - Compute the total number of free highmem pages.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 40fa763c5074..8d306203e555 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2365,61 +2365,6 @@ void drain_all_pages(struct zone *zone)
 	__drain_all_pages(zone, false);
 }
 
-#ifdef CONFIG_HIBERNATION
-
-/*
- * Touch the watchdog for every WD_PAGE_COUNT pages.
- */
-#define WD_PAGE_COUNT	(128*1024)
-
-void mark_free_pages(struct zone *zone)
-{
-	unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT;
-	unsigned long flags;
-	unsigned int order, t;
-	struct page *page;
-
-	if (zone_is_empty(zone))
-		return;
-
-	spin_lock_irqsave(&zone->lock, flags);
-
-	max_zone_pfn = zone_end_pfn(zone);
-	for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
-		if (pfn_valid(pfn)) {
-			page = pfn_to_page(pfn);
-
-			if (!--page_count) {
-				touch_nmi_watchdog();
-				page_count = WD_PAGE_COUNT;
-			}
-
-			if (page_zone(page) != zone)
-				continue;
-
-			if (!swsusp_page_is_forbidden(page))
-				swsusp_unset_page_free(page);
-		}
-
-	for_each_migratetype_order(order, t) {
-		list_for_each_entry(page,
-				&zone->free_area[order].free_list[t], buddy_list) {
-			unsigned long i;
-
-			pfn = page_to_pfn(page);
-			for (i = 0; i < (1UL << order); i++) {
-				if (!--page_count) {
-					touch_nmi_watchdog();
-					page_count = WD_PAGE_COUNT;
-				}
-				swsusp_set_page_free(pfn_to_page(pfn + i));
-			}
-		}
-	}
-	spin_unlock_irqrestore(&zone->lock, flags);
-}
-#endif /* CONFIG_PM */
-
 static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
 							unsigned int order)
 {
-- 
cgit v1.2.3


From 07f44ac3c90c50a201307d3fe4dda120ee8394f5 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 16 May 2023 14:38:18 +0800
Subject: mm: page_alloc: move pm_* function into power

pm_restrict_gfp_mask()/pm_restore_gfp_mask() only used in power, let's
move them out of page_alloc.c.

Adding a general gfp_has_io_fs() function which return true if gfp with
both __GFP_IO and __GFP_FS flags, then use it inside of
pm_suspended_storage(), also the pm_suspended_storage() is moved into
suspend.h.

Link: https://lkml.kernel.org/r/20230516063821.121844-11-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/gfp.h     | 15 ++++-----------
 include/linux/suspend.h |  6 ++++++
 kernel/power/main.c     | 27 +++++++++++++++++++++++++++
 kernel/power/power.h    |  5 +++++
 mm/page_alloc.c         | 38 --------------------------------------
 mm/swapfile.c           |  1 +
 6 files changed, 43 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index ed8cb537c6a7..665f06675c83 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -338,19 +338,12 @@ extern gfp_t gfp_allowed_mask;
 /* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
 bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
 
-extern void pm_restrict_gfp_mask(void);
-extern void pm_restore_gfp_mask(void);
-
-extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
-
-#ifdef CONFIG_PM_SLEEP
-extern bool pm_suspended_storage(void);
-#else
-static inline bool pm_suspended_storage(void)
+static inline bool gfp_has_io_fs(gfp_t gfp)
 {
-	return false;
+	return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS);
 }
-#endif /* CONFIG_PM_SLEEP */
+
+extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
 
 #ifdef CONFIG_CONTIG_ALLOC
 /* The below functions must be run on a range from a single zone. */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 3950a7bf33ae..76923051c03d 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -502,6 +502,11 @@ extern void pm_report_max_hw_sleep(u64 t);
 extern bool events_check_enabled;
 extern suspend_state_t pm_suspend_target_state;
 
+static inline bool pm_suspended_storage(void)
+{
+	return !gfp_has_io_fs(gfp_allowed_mask);
+}
+
 extern bool pm_wakeup_pending(void);
 extern void pm_system_wakeup(void);
 extern void pm_system_cancel_wakeup(void);
@@ -535,6 +540,7 @@ static inline void ksys_sync_helper(void) {}
 
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
+static inline bool pm_suspended_storage(void) { return false; }
 static inline bool pm_wakeup_pending(void) { return false; }
 static inline void pm_system_wakeup(void) {}
 static inline void pm_wakeup_clear(bool reset) {}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 3113ec2f1db4..34fc8359145b 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -21,6 +21,33 @@
 #include "power.h"
 
 #ifdef CONFIG_PM_SLEEP
+/*
+ * The following functions are used by the suspend/hibernate code to temporarily
+ * change gfp_allowed_mask in order to avoid using I/O during memory allocations
+ * while devices are suspended.  To avoid races with the suspend/hibernate code,
+ * they should always be called with system_transition_mutex held
+ * (gfp_allowed_mask also should only be modified with system_transition_mutex
+ * held, unless the suspend/hibernate code is guaranteed not to run in parallel
+ * with that modification).
+ */
+static gfp_t saved_gfp_mask;
+
+void pm_restore_gfp_mask(void)
+{
+	WARN_ON(!mutex_is_locked(&system_transition_mutex));
+	if (saved_gfp_mask) {
+		gfp_allowed_mask = saved_gfp_mask;
+		saved_gfp_mask = 0;
+	}
+}
+
+void pm_restrict_gfp_mask(void)
+{
+	WARN_ON(!mutex_is_locked(&system_transition_mutex));
+	WARN_ON(saved_gfp_mask);
+	saved_gfp_mask = gfp_allowed_mask;
+	gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
+}
 
 unsigned int lock_system_sleep(void)
 {
diff --git a/kernel/power/power.h b/kernel/power/power.h
index b83c8d5e188d..ac14d1b463d1 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -216,6 +216,11 @@ static inline void suspend_test_finish(const char *label) {}
 /* kernel/power/main.c */
 extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down);
 extern int pm_notifier_call_chain(unsigned long val);
+void pm_restrict_gfp_mask(void);
+void pm_restore_gfp_mask(void);
+#else
+static inline void pm_restrict_gfp_mask(void) {}
+static inline void pm_restore_gfp_mask(void) {}
 #endif
 
 #ifdef CONFIG_HIGHMEM
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8d306203e555..005aa0202ae0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -227,44 +227,6 @@ static inline void set_pcppage_migratetype(struct page *page, int migratetype)
 	page->index = migratetype;
 }
 
-#ifdef CONFIG_PM_SLEEP
-/*
- * The following functions are used by the suspend/hibernate code to temporarily
- * change gfp_allowed_mask in order to avoid using I/O during memory allocations
- * while devices are suspended.  To avoid races with the suspend/hibernate code,
- * they should always be called with system_transition_mutex held
- * (gfp_allowed_mask also should only be modified with system_transition_mutex
- * held, unless the suspend/hibernate code is guaranteed not to run in parallel
- * with that modification).
- */
-
-static gfp_t saved_gfp_mask;
-
-void pm_restore_gfp_mask(void)
-{
-	WARN_ON(!mutex_is_locked(&system_transition_mutex));
-	if (saved_gfp_mask) {
-		gfp_allowed_mask = saved_gfp_mask;
-		saved_gfp_mask = 0;
-	}
-}
-
-void pm_restrict_gfp_mask(void)
-{
-	WARN_ON(!mutex_is_locked(&system_transition_mutex));
-	WARN_ON(saved_gfp_mask);
-	saved_gfp_mask = gfp_allowed_mask;
-	gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS);
-}
-
-bool pm_suspended_storage(void)
-{
-	if ((gfp_allowed_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
-		return false;
-	return true;
-}
-#endif /* CONFIG_PM_SLEEP */
-
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 unsigned int pageblock_order __read_mostly;
 #endif
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 274bbf797480..c74259001d5e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -41,6 +41,7 @@
 #include <linux/swap_slots.h>
 #include <linux/sort.h>
 #include <linux/completion.h>
+#include <linux/suspend.h>
 
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
-- 
cgit v1.2.3


From e95d372c4cd46b6ec4eeacc07adcb7260ab4cfa0 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 16 May 2023 14:38:20 +0800
Subject: mm: page_alloc: move sysctls into it own fils

This moves all page alloc related sysctls to its own file, as part of the
kernel/sysctl.c spring cleaning, also move some functions declarations
from mm.h into internal.h.

Link: https://lkml.kernel.org/r/20230516063821.121844-13-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h     |  11 ------
 include/linux/mmzone.h |  21 ----------
 kernel/sysctl.c        |  67 --------------------------------
 mm/internal.h          |  11 ++++++
 mm/mm_init.c           |   2 +
 mm/page_alloc.c        | 103 ++++++++++++++++++++++++++++++++++++++++++-------
 6 files changed, 102 insertions(+), 113 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2382eaf6fd81..6d7e03d83da7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2994,12 +2994,6 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn);
 #endif
 
 extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_range(unsigned long, int, unsigned long,
-		unsigned long, unsigned long, enum meminit_context,
-		struct vmem_altmap *, int migratetype);
-extern void setup_per_zone_wmarks(void);
-extern void calculate_min_free_kbytes(void);
-extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
 extern void __init mmap_init(void);
 
@@ -3020,11 +3014,6 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
 
 extern void setup_per_cpu_pageset(void);
 
-/* page_alloc.c */
-extern int min_free_kbytes;
-extern int watermark_boost_factor;
-extern int watermark_scale_factor;
-
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a4889c9d4055..3a68326c9989 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1512,27 +1512,6 @@ static inline bool has_managed_dma(void)
 }
 #endif
 
-/* These two functions are used to setup the per zone pages min values */
-struct ctl_table;
-
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *,
-		loff_t *);
-int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
-		size_t *, loff_t *);
-extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
-		size_t *, loff_t *);
-int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *, int,
-		void *, size_t *, loff_t *);
-int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
-		void *, size_t *, loff_t *);
-int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
-		void *, size_t *, loff_t *);
-int numa_zonelist_order_handler(struct ctl_table *, int,
-		void *, size_t *, loff_t *);
-extern int percpu_pagelist_high_fraction;
-extern char numa_zonelist_order[];
-#define NUMA_ZONELIST_ORDER_LEN	16
 
 #ifndef CONFIG_NUMA
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bfe53e835524..a57de67f032f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2119,13 +2119,6 @@ static struct ctl_table vm_table[] = {
 		.extra2		= SYSCTL_ONE,
 	},
 #endif
-	{
-		.procname	= "lowmem_reserve_ratio",
-		.data		= &sysctl_lowmem_reserve_ratio,
-		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
-		.mode		= 0644,
-		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
-	},
 	{
 		.procname	= "drop_caches",
 		.data		= &sysctl_drop_caches,
@@ -2135,39 +2128,6 @@ static struct ctl_table vm_table[] = {
 		.extra1		= SYSCTL_ONE,
 		.extra2		= SYSCTL_FOUR,
 	},
-	{
-		.procname	= "min_free_kbytes",
-		.data		= &min_free_kbytes,
-		.maxlen		= sizeof(min_free_kbytes),
-		.mode		= 0644,
-		.proc_handler	= min_free_kbytes_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "watermark_boost_factor",
-		.data		= &watermark_boost_factor,
-		.maxlen		= sizeof(watermark_boost_factor),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "watermark_scale_factor",
-		.data		= &watermark_scale_factor,
-		.maxlen		= sizeof(watermark_scale_factor),
-		.mode		= 0644,
-		.proc_handler	= watermark_scale_factor_sysctl_handler,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= SYSCTL_THREE_THOUSAND,
-	},
-	{
-		.procname	= "percpu_pagelist_high_fraction",
-		.data		= &percpu_pagelist_high_fraction,
-		.maxlen		= sizeof(percpu_pagelist_high_fraction),
-		.mode		= 0644,
-		.proc_handler	= percpu_pagelist_high_fraction_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-	},
 	{
 		.procname	= "page_lock_unfairness",
 		.data		= &sysctl_page_lock_unfairness,
@@ -2223,24 +2183,6 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 	},
-	{
-		.procname	= "min_unmapped_ratio",
-		.data		= &sysctl_min_unmapped_ratio,
-		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
-		.mode		= 0644,
-		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE_HUNDRED,
-	},
-	{
-		.procname	= "min_slab_ratio",
-		.data		= &sysctl_min_slab_ratio,
-		.maxlen		= sizeof(sysctl_min_slab_ratio),
-		.mode		= 0644,
-		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE_HUNDRED,
-	},
 #endif
 #ifdef CONFIG_SMP
 	{
@@ -2267,15 +2209,6 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= mmap_min_addr_handler,
 	},
 #endif
-#ifdef CONFIG_NUMA
-	{
-		.procname	= "numa_zonelist_order",
-		.data		= &numa_zonelist_order,
-		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
-		.mode		= 0644,
-		.proc_handler	= numa_zonelist_order_handler,
-	},
-#endif
 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
 	{
diff --git a/mm/internal.h b/mm/internal.h
index c99da2cfac71..66d7ddf7e211 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -213,6 +213,13 @@ static inline bool is_check_pages_enabled(void)
 	return static_branch_unlikely(&check_pages_enabled);
 }
 
+extern int min_free_kbytes;
+
+void setup_per_zone_wmarks(void);
+void calculate_min_free_kbytes(void);
+int __meminit init_per_zone_wmark_min(void);
+void page_alloc_sysctl_init(void);
+
 /*
  * Structure for holding the mostly immutable allocation parameters passed
  * between functions involved in allocations, including the alloc_pages*
@@ -423,6 +430,10 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
 			  phys_addr_t min_addr,
 			  int nid, bool exact_nid);
 
+void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
+		unsigned long, enum meminit_context, struct vmem_altmap *, int);
+
+
 int split_free_page(struct page *free_page,
 			unsigned int order, unsigned long split_pfn_offset);
 
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 0fd4ddfdfb2e..10bf560302c4 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2392,6 +2392,8 @@ void __init page_alloc_init_late(void)
 	/* Initialize page ext after all struct pages are initialized. */
 	if (deferred_struct_pages)
 		page_ext_init();
+
+	page_alloc_sysctl_init();
 }
 
 #ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 005aa0202ae0..d19a05264125 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -206,7 +206,6 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
 };
 EXPORT_SYMBOL(node_states);
 
-int percpu_pagelist_high_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
 /*
@@ -302,8 +301,8 @@ compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {
 
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
-int watermark_boost_factor __read_mostly = 15000;
-int watermark_scale_factor = 10;
+static int watermark_boost_factor __read_mostly = 15000;
+static int watermark_scale_factor = 10;
 
 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
 int movable_zone;
@@ -4880,12 +4879,12 @@ static int __parse_numa_zonelist_order(char *s)
 	return 0;
 }
 
-char numa_zonelist_order[] = "Node";
-
+static char numa_zonelist_order[] = "Node";
+#define NUMA_ZONELIST_ORDER_LEN	16
 /*
  * sysctl handler for numa_zonelist_order
  */
-int numa_zonelist_order_handler(struct ctl_table *table, int write,
+static int numa_zonelist_order_handler(struct ctl_table *table, int write,
 		void *buffer, size_t *length, loff_t *ppos)
 {
 	if (write)
@@ -4893,7 +4892,6 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write,
 	return proc_dostring(table, write, buffer, length, ppos);
 }
 
-
 static int node_load[MAX_NUMNODES];
 
 /**
@@ -5296,6 +5294,7 @@ static int zone_batchsize(struct zone *zone)
 #endif
 }
 
+static int percpu_pagelist_high_fraction;
 static int zone_highsize(struct zone *zone, int batch, int cpu_online)
 {
 #ifdef CONFIG_MMU
@@ -5825,7 +5824,7 @@ postcore_initcall(init_per_zone_wmark_min)
  *	that we can call two helper functions whenever min_free_kbytes
  *	changes.
  */
-int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
+static int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
 		void *buffer, size_t *length, loff_t *ppos)
 {
 	int rc;
@@ -5841,7 +5840,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
-int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
+static int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
 		void *buffer, size_t *length, loff_t *ppos)
 {
 	int rc;
@@ -5871,7 +5870,7 @@ static void setup_min_unmapped_ratio(void)
 }
 
 
-int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
+static int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
 		void *buffer, size_t *length, loff_t *ppos)
 {
 	int rc;
@@ -5898,7 +5897,7 @@ static void setup_min_slab_ratio(void)
 						     sysctl_min_slab_ratio) / 100;
 }
 
-int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
+static int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
 		void *buffer, size_t *length, loff_t *ppos)
 {
 	int rc;
@@ -5922,8 +5921,8 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
  * minimum watermarks. The lowmem reserve ratio can only make sense
  * if in function of the boot time zone sizes.
  */
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
-		void *buffer, size_t *length, loff_t *ppos)
+static int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table,
+		int write, void *buffer, size_t *length, loff_t *ppos)
 {
 	int i;
 
@@ -5943,7 +5942,7 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
  * cpu. It is the fraction of total pages in each zone that a hot per cpu
  * pagelist can have before it gets flushed back to buddy allocator.
  */
-int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
+static int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *table,
 		int write, void *buffer, size_t *length, loff_t *ppos)
 {
 	struct zone *zone;
@@ -5976,6 +5975,82 @@ out:
 	return ret;
 }
 
+static struct ctl_table page_alloc_sysctl_table[] = {
+	{
+		.procname	= "min_free_kbytes",
+		.data		= &min_free_kbytes,
+		.maxlen		= sizeof(min_free_kbytes),
+		.mode		= 0644,
+		.proc_handler	= min_free_kbytes_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "watermark_boost_factor",
+		.data		= &watermark_boost_factor,
+		.maxlen		= sizeof(watermark_boost_factor),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "watermark_scale_factor",
+		.data		= &watermark_scale_factor,
+		.maxlen		= sizeof(watermark_scale_factor),
+		.mode		= 0644,
+		.proc_handler	= watermark_scale_factor_sysctl_handler,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_THREE_THOUSAND,
+	},
+	{
+		.procname	= "percpu_pagelist_high_fraction",
+		.data		= &percpu_pagelist_high_fraction,
+		.maxlen		= sizeof(percpu_pagelist_high_fraction),
+		.mode		= 0644,
+		.proc_handler	= percpu_pagelist_high_fraction_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "lowmem_reserve_ratio",
+		.data		= &sysctl_lowmem_reserve_ratio,
+		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
+		.mode		= 0644,
+		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
+	},
+#ifdef CONFIG_NUMA
+	{
+		.procname	= "numa_zonelist_order",
+		.data		= &numa_zonelist_order,
+		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
+		.mode		= 0644,
+		.proc_handler	= numa_zonelist_order_handler,
+	},
+	{
+		.procname	= "min_unmapped_ratio",
+		.data		= &sysctl_min_unmapped_ratio,
+		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
+		.mode		= 0644,
+		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE_HUNDRED,
+	},
+	{
+		.procname	= "min_slab_ratio",
+		.data		= &sysctl_min_slab_ratio,
+		.maxlen		= sizeof(sysctl_min_slab_ratio),
+		.mode		= 0644,
+		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE_HUNDRED,
+	},
+#endif
+	{}
+};
+
+void __init page_alloc_sysctl_init(void)
+{
+	register_sysctl_init("vm", page_alloc_sysctl_table);
+}
+
 #ifdef CONFIG_CONTIG_ALLOC
 /* Usage: See admin-guide/dynamic-debug-howto.rst */
 static void alloc_contig_dump_pages(struct list_head *page_list)
-- 
cgit v1.2.3


From f6797adff7f09b4d7f7607c99116409b5ddb54d9 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 16 May 2023 15:52:05 -0700
Subject: mm/hugetlb: remove hugetlb_page_subpool()

All users of hugetlb_page_subpool() have been converted to use the folio
equivalent.  This function can be safely removed.

Link: https://lkml.kernel.org/r/20230516225205.1429196-1-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6d041aa9f0fe..f1543a0568ff 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -757,14 +757,6 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
 	return folio->_hugetlb_subpool;
 }
 
-/*
- * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool
- */
-static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
-{
-	return hugetlb_folio_subpool(page_folio(hpage));
-}
-
 static inline void hugetlb_set_folio_subpool(struct folio *folio,
 					struct hugepage_subpool *subpool)
 {
@@ -1031,11 +1023,6 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
 	return NULL;
 }
 
-static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
-{
-	return NULL;
-}
-
 static inline int isolate_or_dissolve_huge_page(struct page *page,
 						struct list_head *list)
 {
-- 
cgit v1.2.3


From 54d020692b342f7bd02d7f5795fb5c401caecfcc Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lstoakes@gmail.com>
Date: Wed, 17 May 2023 20:25:33 +0100
Subject: mm/gup: remove unused vmas parameter from get_user_pages()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "remove the vmas parameter from GUP APIs", v6.

(pin_/get)_user_pages[_remote]() each provide an optional output parameter
for an array of VMA objects associated with each page in the input range.

These provide the means for VMAs to be returned, as long as mm->mmap_lock
is never released during the GUP operation (i.e.  the internal flag
FOLL_UNLOCKABLE is not specified).

In addition, these VMAs can only be accessed with the mmap_lock held and
become invalidated the moment it is released.

The vast majority of invocations do not use this functionality and of
those that do, all but one case retrieve a single VMA to perform checks
upon.

It is not egregious in the single VMA cases to simply replace the
operation with a vma_lookup().  In these cases we duplicate the (fast)
lookup on a slow path already under the mmap_lock, abstracted to a new
get_user_page_vma_remote() inline helper function which also performs
error checking and reference count maintenance.

The special case is io_uring, where io_pin_pages() specifically needs to
assert that the VMAs underlying the range do not result in broken
long-term GUP file-backed mappings.

As GUP now internally asserts that FOLL_LONGTERM mappings are not
file-backed in a broken fashion (i.e.  requiring dirty tracking) - as
implemented in "mm/gup: disallow FOLL_LONGTERM GUP-nonfast writing to
file-backed mappings" - this logic is no longer required and so we can
simply remove it altogether from io_uring.

Eliminating the vmas parameter eliminates an entire class of danging
pointer errors that might have occured should the lock have been
incorrectly released.

In addition, the API is simplified and now clearly expresses what it is
intended for - applying the specified GUP flags and (if pinning) returning
pinned pages.

This change additionally opens the door to further potential improvements
in GUP and the possible marrying of disparate code paths.

I have run this series against gup_test with no issues.

Thanks to Matthew Wilcox for suggesting this refactoring!


This patch (of 6):

No invocation of get_user_pages() use the vmas parameter, so remove it.

The GUP API is confusing and caveated.  Recent changes have done much to
improve that, however there is more we can do.  Exporting vmas is a prime
target as the caller has to be extremely careful to preclude their use
after the mmap_lock has expired or otherwise be left with dangling
pointers.

Removing the vmas parameter focuses the GUP functions upon their primary
purpose - pinning (and outputting) pages as well as performing the actions
implied by the input flags.

This is part of a patch series aiming to remove the vmas parameter
altogether.

Link: https://lkml.kernel.org/r/cover.1684350871.git.lstoakes@gmail.com
Link: https://lkml.kernel.org/r/589e0c64794668ffc799651e8d85e703262b1e9d.1684350871.git.lstoakes@gmail.com
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Christian König <christian.koenig@amd.com> (for radeon parts)
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sean Christopherson <seanjc@google.com> (KVM)
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kernel/cpu/sgx/ioctl.c     | 2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c | 2 +-
 drivers/misc/sgi-gru/grufault.c     | 2 +-
 include/linux/mm.h                  | 3 +--
 mm/gup.c                            | 9 +++------
 mm/gup_test.c                       | 5 ++---
 virt/kvm/kvm_main.c                 | 2 +-
 7 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 21ca0a831b70..5d390df21440 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -214,7 +214,7 @@ static int __sgx_encl_add_page(struct sgx_encl *encl,
 	if (!(vma->vm_flags & VM_MAYEXEC))
 		return -EACCES;
 
-	ret = get_user_pages(src, 1, 0, &src_page, NULL);
+	ret = get_user_pages(src, 1, 0, &src_page);
 	if (ret < 1)
 		return -EFAULT;
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 2220cdf6a3f6..3a9db030f98f 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -359,7 +359,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_device *bdev, struct ttm_tt *ttm
 		struct page **pages = ttm->pages + pinned;
 
 		r = get_user_pages(userptr, num_pages, write ? FOLL_WRITE : 0,
-				   pages, NULL);
+				   pages);
 		if (r < 0)
 			goto release_pages;
 
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index b836936e9747..378cf02a2aa1 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -185,7 +185,7 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
 #else
 	*pageshift = PAGE_SHIFT;
 #endif
-	if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
+	if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page) <= 0)
 		return -EFAULT;
 	*paddr = page_to_phys(page);
 	put_page(page);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6d7e03d83da7..6336253c18e2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2369,8 +2369,7 @@ long pin_user_pages_remote(struct mm_struct *mm,
 			   unsigned int gup_flags, struct page **pages,
 			   struct vm_area_struct **vmas, int *locked);
 long get_user_pages(unsigned long start, unsigned long nr_pages,
-			    unsigned int gup_flags, struct page **pages,
-			    struct vm_area_struct **vmas);
+		    unsigned int gup_flags, struct page **pages);
 long pin_user_pages(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages,
 		    struct vm_area_struct **vmas);
diff --git a/mm/gup.c b/mm/gup.c
index e19b06a66229..21daeee5f163 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2251,8 +2251,6 @@ long get_user_pages_remote(struct mm_struct *mm,
  * @pages:      array that receives pointers to the pages pinned.
  *              Should be at least nr_pages long. Or NULL, if caller
  *              only intends to ensure the pages are faulted in.
- * @vmas:       array of pointers to vmas corresponding to each page.
- *              Or NULL if the caller does not require them.
  *
  * This is the same as get_user_pages_remote(), just with a less-flexible
  * calling convention where we assume that the mm being operated on belongs to
@@ -2260,16 +2258,15 @@ long get_user_pages_remote(struct mm_struct *mm,
  * obviously don't pass FOLL_REMOTE in here.
  */
 long get_user_pages(unsigned long start, unsigned long nr_pages,
-		unsigned int gup_flags, struct page **pages,
-		struct vm_area_struct **vmas)
+		    unsigned int gup_flags, struct page **pages)
 {
 	int locked = 1;
 
-	if (!is_valid_gup_args(pages, vmas, NULL, &gup_flags, FOLL_TOUCH))
+	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_TOUCH))
 		return -EINVAL;
 
 	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
-				       vmas, &locked, gup_flags);
+				       NULL, &locked, gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages);
 
diff --git a/mm/gup_test.c b/mm/gup_test.c
index 8ae7307a1bb6..9ba8ea23f84e 100644
--- a/mm/gup_test.c
+++ b/mm/gup_test.c
@@ -139,8 +139,7 @@ static int __gup_test_ioctl(unsigned int cmd,
 						 pages + i);
 			break;
 		case GUP_BASIC_TEST:
-			nr = get_user_pages(addr, nr, gup->gup_flags, pages + i,
-					    NULL);
+			nr = get_user_pages(addr, nr, gup->gup_flags, pages + i);
 			break;
 		case PIN_FAST_BENCHMARK:
 			nr = pin_user_pages_fast(addr, nr, gup->gup_flags,
@@ -161,7 +160,7 @@ static int __gup_test_ioctl(unsigned int cmd,
 						    pages + i, NULL);
 			else
 				nr = get_user_pages(addr, nr, gup->gup_flags,
-						    pages + i, NULL);
+						    pages + i);
 			break;
 		default:
 			ret = -EINVAL;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 479802a892d4..51e4882d0873 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2477,7 +2477,7 @@ static inline int check_user_page_hwpoison(unsigned long addr)
 {
 	int rc, flags = FOLL_HWPOISON | FOLL_WRITE;
 
-	rc = get_user_pages(addr, 1, flags, NULL, NULL);
+	rc = get_user_pages(addr, 1, flags, NULL);
 	return rc == -EHWPOISON;
 }
 
-- 
cgit v1.2.3


From 0b295316b3a9b7858eafbebdc31b4827a6edde03 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lstoakes@gmail.com>
Date: Wed, 17 May 2023 20:25:36 +0100
Subject: mm/gup: remove unused vmas parameter from pin_user_pages_remote()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No invocation of pin_user_pages_remote() uses the vmas parameter, so
remove it.  This forms part of a larger patch set eliminating the use of
the vmas parameters altogether.

Link: https://lkml.kernel.org/r/28f000beb81e45bf538a2aaa77c90f5482b67a32.1684350871.git.lstoakes@gmail.com
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/iommu/iommufd/pages.c    | 4 ++--
 drivers/vfio/vfio_iommu_type1.c  | 2 +-
 include/linux/mm.h               | 2 +-
 kernel/trace/trace_events_user.c | 2 +-
 mm/gup.c                         | 8 +++-----
 mm/process_vm_access.c           | 2 +-
 6 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 3c47846cc5ef..412ca96be128 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -786,7 +786,7 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
 			user->locked = 1;
 		}
 		rc = pin_user_pages_remote(pages->source_mm, uptr, npages,
-					   user->gup_flags, user->upages, NULL,
+					   user->gup_flags, user->upages,
 					   &user->locked);
 	}
 	if (rc <= 0) {
@@ -1799,7 +1799,7 @@ static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index,
 	rc = pin_user_pages_remote(
 		pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE),
 		1, (flags & IOMMUFD_ACCESS_RW_WRITE) ? FOLL_WRITE : 0, &page,
-		NULL, NULL);
+		NULL);
 	mmap_read_unlock(pages->source_mm);
 	if (rc != 1) {
 		if (WARN_ON(rc >= 0))
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 0d2f805468e1..306e6f1d1c70 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -562,7 +562,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
 
 	mmap_read_lock(mm);
 	ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
-				    pages, NULL, NULL);
+				    pages, NULL);
 	if (ret > 0) {
 		int i;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6336253c18e2..cf17ffdf4fbf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2367,7 +2367,7 @@ long get_user_pages_remote(struct mm_struct *mm,
 long pin_user_pages_remote(struct mm_struct *mm,
 			   unsigned long start, unsigned long nr_pages,
 			   unsigned int gup_flags, struct page **pages,
-			   struct vm_area_struct **vmas, int *locked);
+			   int *locked);
 long get_user_pages(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages);
 long pin_user_pages(unsigned long start, unsigned long nr_pages,
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index b1ecd7677642..bdc2666e8d39 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -406,7 +406,7 @@ static int user_event_enabler_write(struct user_event_mm *mm,
 		return -EBUSY;
 
 	ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT,
-				    &page, NULL, NULL);
+				    &page, NULL);
 
 	if (unlikely(ret <= 0)) {
 		if (!fixup_fault)
diff --git a/mm/gup.c b/mm/gup.c
index 21daeee5f163..edf0fe2695b0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -3100,8 +3100,6 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast);
  * @gup_flags:	flags modifying lookup behaviour
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_pages long.
- * @vmas:	array of pointers to vmas corresponding to each page.
- *		Or NULL if the caller does not require them.
  * @locked:	pointer to lock flag indicating whether lock is held and
  *		subsequently whether VM_FAULT_RETRY functionality can be
  *		utilised. Lock must initially be held.
@@ -3116,14 +3114,14 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast);
 long pin_user_pages_remote(struct mm_struct *mm,
 			   unsigned long start, unsigned long nr_pages,
 			   unsigned int gup_flags, struct page **pages,
-			   struct vm_area_struct **vmas, int *locked)
+			   int *locked)
 {
 	int local_locked = 1;
 
-	if (!is_valid_gup_args(pages, vmas, locked, &gup_flags,
+	if (!is_valid_gup_args(pages, NULL, locked, &gup_flags,
 			       FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE))
 		return 0;
-	return __gup_longterm_locked(mm, start, nr_pages, pages, vmas,
+	return __gup_longterm_locked(mm, start, nr_pages, pages, NULL,
 				     locked ? locked : &local_locked,
 				     gup_flags);
 }
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 78dfaf9e8990..0523edab03a6 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -104,7 +104,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
 		mmap_read_lock(mm);
 		pinned_pages = pin_user_pages_remote(mm, pa, pinned_pages,
 						     flags, process_pages,
-						     NULL, &locked);
+						     &locked);
 		if (locked)
 			mmap_read_unlock(mm);
 		if (pinned_pages <= 0)
-- 
cgit v1.2.3


From ca5e863233e8f6acd1792fd85d6bc2729a1b2c10 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lstoakes@gmail.com>
Date: Wed, 17 May 2023 20:25:39 +0100
Subject: mm/gup: remove vmas parameter from get_user_pages_remote()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only instances of get_user_pages_remote() invocations which used the
vmas parameter were for a single page which can instead simply look up the
VMA directly. In particular:-

- __update_ref_ctr() looked up the VMA but did nothing with it so we simply
  remove it.

- __access_remote_vm() was already using vma_lookup() when the original
  lookup failed so by doing the lookup directly this also de-duplicates the
  code.

We are able to perform these VMA operations as we already hold the
mmap_lock in order to be able to call get_user_pages_remote().

As part of this work we add get_user_page_vma_remote() which abstracts the
VMA lookup, error handling and decrementing the page reference count should
the VMA lookup fail.

This forms part of a broader set of patches intended to eliminate the vmas
parameter altogether.

[akpm@linux-foundation.org: avoid passing NULL to PTR_ERR]
Link: https://lkml.kernel.org/r/d20128c849ecdbf4dd01cc828fcec32127ed939a.1684350871.git.lstoakes@gmail.com
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> (for arm64)
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com> (for s390)
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Christian König <christian.koenig@amd.com>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/kernel/mte.c   | 17 +++++++++--------
 arch/s390/kvm/interrupt.c |  2 +-
 fs/exec.c                 |  2 +-
 include/linux/mm.h        | 34 +++++++++++++++++++++++++++++++---
 kernel/events/uprobes.c   | 13 +++++--------
 mm/gup.c                  | 12 ++++--------
 mm/memory.c               | 20 ++++++++++----------
 mm/rmap.c                 |  2 +-
 security/tomoyo/domain.c  |  2 +-
 virt/kvm/async_pf.c       |  3 +--
 10 files changed, 64 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 7e89968bd282..4c5ef9b20065 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -416,10 +416,9 @@ long get_mte_ctrl(struct task_struct *task)
 static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
 				struct iovec *kiov, unsigned int gup_flags)
 {
-	struct vm_area_struct *vma;
 	void __user *buf = kiov->iov_base;
 	size_t len = kiov->iov_len;
-	int ret;
+	int err = 0;
 	int write = gup_flags & FOLL_WRITE;
 
 	if (!access_ok(buf, len))
@@ -429,14 +428,16 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
 		return -EIO;
 
 	while (len) {
+		struct vm_area_struct *vma;
 		unsigned long tags, offset;
 		void *maddr;
-		struct page *page = NULL;
+		struct page *page = get_user_page_vma_remote(mm, addr,
+							     gup_flags, &vma);
 
-		ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page,
-					    &vma, NULL);
-		if (ret <= 0)
+		if (IS_ERR_OR_NULL(page)) {
+			err = page == NULL ? -EIO : PTR_ERR(page);
 			break;
+		}
 
 		/*
 		 * Only copy tags if the page has been mapped as PROT_MTE
@@ -446,7 +447,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
 		 * was never mapped with PROT_MTE.
 		 */
 		if (!(vma->vm_flags & VM_MTE)) {
-			ret = -EOPNOTSUPP;
+			err = -EOPNOTSUPP;
 			put_page(page);
 			break;
 		}
@@ -479,7 +480,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
 	kiov->iov_len = buf - kiov->iov_base;
 	if (!kiov->iov_len) {
 		/* check for error accessing the tracee's address space */
-		if (ret <= 0)
+		if (err)
 			return -EIO;
 		else
 			return -EFAULT;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index da6dac36e959..9bd0a873f3b1 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2777,7 +2777,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
 
 	mmap_read_lock(kvm->mm);
 	get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
-			      &page, NULL, NULL);
+			      &page, NULL);
 	mmap_read_unlock(kvm->mm);
 	return page;
 }
diff --git a/fs/exec.c b/fs/exec.c
index a466e797c8e2..25c65b64544b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -220,7 +220,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 	 */
 	mmap_read_lock(bprm->mm);
 	ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
-			&page, NULL, NULL);
+			&page, NULL);
 	mmap_read_unlock(bprm->mm);
 	if (ret <= 0)
 		return NULL;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cf17ffdf4fbf..fcbfb961b49f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2353,6 +2353,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 	unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
 
+static inline struct vm_area_struct *vma_lookup(struct mm_struct *mm,
+						unsigned long addr);
+
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
 		void *buf, int len, unsigned int gup_flags);
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
@@ -2361,13 +2364,38 @@ extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
 			      void *buf, int len, unsigned int gup_flags);
 
 long get_user_pages_remote(struct mm_struct *mm,
-			    unsigned long start, unsigned long nr_pages,
-			    unsigned int gup_flags, struct page **pages,
-			    struct vm_area_struct **vmas, int *locked);
+			   unsigned long start, unsigned long nr_pages,
+			   unsigned int gup_flags, struct page **pages,
+			   int *locked);
 long pin_user_pages_remote(struct mm_struct *mm,
 			   unsigned long start, unsigned long nr_pages,
 			   unsigned int gup_flags, struct page **pages,
 			   int *locked);
+
+static inline struct page *get_user_page_vma_remote(struct mm_struct *mm,
+						    unsigned long addr,
+						    int gup_flags,
+						    struct vm_area_struct **vmap)
+{
+	struct page *page;
+	struct vm_area_struct *vma;
+	int got = get_user_pages_remote(mm, addr, 1, gup_flags, &page, NULL);
+
+	if (got < 0)
+		return ERR_PTR(got);
+	if (got == 0)
+		return NULL;
+
+	vma = vma_lookup(mm, addr);
+	if (WARN_ON_ONCE(!vma)) {
+		put_page(page);
+		return ERR_PTR(-EINVAL);
+	}
+
+	*vmap = vma;
+	return page;
+}
+
 long get_user_pages(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages);
 long pin_user_pages(unsigned long start, unsigned long nr_pages,
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 59887c69d54c..607d742caa61 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -365,7 +365,6 @@ __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
 {
 	void *kaddr;
 	struct page *page;
-	struct vm_area_struct *vma;
 	int ret;
 	short *ptr;
 
@@ -373,7 +372,7 @@ __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
 		return -EINVAL;
 
 	ret = get_user_pages_remote(mm, vaddr, 1,
-			FOLL_WRITE, &page, &vma, NULL);
+				    FOLL_WRITE, &page, NULL);
 	if (unlikely(ret <= 0)) {
 		/*
 		 * We are asking for 1 page. If get_user_pages_remote() fails,
@@ -474,10 +473,9 @@ retry:
 	if (is_register)
 		gup_flags |= FOLL_SPLIT_PMD;
 	/* Read the page with vaddr into memory */
-	ret = get_user_pages_remote(mm, vaddr, 1, gup_flags,
-				    &old_page, &vma, NULL);
-	if (ret <= 0)
-		return ret;
+	old_page = get_user_page_vma_remote(mm, vaddr, gup_flags, &vma);
+	if (IS_ERR_OR_NULL(old_page))
+		return old_page ? PTR_ERR(old_page) : 0;
 
 	ret = verify_opcode(old_page, vaddr, &opcode);
 	if (ret <= 0)
@@ -2027,8 +2025,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
 	 * but we treat this as a 'remote' access since it is
 	 * essentially a kernel access to the memory.
 	 */
-	result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page,
-			NULL, NULL);
+	result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page, NULL);
 	if (result < 0)
 		return result;
 
diff --git a/mm/gup.c b/mm/gup.c
index edf0fe2695b0..764bf0c20827 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2165,8 +2165,6 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_pages long. Or NULL, if caller
  *		only intends to ensure the pages are faulted in.
- * @vmas:	array of pointers to vmas corresponding to each page.
- *		Or NULL if the caller does not require them.
  * @locked:	pointer to lock flag indicating whether lock is held and
  *		subsequently whether VM_FAULT_RETRY functionality can be
  *		utilised. Lock must initially be held.
@@ -2181,8 +2179,6 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
  *
  * The caller is responsible for releasing returned @pages, via put_page().
  *
- * @vmas are valid only as long as mmap_lock is held.
- *
  * Must be called with mmap_lock held for read or write.
  *
  * get_user_pages_remote walks a process's page tables and takes a reference
@@ -2219,15 +2215,15 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
 long get_user_pages_remote(struct mm_struct *mm,
 		unsigned long start, unsigned long nr_pages,
 		unsigned int gup_flags, struct page **pages,
-		struct vm_area_struct **vmas, int *locked)
+		int *locked)
 {
 	int local_locked = 1;
 
-	if (!is_valid_gup_args(pages, vmas, locked, &gup_flags,
+	if (!is_valid_gup_args(pages, NULL, locked, &gup_flags,
 			       FOLL_TOUCH | FOLL_REMOTE))
 		return -EINVAL;
 
-	return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+	return __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
 				       locked ? locked : &local_locked,
 				       gup_flags);
 }
@@ -2237,7 +2233,7 @@ EXPORT_SYMBOL(get_user_pages_remote);
 long get_user_pages_remote(struct mm_struct *mm,
 			   unsigned long start, unsigned long nr_pages,
 			   unsigned int gup_flags, struct page **pages,
-			   struct vm_area_struct **vmas, int *locked)
+			   int *locked)
 {
 	return 0;
 }
diff --git a/mm/memory.c b/mm/memory.c
index f69fbc251198..4dd09f930c61 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5587,7 +5587,6 @@ EXPORT_SYMBOL_GPL(generic_access_phys);
 int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
 		       int len, unsigned int gup_flags)
 {
-	struct vm_area_struct *vma;
 	void *old_buf = buf;
 	int write = gup_flags & FOLL_WRITE;
 
@@ -5596,29 +5595,30 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
 
 	/* ignore errors, just check how much was successfully transferred */
 	while (len) {
-		int bytes, ret, offset;
+		int bytes, offset;
 		void *maddr;
-		struct page *page = NULL;
+		struct vm_area_struct *vma = NULL;
+		struct page *page = get_user_page_vma_remote(mm, addr,
+							     gup_flags, &vma);
 
-		ret = get_user_pages_remote(mm, addr, 1,
-				gup_flags, &page, &vma, NULL);
-		if (ret <= 0) {
+		if (IS_ERR_OR_NULL(page)) {
 #ifndef CONFIG_HAVE_IOREMAP_PROT
 			break;
 #else
+			int res = 0;
+
 			/*
 			 * Check if this is a VM_IO | VM_PFNMAP VMA, which
 			 * we can access using slightly different code.
 			 */
-			vma = vma_lookup(mm, addr);
 			if (!vma)
 				break;
 			if (vma->vm_ops && vma->vm_ops->access)
-				ret = vma->vm_ops->access(vma, addr, buf,
+				res = vma->vm_ops->access(vma, addr, buf,
 							  len, write);
-			if (ret <= 0)
+			if (res <= 0)
 				break;
-			bytes = ret;
+			bytes = res;
 #endif
 		} else {
 			bytes = len;
diff --git a/mm/rmap.c b/mm/rmap.c
index 19392e090bec..cd918cb9a431 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2328,7 +2328,7 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,
 
 	npages = get_user_pages_remote(mm, start, npages,
 				       FOLL_GET | FOLL_WRITE | FOLL_SPLIT_PMD,
-				       pages, NULL, NULL);
+				       pages, NULL);
 	if (npages < 0)
 		return npages;
 
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 31af29f669d2..ac20c0bdff9d 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -916,7 +916,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
 	 */
 	mmap_read_lock(bprm->mm);
 	ret = get_user_pages_remote(bprm->mm, pos, 1,
-				    FOLL_FORCE, &page, NULL, NULL);
+				    FOLL_FORCE, &page, NULL);
 	mmap_read_unlock(bprm->mm);
 	if (ret <= 0)
 		return false;
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 9bfe1d6f6529..e033c79d528e 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -61,8 +61,7 @@ static void async_pf_execute(struct work_struct *work)
 	 * access remotely.
 	 */
 	mmap_read_lock(mm);
-	get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, NULL,
-			&locked);
+	get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
 	if (locked)
 		mmap_read_unlock(mm);
 
-- 
cgit v1.2.3


From 4c630f307455c06f99bdeca7f7a1ab5318604fe0 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lstoakes@gmail.com>
Date: Wed, 17 May 2023 20:25:45 +0100
Subject: mm/gup: remove vmas parameter from pin_user_pages()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are now in a position where no caller of pin_user_pages() requires the
vmas parameter at all, so eliminate this parameter from the function and
all callers.

This clears the way to removing the vmas parameter from GUP altogether.

Link: https://lkml.kernel.org/r/195a99ae949c9f5cb589d2222b736ced96ec199a.1684350871.git.lstoakes@gmail.com
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>	[qib]
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>	[drivers/media]
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/powerpc/mm/book3s64/iommu_api.c       | 2 +-
 drivers/infiniband/hw/qib/qib_user_pages.c | 2 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c   | 2 +-
 drivers/infiniband/sw/siw/siw_mem.c        | 2 +-
 drivers/media/v4l2-core/videobuf-dma-sg.c  | 2 +-
 drivers/vdpa/vdpa_user/vduse_dev.c         | 2 +-
 drivers/vhost/vdpa.c                       | 2 +-
 include/linux/mm.h                         | 3 +--
 io_uring/rsrc.c                            | 2 +-
 mm/gup.c                                   | 9 +++------
 mm/gup_test.c                              | 9 ++++-----
 net/xdp/xdp_umem.c                         | 2 +-
 12 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index 81d7185e2ae8..d19fb1f3007d 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -105,7 +105,7 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 
 		ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n,
 				FOLL_WRITE | FOLL_LONGTERM,
-				mem->hpages + entry, NULL);
+				mem->hpages + entry);
 		if (ret == n) {
 			pinned += n;
 			continue;
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index f693bc753b6b..1bb7507325bc 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -111,7 +111,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
 		ret = pin_user_pages(start_page + got * PAGE_SIZE,
 				     num_pages - got,
 				     FOLL_LONGTERM | FOLL_WRITE,
-				     p + got, NULL);
+				     p + got);
 		if (ret < 0) {
 			mmap_read_unlock(current->mm);
 			goto bail_release;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 2a5cac2658ec..84e0f41e7dfa 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -140,7 +140,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 		ret = pin_user_pages(cur_base,
 				     min_t(unsigned long, npages,
 				     PAGE_SIZE / sizeof(struct page *)),
-				     gup_flags, page_list, NULL);
+				     gup_flags, page_list);
 
 		if (ret < 0)
 			goto out;
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index f51ab2ccf151..e6e25f15567d 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -422,7 +422,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 		umem->page_chunk[i].plist = plist;
 		while (nents) {
 			rv = pin_user_pages(first_page_va, nents, foll_flags,
-					    plist, NULL);
+					    plist);
 			if (rv < 0)
 				goto out_sem_up;
 
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index 53001532e8e3..405b89ea1054 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -180,7 +180,7 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
 		data, size, dma->nr_pages);
 
 	err = pin_user_pages(data & PAGE_MASK, dma->nr_pages, gup_flags,
-			     dma->pages, NULL);
+			     dma->pages);
 
 	if (err != dma->nr_pages) {
 		dma->nr_pages = (err >= 0) ? err : 0;
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index de97e38c3b82..4d4405f058e8 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -1052,7 +1052,7 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
 		goto out;
 
 	pinned = pin_user_pages(uaddr, npages, FOLL_LONGTERM | FOLL_WRITE,
-				page_list, NULL);
+				page_list);
 	if (pinned != npages) {
 		ret = pinned < 0 ? pinned : -ENOMEM;
 		goto out;
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 8c1aefc865f0..61223fcbe82b 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -983,7 +983,7 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
 	while (npages) {
 		sz2pin = min_t(unsigned long, npages, list_size);
 		pinned = pin_user_pages(cur_base, sz2pin,
-					gup_flags, page_list, NULL);
+					gup_flags, page_list);
 		if (sz2pin != pinned) {
 			if (pinned < 0) {
 				ret = pinned;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fcbfb961b49f..280429ffa91d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2399,8 +2399,7 @@ static inline struct page *get_user_page_vma_remote(struct mm_struct *mm,
 long get_user_pages(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages);
 long pin_user_pages(unsigned long start, unsigned long nr_pages,
-		    unsigned int gup_flags, struct page **pages,
-		    struct vm_area_struct **vmas);
+		    unsigned int gup_flags, struct page **pages);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    struct page **pages, unsigned int gup_flags);
 long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index b6451f8bc5d5..b56bda46a9eb 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1044,7 +1044,7 @@ struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages)
 	ret = 0;
 	mmap_read_lock(current->mm);
 	pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
-			      pages, NULL);
+			      pages);
 	if (pret == nr_pages)
 		*npages = nr_pages;
 	else
diff --git a/mm/gup.c b/mm/gup.c
index 764bf0c20827..18e3bc2ee3f1 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -3131,8 +3131,6 @@ EXPORT_SYMBOL(pin_user_pages_remote);
  * @gup_flags:	flags modifying lookup behaviour
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_pages long.
- * @vmas:	array of pointers to vmas corresponding to each page.
- *		Or NULL if the caller does not require them.
  *
  * Nearly the same as get_user_pages(), except that FOLL_TOUCH is not set, and
  * FOLL_PIN is set.
@@ -3141,15 +3139,14 @@ EXPORT_SYMBOL(pin_user_pages_remote);
  * see Documentation/core-api/pin_user_pages.rst for details.
  */
 long pin_user_pages(unsigned long start, unsigned long nr_pages,
-		    unsigned int gup_flags, struct page **pages,
-		    struct vm_area_struct **vmas)
+		    unsigned int gup_flags, struct page **pages)
 {
 	int locked = 1;
 
-	if (!is_valid_gup_args(pages, vmas, NULL, &gup_flags, FOLL_PIN))
+	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_PIN))
 		return 0;
 	return __gup_longterm_locked(current->mm, start, nr_pages,
-				     pages, vmas, &locked, gup_flags);
+				     pages, NULL, &locked, gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages);
 
diff --git a/mm/gup_test.c b/mm/gup_test.c
index 9ba8ea23f84e..1668ce0e0783 100644
--- a/mm/gup_test.c
+++ b/mm/gup_test.c
@@ -146,18 +146,17 @@ static int __gup_test_ioctl(unsigned int cmd,
 						 pages + i);
 			break;
 		case PIN_BASIC_TEST:
-			nr = pin_user_pages(addr, nr, gup->gup_flags, pages + i,
-					    NULL);
+			nr = pin_user_pages(addr, nr, gup->gup_flags, pages + i);
 			break;
 		case PIN_LONGTERM_BENCHMARK:
 			nr = pin_user_pages(addr, nr,
 					    gup->gup_flags | FOLL_LONGTERM,
-					    pages + i, NULL);
+					    pages + i);
 			break;
 		case DUMP_USER_PAGES_TEST:
 			if (gup->test_flags & GUP_TEST_FLAG_DUMP_PAGES_USE_PIN)
 				nr = pin_user_pages(addr, nr, gup->gup_flags,
-						    pages + i, NULL);
+						    pages + i);
 			else
 				nr = get_user_pages(addr, nr, gup->gup_flags,
 						    pages + i);
@@ -270,7 +269,7 @@ static inline int pin_longterm_test_start(unsigned long arg)
 							gup_flags, pages);
 		else
 			cur_pages = pin_user_pages(addr, remaining_pages,
-						   gup_flags, pages, NULL);
+						   gup_flags, pages);
 		if (cur_pages < 0) {
 			pin_longterm_test_stop();
 			ret = cur_pages;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 02207e852d79..06cead2b8e34 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -103,7 +103,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
 
 	mmap_read_lock(current->mm);
 	npgs = pin_user_pages(address, umem->npgs,
-			      gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
+			      gup_flags | FOLL_LONGTERM, &umem->pgs[0]);
 	mmap_read_unlock(current->mm);
 
 	if (npgs != umem->npgs) {
-- 
cgit v1.2.3


From b2cac248191b7466c5819e0da617b0705a26e197 Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lstoakes@gmail.com>
Date: Wed, 17 May 2023 20:25:48 +0100
Subject: mm/gup: remove vmas array from internal GUP functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now we have eliminated all callers to GUP APIs which use the vmas
parameter, eliminate it altogether.

This eliminates a class of bugs where vmas might have been kept around
longer than the mmap_lock and thus we need not be concerned about locks
being dropped during this operation leaving behind dangling pointers.

This simplifies the GUP API and makes it considerably clearer as to its
purpose - follow flags are applied and if pinning, an array of pages is
returned.

Link: https://lkml.kernel.org/r/6811b4b2b4b3baf3dd07f422bb18853bb2cd09fb.1684350871.git.lstoakes@gmail.com
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 10 +++---
 mm/gup.c                | 83 ++++++++++++++++++-------------------------------
 mm/hugetlb.c            | 24 ++++++--------
 3 files changed, 45 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f1543a0568ff..21f942025fec 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -133,9 +133,8 @@ int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
 struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
 				unsigned long address, unsigned int flags);
 long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
-			 struct page **, struct vm_area_struct **,
-			 unsigned long *, unsigned long *, long, unsigned int,
-			 int *);
+			 struct page **, unsigned long *, unsigned long *,
+			 long, unsigned int, int *);
 void unmap_hugepage_range(struct vm_area_struct *,
 			  unsigned long, unsigned long, struct page *,
 			  zap_flags_t);
@@ -306,9 +305,8 @@ static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
 
 static inline long follow_hugetlb_page(struct mm_struct *mm,
 			struct vm_area_struct *vma, struct page **pages,
-			struct vm_area_struct **vmas, unsigned long *position,
-			unsigned long *nr_pages, long i, unsigned int flags,
-			int *nonblocking)
+			unsigned long *position, unsigned long *nr_pages,
+			long i, unsigned int flags, int *nonblocking)
 {
 	BUG();
 	return 0;
diff --git a/mm/gup.c b/mm/gup.c
index 18e3bc2ee3f1..8db58305f4eb 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1024,8 +1024,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
  * @pages:	array that receives pointers to the pages pinned.
  *		Should be at least nr_pages long. Or NULL, if caller
  *		only intends to ensure the pages are faulted in.
- * @vmas:	array of pointers to vmas corresponding to each page.
- *		Or NULL if the caller does not require them.
  * @locked:     whether we're still with the mmap_lock held
  *
  * Returns either number of pages pinned (which may be less than the
@@ -1039,8 +1037,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
  *
  * The caller is responsible for releasing returned @pages, via put_page().
  *
- * @vmas are valid only as long as mmap_lock is held.
- *
  * Must be called with mmap_lock held.  It may be released.  See below.
  *
  * __get_user_pages walks a process's page tables and takes a reference to
@@ -1076,7 +1072,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 static long __get_user_pages(struct mm_struct *mm,
 		unsigned long start, unsigned long nr_pages,
 		unsigned int gup_flags, struct page **pages,
-		struct vm_area_struct **vmas, int *locked)
+		int *locked)
 {
 	long ret = 0, i = 0;
 	struct vm_area_struct *vma = NULL;
@@ -1116,9 +1112,9 @@ static long __get_user_pages(struct mm_struct *mm,
 				goto out;
 
 			if (is_vm_hugetlb_page(vma)) {
-				i = follow_hugetlb_page(mm, vma, pages, vmas,
-						&start, &nr_pages, i,
-						gup_flags, locked);
+				i = follow_hugetlb_page(mm, vma, pages,
+							&start, &nr_pages, i,
+							gup_flags, locked);
 				if (!*locked) {
 					/*
 					 * We've got a VM_FAULT_RETRY
@@ -1183,10 +1179,6 @@ retry:
 			ctx.page_mask = 0;
 		}
 next_page:
-		if (vmas) {
-			vmas[i] = vma;
-			ctx.page_mask = 0;
-		}
 		page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
 		if (page_increm > nr_pages)
 			page_increm = nr_pages;
@@ -1341,7 +1333,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 						unsigned long start,
 						unsigned long nr_pages,
 						struct page **pages,
-						struct vm_area_struct **vmas,
 						int *locked,
 						unsigned int flags)
 {
@@ -1379,7 +1370,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 	pages_done = 0;
 	for (;;) {
 		ret = __get_user_pages(mm, start, nr_pages, flags, pages,
-				       vmas, locked);
+				       locked);
 		if (!(flags & FOLL_UNLOCKABLE)) {
 			/* VM_FAULT_RETRY couldn't trigger, bypass */
 			pages_done = ret;
@@ -1443,7 +1434,7 @@ retry:
 
 		*locked = 1;
 		ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED,
-				       pages, NULL, locked);
+				       pages, locked);
 		if (!*locked) {
 			/* Continue to retry until we succeeded */
 			BUG_ON(ret != 0);
@@ -1541,7 +1532,7 @@ long populate_vma_page_range(struct vm_area_struct *vma,
 	 * not result in a stack expansion that recurses back here.
 	 */
 	ret = __get_user_pages(mm, start, nr_pages, gup_flags,
-				NULL, NULL, locked ? locked : &local_locked);
+			       NULL, locked ? locked : &local_locked);
 	lru_add_drain();
 	return ret;
 }
@@ -1599,7 +1590,7 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
 		return -EINVAL;
 
 	ret = __get_user_pages(mm, start, nr_pages, gup_flags,
-				NULL, NULL, locked);
+			       NULL, locked);
 	lru_add_drain();
 	return ret;
 }
@@ -1667,8 +1658,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
 #else /* CONFIG_MMU */
 static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
 		unsigned long nr_pages, struct page **pages,
-		struct vm_area_struct **vmas, int *locked,
-		unsigned int foll_flags)
+		int *locked, unsigned int foll_flags)
 {
 	struct vm_area_struct *vma;
 	bool must_unlock = false;
@@ -1712,8 +1702,7 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
 			if (pages[i])
 				get_page(pages[i]);
 		}
-		if (vmas)
-			vmas[i] = vma;
+
 		start = (start + PAGE_SIZE) & PAGE_MASK;
 	}
 
@@ -1894,8 +1883,7 @@ struct page *get_dump_page(unsigned long addr)
 	int locked = 0;
 	int ret;
 
-	ret = __get_user_pages_locked(current->mm, addr, 1, &page, NULL,
-				      &locked,
+	ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked,
 				      FOLL_FORCE | FOLL_DUMP | FOLL_GET);
 	return (ret == 1) ? page : NULL;
 }
@@ -2068,7 +2056,6 @@ static long __gup_longterm_locked(struct mm_struct *mm,
 				  unsigned long start,
 				  unsigned long nr_pages,
 				  struct page **pages,
-				  struct vm_area_struct **vmas,
 				  int *locked,
 				  unsigned int gup_flags)
 {
@@ -2076,13 +2063,13 @@ static long __gup_longterm_locked(struct mm_struct *mm,
 	long rc, nr_pinned_pages;
 
 	if (!(gup_flags & FOLL_LONGTERM))
-		return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+		return __get_user_pages_locked(mm, start, nr_pages, pages,
 					       locked, gup_flags);
 
 	flags = memalloc_pin_save();
 	do {
 		nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
-							  pages, vmas, locked,
+							  pages, locked,
 							  gup_flags);
 		if (nr_pinned_pages <= 0) {
 			rc = nr_pinned_pages;
@@ -2100,9 +2087,8 @@ static long __gup_longterm_locked(struct mm_struct *mm,
  * Check that the given flags are valid for the exported gup/pup interface, and
  * update them with the required flags that the caller must have set.
  */
-static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
-			      int *locked, unsigned int *gup_flags_p,
-			      unsigned int to_set)
+static bool is_valid_gup_args(struct page **pages, int *locked,
+			      unsigned int *gup_flags_p, unsigned int to_set)
 {
 	unsigned int gup_flags = *gup_flags_p;
 
@@ -2144,13 +2130,6 @@ static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
 			 (gup_flags & FOLL_PCI_P2PDMA)))
 		return false;
 
-	/*
-	 * Can't use VMAs with locked, as locked allows GUP to unlock
-	 * which invalidates the vmas array
-	 */
-	if (WARN_ON_ONCE(vmas && (gup_flags & FOLL_UNLOCKABLE)))
-		return false;
-
 	*gup_flags_p = gup_flags;
 	return true;
 }
@@ -2219,11 +2198,11 @@ long get_user_pages_remote(struct mm_struct *mm,
 {
 	int local_locked = 1;
 
-	if (!is_valid_gup_args(pages, NULL, locked, &gup_flags,
+	if (!is_valid_gup_args(pages, locked, &gup_flags,
 			       FOLL_TOUCH | FOLL_REMOTE))
 		return -EINVAL;
 
-	return __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
+	return __get_user_pages_locked(mm, start, nr_pages, pages,
 				       locked ? locked : &local_locked,
 				       gup_flags);
 }
@@ -2258,11 +2237,11 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
 {
 	int locked = 1;
 
-	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_TOUCH))
+	if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_TOUCH))
 		return -EINVAL;
 
 	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
-				       NULL, &locked, gup_flags);
+				       &locked, gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages);
 
@@ -2286,12 +2265,12 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 {
 	int locked = 0;
 
-	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
+	if (!is_valid_gup_args(pages, NULL, &gup_flags,
 			       FOLL_TOUCH | FOLL_UNLOCKABLE))
 		return -EINVAL;
 
 	return __get_user_pages_locked(current->mm, start, nr_pages, pages,
-				       NULL, &locked, gup_flags);
+				       &locked, gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
@@ -2981,7 +2960,7 @@ static int internal_get_user_pages_fast(unsigned long start,
 	start += nr_pinned << PAGE_SHIFT;
 	pages += nr_pinned;
 	ret = __gup_longterm_locked(current->mm, start, nr_pages - nr_pinned,
-				    pages, NULL, &locked,
+				    pages, &locked,
 				    gup_flags | FOLL_TOUCH | FOLL_UNLOCKABLE);
 	if (ret < 0) {
 		/*
@@ -3023,7 +3002,7 @@ int get_user_pages_fast_only(unsigned long start, int nr_pages,
 	 * FOLL_FAST_ONLY is required in order to match the API description of
 	 * this routine: no fall back to regular ("slow") GUP.
 	 */
-	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
+	if (!is_valid_gup_args(pages, NULL, &gup_flags,
 			       FOLL_GET | FOLL_FAST_ONLY))
 		return -EINVAL;
 
@@ -3056,7 +3035,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
 	 * FOLL_GET, because gup fast is always a "pin with a +1 page refcount"
 	 * request.
 	 */
-	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_GET))
+	if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_GET))
 		return -EINVAL;
 	return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
 }
@@ -3081,7 +3060,7 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
 int pin_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages)
 {
-	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_PIN))
+	if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN))
 		return -EINVAL;
 	return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
 }
@@ -3114,10 +3093,10 @@ long pin_user_pages_remote(struct mm_struct *mm,
 {
 	int local_locked = 1;
 
-	if (!is_valid_gup_args(pages, NULL, locked, &gup_flags,
+	if (!is_valid_gup_args(pages, locked, &gup_flags,
 			       FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE))
 		return 0;
-	return __gup_longterm_locked(mm, start, nr_pages, pages, NULL,
+	return __gup_longterm_locked(mm, start, nr_pages, pages,
 				     locked ? locked : &local_locked,
 				     gup_flags);
 }
@@ -3143,10 +3122,10 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
 {
 	int locked = 1;
 
-	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_PIN))
+	if (!is_valid_gup_args(pages, NULL, &gup_flags, FOLL_PIN))
 		return 0;
 	return __gup_longterm_locked(current->mm, start, nr_pages,
-				     pages, NULL, &locked, gup_flags);
+				     pages, &locked, gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages);
 
@@ -3160,11 +3139,11 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 {
 	int locked = 0;
 
-	if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
+	if (!is_valid_gup_args(pages, NULL, &gup_flags,
 			       FOLL_PIN | FOLL_TOUCH | FOLL_UNLOCKABLE))
 		return 0;
 
-	return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
+	return __gup_longterm_locked(current->mm, start, nr_pages, pages,
 				     &locked, gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages_unlocked);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f154019e6b84..ea24718db4af 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6425,17 +6425,14 @@ out_release_nounlock:
 }
 #endif /* CONFIG_USERFAULTFD */
 
-static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
-				 int refs, struct page **pages,
-				 struct vm_area_struct **vmas)
+static void record_subpages(struct page *page, struct vm_area_struct *vma,
+			    int refs, struct page **pages)
 {
 	int nr;
 
 	for (nr = 0; nr < refs; nr++) {
 		if (likely(pages))
 			pages[nr] = nth_page(page, nr);
-		if (vmas)
-			vmas[nr] = vma;
 	}
 }
 
@@ -6508,9 +6505,9 @@ out_unlock:
 }
 
 long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			 struct page **pages, struct vm_area_struct **vmas,
-			 unsigned long *position, unsigned long *nr_pages,
-			 long i, unsigned int flags, int *locked)
+			 struct page **pages, unsigned long *position,
+			 unsigned long *nr_pages, long i, unsigned int flags,
+			 int *locked)
 {
 	unsigned long pfn_offset;
 	unsigned long vaddr = *position;
@@ -6638,7 +6635,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * If subpage information not requested, update counters
 		 * and skip the same_page loop below.
 		 */
-		if (!pages && !vmas && !pfn_offset &&
+		if (!pages && !pfn_offset &&
 		    (vaddr + huge_page_size(h) < vma->vm_end) &&
 		    (remainder >= pages_per_huge_page(h))) {
 			vaddr += huge_page_size(h);
@@ -6653,11 +6650,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		refs = min3(pages_per_huge_page(h) - pfn_offset, remainder,
 		    (vma->vm_end - ALIGN_DOWN(vaddr, PAGE_SIZE)) >> PAGE_SHIFT);
 
-		if (pages || vmas)
-			record_subpages_vmas(nth_page(page, pfn_offset),
-					     vma, refs,
-					     likely(pages) ? pages + i : NULL,
-					     vmas ? vmas + i : NULL);
+		if (pages)
+			record_subpages(nth_page(page, pfn_offset),
+					vma, refs,
+					likely(pages) ? pages + i : NULL);
 
 		if (pages) {
 			/*
-- 
cgit v1.2.3


From 4e096ae1801e24b338e02715c65c3ffa8883ba5d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 13 May 2023 01:11:01 +0100
Subject: mm: convert migrate_pages() to work on folios

Almost all of the callers & implementors of migrate_pages() were already
converted to use folios.  compaction_alloc() & compaction_free() are
trivial to convert a part of this patch and not worth splitting out.

Link: https://lkml.kernel.org/r/20230513001101.276972-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/mm/page_migration.rst                |   7 +-
 .../translations/zh_CN/mm/page_migration.rst       |   2 +-
 include/linux/migrate.h                            |  16 +-
 mm/compaction.c                                    |  15 +-
 mm/mempolicy.c                                     |  15 +-
 mm/migrate.c                                       | 161 ++++++++++-----------
 mm/vmscan.c                                        |  15 +-
 7 files changed, 108 insertions(+), 123 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/mm/page_migration.rst b/Documentation/mm/page_migration.rst
index 313dce18893e..e35af7805be5 100644
--- a/Documentation/mm/page_migration.rst
+++ b/Documentation/mm/page_migration.rst
@@ -73,14 +73,13 @@ In kernel use of migrate_pages()
    It also prevents the swapper or other scans from encountering
    the page.
 
-2. We need to have a function of type new_page_t that can be
+2. We need to have a function of type new_folio_t that can be
    passed to migrate_pages(). This function should figure out
-   how to allocate the correct new page given the old page.
+   how to allocate the correct new folio given the old folio.
 
 3. The migrate_pages() function is called which attempts
    to do the migration. It will call the function to allocate
-   the new page for each page that is considered for
-   moving.
+   the new folio for each folio that is considered for moving.
 
 How migrate_pages() works
 =========================
diff --git a/Documentation/translations/zh_CN/mm/page_migration.rst b/Documentation/translations/zh_CN/mm/page_migration.rst
index 076081dc1635..f95063826a15 100644
--- a/Documentation/translations/zh_CN/mm/page_migration.rst
+++ b/Documentation/translations/zh_CN/mm/page_migration.rst
@@ -55,7 +55,7 @@ mbind()设置一个新的内存策略。一个进程的页面也可以通过sys_
    消失。它还可以防止交换器或其他扫描器遇到该页。
 
 
-2. 我们需要有一个new_page_t类型的函数，可以传递给migrate_pages()。这个函数应该计算
+2. 我们需要有一个new_folio_t类型的函数，可以传递给migrate_pages()。这个函数应该计算
    出如何在给定的旧页面中分配正确的新页面。
 
 3. migrate_pages()函数被调用，它试图进行迁移。它将调用该函数为每个被考虑迁移的页面分
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 6241a1596a75..6de5756d8533 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,8 +7,8 @@
 #include <linux/migrate_mode.h>
 #include <linux/hugetlb.h>
 
-typedef struct page *new_page_t(struct page *page, unsigned long private);
-typedef void free_page_t(struct page *page, unsigned long private);
+typedef struct folio *new_folio_t(struct folio *folio, unsigned long private);
+typedef void free_folio_t(struct folio *folio, unsigned long private);
 
 struct migration_target_control;
 
@@ -67,10 +67,10 @@ int migrate_folio_extra(struct address_space *mapping, struct folio *dst,
 		struct folio *src, enum migrate_mode mode, int extra_count);
 int migrate_folio(struct address_space *mapping, struct folio *dst,
 		struct folio *src, enum migrate_mode mode);
-int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
+int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free,
 		  unsigned long private, enum migrate_mode mode, int reason,
 		  unsigned int *ret_succeeded);
-struct page *alloc_migration_target(struct page *page, unsigned long private);
+struct folio *alloc_migration_target(struct folio *src, unsigned long private);
 bool isolate_movable_page(struct page *page, isolate_mode_t mode);
 
 int migrate_huge_page_move_mapping(struct address_space *mapping,
@@ -85,11 +85,11 @@ int folio_migrate_mapping(struct address_space *mapping,
 #else
 
 static inline void putback_movable_pages(struct list_head *l) {}
-static inline int migrate_pages(struct list_head *l, new_page_t new,
-		free_page_t free, unsigned long private, enum migrate_mode mode,
-		int reason, unsigned int *ret_succeeded)
+static inline int migrate_pages(struct list_head *l, new_folio_t new,
+		free_folio_t free, unsigned long private,
+		enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
 	{ return -ENOSYS; }
-static inline struct page *alloc_migration_target(struct page *page,
+static inline struct folio *alloc_migration_target(struct folio *src,
 		unsigned long private)
 	{ return NULL; }
 static inline bool isolate_movable_page(struct page *page, isolate_mode_t mode)
diff --git a/mm/compaction.c b/mm/compaction.c
index f6465ae74d3f..e23e00bec030 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1685,11 +1685,10 @@ splitmap:
  * This is a migrate-callback that "allocates" freepages by taking pages
  * from the isolated freelists in the block we are migrating to.
  */
-static struct page *compaction_alloc(struct page *migratepage,
-					unsigned long data)
+static struct folio *compaction_alloc(struct folio *src, unsigned long data)
 {
 	struct compact_control *cc = (struct compact_control *)data;
-	struct page *freepage;
+	struct folio *dst;
 
 	if (list_empty(&cc->freepages)) {
 		isolate_freepages(cc);
@@ -1698,11 +1697,11 @@ static struct page *compaction_alloc(struct page *migratepage,
 			return NULL;
 	}
 
-	freepage = list_entry(cc->freepages.next, struct page, lru);
-	list_del(&freepage->lru);
+	dst = list_entry(cc->freepages.next, struct folio, lru);
+	list_del(&dst->lru);
 	cc->nr_freepages--;
 
-	return freepage;
+	return dst;
 }
 
 /*
@@ -1710,11 +1709,11 @@ static struct page *compaction_alloc(struct page *migratepage,
  * freelist.  All pages on the freelist are from the same zone, so there is no
  * special handling needed for NUMA.
  */
-static void compaction_free(struct page *page, unsigned long data)
+static void compaction_free(struct folio *dst, unsigned long data)
 {
 	struct compact_control *cc = (struct compact_control *)data;
 
-	list_add(&page->lru, &cc->freepages);
+	list_add(&dst->lru, &cc->freepages);
 	cc->nr_freepages++;
 }
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1756389a0609..f06ca8c18e62 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1195,24 +1195,22 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
  * list of pages handed to migrate_pages()--which is how we get here--
  * is in virtual address order.
  */
-static struct page *new_page(struct page *page, unsigned long start)
+static struct folio *new_folio(struct folio *src, unsigned long start)
 {
-	struct folio *dst, *src = page_folio(page);
 	struct vm_area_struct *vma;
 	unsigned long address;
 	VMA_ITERATOR(vmi, current->mm, start);
 	gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL;
 
 	for_each_vma(vmi, vma) {
-		address = page_address_in_vma(page, vma);
+		address = page_address_in_vma(&src->page, vma);
 		if (address != -EFAULT)
 			break;
 	}
 
 	if (folio_test_hugetlb(src)) {
-		dst = alloc_hugetlb_folio_vma(folio_hstate(src),
+		return alloc_hugetlb_folio_vma(folio_hstate(src),
 				vma, address);
-		return &dst->page;
 	}
 
 	if (folio_test_large(src))
@@ -1221,9 +1219,8 @@ static struct page *new_page(struct page *page, unsigned long start)
 	/*
 	 * if !vma, vma_alloc_folio() will use task or system default policy
 	 */
-	dst = vma_alloc_folio(gfp, folio_order(src), vma, address,
+	return vma_alloc_folio(gfp, folio_order(src), vma, address,
 			folio_test_large(src));
-	return &dst->page;
 }
 #else
 
@@ -1239,7 +1236,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 	return -ENOSYS;
 }
 
-static struct page *new_page(struct page *page, unsigned long start)
+static struct folio *new_folio(struct folio *src, unsigned long start)
 {
 	return NULL;
 }
@@ -1334,7 +1331,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 
 		if (!list_empty(&pagelist)) {
 			WARN_ON_ONCE(flags & MPOL_MF_LAZY);
-			nr_failed = migrate_pages(&pagelist, new_page, NULL,
+			nr_failed = migrate_pages(&pagelist, new_folio, NULL,
 				start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND, NULL);
 			if (nr_failed)
 				putback_movable_pages(&pagelist);
diff --git a/mm/migrate.c b/mm/migrate.c
index cb292d2a90ce..30b5ce10935e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1067,15 +1067,13 @@ static void migrate_folio_undo_src(struct folio *src,
 }
 
 /* Restore the destination folio to the original state upon failure */
-static void migrate_folio_undo_dst(struct folio *dst,
-				   bool locked,
-				   free_page_t put_new_page,
-				   unsigned long private)
+static void migrate_folio_undo_dst(struct folio *dst, bool locked,
+		free_folio_t put_new_folio, unsigned long private)
 {
 	if (locked)
 		folio_unlock(dst);
-	if (put_new_page)
-		put_new_page(&dst->page, private);
+	if (put_new_folio)
+		put_new_folio(dst, private);
 	else
 		folio_put(dst);
 }
@@ -1099,14 +1097,13 @@ static void migrate_folio_done(struct folio *src,
 }
 
 /* Obtain the lock on page, remove all ptes. */
-static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page,
-			       unsigned long private, struct folio *src,
-			       struct folio **dstp, enum migrate_mode mode,
-			       enum migrate_reason reason, struct list_head *ret)
+static int migrate_folio_unmap(new_folio_t get_new_folio,
+		free_folio_t put_new_folio, unsigned long private,
+		struct folio *src, struct folio **dstp, enum migrate_mode mode,
+		enum migrate_reason reason, struct list_head *ret)
 {
 	struct folio *dst;
 	int rc = -EAGAIN;
-	struct page *newpage = NULL;
 	int page_was_mapped = 0;
 	struct anon_vma *anon_vma = NULL;
 	bool is_lru = !__PageMovable(&src->page);
@@ -1123,10 +1120,9 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page
 		return MIGRATEPAGE_SUCCESS;
 	}
 
-	newpage = get_new_page(&src->page, private);
-	if (!newpage)
+	dst = get_new_folio(src, private);
+	if (!dst)
 		return -ENOMEM;
-	dst = page_folio(newpage);
 	*dstp = dst;
 
 	dst->private = NULL;
@@ -1254,13 +1250,13 @@ out:
 		ret = NULL;
 
 	migrate_folio_undo_src(src, page_was_mapped, anon_vma, locked, ret);
-	migrate_folio_undo_dst(dst, dst_locked, put_new_page, private);
+	migrate_folio_undo_dst(dst, dst_locked, put_new_folio, private);
 
 	return rc;
 }
 
 /* Migrate the folio to the newly allocated folio in dst. */
-static int migrate_folio_move(free_page_t put_new_page, unsigned long private,
+static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
 			      struct folio *src, struct folio *dst,
 			      enum migrate_mode mode, enum migrate_reason reason,
 			      struct list_head *ret)
@@ -1332,7 +1328,7 @@ out:
 	}
 
 	migrate_folio_undo_src(src, page_was_mapped, anon_vma, true, ret);
-	migrate_folio_undo_dst(dst, true, put_new_page, private);
+	migrate_folio_undo_dst(dst, true, put_new_folio, private);
 
 	return rc;
 }
@@ -1355,16 +1351,14 @@ out:
  * because then pte is replaced with migration swap entry and direct I/O code
  * will wait in the page fault for migration to complete.
  */
-static int unmap_and_move_huge_page(new_page_t get_new_page,
-				free_page_t put_new_page, unsigned long private,
-				struct page *hpage, int force,
-				enum migrate_mode mode, int reason,
-				struct list_head *ret)
+static int unmap_and_move_huge_page(new_folio_t get_new_folio,
+		free_folio_t put_new_folio, unsigned long private,
+		struct folio *src, int force, enum migrate_mode mode,
+		int reason, struct list_head *ret)
 {
-	struct folio *dst, *src = page_folio(hpage);
+	struct folio *dst;
 	int rc = -EAGAIN;
 	int page_was_mapped = 0;
-	struct page *new_hpage;
 	struct anon_vma *anon_vma = NULL;
 	struct address_space *mapping = NULL;
 
@@ -1374,10 +1368,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 		return MIGRATEPAGE_SUCCESS;
 	}
 
-	new_hpage = get_new_page(hpage, private);
-	if (!new_hpage)
+	dst = get_new_folio(src, private);
+	if (!dst)
 		return -ENOMEM;
-	dst = page_folio(new_hpage);
 
 	if (!folio_trylock(src)) {
 		if (!force)
@@ -1418,7 +1411,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 			 * semaphore in write mode here and set TTU_RMAP_LOCKED
 			 * to let lower levels know we have taken the lock.
 			 */
-			mapping = hugetlb_page_mapping_lock_write(hpage);
+			mapping = hugetlb_page_mapping_lock_write(&src->page);
 			if (unlikely(!mapping))
 				goto unlock_put_anon;
 
@@ -1448,7 +1441,7 @@ put_anon:
 
 	if (rc == MIGRATEPAGE_SUCCESS) {
 		move_hugetlb_state(src, dst, reason);
-		put_new_page = NULL;
+		put_new_folio = NULL;
 	}
 
 out_unlock:
@@ -1464,8 +1457,8 @@ out:
 	 * it.  Otherwise, put_page() will drop the reference grabbed during
 	 * isolation.
 	 */
-	if (put_new_page)
-		put_new_page(new_hpage, private);
+	if (put_new_folio)
+		put_new_folio(dst, private);
 	else
 		folio_putback_active_hugetlb(dst);
 
@@ -1512,8 +1505,8 @@ struct migrate_pages_stats {
  * exist any more. It is caller's responsibility to call putback_movable_pages()
  * only if ret != 0.
  */
-static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,
-			    free_page_t put_new_page, unsigned long private,
+static int migrate_hugetlbs(struct list_head *from, new_folio_t get_new_folio,
+			    free_folio_t put_new_folio, unsigned long private,
 			    enum migrate_mode mode, int reason,
 			    struct migrate_pages_stats *stats,
 			    struct list_head *ret_folios)
@@ -1551,9 +1544,9 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,
 				continue;
 			}
 
-			rc = unmap_and_move_huge_page(get_new_page,
-						      put_new_page, private,
-						      &folio->page, pass > 2, mode,
+			rc = unmap_and_move_huge_page(get_new_folio,
+						      put_new_folio, private,
+						      folio, pass > 2, mode,
 						      reason, ret_folios);
 			/*
 			 * The rules are:
@@ -1610,11 +1603,11 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,
  * deadlock (e.g., for loop device).  So, if mode != MIGRATE_ASYNC, the
  * length of the from list must be <= 1.
  */
-static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
-		free_page_t put_new_page, unsigned long private,
-		enum migrate_mode mode, int reason, struct list_head *ret_folios,
-		struct list_head *split_folios, struct migrate_pages_stats *stats,
-		int nr_pass)
+static int migrate_pages_batch(struct list_head *from,
+		new_folio_t get_new_folio, free_folio_t put_new_folio,
+		unsigned long private, enum migrate_mode mode, int reason,
+		struct list_head *ret_folios, struct list_head *split_folios,
+		struct migrate_pages_stats *stats, int nr_pass)
 {
 	int retry = 1;
 	int thp_retry = 1;
@@ -1664,8 +1657,9 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 				continue;
 			}
 
-			rc = migrate_folio_unmap(get_new_page, put_new_page, private,
-						 folio, &dst, mode, reason, ret_folios);
+			rc = migrate_folio_unmap(get_new_folio, put_new_folio,
+					private, folio, &dst, mode, reason,
+					ret_folios);
 			/*
 			 * The rules are:
 			 *	Success: folio will be freed
@@ -1762,7 +1756,7 @@ move:
 
 			cond_resched();
 
-			rc = migrate_folio_move(put_new_page, private,
+			rc = migrate_folio_move(put_new_folio, private,
 						folio, dst, mode,
 						reason, ret_folios);
 			/*
@@ -1808,7 +1802,7 @@ out:
 		migrate_folio_undo_src(folio, page_was_mapped, anon_vma,
 				       true, ret_folios);
 		list_del(&dst->lru);
-		migrate_folio_undo_dst(dst, true, put_new_page, private);
+		migrate_folio_undo_dst(dst, true, put_new_folio, private);
 		dst = dst2;
 		dst2 = list_next_entry(dst, lru);
 	}
@@ -1816,10 +1810,11 @@ out:
 	return rc;
 }
 
-static int migrate_pages_sync(struct list_head *from, new_page_t get_new_page,
-		free_page_t put_new_page, unsigned long private,
-		enum migrate_mode mode, int reason, struct list_head *ret_folios,
-		struct list_head *split_folios, struct migrate_pages_stats *stats)
+static int migrate_pages_sync(struct list_head *from, new_folio_t get_new_folio,
+		free_folio_t put_new_folio, unsigned long private,
+		enum migrate_mode mode, int reason,
+		struct list_head *ret_folios, struct list_head *split_folios,
+		struct migrate_pages_stats *stats)
 {
 	int rc, nr_failed = 0;
 	LIST_HEAD(folios);
@@ -1827,7 +1822,7 @@ static int migrate_pages_sync(struct list_head *from, new_page_t get_new_page,
 
 	memset(&astats, 0, sizeof(astats));
 	/* Try to migrate in batch with MIGRATE_ASYNC mode firstly */
-	rc = migrate_pages_batch(from, get_new_page, put_new_page, private, MIGRATE_ASYNC,
+	rc = migrate_pages_batch(from, get_new_folio, put_new_folio, private, MIGRATE_ASYNC,
 				 reason, &folios, split_folios, &astats,
 				 NR_MAX_MIGRATE_ASYNC_RETRY);
 	stats->nr_succeeded += astats.nr_succeeded;
@@ -1849,7 +1844,7 @@ static int migrate_pages_sync(struct list_head *from, new_page_t get_new_page,
 	list_splice_tail_init(&folios, from);
 	while (!list_empty(from)) {
 		list_move(from->next, &folios);
-		rc = migrate_pages_batch(&folios, get_new_page, put_new_page,
+		rc = migrate_pages_batch(&folios, get_new_folio, put_new_folio,
 					 private, mode, reason, ret_folios,
 					 split_folios, stats, NR_MAX_MIGRATE_SYNC_RETRY);
 		list_splice_tail_init(&folios, ret_folios);
@@ -1866,11 +1861,11 @@ static int migrate_pages_sync(struct list_head *from, new_page_t get_new_page,
  *		   supplied as the target for the page migration
  *
  * @from:		The list of folios to be migrated.
- * @get_new_page:	The function used to allocate free folios to be used
+ * @get_new_folio:	The function used to allocate free folios to be used
  *			as the target of the folio migration.
- * @put_new_page:	The function used to free target folios if migration
+ * @put_new_folio:	The function used to free target folios if migration
  *			fails, or NULL if no special handling is necessary.
- * @private:		Private data to be passed on to get_new_page()
+ * @private:		Private data to be passed on to get_new_folio()
  * @mode:		The migration mode that specifies the constraints for
  *			folio migration, if any.
  * @reason:		The reason for folio migration.
@@ -1887,8 +1882,8 @@ static int migrate_pages_sync(struct list_head *from, new_page_t get_new_page,
  * considered as the number of non-migrated large folio, no matter how many
  * split folios of the large folio are migrated successfully.
  */
-int migrate_pages(struct list_head *from, new_page_t get_new_page,
-		free_page_t put_new_page, unsigned long private,
+int migrate_pages(struct list_head *from, new_folio_t get_new_folio,
+		free_folio_t put_new_folio, unsigned long private,
 		enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
 {
 	int rc, rc_gather;
@@ -1903,7 +1898,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 
 	memset(&stats, 0, sizeof(stats));
 
-	rc_gather = migrate_hugetlbs(from, get_new_page, put_new_page, private,
+	rc_gather = migrate_hugetlbs(from, get_new_folio, put_new_folio, private,
 				     mode, reason, &stats, &ret_folios);
 	if (rc_gather < 0)
 		goto out;
@@ -1926,12 +1921,14 @@ again:
 	else
 		list_splice_init(from, &folios);
 	if (mode == MIGRATE_ASYNC)
-		rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private,
-					 mode, reason, &ret_folios, &split_folios, &stats,
-					 NR_MAX_MIGRATE_PAGES_RETRY);
+		rc = migrate_pages_batch(&folios, get_new_folio, put_new_folio,
+				private, mode, reason, &ret_folios,
+				&split_folios, &stats,
+				NR_MAX_MIGRATE_PAGES_RETRY);
 	else
-		rc = migrate_pages_sync(&folios, get_new_page, put_new_page, private,
-					mode, reason, &ret_folios, &split_folios, &stats);
+		rc = migrate_pages_sync(&folios, get_new_folio, put_new_folio,
+				private, mode, reason, &ret_folios,
+				&split_folios, &stats);
 	list_splice_tail_init(&folios, &ret_folios);
 	if (rc < 0) {
 		rc_gather = rc;
@@ -1944,8 +1941,9 @@ again:
 		 * is counted as 1 failure already.  And, we only try to migrate
 		 * with minimal effort, force MIGRATE_ASYNC mode and retry once.
 		 */
-		migrate_pages_batch(&split_folios, get_new_page, put_new_page, private,
-				    MIGRATE_ASYNC, reason, &ret_folios, NULL, &stats, 1);
+		migrate_pages_batch(&split_folios, get_new_folio,
+				put_new_folio, private, MIGRATE_ASYNC, reason,
+				&ret_folios, NULL, &stats, 1);
 		list_splice_tail_init(&split_folios, &ret_folios);
 	}
 	rc_gather += rc;
@@ -1980,14 +1978,11 @@ out:
 	return rc_gather;
 }
 
-struct page *alloc_migration_target(struct page *page, unsigned long private)
+struct folio *alloc_migration_target(struct folio *src, unsigned long private)
 {
-	struct folio *folio = page_folio(page);
 	struct migration_target_control *mtc;
 	gfp_t gfp_mask;
 	unsigned int order = 0;
-	struct folio *hugetlb_folio = NULL;
-	struct folio *new_folio = NULL;
 	int nid;
 	int zidx;
 
@@ -1995,33 +1990,30 @@ struct page *alloc_migration_target(struct page *page, unsigned long private)
 	gfp_mask = mtc->gfp_mask;
 	nid = mtc->nid;
 	if (nid == NUMA_NO_NODE)
-		nid = folio_nid(folio);
+		nid = folio_nid(src);
 
-	if (folio_test_hugetlb(folio)) {
-		struct hstate *h = folio_hstate(folio);
+	if (folio_test_hugetlb(src)) {
+		struct hstate *h = folio_hstate(src);
 
 		gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
-		hugetlb_folio = alloc_hugetlb_folio_nodemask(h, nid,
+		return alloc_hugetlb_folio_nodemask(h, nid,
 						mtc->nmask, gfp_mask);
-		return &hugetlb_folio->page;
 	}
 
-	if (folio_test_large(folio)) {
+	if (folio_test_large(src)) {
 		/*
 		 * clear __GFP_RECLAIM to make the migration callback
 		 * consistent with regular THP allocations.
 		 */
 		gfp_mask &= ~__GFP_RECLAIM;
 		gfp_mask |= GFP_TRANSHUGE;
-		order = folio_order(folio);
+		order = folio_order(src);
 	}
-	zidx = zone_idx(folio_zone(folio));
+	zidx = zone_idx(folio_zone(src));
 	if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
 		gfp_mask |= __GFP_HIGHMEM;
 
-	new_folio = __folio_alloc(gfp_mask, order, nid, mtc->nmask);
-
-	return &new_folio->page;
+	return __folio_alloc(gfp_mask, order, nid, mtc->nmask);
 }
 
 #ifdef CONFIG_NUMA
@@ -2472,13 +2464,12 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
 	return false;
 }
 
-static struct page *alloc_misplaced_dst_page(struct page *page,
+static struct folio *alloc_misplaced_dst_folio(struct folio *src,
 					   unsigned long data)
 {
 	int nid = (int) data;
-	int order = compound_order(page);
+	int order = folio_order(src);
 	gfp_t gfp = __GFP_THISNODE;
-	struct folio *new;
 
 	if (order > 0)
 		gfp |= GFP_TRANSHUGE_LIGHT;
@@ -2487,9 +2478,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
 			__GFP_NOWARN;
 		gfp &= ~__GFP_RECLAIM;
 	}
-	new = __folio_alloc_node(gfp, order, nid);
-
-	return &new->page;
+	return __folio_alloc_node(gfp, order, nid);
 }
 
 static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
@@ -2567,7 +2556,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 		goto out;
 
 	list_add(&page->lru, &migratepages);
-	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
+	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_folio,
 				     NULL, node, MIGRATE_ASYNC,
 				     MR_NUMA_MISPLACED, &nr_succeeded);
 	if (nr_remaining) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 15efbfbb1963..4637f6462e9c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1621,9 +1621,10 @@ static void folio_check_dirty_writeback(struct folio *folio,
 		mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
 }
 
-static struct page *alloc_demote_page(struct page *page, unsigned long private)
+static struct folio *alloc_demote_folio(struct folio *src,
+		unsigned long private)
 {
-	struct page *target_page;
+	struct folio *dst;
 	nodemask_t *allowed_mask;
 	struct migration_target_control *mtc;
 
@@ -1641,14 +1642,14 @@ static struct page *alloc_demote_page(struct page *page, unsigned long private)
 	 */
 	mtc->nmask = NULL;
 	mtc->gfp_mask |= __GFP_THISNODE;
-	target_page = alloc_migration_target(page, (unsigned long)mtc);
-	if (target_page)
-		return target_page;
+	dst = alloc_migration_target(src, (unsigned long)mtc);
+	if (dst)
+		return dst;
 
 	mtc->gfp_mask &= ~__GFP_THISNODE;
 	mtc->nmask = allowed_mask;
 
-	return alloc_migration_target(page, (unsigned long)mtc);
+	return alloc_migration_target(src, (unsigned long)mtc);
 }
 
 /*
@@ -1683,7 +1684,7 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
 	node_get_allowed_targets(pgdat, &allowed_mask);
 
 	/* Demotion ignores all cpuset and mempolicy settings */
-	migrate_pages(demote_folios, alloc_demote_page, NULL,
+	migrate_pages(demote_folios, alloc_demote_folio, NULL,
 		      (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
 		      &nr_succeeded);
 
-- 
cgit v1.2.3


From 89f499f35c11af61ba7075ddc23209d10805a25a Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
Date: Thu, 18 May 2023 10:55:14 -0400
Subject: maple_tree: add format option to mt_dump()

Allow different formatting strings to be used when dumping the tree.
Currently supports hex and decimal.

Link: https://lkml.kernel.org/r/20230518145544.1722059-6-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Vernon Yang <vernon2gm@gmail.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h       |  9 ++++-
 lib/maple_tree.c                 | 87 ++++++++++++++++++++++++++--------------
 lib/test_maple_tree.c            | 10 ++---
 mm/internal.h                    |  4 +-
 mm/mmap.c                        |  8 ++--
 tools/testing/radix-tree/maple.c | 12 +++---
 6 files changed, 82 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 1fadb5f5978b..140fb271be4a 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -670,10 +670,15 @@ void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max);
 
 
 #ifdef CONFIG_DEBUG_MAPLE_TREE
+enum mt_dump_format {
+	mt_dump_dec,
+	mt_dump_hex,
+};
+
 extern atomic_t maple_tree_tests_run;
 extern atomic_t maple_tree_tests_passed;
 
-void mt_dump(const struct maple_tree *mt);
+void mt_dump(const struct maple_tree *mt, enum mt_dump_format format);
 void mt_validate(struct maple_tree *mt);
 void mt_cache_shrink(void);
 #define MT_BUG_ON(__tree, __x) do {					\
@@ -681,7 +686,7 @@ void mt_cache_shrink(void);
 	if (__x) {							\
 		pr_info("BUG at %s:%d (%u)\n",				\
 		__func__, __LINE__, __x);				\
-		mt_dump(__tree);					\
+		mt_dump(__tree, mt_dump_hex);				\
 		pr_info("Pass: %u Run:%u\n",				\
 			atomic_read(&maple_tree_tests_passed),		\
 			atomic_read(&maple_tree_tests_run));		\
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index e095e2c39a1b..dfa0271101d2 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5694,7 +5694,7 @@ void *mas_store(struct ma_state *mas, void *entry)
 	trace_ma_write(__func__, mas, 0, entry);
 #ifdef CONFIG_DEBUG_MAPLE_TREE
 	if (mas->index > mas->last)
-		pr_err("Error %lu > %lu %p\n", mas->index, mas->last, entry);
+		pr_err("Error %lX > %lX %p\n", mas->index, mas->last, entry);
 	MT_BUG_ON(mas->tree, mas->index > mas->last);
 	if (mas->index > mas->last) {
 		mas_set_err(mas, -EINVAL);
@@ -6748,22 +6748,33 @@ static void mas_dfs_postorder(struct ma_state *mas, unsigned long max)
 
 /* Tree validations */
 static void mt_dump_node(const struct maple_tree *mt, void *entry,
-		unsigned long min, unsigned long max, unsigned int depth);
+		unsigned long min, unsigned long max, unsigned int depth,
+		enum mt_dump_format format);
 static void mt_dump_range(unsigned long min, unsigned long max,
-			  unsigned int depth)
+			  unsigned int depth, enum mt_dump_format format)
 {
 	static const char spaces[] = "                                ";
 
-	if (min == max)
-		pr_info("%.*s%lu: ", depth * 2, spaces, min);
-	else
-		pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
+	switch(format) {
+	case mt_dump_hex:
+		if (min == max)
+			pr_info("%.*s%lx: ", depth * 2, spaces, min);
+		else
+			pr_info("%.*s%lx-%lx: ", depth * 2, spaces, min, max);
+		break;
+	default:
+	case mt_dump_dec:
+		if (min == max)
+			pr_info("%.*s%lu: ", depth * 2, spaces, min);
+		else
+			pr_info("%.*s%lu-%lu: ", depth * 2, spaces, min, max);
+	}
 }
 
 static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
-			  unsigned int depth)
+			  unsigned int depth, enum mt_dump_format format)
 {
-	mt_dump_range(min, max, depth);
+	mt_dump_range(min, max, depth, format);
 
 	if (xa_is_value(entry))
 		pr_cont("value %ld (0x%lx) [%p]\n", xa_to_value(entry),
@@ -6777,7 +6788,8 @@ static void mt_dump_entry(void *entry, unsigned long min, unsigned long max,
 }
 
 static void mt_dump_range64(const struct maple_tree *mt, void *entry,
-			unsigned long min, unsigned long max, unsigned int depth)
+		unsigned long min, unsigned long max, unsigned int depth,
+		enum mt_dump_format format)
 {
 	struct maple_range_64 *node = &mte_to_node(entry)->mr64;
 	bool leaf = mte_is_leaf(entry);
@@ -6785,8 +6797,16 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry,
 	int i;
 
 	pr_cont(" contents: ");
-	for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++)
-		pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
+	for (i = 0; i < MAPLE_RANGE64_SLOTS - 1; i++) {
+		switch(format) {
+		case mt_dump_hex:
+			pr_cont("%p %lX ", node->slot[i], node->pivot[i]);
+			break;
+		default:
+		case mt_dump_dec:
+			pr_cont("%p %lu ", node->slot[i], node->pivot[i]);
+		}
+	}
 	pr_cont("%p\n", node->slot[i]);
 	for (i = 0; i < MAPLE_RANGE64_SLOTS; i++) {
 		unsigned long last = max;
@@ -6799,24 +6819,32 @@ static void mt_dump_range64(const struct maple_tree *mt, void *entry,
 			break;
 		if (leaf)
 			mt_dump_entry(mt_slot(mt, node->slot, i),
-					first, last, depth + 1);
+					first, last, depth + 1, format);
 		else if (node->slot[i])
 			mt_dump_node(mt, mt_slot(mt, node->slot, i),
-					first, last, depth + 1);
+					first, last, depth + 1, format);
 
 		if (last == max)
 			break;
 		if (last > max) {
-			pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
+			switch(format) {
+			case mt_dump_hex:
+				pr_err("node %p last (%lx) > max (%lx) at pivot %d!\n",
 					node, last, max, i);
-			break;
+				break;
+			default:
+			case mt_dump_dec:
+				pr_err("node %p last (%lu) > max (%lu) at pivot %d!\n",
+					node, last, max, i);
+			}
 		}
 		first = last + 1;
 	}
 }
 
 static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
-			unsigned long min, unsigned long max, unsigned int depth)
+	unsigned long min, unsigned long max, unsigned int depth,
+	enum mt_dump_format format)
 {
 	struct maple_arange_64 *node = &mte_to_node(entry)->ma64;
 	bool leaf = mte_is_leaf(entry);
@@ -6841,10 +6869,10 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
 			break;
 		if (leaf)
 			mt_dump_entry(mt_slot(mt, node->slot, i),
-					first, last, depth + 1);
+					first, last, depth + 1, format);
 		else if (node->slot[i])
 			mt_dump_node(mt, mt_slot(mt, node->slot, i),
-					first, last, depth + 1);
+					first, last, depth + 1, format);
 
 		if (last == max)
 			break;
@@ -6858,13 +6886,14 @@ static void mt_dump_arange64(const struct maple_tree *mt, void *entry,
 }
 
 static void mt_dump_node(const struct maple_tree *mt, void *entry,
-		unsigned long min, unsigned long max, unsigned int depth)
+		unsigned long min, unsigned long max, unsigned int depth,
+		enum mt_dump_format format)
 {
 	struct maple_node *node = mte_to_node(entry);
 	unsigned int type = mte_node_type(entry);
 	unsigned int i;
 
-	mt_dump_range(min, max, depth);
+	mt_dump_range(min, max, depth, format);
 
 	pr_cont("node %p depth %d type %d parent %p", node, depth, type,
 			node ? node->parent : NULL);
@@ -6875,15 +6904,15 @@ static void mt_dump_node(const struct maple_tree *mt, void *entry,
 			if (min + i > max)
 				pr_cont("OUT OF RANGE: ");
 			mt_dump_entry(mt_slot(mt, node->slot, i),
-					min + i, min + i, depth);
+					min + i, min + i, depth, format);
 		}
 		break;
 	case maple_leaf_64:
 	case maple_range_64:
-		mt_dump_range64(mt, entry, min, max, depth);
+		mt_dump_range64(mt, entry, min, max, depth, format);
 		break;
 	case maple_arange_64:
-		mt_dump_arange64(mt, entry, min, max, depth);
+		mt_dump_arange64(mt, entry, min, max, depth, format);
 		break;
 
 	default:
@@ -6891,16 +6920,16 @@ static void mt_dump_node(const struct maple_tree *mt, void *entry,
 	}
 }
 
-void mt_dump(const struct maple_tree *mt)
+void mt_dump(const struct maple_tree *mt, enum mt_dump_format format)
 {
 	void *entry = rcu_dereference_check(mt->ma_root, mt_locked(mt));
 
 	pr_info("maple_tree(%p) flags %X, height %u root %p\n",
 		 mt, mt->ma_flags, mt_height(mt), entry);
 	if (!xa_is_node(entry))
-		mt_dump_entry(entry, 0, 0, 0);
+		mt_dump_entry(entry, 0, 0, 0, format);
 	else if (entry)
-		mt_dump_node(mt, entry, 0, mt_node_max(entry), 0);
+		mt_dump_node(mt, entry, 0, mt_node_max(entry), 0, format);
 }
 EXPORT_SYMBOL_GPL(mt_dump);
 
@@ -6953,7 +6982,7 @@ static void mas_validate_gaps(struct ma_state *mas)
 						mas_mn(mas), i,
 						mas_get_slot(mas, i), gap,
 						p_end, p_start);
-					mt_dump(mas->tree);
+					mt_dump(mas->tree, mt_dump_hex);
 
 					MT_BUG_ON(mas->tree,
 						gap != p_end - p_start + 1);
@@ -6986,7 +7015,7 @@ counted:
 	MT_BUG_ON(mas->tree, max_gap > mas->max);
 	if (ma_gaps(p_mn, mas_parent_type(mas, mte))[p_slot] != max_gap) {
 		pr_err("gap %p[%u] != %lu\n", p_mn, p_slot, max_gap);
-		mt_dump(mas->tree);
+		mt_dump(mas->tree, mt_dump_hex);
 	}
 
 	MT_BUG_ON(mas->tree,
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index f1db333270e9..d6929270dd36 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -219,7 +219,7 @@ static noinline void check_rev_seq(struct maple_tree *mt, unsigned long max,
 #ifndef __KERNEL__
 	if (verbose) {
 		rcu_barrier();
-		mt_dump(mt);
+		mt_dump(mt, mt_dump_dec);
 		pr_info(" %s test of 0-%lu %luK in %d active (%d total)\n",
 			__func__, max, mt_get_alloc_size()/1024, mt_nr_allocated(),
 			mt_nr_tallocated());
@@ -248,7 +248,7 @@ static noinline void check_seq(struct maple_tree *mt, unsigned long max,
 #ifndef __KERNEL__
 	if (verbose) {
 		rcu_barrier();
-		mt_dump(mt);
+		mt_dump(mt, mt_dump_dec);
 		pr_info(" seq test of 0-%lu %luK in %d active (%d total)\n",
 			max, mt_get_alloc_size()/1024, mt_nr_allocated(),
 			mt_nr_tallocated());
@@ -893,7 +893,7 @@ static noinline void check_alloc_range(struct maple_tree *mt)
 #if DEBUG_ALLOC_RANGE
 		pr_debug("\tInsert %lu-%lu\n", range[i] >> 12,
 			 (range[i + 1] >> 12) - 1);
-		mt_dump(mt);
+		mt_dump(mt, mt_dump_hex);
 #endif
 		check_insert_range(mt, range[i] >> 12, (range[i + 1] >> 12) - 1,
 				xa_mk_value(range[i] >> 12), 0);
@@ -934,7 +934,7 @@ static noinline void check_alloc_range(struct maple_tree *mt)
 				xa_mk_value(req_range[i] >> 12)); /* pointer */
 		mt_validate(mt);
 #if DEBUG_ALLOC_RANGE
-		mt_dump(mt);
+		mt_dump(mt, mt_dump_hex);
 #endif
 	}
 
@@ -1572,7 +1572,7 @@ static noinline void check_node_overwrite(struct maple_tree *mt)
 		mtree_test_store_range(mt, i*100, i*100 + 50, xa_mk_value(i*100));
 
 	mtree_test_store_range(mt, 319951, 367950, NULL);
-	/*mt_dump(mt); */
+	/*mt_dump(mt, mt_dump_dec); */
 	mt_validate(mt);
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index ec55da813c13..692498a84fde 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1064,13 +1064,13 @@ static inline void vma_iter_store(struct vma_iterator *vmi,
 		printk("%lu > %lu\n", vmi->mas.index, vma->vm_start);
 		printk("store of vma %lu-%lu", vma->vm_start, vma->vm_end);
 		printk("into slot    %lu-%lu", vmi->mas.index, vmi->mas.last);
-		mt_dump(vmi->mas.tree);
+		mt_dump(vmi->mas.tree, mt_dump_hex);
 	}
 	if (WARN_ON(vmi->mas.node != MAS_START && vmi->mas.last <  vma->vm_start)) {
 		printk("%lu < %lu\n", vmi->mas.last, vma->vm_start);
 		printk("store of vma %lu-%lu", vma->vm_start, vma->vm_end);
 		printk("into slot    %lu-%lu", vmi->mas.index, vmi->mas.last);
-		mt_dump(vmi->mas.tree);
+		mt_dump(vmi->mas.tree, mt_dump_hex);
 	}
 #endif
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 13678edaa22c..04bcf3b3c720 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -301,7 +301,7 @@ out:
 
 #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
 extern void mt_validate(struct maple_tree *mt);
-extern void mt_dump(const struct maple_tree *mt);
+extern void mt_dump(const struct maple_tree *mt, enum mt_dump_format fmt);
 
 /* Validate the maple tree */
 static void validate_mm_mt(struct mm_struct *mm)
@@ -323,18 +323,18 @@ static void validate_mm_mt(struct mm_struct *mm)
 			pr_emerg("mt vma: %p %lu - %lu\n", vma_mt,
 				 vma_mt->vm_start, vma_mt->vm_end);
 
-			mt_dump(mas.tree);
+			mt_dump(mas.tree, mt_dump_hex);
 			if (vma_mt->vm_end != mas.last + 1) {
 				pr_err("vma: %p vma_mt %lu-%lu\tmt %lu-%lu\n",
 						mm, vma_mt->vm_start, vma_mt->vm_end,
 						mas.index, mas.last);
-				mt_dump(mas.tree);
+				mt_dump(mas.tree, mt_dump_hex);
 			}
 			VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm);
 			if (vma_mt->vm_start != mas.index) {
 				pr_err("vma: %p vma_mt %p %lu - %lu doesn't match\n",
 						mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end);
-				mt_dump(mas.tree);
+				mt_dump(mas.tree, mt_dump_hex);
 			}
 			VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm);
 		}
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 75df543e019c..ebcb3faf85ea 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -1054,7 +1054,7 @@ static noinline void check_erase2_testset(struct maple_tree *mt,
 		if (entry_count)
 			MT_BUG_ON(mt, !mt_height(mt));
 #if check_erase2_debug > 1
-		mt_dump(mt);
+		mt_dump(mt, mt_dump_hex);
 #endif
 #if check_erase2_debug
 		pr_err("Done\n");
@@ -1085,7 +1085,7 @@ static noinline void check_erase2_testset(struct maple_tree *mt,
 		mas_for_each(&mas, foo, ULONG_MAX) {
 			if (xa_is_zero(foo)) {
 				if (addr == mas.index) {
-					mt_dump(mas.tree);
+					mt_dump(mas.tree, mt_dump_hex);
 					pr_err("retry failed %lu - %lu\n",
 						mas.index, mas.last);
 					MT_BUG_ON(mt, 1);
@@ -34513,7 +34513,7 @@ static void *rcu_reader_rev(void *ptr)
 			if (mas.index != r_start) {
 				alt = xa_mk_value(index + i * 2 + 1 +
 						  RCU_RANGE_COUNT);
-				mt_dump(test->mt);
+				mt_dump(test->mt, mt_dump_dec);
 				printk("Error: %lu-%lu %p != %lu-%lu %p %p line %d i %d\n",
 				       mas.index, mas.last, entry,
 				       r_start, r_end, expected, alt,
@@ -35784,10 +35784,10 @@ void farmer_tests(void)
 	struct maple_node *node;
 	DEFINE_MTREE(tree);
 
-	mt_dump(&tree);
+	mt_dump(&tree, mt_dump_dec);
 
 	tree.ma_root = xa_mk_value(0);
-	mt_dump(&tree);
+	mt_dump(&tree, mt_dump_dec);
 
 	node = mt_alloc_one(GFP_KERNEL);
 	node->parent = (void *)((unsigned long)(&tree) | 1);
@@ -35797,7 +35797,7 @@ void farmer_tests(void)
 	node->mr64.pivot[1] = 1;
 	node->mr64.pivot[2] = 0;
 	tree.ma_root = mt_mk_node(node, maple_leaf_64);
-	mt_dump(&tree);
+	mt_dump(&tree, mt_dump_dec);
 
 	node->parent = ma_parent_ptr(node);
 	ma_free_rcu(node);
-- 
cgit v1.2.3


From f0a1f866aba1ca62ef6f17d1c441eba65f2d6845 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Thu, 18 May 2023 10:55:15 -0400
Subject: maple_tree: add debug BUG_ON and WARN_ON variants

Add debug macros to dump the maple state and/or the tree for both warning
and bug_on calls.

Link: https://lkml.kernel.org/r/20230518145544.1722059-7-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Vernon Yang <vernon2gm@gmail.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 100 +++++++++++++++++++++++++++++++++++++++++++--
 lib/maple_tree.c           |  34 ++++++++++++++-
 2 files changed, 129 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 140fb271be4a..204d7941a39e 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -482,13 +482,13 @@ static inline void mas_init(struct ma_state *mas, struct maple_tree *tree,
 }
 
 /* Checks if a mas has not found anything */
-static inline bool mas_is_none(struct ma_state *mas)
+static inline bool mas_is_none(const struct ma_state *mas)
 {
 	return mas->node == MAS_NONE;
 }
 
 /* Checks if a mas has been paused */
-static inline bool mas_is_paused(struct ma_state *mas)
+static inline bool mas_is_paused(const struct ma_state *mas)
 {
 	return mas->node == MAS_PAUSE;
 }
@@ -679,6 +679,8 @@ extern atomic_t maple_tree_tests_run;
 extern atomic_t maple_tree_tests_passed;
 
 void mt_dump(const struct maple_tree *mt, enum mt_dump_format format);
+void mas_dump(const struct ma_state *mas);
+void mas_wr_dump(const struct ma_wr_state *wr_mas);
 void mt_validate(struct maple_tree *mt);
 void mt_cache_shrink(void);
 #define MT_BUG_ON(__tree, __x) do {					\
@@ -695,8 +697,100 @@ void mt_cache_shrink(void);
 		atomic_inc(&maple_tree_tests_passed);			\
 	}								\
 } while (0)
+
+#define MAS_BUG_ON(__mas, __x) do {					\
+	atomic_inc(&maple_tree_tests_run);				\
+	if (__x) {							\
+		pr_info("BUG at %s:%d (%u)\n",				\
+		__func__, __LINE__, __x);				\
+		mas_dump(__mas);					\
+		mt_dump((__mas)->tree, mt_dump_hex);			\
+		pr_info("Pass: %u Run:%u\n",				\
+			atomic_read(&maple_tree_tests_passed),		\
+			atomic_read(&maple_tree_tests_run));		\
+		dump_stack();						\
+	} else {							\
+		atomic_inc(&maple_tree_tests_passed);			\
+	}								\
+} while (0)
+
+#define MAS_WR_BUG_ON(__wrmas, __x) do {				\
+	atomic_inc(&maple_tree_tests_run);				\
+	if (__x) {							\
+		pr_info("BUG at %s:%d (%u)\n",				\
+		__func__, __LINE__, __x);				\
+		mas_wr_dump(__wrmas);					\
+		mas_dump((__wrmas)->mas);				\
+		mt_dump((__wrmas)->mas->tree, mt_dump_hex);		\
+		pr_info("Pass: %u Run:%u\n",				\
+			atomic_read(&maple_tree_tests_passed),		\
+			atomic_read(&maple_tree_tests_run));		\
+		dump_stack();						\
+	} else {							\
+		atomic_inc(&maple_tree_tests_passed);			\
+	}								\
+} while (0)
+
+#define MT_WARN_ON(__tree, __x)  ({					\
+	int ret = !!(__x);						\
+	atomic_inc(&maple_tree_tests_run);				\
+	if (ret) {							\
+		pr_info("WARN at %s:%d (%u)\n",				\
+		__func__, __LINE__, __x);				\
+		mt_dump(__tree, mt_dump_hex);				\
+		pr_info("Pass: %u Run:%u\n",				\
+			atomic_read(&maple_tree_tests_passed),		\
+			atomic_read(&maple_tree_tests_run));		\
+		dump_stack();						\
+	} else {							\
+		atomic_inc(&maple_tree_tests_passed);			\
+	}								\
+	unlikely(ret);							\
+})
+
+#define MAS_WARN_ON(__mas, __x) ({					\
+	int ret = !!(__x);						\
+	atomic_inc(&maple_tree_tests_run);				\
+	if (ret) {							\
+		pr_info("WARN at %s:%d (%u)\n",				\
+		__func__, __LINE__, __x);				\
+		mas_dump(__mas);					\
+		mt_dump((__mas)->tree, mt_dump_hex);			\
+		pr_info("Pass: %u Run:%u\n",				\
+			atomic_read(&maple_tree_tests_passed),		\
+			atomic_read(&maple_tree_tests_run));		\
+		dump_stack();						\
+	} else {							\
+		atomic_inc(&maple_tree_tests_passed);			\
+	}								\
+	unlikely(ret);							\
+})
+
+#define MAS_WR_WARN_ON(__wrmas, __x) ({					\
+	int ret = !!(__x);						\
+	atomic_inc(&maple_tree_tests_run);				\
+	if (ret) {							\
+		pr_info("WARN at %s:%d (%u)\n",				\
+		__func__, __LINE__, __x);				\
+		mas_wr_dump(__wrmas);					\
+		mas_dump((__wrmas)->mas);				\
+		mt_dump((__wrmas)->mas->tree, mt_dump_hex);		\
+		pr_info("Pass: %u Run:%u\n",				\
+			atomic_read(&maple_tree_tests_passed),		\
+			atomic_read(&maple_tree_tests_run));		\
+		dump_stack();						\
+	} else {							\
+		atomic_inc(&maple_tree_tests_passed);			\
+	}								\
+	unlikely(ret);							\
+})
 #else
-#define MT_BUG_ON(__tree, __x) BUG_ON(__x)
+#define MT_BUG_ON(__tree, __x)		BUG_ON(__x)
+#define MAS_BUG_ON(__mas, __x)		BUG_ON(__x)
+#define MAS_WR_BUG_ON(__mas, __x)	BUG_ON(__x)
+#define MT_WARN_ON(__tree, __x)		WARN_ON(__x)
+#define MAS_WARN_ON(__mas, __x)		WARN_ON(__x)
+#define MAS_WR_WARN_ON(__mas, __x)	WARN_ON(__x)
 #endif /* CONFIG_DEBUG_MAPLE_TREE */
 
 #endif /*_LINUX_MAPLE_TREE_H */
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index dfa0271101d2..ff16b6c0ac08 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -240,12 +240,12 @@ static inline void mas_set_err(struct ma_state *mas, long err)
 	mas->node = MA_ERROR(err);
 }
 
-static inline bool mas_is_ptr(struct ma_state *mas)
+static inline bool mas_is_ptr(const struct ma_state *mas)
 {
 	return mas->node == MAS_ROOT;
 }
 
-static inline bool mas_is_start(struct ma_state *mas)
+static inline bool mas_is_start(const struct ma_state *mas)
 {
 	return mas->node == MAS_START;
 }
@@ -7246,4 +7246,34 @@ done:
 }
 EXPORT_SYMBOL_GPL(mt_validate);
 
+void mas_dump(const struct ma_state *mas)
+{
+	pr_err("MAS: tree=%p enode=%p ", mas->tree, mas->node);
+	if (mas_is_none(mas))
+		pr_err("(MAS_NONE) ");
+	else if (mas_is_ptr(mas))
+		pr_err("(MAS_ROOT) ");
+	else if (mas_is_start(mas))
+		 pr_err("(MAS_START) ");
+	else if (mas_is_paused(mas))
+		pr_err("(MAS_PAUSED) ");
+
+	pr_err("[%u] index=%lx last=%lx\n", mas->offset, mas->index, mas->last);
+	pr_err("     min=%lx max=%lx alloc=%p, depth=%u, flags=%x\n",
+	       mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags);
+	if (mas->index > mas->last)
+		pr_err("Check index & last\n");
+}
+EXPORT_SYMBOL_GPL(mas_dump);
+
+void mas_wr_dump(const struct ma_wr_state *wr_mas)
+{
+	pr_err("WR_MAS: node=%p r_min=%lx r_max=%lx\n",
+	       wr_mas->node, wr_mas->r_min, wr_mas->r_max);
+	pr_err("        type=%u off_end=%u, node_end=%u, end_piv=%lx\n",
+	       wr_mas->type, wr_mas->offset_end, wr_mas->node_end,
+	       wr_mas->end_piv);
+}
+EXPORT_SYMBOL_GPL(mas_wr_dump);
+
 #endif /* CONFIG_DEBUG_MAPLE_TREE */
-- 
cgit v1.2.3


From 7f2f9dc16fee59afdec2df8c794e97c36e387257 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>
Date: Thu, 18 May 2023 10:55:17 -0400
Subject: maple_tree: change RCU checks to WARN_ON() instead of BUG_ON()

If RCU is enabled and the tree isn't locked, just warn the user and avoid
crashing the kernel.

Link: https://lkml.kernel.org/r/20230518145544.1722059-9-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Vernon Yang <vernon2gm@gmail.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 204d7941a39e..ed92abf4c1fb 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -616,7 +616,7 @@ static inline void mt_clear_in_rcu(struct maple_tree *mt)
 		return;
 
 	if (mt_external_lock(mt)) {
-		BUG_ON(!mt_lock_is_held(mt));
+		WARN_ON(!mt_lock_is_held(mt));
 		mt->ma_flags &= ~MT_FLAGS_USE_RCU;
 	} else {
 		mtree_lock(mt);
@@ -635,7 +635,7 @@ static inline void mt_set_in_rcu(struct maple_tree *mt)
 		return;
 
 	if (mt_external_lock(mt)) {
-		BUG_ON(!mt_lock_is_held(mt));
+		WARN_ON(!mt_lock_is_held(mt));
 		mt->ma_flags |= MT_FLAGS_USE_RCU;
 	} else {
 		mtree_lock(mt);
-- 
cgit v1.2.3


From b50e195ff436625b26dcc9839bc52cc7c5bf1a54 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Thu, 18 May 2023 10:55:26 -0400
Subject: mm: update validate_mm() to use vma iterator

Use the vma iterator in the validation code and combine the code to check
the maple tree into the main validate_mm() function.

Introduce a new function vma_iter_dump_tree() to dump the maple tree in
hex layout.

Replace all calls to validate_mm_mt() with validate_mm().

[Liam.Howlett@oracle.com: update validate_mm() to use vma iterator CONFIG flag]
  Link: https://lkml.kernel.org/r/20230606183538.588190-1-Liam.Howlett@oracle.com
Link: https://lkml.kernel.org/r/20230518145544.1722059-18-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Vernon Yang <vernon2gm@gmail.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmdebug.h | 14 ++++++++
 mm/debug.c              |  9 +++++
 mm/internal.h           |  3 +-
 mm/mmap.c               | 94 +++++++++++++++++++------------------------------
 4 files changed, 61 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index b8728d11c949..7c3e7b0b0e8f 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -8,10 +8,12 @@
 struct page;
 struct vm_area_struct;
 struct mm_struct;
+struct vma_iterator;
 
 void dump_page(struct page *page, const char *reason);
 void dump_vma(const struct vm_area_struct *vma);
 void dump_mm(const struct mm_struct *mm);
+void vma_iter_dump_tree(const struct vma_iterator *vmi);
 
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
@@ -74,6 +76,17 @@ void dump_mm(const struct mm_struct *mm);
 	}								\
 	unlikely(__ret_warn_once);					\
 })
+#define VM_WARN_ON_ONCE_MM(cond, mm)		({			\
+	static bool __section(".data.once") __warned;			\
+	int __ret_warn_once = !!(cond);					\
+									\
+	if (unlikely(__ret_warn_once && !__warned)) {			\
+		dump_mm(mm);						\
+		__warned = true;					\
+		WARN_ON(1);						\
+	}								\
+	unlikely(__ret_warn_once);					\
+})
 
 #define VM_WARN_ON(cond) (void)WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
@@ -90,6 +103,7 @@ void dump_mm(const struct mm_struct *mm);
 #define VM_WARN_ON_ONCE_PAGE(cond, page)  BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_MM(cond, mm)  BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #endif
diff --git a/mm/debug.c b/mm/debug.c
index c7b228097bd9..ee533a5ceb79 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -268,4 +268,13 @@ void page_init_poison(struct page *page, size_t size)
 	if (page_init_poisoning)
 		memset(page, PAGE_POISON_PATTERN, size);
 }
+
+void vma_iter_dump_tree(const struct vma_iterator *vmi)
+{
+#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
+	mas_dump(&vmi->mas);
+	mt_dump(vmi->mas.tree, mt_dump_hex);
+#endif	/* CONFIG_DEBUG_VM_MAPLE_TREE */
+}
+
 #endif		/* CONFIG_DEBUG_VM */
diff --git a/mm/internal.h b/mm/internal.h
index 692498a84fde..41cc5e6225fb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1064,13 +1064,14 @@ static inline void vma_iter_store(struct vma_iterator *vmi,
 		printk("%lu > %lu\n", vmi->mas.index, vma->vm_start);
 		printk("store of vma %lu-%lu", vma->vm_start, vma->vm_end);
 		printk("into slot    %lu-%lu", vmi->mas.index, vmi->mas.last);
-		mt_dump(vmi->mas.tree, mt_dump_hex);
+		vma_iter_dump_tree(vmi);
 	}
 	if (WARN_ON(vmi->mas.node != MAS_START && vmi->mas.last <  vma->vm_start)) {
 		printk("%lu < %lu\n", vmi->mas.last, vma->vm_start);
 		printk("store of vma %lu-%lu", vma->vm_start, vma->vm_end);
 		printk("into slot    %lu-%lu", vmi->mas.index, vmi->mas.last);
 		mt_dump(vmi->mas.tree, mt_dump_hex);
+		vma_iter_dump_tree(vmi);
 	}
 #endif
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 04bcf3b3c720..8f67d80c6dde 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -300,61 +300,40 @@ out:
 }
 
 #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
-extern void mt_validate(struct maple_tree *mt);
-extern void mt_dump(const struct maple_tree *mt, enum mt_dump_format fmt);
-
-/* Validate the maple tree */
-static void validate_mm_mt(struct mm_struct *mm)
-{
-	struct maple_tree *mt = &mm->mm_mt;
-	struct vm_area_struct *vma_mt;
-
-	MA_STATE(mas, mt, 0, 0);
-
-	mt_validate(&mm->mm_mt);
-	mas_for_each(&mas, vma_mt, ULONG_MAX) {
-		if ((vma_mt->vm_start != mas.index) ||
-		    (vma_mt->vm_end - 1 != mas.last)) {
-			pr_emerg("issue in %s\n", current->comm);
-			dump_stack();
-			dump_vma(vma_mt);
-			pr_emerg("mt piv: %p %lu - %lu\n", vma_mt,
-				 mas.index, mas.last);
-			pr_emerg("mt vma: %p %lu - %lu\n", vma_mt,
-				 vma_mt->vm_start, vma_mt->vm_end);
-
-			mt_dump(mas.tree, mt_dump_hex);
-			if (vma_mt->vm_end != mas.last + 1) {
-				pr_err("vma: %p vma_mt %lu-%lu\tmt %lu-%lu\n",
-						mm, vma_mt->vm_start, vma_mt->vm_end,
-						mas.index, mas.last);
-				mt_dump(mas.tree, mt_dump_hex);
-			}
-			VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm);
-			if (vma_mt->vm_start != mas.index) {
-				pr_err("vma: %p vma_mt %p %lu - %lu doesn't match\n",
-						mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end);
-				mt_dump(mas.tree, mt_dump_hex);
-			}
-			VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm);
-		}
-	}
-}
-
 static void validate_mm(struct mm_struct *mm)
 {
 	int bug = 0;
 	int i = 0;
 	struct vm_area_struct *vma;
-	MA_STATE(mas, &mm->mm_mt, 0, 0);
-
-	validate_mm_mt(mm);
+	VMA_ITERATOR(vmi, mm, 0);
 
-	mas_for_each(&mas, vma, ULONG_MAX) {
+	mt_validate(&mm->mm_mt);
+	for_each_vma(vmi, vma) {
 #ifdef CONFIG_DEBUG_VM_RB
 		struct anon_vma *anon_vma = vma->anon_vma;
 		struct anon_vma_chain *avc;
+#endif
+		unsigned long vmi_start, vmi_end;
+		bool warn = 0;
 
+		vmi_start = vma_iter_addr(&vmi);
+		vmi_end = vma_iter_end(&vmi);
+		if (VM_WARN_ON_ONCE_MM(vma->vm_end != vmi_end, mm))
+			warn = 1;
+
+		if (VM_WARN_ON_ONCE_MM(vma->vm_start != vmi_start, mm))
+			warn = 1;
+
+		if (warn) {
+			pr_emerg("issue in %s\n", current->comm);
+			dump_stack();
+			dump_vma(vma);
+			pr_emerg("tree range: %px start %lx end %lx\n", vma,
+				 vmi_start, vmi_end - 1);
+			vma_iter_dump_tree(&vmi);
+		}
+
+#ifdef CONFIG_DEBUG_VM_RB
 		if (anon_vma) {
 			anon_vma_lock_read(anon_vma);
 			list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
@@ -365,14 +344,13 @@ static void validate_mm(struct mm_struct *mm)
 		i++;
 	}
 	if (i != mm->map_count) {
-		pr_emerg("map_count %d mas_for_each %d\n", mm->map_count, i);
+		pr_emerg("map_count %d vma iterator %d\n", mm->map_count, i);
 		bug = 1;
 	}
 	VM_BUG_ON_MM(bug, mm);
 }
 
 #else /* !CONFIG_DEBUG_VM_MAPLE_TREE */
-#define validate_mm_mt(root) do { } while (0)
 #define validate_mm(mm) do { } while (0)
 #endif /* CONFIG_DEBUG_VM_MAPLE_TREE */
 
@@ -2234,7 +2212,7 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	struct vm_area_struct *new;
 	int err;
 
-	validate_mm_mt(vma->vm_mm);
+	validate_mm(vma->vm_mm);
 
 	WARN_ON(vma->vm_start >= addr);
 	WARN_ON(vma->vm_end <= addr);
@@ -2292,7 +2270,7 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	/* Success. */
 	if (new_below)
 		vma_next(vmi);
-	validate_mm_mt(vma->vm_mm);
+	validate_mm(vma->vm_mm);
 	return 0;
 
 out_free_mpol:
@@ -2301,7 +2279,7 @@ out_free_vmi:
 	vma_iter_free(vmi);
 out_free_vma:
 	vm_area_free(new);
-	validate_mm_mt(vma->vm_mm);
+	validate_mm(vma->vm_mm);
 	return err;
 }
 
@@ -2936,7 +2914,7 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
 
 	arch_unmap(mm, start, end);
 	ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade);
-	validate_mm_mt(mm);
+	validate_mm(mm);
 	return ret;
 }
 
@@ -2958,7 +2936,7 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	struct mm_struct *mm = current->mm;
 	struct vma_prepare vp;
 
-	validate_mm_mt(mm);
+	validate_mm(mm);
 	/*
 	 * Check against address space limits by the changed size
 	 * Note: This happens *after* clearing old mappings in some code paths.
@@ -3199,7 +3177,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	bool faulted_in_anon_vma = true;
 	VMA_ITERATOR(vmi, mm, addr);
 
-	validate_mm_mt(mm);
+	validate_mm(mm);
 	/*
 	 * If anonymous vma has not yet been faulted, update new pgoff
 	 * to match new location, to increase its chance of merging.
@@ -3258,7 +3236,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 			goto out_vma_link;
 		*need_rmap_locks = false;
 	}
-	validate_mm_mt(mm);
+	validate_mm(mm);
 	return new_vma;
 
 out_vma_link:
@@ -3274,7 +3252,7 @@ out_free_mempol:
 out_free_vma:
 	vm_area_free(new_vma);
 out:
-	validate_mm_mt(mm);
+	validate_mm(mm);
 	return NULL;
 }
 
@@ -3411,7 +3389,7 @@ static struct vm_area_struct *__install_special_mapping(
 	int ret;
 	struct vm_area_struct *vma;
 
-	validate_mm_mt(mm);
+	validate_mm(mm);
 	vma = vm_area_alloc(mm);
 	if (unlikely(vma == NULL))
 		return ERR_PTR(-ENOMEM);
@@ -3434,12 +3412,12 @@ static struct vm_area_struct *__install_special_mapping(
 
 	perf_event_mmap(vma);
 
-	validate_mm_mt(mm);
+	validate_mm(mm);
 	return vma;
 
 out:
 	vm_area_free(vma);
-	validate_mm_mt(mm);
+	validate_mm(mm);
 	return ERR_PTR(ret);
 }
 
-- 
cgit v1.2.3


From 6169b553195a193c52a675e45a9578f595fe194f Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Thu, 18 May 2023 10:55:37 -0400
Subject: maple_tree: add mas_next_range() and mas_find_range() interfaces

Some users of the maple tree may want to move to the next range in the
tree, even if it stores a NULL.  This family of function provides that
functionality by advancing one slot at a time and returning the result,
while mas_contiguous() will iterate over the range and stop on
encountering the first NULL.

Link: https://lkml.kernel.org/r/20230518145544.1722059-29-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Vernon Yang <vernon2gm@gmail.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h |   3 +-
 lib/maple_tree.c           | 172 ++++++++++++++++++++++++++++++++-------------
 2 files changed, 124 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index ed92abf4c1fb..9d040043858a 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -455,6 +455,7 @@ void *mas_erase(struct ma_state *mas);
 int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp);
 void mas_store_prealloc(struct ma_state *mas, void *entry);
 void *mas_find(struct ma_state *mas, unsigned long max);
+void *mas_find_range(struct ma_state *mas, unsigned long max);
 void *mas_find_rev(struct ma_state *mas, unsigned long min);
 int mas_preallocate(struct ma_state *mas, gfp_t gfp);
 bool mas_is_err(struct ma_state *mas);
@@ -467,6 +468,7 @@ int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries);
 
 void *mas_prev(struct ma_state *mas, unsigned long min);
 void *mas_next(struct ma_state *mas, unsigned long max);
+void *mas_next_range(struct ma_state *mas, unsigned long max);
 
 int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max,
 		   unsigned long size);
@@ -528,7 +530,6 @@ static inline void mas_reset(struct ma_state *mas)
 #define mas_for_each(__mas, __entry, __max) \
 	while (((__entry) = mas_find((__mas), (__max))) != NULL)
 
-
 /**
  * mas_set_range() - Set up Maple Tree operation state for a different index.
  * @mas: Maple Tree operation state.
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 205664f46e58..41c152718000 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -4793,13 +4793,10 @@ again:
  */
 static inline void *mas_next_entry(struct ma_state *mas, unsigned long limit)
 {
-	void *entry = NULL;
-
 	if (mas->last >= limit)
 		return NULL;
 
-	entry = mas_next_slot(mas, limit, false);
-	return entry;
+	return mas_next_slot(mas, limit, false);
 }
 
 /*
@@ -5880,18 +5877,8 @@ int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries)
 }
 EXPORT_SYMBOL_GPL(mas_expected_entries);
 
-/**
- * mas_next() - Get the next entry.
- * @mas: The maple state
- * @max: The maximum index to check.
- *
- * Returns the next entry after @mas->index.
- * Must hold rcu_read_lock or the write lock.
- * Can return the zero entry.
- *
- * Return: The next entry or %NULL
- */
-void *mas_next(struct ma_state *mas, unsigned long max)
+static inline bool mas_next_setup(struct ma_state *mas, unsigned long max,
+		void **entry)
 {
 	bool was_none = mas_is_none(mas);
 
@@ -5899,24 +5886,71 @@ void *mas_next(struct ma_state *mas, unsigned long max)
 		mas->node = MAS_START;
 
 	if (mas_is_start(mas))
-		mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
+		*entry = mas_walk(mas); /* Retries on dead nodes handled by mas_walk */
 
 	if (mas_is_ptr(mas)) {
+		*entry = NULL;
 		if (was_none && mas->index == 0) {
 			mas->index = mas->last = 0;
-			return mas_root(mas);
+			return true;
 		}
 		mas->index = 1;
 		mas->last = ULONG_MAX;
 		mas->node = MAS_NONE;
-		return NULL;
+		return true;
 	}
 
-	/* Retries on dead nodes handled by mas_next_entry */
-	return mas_next_entry(mas, max);
+	if (mas_is_none(mas))
+		return true;
+	return false;
+}
+
+/**
+ * mas_next() - Get the next entry.
+ * @mas: The maple state
+ * @max: The maximum index to check.
+ *
+ * Returns the next entry after @mas->index.
+ * Must hold rcu_read_lock or the write lock.
+ * Can return the zero entry.
+ *
+ * Return: The next entry or %NULL
+ */
+void *mas_next(struct ma_state *mas, unsigned long max)
+{
+	void *entry = NULL;
+
+	if (mas_next_setup(mas, max, &entry))
+		return entry;
+
+	/* Retries on dead nodes handled by mas_next_slot */
+	return mas_next_slot(mas, max, false);
 }
 EXPORT_SYMBOL_GPL(mas_next);
 
+/**
+ * mas_next_range() - Advance the maple state to the next range
+ * @mas: The maple state
+ * @max: The maximum index to check.
+ *
+ * Sets @mas->index and @mas->last to the range.
+ * Must hold rcu_read_lock or the write lock.
+ * Can return the zero entry.
+ *
+ * Return: The next entry or %NULL
+ */
+void *mas_next_range(struct ma_state *mas, unsigned long max)
+{
+	void *entry = NULL;
+
+	if (mas_next_setup(mas, max, &entry))
+		return entry;
+
+	/* Retries on dead nodes handled by mas_next_slot */
+	return mas_next_slot(mas, max, true);
+}
+EXPORT_SYMBOL_GPL(mas_next_range);
+
 /**
  * mt_next() - get the next value in the maple tree
  * @mt: The maple tree
@@ -6026,49 +6060,41 @@ void mas_pause(struct ma_state *mas)
 EXPORT_SYMBOL_GPL(mas_pause);
 
 /**
- * mas_find() - On the first call, find the entry at or after mas->index up to
- * %max.  Otherwise, find the entry after mas->index.
+ * mas_find_setup() - Internal function to set up mas_find*().
  * @mas: The maple state
- * @max: The maximum value to check.
- *
- * Must hold rcu_read_lock or the write lock.
- * If an entry exists, last and index are updated accordingly.
- * May set @mas->node to MAS_NONE.
+ * @max: The maximum index
+ * @entry: Pointer to the entry
  *
- * Return: The entry or %NULL.
+ * Returns: True if entry is the answer, false otherwise.
  */
-void *mas_find(struct ma_state *mas, unsigned long max)
+static inline bool mas_find_setup(struct ma_state *mas, unsigned long max,
+		void **entry)
 {
+	*entry = NULL;
+
 	if (unlikely(mas_is_none(mas))) {
 		if (unlikely(mas->last >= max))
-			return NULL;
+			return true;
 
 		mas->index = mas->last;
 		mas->node = MAS_START;
-	}
-
-	if (unlikely(mas_is_paused(mas))) {
+	} else if (unlikely(mas_is_paused(mas))) {
 		if (unlikely(mas->last >= max))
-			return NULL;
+			return true;
 
 		mas->node = MAS_START;
 		mas->index = ++mas->last;
-	}
-
-
-	if (unlikely(mas_is_ptr(mas)))
+	} else if (unlikely(mas_is_ptr(mas)))
 		goto ptr_out_of_range;
 
 	if (unlikely(mas_is_start(mas))) {
 		/* First run or continue */
-		void *entry;
-
 		if (mas->index > max)
-			return NULL;
+			return true;
 
-		entry = mas_walk(mas);
-		if (entry)
-			return entry;
+		*entry = mas_walk(mas);
+		if (*entry)
+			return true;
 
 	}
 
@@ -6076,23 +6102,69 @@ void *mas_find(struct ma_state *mas, unsigned long max)
 		if (unlikely(mas_is_ptr(mas)))
 			goto ptr_out_of_range;
 
-		return NULL;
+		return true;
 	}
 
 	if (mas->index == max)
-		return NULL;
+		return true;
 
-	/* Retries on dead nodes handled by mas_next_slot */
-	return mas_next_slot(mas, max, false);
+	return false;
 
 ptr_out_of_range:
 	mas->node = MAS_NONE;
 	mas->index = 1;
 	mas->last = ULONG_MAX;
-	return NULL;
+	return true;
+}
+
+/**
+ * mas_find() - On the first call, find the entry at or after mas->index up to
+ * %max.  Otherwise, find the entry after mas->index.
+ * @mas: The maple state
+ * @max: The maximum value to check.
+ *
+ * Must hold rcu_read_lock or the write lock.
+ * If an entry exists, last and index are updated accordingly.
+ * May set @mas->node to MAS_NONE.
+ *
+ * Return: The entry or %NULL.
+ */
+void *mas_find(struct ma_state *mas, unsigned long max)
+{
+	void *entry = NULL;
+
+	if (mas_find_setup(mas, max, &entry))
+		return entry;
+
+	/* Retries on dead nodes handled by mas_next_slot */
+	return mas_next_slot(mas, max, false);
 }
 EXPORT_SYMBOL_GPL(mas_find);
 
+/**
+ * mas_find_range() - On the first call, find the entry at or after
+ * mas->index up to %max.  Otherwise, advance to the next slot mas->index.
+ * @mas: The maple state
+ * @max: The maximum value to check.
+ *
+ * Must hold rcu_read_lock or the write lock.
+ * If an entry exists, last and index are updated accordingly.
+ * May set @mas->node to MAS_NONE.
+ *
+ * Return: The entry or %NULL.
+ */
+void *mas_find_range(struct ma_state *mas, unsigned long max)
+{
+	void *entry;
+
+	if (mas_find_setup(mas, max, &entry))
+		return entry;
+
+	/* Retries on dead nodes handled by mas_next_slot */
+	return mas_next_slot(mas, max, true);
+}
+EXPORT_SYMBOL_GPL(mas_find_range);
+
 /**
  * mas_find_rev: On the first call, find the first non-null entry at or below
  * mas->index down to %min.  Otherwise find the first non-null entry below
-- 
cgit v1.2.3


From 6b9e93e0102048e64681c2fa265ae81c221f6c6d Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Thu, 18 May 2023 10:55:40 -0400
Subject: maple_tree: add mas_prev_range() and mas_find_range_rev interface

Some users of the maple tree may want to move to the previous range
regardless of the value stored there.  Add this interface as well as the
'find' variant to support walking to the first value, then iterating over
the previous ranges.

Link: https://lkml.kernel.org/r/20230518145544.1722059-32-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Vernon Yang <vernon2gm@gmail.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h |   2 +
 lib/maple_tree.c           | 161 ++++++++++++++++++++++++++++++++++-----------
 2 files changed, 124 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 9d040043858a..541675229568 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -457,6 +457,7 @@ void mas_store_prealloc(struct ma_state *mas, void *entry);
 void *mas_find(struct ma_state *mas, unsigned long max);
 void *mas_find_range(struct ma_state *mas, unsigned long max);
 void *mas_find_rev(struct ma_state *mas, unsigned long min);
+void *mas_find_range_rev(struct ma_state *mas, unsigned long max);
 int mas_preallocate(struct ma_state *mas, gfp_t gfp);
 bool mas_is_err(struct ma_state *mas);
 
@@ -467,6 +468,7 @@ void mas_destroy(struct ma_state *mas);
 int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries);
 
 void *mas_prev(struct ma_state *mas, unsigned long min);
+void *mas_prev_range(struct ma_state *mas, unsigned long max);
 void *mas_next(struct ma_state *mas, unsigned long max);
 void *mas_next_range(struct ma_state *mas, unsigned long max);
 
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 9c7e765c809d..59c15f8b4793 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5919,18 +5919,8 @@ void *mt_next(struct maple_tree *mt, unsigned long index, unsigned long max)
 }
 EXPORT_SYMBOL_GPL(mt_next);
 
-/**
- * mas_prev() - Get the previous entry
- * @mas: The maple state
- * @min: The minimum value to check.
- *
- * Must hold rcu_read_lock or the write lock.
- * Will reset mas to MAS_START if the node is MAS_NONE.  Will stop on not
- * searchable nodes.
- *
- * Return: the previous value or %NULL.
- */
-void *mas_prev(struct ma_state *mas, unsigned long min)
+static inline bool mas_prev_setup(struct ma_state *mas, unsigned long min,
+		void **entry)
 {
 	if (mas->index <= min)
 		goto none;
@@ -5948,7 +5938,8 @@ void *mas_prev(struct ma_state *mas, unsigned long min)
 		if (!mas->index)
 			goto none;
 		mas->index = mas->last = 0;
-		return mas_root(mas);
+		*entry = mas_root(mas);
+		return true;
 	}
 
 	if (mas_is_none(mas)) {
@@ -5956,18 +5947,64 @@ void *mas_prev(struct ma_state *mas, unsigned long min)
 			/* Walked to out-of-range pointer? */
 			mas->index = mas->last = 0;
 			mas->node = MAS_ROOT;
-			return mas_root(mas);
+			*entry = mas_root(mas);
+			return true;
 		}
-		return NULL;
+		return true;
 	}
-	return mas_prev_slot(mas, min, false);
+
+	return false;
 
 none:
 	mas->node = MAS_NONE;
-	return NULL;
+	return true;
+}
+
+/**
+ * mas_prev() - Get the previous entry
+ * @mas: The maple state
+ * @min: The minimum value to check.
+ *
+ * Must hold rcu_read_lock or the write lock.
+ * Will reset mas to MAS_START if the node is MAS_NONE.  Will stop on not
+ * searchable nodes.
+ *
+ * Return: the previous value or %NULL.
+ */
+void *mas_prev(struct ma_state *mas, unsigned long min)
+{
+	void *entry = NULL;
+
+	if (mas_prev_setup(mas, min, &entry))
+		return entry;
+
+	return mas_prev_slot(mas, min, false);
 }
 EXPORT_SYMBOL_GPL(mas_prev);
 
+/**
+ * mas_prev_range() - Advance to the previous range
+ * @mas: The maple state
+ * @min: The minimum value to check.
+ *
+ * Sets @mas->index and @mas->last to the range.
+ * Must hold rcu_read_lock or the write lock.
+ * Will reset mas to MAS_START if the node is MAS_NONE.  Will stop on not
+ * searchable nodes.
+ *
+ * Return: the previous value or %NULL.
+ */
+void *mas_prev_range(struct ma_state *mas, unsigned long min)
+{
+	void *entry = NULL;
+
+	if (mas_prev_setup(mas, min, &entry))
+		return entry;
+
+	return mas_prev_slot(mas, min, true);
+}
+EXPORT_SYMBOL_GPL(mas_prev_range);
+
 /**
  * mt_prev() - get the previous value in the maple tree
  * @mt: The maple tree
@@ -6114,20 +6151,18 @@ void *mas_find_range(struct ma_state *mas, unsigned long max)
 EXPORT_SYMBOL_GPL(mas_find_range);
 
 /**
- * mas_find_rev: On the first call, find the first non-null entry at or below
- * mas->index down to %min.  Otherwise find the first non-null entry below
- * mas->index down to %min.
+ * mas_find_rev_setup() - Internal function to set up mas_find_*_rev()
  * @mas: The maple state
- * @min: The minimum value to check.
- *
- * Must hold rcu_read_lock or the write lock.
- * If an entry exists, last and index are updated accordingly.
- * May set @mas->node to MAS_NONE.
+ * @min: The minimum index
+ * @entry: Pointer to the entry
  *
- * Return: The entry or %NULL.
+ * Returns: True if entry is the answer, false otherwise.
  */
-void *mas_find_rev(struct ma_state *mas, unsigned long min)
+static inline bool mas_find_rev_setup(struct ma_state *mas, unsigned long min,
+		void **entry)
 {
+	*entry = NULL;
+
 	if (unlikely(mas_is_none(mas))) {
 		if (mas->index <= min)
 			goto none;
@@ -6139,7 +6174,7 @@ void *mas_find_rev(struct ma_state *mas, unsigned long min)
 	if (unlikely(mas_is_paused(mas))) {
 		if (unlikely(mas->index <= min)) {
 			mas->node = MAS_NONE;
-			return NULL;
+			return true;
 		}
 		mas->node = MAS_START;
 		mas->last = --mas->index;
@@ -6147,14 +6182,12 @@ void *mas_find_rev(struct ma_state *mas, unsigned long min)
 
 	if (unlikely(mas_is_start(mas))) {
 		/* First run or continue */
-		void *entry;
-
 		if (mas->index < min)
-			return NULL;
+			return true;
 
-		entry = mas_walk(mas);
-		if (entry)
-			return entry;
+		*entry = mas_walk(mas);
+		if (*entry)
+			return true;
 	}
 
 	if (unlikely(!mas_searchable(mas))) {
@@ -6168,22 +6201,72 @@ void *mas_find_rev(struct ma_state *mas, unsigned long min)
 			 */
 			mas->last = mas->index = 0;
 			mas->node = MAS_ROOT;
-			return mas_root(mas);
+			*entry = mas_root(mas);
+			return true;
 		}
 	}
 
 	if (mas->index < min)
-		return NULL;
+		return true;
 
-	/* Retries on dead nodes handled by mas_prev_slot */
-	return mas_prev_slot(mas, min, false);
+	return false;
 
 none:
 	mas->node = MAS_NONE;
-	return NULL;
+	return true;
+}
+
+/**
+ * mas_find_rev: On the first call, find the first non-null entry at or below
+ * mas->index down to %min.  Otherwise find the first non-null entry below
+ * mas->index down to %min.
+ * @mas: The maple state
+ * @min: The minimum value to check.
+ *
+ * Must hold rcu_read_lock or the write lock.
+ * If an entry exists, last and index are updated accordingly.
+ * May set @mas->node to MAS_NONE.
+ *
+ * Return: The entry or %NULL.
+ */
+void *mas_find_rev(struct ma_state *mas, unsigned long min)
+{
+	void *entry;
+
+	if (mas_find_rev_setup(mas, min, &entry))
+		return entry;
+
+	/* Retries on dead nodes handled by mas_prev_slot */
+	return mas_prev_slot(mas, min, false);
+
 }
 EXPORT_SYMBOL_GPL(mas_find_rev);
 
+/**
+ * mas_find_range_rev: On the first call, find the first non-null entry at or
+ * below mas->index down to %min.  Otherwise advance to the previous slot after
+ * mas->index down to %min.
+ * @mas: The maple state
+ * @min: The minimum value to check.
+ *
+ * Must hold rcu_read_lock or the write lock.
+ * If an entry exists, last and index are updated accordingly.
+ * May set @mas->node to MAS_NONE.
+ *
+ * Return: The entry or %NULL.
+ */
+void *mas_find_range_rev(struct ma_state *mas, unsigned long min)
+{
+	void *entry;
+
+	if (mas_find_rev_setup(mas, min, &entry))
+		return entry;
+
+	/* Retries on dead nodes handled by mas_prev_slot */
+	return mas_prev_slot(mas, min, true);
+}
+EXPORT_SYMBOL_GPL(mas_find_range_rev);
+
 /**
  * mas_erase() - Find the range in which index resides and erase the entire
  * range.
-- 
cgit v1.2.3


From bb5dbd2272b8d7b3a34d234bb916819afbf802d1 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Thu, 18 May 2023 10:55:43 -0400
Subject: mm: add vma_iter_{next,prev}_range() to vma iterator

Add functionality to the VMA iterator to advance and retreat one offset
within the maple tree, regardless of the value contained.  This can lead
to less re-walking to find an area of interest, especially when there is
nothing in that offset.

Link: https://lkml.kernel.org/r/20230518145544.1722059-35-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: David Binderman <dcb314@hotmail.com>
Cc: Peng Zhang <zhangpeng.00@bytedance.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Vernon Yang <vernon2gm@gmail.com>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 280429ffa91d..62bb3272e531 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -866,11 +866,24 @@ static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
 	return mas_find(&vmi->mas, ULONG_MAX);
 }
 
+static inline
+struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi)
+{
+	return mas_next_range(&vmi->mas, ULONG_MAX);
+}
+
+
 static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
 {
 	return mas_prev(&vmi->mas, 0);
 }
 
+static inline
+struct vm_area_struct *vma_iter_prev_range(struct vma_iterator *vmi)
+{
+	return mas_prev_range(&vmi->mas, 0);
+}
+
 static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
 {
 	return vmi->mas.index;
-- 
cgit v1.2.3


From ecd8b2928f2efc7b678b361d51920c15b5ef3885 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 19 May 2023 14:39:55 +0200
Subject: mm: compaction: remove compaction result helpers

Patch series "mm: compaction: cleanups & simplifications".

These compaction cleanups are split out from the huge page allocator
series[1], as requested by reviewer feedback.

[1] https://lore.kernel.org/linux-mm/20230418191313.268131-1-hannes@cmpxchg.org/


This patch (of 5):

The compaction result helpers encode quirks that are specific to the
allocator's retry logic.  E.g.  COMPACT_SUCCESS and COMPACT_COMPLETE
actually represent failures that should be retried upon, and so on.  I
frequently found myself pulling up the helper implementation in order to
understand and work on the retry logic.  They're not quite clean
abstractions; rather they split the retry logic into two locations.

Remove the helpers and inline the checks.  Then comment on the result
interpretations directly where the decision making happens.

Link: https://lkml.kernel.org/r/20230519123959.77335-1-hannes@cmpxchg.org
Link: https://lkml.kernel.org/r/20230519123959.77335-2-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compaction.h     | 92 ------------------------------------------
 include/trace/events/mmflags.h |  4 +-
 mm/page_alloc.c                | 30 ++++++++------
 3 files changed, 19 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index a6e512cfb670..1f0328a2ba48 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -95,78 +95,6 @@ extern enum compact_result compaction_suitable(struct zone *zone, int order,
 extern void compaction_defer_reset(struct zone *zone, int order,
 				bool alloc_success);
 
-/* Compaction has made some progress and retrying makes sense */
-static inline bool compaction_made_progress(enum compact_result result)
-{
-	/*
-	 * Even though this might sound confusing this in fact tells us
-	 * that the compaction successfully isolated and migrated some
-	 * pageblocks.
-	 */
-	if (result == COMPACT_SUCCESS)
-		return true;
-
-	return false;
-}
-
-/* Compaction has failed and it doesn't make much sense to keep retrying. */
-static inline bool compaction_failed(enum compact_result result)
-{
-	/* All zones were scanned completely and still not result. */
-	if (result == COMPACT_COMPLETE)
-		return true;
-
-	return false;
-}
-
-/* Compaction needs reclaim to be performed first, so it can continue. */
-static inline bool compaction_needs_reclaim(enum compact_result result)
-{
-	/*
-	 * Compaction backed off due to watermark checks for order-0
-	 * so the regular reclaim has to try harder and reclaim something.
-	 */
-	if (result == COMPACT_SKIPPED)
-		return true;
-
-	return false;
-}
-
-/*
- * Compaction has backed off for some reason after doing some work or none
- * at all. It might be throttling or lock contention. Retrying might be still
- * worthwhile, but with a higher priority if allowed.
- */
-static inline bool compaction_withdrawn(enum compact_result result)
-{
-	/*
-	 * If compaction is deferred for high-order allocations, it is
-	 * because sync compaction recently failed. If this is the case
-	 * and the caller requested a THP allocation, we do not want
-	 * to heavily disrupt the system, so we fail the allocation
-	 * instead of entering direct reclaim.
-	 */
-	if (result == COMPACT_DEFERRED)
-		return true;
-
-	/*
-	 * If compaction in async mode encounters contention or blocks higher
-	 * priority task we back off early rather than cause stalls.
-	 */
-	if (result == COMPACT_CONTENDED)
-		return true;
-
-	/*
-	 * Page scanners have met but we haven't scanned full zones so this
-	 * is a back off in fact.
-	 */
-	if (result == COMPACT_PARTIAL_SKIPPED)
-		return true;
-
-	return false;
-}
-
-
 bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 					int alloc_flags);
 
@@ -185,26 +113,6 @@ static inline enum compact_result compaction_suitable(struct zone *zone, int ord
 	return COMPACT_SKIPPED;
 }
 
-static inline bool compaction_made_progress(enum compact_result result)
-{
-	return false;
-}
-
-static inline bool compaction_failed(enum compact_result result)
-{
-	return false;
-}
-
-static inline bool compaction_needs_reclaim(enum compact_result result)
-{
-	return false;
-}
-
-static inline bool compaction_withdrawn(enum compact_result result)
-{
-	return true;
-}
-
 static inline void kcompactd_run(int nid)
 {
 }
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index b63e7c0fbbe5..1478b9dd05fa 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -223,8 +223,8 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY,	"softdirty"	)		\
 #define compact_result_to_feedback(result)	\
 ({						\
 	enum compact_result __result = result;	\
-	(compaction_failed(__result)) ? COMPACTION_FAILED : \
-		(compaction_withdrawn(__result)) ? COMPACTION_WITHDRAWN : COMPACTION_PROGRESS; \
+	(__result == COMPACT_COMPLETE) ? COMPACTION_FAILED : \
+		(__result == COMPACT_SUCCESS) ? COMPACTION_PROGRESS : COMPACTION_WITHDRAWN; \
 })
 
 #define COMPACTION_FEEDBACK		\
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b9a9ba2db9e9..e3a3ebc2dfce 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3469,35 +3469,39 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
 	if (fatal_signal_pending(current))
 		return false;
 
-	if (compaction_made_progress(compact_result))
+	/*
+	 * Compaction managed to coalesce some page blocks, but the
+	 * allocation failed presumably due to a race. Retry some.
+	 */
+	if (compact_result == COMPACT_SUCCESS)
 		(*compaction_retries)++;
 
 	/*
-	 * compaction considers all the zone as desperately out of memory
-	 * so it doesn't really make much sense to retry except when the
+	 * All zones were scanned completely and still no result. It
+	 * doesn't really make much sense to retry except when the
 	 * failure could be caused by insufficient priority
 	 */
-	if (compaction_failed(compact_result))
+	if (compact_result == COMPACT_COMPLETE)
 		goto check_priority;
 
 	/*
-	 * compaction was skipped because there are not enough order-0 pages
-	 * to work with, so we retry only if it looks like reclaim can help.
+	 * Compaction was skipped due to a lack of free order-0
+	 * migration targets. Continue if reclaim can help.
 	 */
-	if (compaction_needs_reclaim(compact_result)) {
+	if (compact_result == COMPACT_SKIPPED) {
 		ret = compaction_zonelist_suitable(ac, order, alloc_flags);
 		goto out;
 	}
 
 	/*
-	 * make sure the compaction wasn't deferred or didn't bail out early
-	 * due to locks contention before we declare that we should give up.
-	 * But the next retry should use a higher priority if allowed, so
-	 * we don't just keep bailing out endlessly.
+	 * If compaction backed due to being deferred, due to
+	 * contended locks in async mode, or due to scanners meeting
+	 * after a partial scan, retry with increased priority.
 	 */
-	if (compaction_withdrawn(compact_result)) {
+	if (compact_result == COMPACT_DEFERRED ||
+	    compact_result == COMPACT_CONTENDED ||
+	    compact_result == COMPACT_PARTIAL_SKIPPED)
 		goto check_priority;
-	}
 
 	/*
 	 * !costly requests are much more important than __GFP_RETRY_MAYFAIL
-- 
cgit v1.2.3


From e8606320e9af9774fd879e71c940fc9e5fd9b901 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 19 May 2023 14:39:57 +0200
Subject: mm: compaction: refactor __compaction_suitable()

__compaction_suitable() is supposed to check for available migration
targets.  However, it also checks whether the operation was requested via
/proc/sys/vm/compact_memory, and whether the original allocation request
can already succeed.  These don't apply to all callsites.

Move the checks out to the callers, so that later patches can deal with
them one by one.  No functional change intended.

[hannes@cmpxchg.org: fix comment, per Vlastimil]
  Link: https://lkml.kernel.org/r/20230602144942.GC161817@cmpxchg.org
Link: https://lkml.kernel.org/r/20230519123959.77335-4-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compaction.h |  4 +--
 mm/compaction.c            | 79 +++++++++++++++++++++++++++++-----------------
 mm/vmscan.c                | 35 ++++++++++++--------
 3 files changed, 73 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 1f0328a2ba48..9f7cf3e1bf89 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -90,7 +90,7 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
 		struct page **page);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern enum compact_result compaction_suitable(struct zone *zone, int order,
-		unsigned int alloc_flags, int highest_zoneidx);
+					       int highest_zoneidx);
 
 extern void compaction_defer_reset(struct zone *zone, int order,
 				bool alloc_success);
@@ -108,7 +108,7 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
 }
 
 static inline enum compact_result compaction_suitable(struct zone *zone, int order,
-					int alloc_flags, int highest_zoneidx)
+						      int highest_zoneidx)
 {
 	return COMPACT_SKIPPED;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index e23e00bec030..bb9b76244a5d 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2194,24 +2194,10 @@ static enum compact_result compact_finished(struct compact_control *cc)
 }
 
 static enum compact_result __compaction_suitable(struct zone *zone, int order,
-					unsigned int alloc_flags,
 					int highest_zoneidx,
 					unsigned long wmark_target)
 {
 	unsigned long watermark;
-
-	if (is_via_compact_memory(order))
-		return COMPACT_CONTINUE;
-
-	watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
-	/*
-	 * If watermarks for high-order allocation are already met, there
-	 * should be no need for compaction at all.
-	 */
-	if (zone_watermark_ok(zone, order, watermark, highest_zoneidx,
-								alloc_flags))
-		return COMPACT_SUCCESS;
-
 	/*
 	 * Watermarks for order-0 must be met for compaction to be able to
 	 * isolate free pages for migration targets. This means that the
@@ -2240,17 +2226,15 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
  * compaction_suitable: Is this suitable to run compaction on this zone now?
  * Returns
  *   COMPACT_SKIPPED  - If there are too few free pages for compaction
- *   COMPACT_SUCCESS  - If the allocation would succeed without compaction
  *   COMPACT_CONTINUE - If compaction should run now
  */
 enum compact_result compaction_suitable(struct zone *zone, int order,
-					unsigned int alloc_flags,
 					int highest_zoneidx)
 {
 	enum compact_result ret;
 	int fragindex;
 
-	ret = __compaction_suitable(zone, order, alloc_flags, highest_zoneidx,
+	ret = __compaction_suitable(zone, order, highest_zoneidx,
 				    zone_page_state(zone, NR_FREE_PAGES));
 	/*
 	 * fragmentation index determines if allocation failures are due to
@@ -2294,7 +2278,16 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 	for_each_zone_zonelist_nodemask(zone, z, ac->zonelist,
 				ac->highest_zoneidx, ac->nodemask) {
 		unsigned long available;
-		enum compact_result compact_result;
+		unsigned long watermark;
+
+		if (is_via_compact_memory(order))
+			return true;
+
+		/* Allocation can already succeed, nothing to do */
+		watermark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK);
+		if (zone_watermark_ok(zone, order, watermark,
+				      ac->highest_zoneidx, alloc_flags))
+			continue;
 
 		/*
 		 * Do not consider all the reclaimable memory because we do not
@@ -2304,9 +2297,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 		 */
 		available = zone_reclaimable_pages(zone) / order;
 		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
-		compact_result = __compaction_suitable(zone, order, alloc_flags,
-				ac->highest_zoneidx, available);
-		if (compact_result == COMPACT_CONTINUE)
+		if (__compaction_suitable(zone, order, ac->highest_zoneidx,
+					  available) == COMPACT_CONTINUE)
 			return true;
 	}
 
@@ -2336,11 +2328,23 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
 	INIT_LIST_HEAD(&cc->migratepages);
 
 	cc->migratetype = gfp_migratetype(cc->gfp_mask);
-	ret = compaction_suitable(cc->zone, cc->order, cc->alloc_flags,
-							cc->highest_zoneidx);
-	/* Compaction is likely to fail */
-	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
-		return ret;
+
+	if (!is_via_compact_memory(cc->order)) {
+		unsigned long watermark;
+
+		/* Allocation can already succeed, nothing to do */
+		watermark = wmark_pages(cc->zone,
+					cc->alloc_flags & ALLOC_WMARK_MASK);
+		if (zone_watermark_ok(cc->zone, cc->order, watermark,
+				      cc->highest_zoneidx, cc->alloc_flags))
+			return COMPACT_SUCCESS;
+
+		ret = compaction_suitable(cc->zone, cc->order,
+					  cc->highest_zoneidx);
+		/* Compaction is likely to fail */
+		if (ret == COMPACT_SKIPPED)
+			return ret;
+	}
 
 	/*
 	 * Clear pageblock skip if there were failures recently and compaction
@@ -2844,7 +2848,16 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
 		if (!populated_zone(zone))
 			continue;
 
-		if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
+		if (is_via_compact_memory(pgdat->kcompactd_max_order))
+			return true;
+
+		/* Allocation can already succeed, check other zones */
+		if (zone_watermark_ok(zone, pgdat->kcompactd_max_order,
+				      min_wmark_pages(zone),
+				      highest_zoneidx, 0))
+			continue;
+
+		if (compaction_suitable(zone, pgdat->kcompactd_max_order,
 					highest_zoneidx) == COMPACT_CONTINUE)
 			return true;
 	}
@@ -2882,10 +2895,18 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 		if (compaction_deferred(zone, cc.order))
 			continue;
 
-		if (compaction_suitable(zone, cc.order, 0, zoneid) !=
-							COMPACT_CONTINUE)
+		if (is_via_compact_memory(cc.order))
+			goto compact;
+
+		/* Allocation can already succeed, nothing to do */
+		if (zone_watermark_ok(zone, cc.order,
+				      min_wmark_pages(zone), zoneid, 0))
 			continue;
 
+		if (compaction_suitable(zone, cc.order,
+					zoneid) != COMPACT_CONTINUE)
+			continue;
+compact:
 		if (kthread_should_stop())
 			return;
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4637f6462e9c..9f8bfd1fcf58 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6399,14 +6399,17 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 		if (!managed_zone(zone))
 			continue;
 
-		switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) {
-		case COMPACT_SUCCESS:
-		case COMPACT_CONTINUE:
+		if (sc->order == -1) /* is_via_compact_memory() */
+			return false;
+
+		/* Allocation can already succeed, nothing to do */
+		if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone),
+				      sc->reclaim_idx, 0))
+			return false;
+
+		if (compaction_suitable(zone, sc->order,
+					sc->reclaim_idx) == COMPACT_CONTINUE)
 			return false;
-		default:
-			/* check next zone */
-			;
-		}
 	}
 
 	/*
@@ -6594,16 +6597,20 @@ again:
 static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 {
 	unsigned long watermark;
-	enum compact_result suitable;
 
-	suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx);
-	if (suitable == COMPACT_SUCCESS)
-		/* Allocation should succeed already. Don't reclaim. */
+	if (sc->order == -1) /* is_via_compact_memory() */
+		goto suitable;
+
+	/* Allocation can already succeed, nothing to do */
+	if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone),
+			      sc->reclaim_idx, 0))
 		return true;
-	if (suitable == COMPACT_SKIPPED)
-		/* Compaction cannot yet proceed. Do reclaim. */
-		return false;
 
+	/* Compaction cannot yet proceed. Do reclaim. */
+	if (compaction_suitable(zone, sc->order,
+				sc->reclaim_idx) == COMPACT_SKIPPED)
+		return false;
+suitable:
 	/*
 	 * Compaction is already possible, but it takes time to run and there
 	 * are potentially other callers using the pages just freed. So proceed
-- 
cgit v1.2.3


From 3cf04937529020e149666f56a41ebdeb226b69ed Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 2 Jun 2023 11:12:04 -0400
Subject: mm: compaction: have compaction_suitable() return bool

Since it only returns COMPACT_CONTINUE or COMPACT_SKIPPED now, a bool
return value simplifies the callsites.

Link: https://lkml.kernel.org/r/20230602151204.GD161817@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compaction.h |  6 ++---
 mm/compaction.c            | 64 ++++++++++++++++++++++------------------------
 mm/vmscan.c                |  6 ++---
 3 files changed, 36 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 9f7cf3e1bf89..57b16e69c19a 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -89,7 +89,7 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
 		const struct alloc_context *ac, enum compact_priority prio,
 		struct page **page);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
-extern enum compact_result compaction_suitable(struct zone *zone, int order,
+extern bool compaction_suitable(struct zone *zone, int order,
 					       int highest_zoneidx);
 
 extern void compaction_defer_reset(struct zone *zone, int order,
@@ -107,10 +107,10 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
 {
 }
 
-static inline enum compact_result compaction_suitable(struct zone *zone, int order,
+static inline bool compaction_suitable(struct zone *zone, int order,
 						      int highest_zoneidx)
 {
-	return COMPACT_SKIPPED;
+	return false;
 }
 
 static inline void kcompactd_run(int nid)
diff --git a/mm/compaction.c b/mm/compaction.c
index 470cfd24ef18..9b550bfe900b 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2193,9 +2193,9 @@ static enum compact_result compact_finished(struct compact_control *cc)
 	return ret;
 }
 
-static enum compact_result __compaction_suitable(struct zone *zone, int order,
-					int highest_zoneidx,
-					unsigned long wmark_target)
+static bool __compaction_suitable(struct zone *zone, int order,
+				  int highest_zoneidx,
+				  unsigned long wmark_target)
 {
 	unsigned long watermark;
 	/*
@@ -2215,27 +2215,20 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
 	watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
 				low_wmark_pages(zone) : min_wmark_pages(zone);
 	watermark += compact_gap(order);
-	if (!__zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
-						ALLOC_CMA, wmark_target))
-		return COMPACT_SKIPPED;
-
-	return COMPACT_CONTINUE;
+	return __zone_watermark_ok(zone, 0, watermark, highest_zoneidx,
+				   ALLOC_CMA, wmark_target);
 }
 
 /*
  * compaction_suitable: Is this suitable to run compaction on this zone now?
- * Returns
- *   COMPACT_SKIPPED  - If there are too few free pages for compaction
- *   COMPACT_CONTINUE - If compaction should run now
  */
-enum compact_result compaction_suitable(struct zone *zone, int order,
-					int highest_zoneidx)
+bool compaction_suitable(struct zone *zone, int order, int highest_zoneidx)
 {
-	enum compact_result ret;
-	int fragindex;
+	enum compact_result compact_result;
+	bool suitable;
 
-	ret = __compaction_suitable(zone, order, highest_zoneidx,
-				    zone_page_state(zone, NR_FREE_PAGES));
+	suitable = __compaction_suitable(zone, order, highest_zoneidx,
+					 zone_page_state(zone, NR_FREE_PAGES));
 	/*
 	 * fragmentation index determines if allocation failures are due to
 	 * low memory or external fragmentation
@@ -2252,17 +2245,24 @@ enum compact_result compaction_suitable(struct zone *zone, int order,
 	 * excessive compaction for costly orders, but it should not be at the
 	 * expense of system stability.
 	 */
-	if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) {
-		fragindex = fragmentation_index(zone, order);
-		if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
-			ret = COMPACT_NOT_SUITABLE_ZONE;
+	if (suitable) {
+		compact_result = COMPACT_CONTINUE;
+		if (order > PAGE_ALLOC_COSTLY_ORDER) {
+			int fragindex = fragmentation_index(zone, order);
+
+			if (fragindex >= 0 &&
+			    fragindex <= sysctl_extfrag_threshold) {
+				suitable = false;
+				compact_result = COMPACT_NOT_SUITABLE_ZONE;
+			}
+		}
+	} else {
+		compact_result = COMPACT_SKIPPED;
 	}
 
-	trace_mm_compaction_suitable(zone, order, ret);
-	if (ret == COMPACT_NOT_SUITABLE_ZONE)
-		ret = COMPACT_SKIPPED;
+	trace_mm_compaction_suitable(zone, order, compact_result);
 
-	return ret;
+	return suitable;
 }
 
 bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
@@ -2288,7 +2288,7 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 		available = zone_reclaimable_pages(zone) / order;
 		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
 		if (__compaction_suitable(zone, order, ac->highest_zoneidx,
-					  available) == COMPACT_CONTINUE)
+					  available))
 			return true;
 	}
 
@@ -2329,11 +2329,10 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
 				      cc->highest_zoneidx, cc->alloc_flags))
 			return COMPACT_SUCCESS;
 
-		ret = compaction_suitable(cc->zone, cc->order,
-					  cc->highest_zoneidx);
 		/* Compaction is likely to fail */
-		if (ret == COMPACT_SKIPPED)
-			return ret;
+		if (!compaction_suitable(cc->zone, cc->order,
+					 cc->highest_zoneidx))
+			return COMPACT_SKIPPED;
 	}
 
 	/*
@@ -2845,7 +2844,7 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
 			continue;
 
 		if (compaction_suitable(zone, pgdat->kcompactd_max_order,
-					highest_zoneidx) == COMPACT_CONTINUE)
+					highest_zoneidx))
 			return true;
 	}
 
@@ -2887,8 +2886,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 				      min_wmark_pages(zone), zoneid, 0))
 			continue;
 
-		if (compaction_suitable(zone, cc.order,
-					zoneid) != COMPACT_CONTINUE)
+		if (!compaction_suitable(zone, cc.order, zoneid))
 			continue;
 
 		if (kthread_should_stop())
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 99e4ae44850d..df7e52b522ec 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6404,8 +6404,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 				      sc->reclaim_idx, 0))
 			return false;
 
-		if (compaction_suitable(zone, sc->order,
-					sc->reclaim_idx) == COMPACT_CONTINUE)
+		if (compaction_suitable(zone, sc->order, sc->reclaim_idx))
 			return false;
 	}
 
@@ -6601,8 +6600,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 		return true;
 
 	/* Compaction cannot yet proceed. Do reclaim. */
-	if (compaction_suitable(zone, sc->order,
-				sc->reclaim_idx) == COMPACT_SKIPPED)
+	if (!compaction_suitable(zone, sc->order, sc->reclaim_idx))
 		return false;
 
 	/*
-- 
cgit v1.2.3


From 5c7e7a0d79072eb02780a2c0dee730b23cde711d Mon Sep 17 00:00:00 2001
From: "T.J. Alumbaugh" <talumbau@google.com>
Date: Mon, 22 May 2023 11:20:56 +0000
Subject: mm: multi-gen LRU: cleanup lru_gen_soft_reclaim()

lru_gen_soft_reclaim() gets the lruvec from the memcg and node ID to keep a
cleaner interface on the caller side.

Link: https://lkml.kernel.org/r/20230522112058.2965866-2-talumbau@google.com
Signed-off-by: T.J. Alumbaugh <talumbau@google.com>
Reviewed-by: Yuanchu Xie <yuanchu@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 4 ++--
 mm/memcontrol.c        | 2 +-
 mm/vmscan.c            | 4 +++-
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3a68326c9989..5a7ada0413da 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -534,7 +534,7 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg);
 void lru_gen_online_memcg(struct mem_cgroup *memcg);
 void lru_gen_offline_memcg(struct mem_cgroup *memcg);
 void lru_gen_release_memcg(struct mem_cgroup *memcg);
-void lru_gen_soft_reclaim(struct lruvec *lruvec);
+void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid);
 
 #else /* !CONFIG_MEMCG */
 
@@ -585,7 +585,7 @@ static inline void lru_gen_release_memcg(struct mem_cgroup *memcg)
 {
 }
 
-static inline void lru_gen_soft_reclaim(struct lruvec *lruvec)
+static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7c681492b47b..6a3d4ce87b8a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -485,7 +485,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)
 
 	if (lru_gen_enabled()) {
 		if (soft_limit_excess(memcg))
-			lru_gen_soft_reclaim(&memcg->nodeinfo[nid]->lruvec);
+			lru_gen_soft_reclaim(memcg, nid);
 		return;
 	}
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cafb933d609f..a51a7e0f8b63 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4846,8 +4846,10 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
 	}
 }
 
-void lru_gen_soft_reclaim(struct lruvec *lruvec)
+void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
 {
+	struct lruvec *lruvec = get_lruvec(memcg, nid);
+
 	/* see the comment on MEMCG_NR_GENS */
 	if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
 		lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
-- 
cgit v1.2.3


From 06b27ce36a1a3dc5ea6f8314d0c7d1baa9f8ece7 Mon Sep 17 00:00:00 2001
From: Peng Zhang <zhangpeng.00@bytedance.com>
Date: Wed, 24 May 2023 11:12:47 +0800
Subject: maple_tree: relocate the declaration of mas_empty_area_rev().

Relocate the declaration of mas_empty_area_rev() so that mas_empty_area()
and mas_empty_area_rev() are together.

Link: https://lkml.kernel.org/r/20230524031247.65949-11-zhangpeng.00@bytedance.com
Signed-off-by: Peng Zhang <zhangpeng.00@bytedance.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/maple_tree.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 541675229568..295548cca8b3 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -474,6 +474,12 @@ void *mas_next_range(struct ma_state *mas, unsigned long max);
 
 int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max,
 		   unsigned long size);
+/*
+ * This finds an empty area from the highest address to the lowest.
+ * AKA "Topdown" version,
+ */
+int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
+		       unsigned long max, unsigned long size);
 
 static inline void mas_init(struct ma_state *mas, struct maple_tree *tree,
 			    unsigned long addr)
@@ -497,12 +503,6 @@ static inline bool mas_is_paused(const struct ma_state *mas)
 	return mas->node == MAS_PAUSE;
 }
 
-/*
- * This finds an empty area from the highest address to the lowest.
- * AKA "Topdown" version,
- */
-int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
-		       unsigned long max, unsigned long size);
 /**
  * mas_reset() - Reset a Maple Tree operation state.
  * @mas: Maple Tree operation state.
-- 
cgit v1.2.3


From 3ecdeb0f876e91c4a7129ba2ba5baa530aa6c4f9 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 29 May 2023 14:13:53 +0800
Subject: swap: remove __swp_swapcount()

__swp_swapcount() just encloses the calling to swap_swapcount() with
get/put_swap_device().  It is called in __read_swap_cache_async() only,
which encloses the calling with get/put_swap_device() already.  So,
__read_swap_cache_async() can call swap_swapcount() directly.

Link: https://lkml.kernel.org/r/20230529061355.125791-4-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Chris Li (Google) <chrisl@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h |  4 ++--
 mm/swap_state.c      |  2 +-
 mm/swapfile.c        | 20 +-------------------
 3 files changed, 4 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index b2128df5edea..2ddbfd85f6c7 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -513,7 +513,7 @@ int find_first_swap(dev_t *device);
 extern unsigned int count_swap_pages(int, int);
 extern sector_t swapdev_block(int, pgoff_t);
 extern int __swap_count(swp_entry_t entry);
-extern int __swp_swapcount(swp_entry_t entry);
+extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry);
 extern int swp_swapcount(swp_entry_t entry);
 extern struct swap_info_struct *page_swap_info(struct page *);
 extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);
@@ -591,7 +591,7 @@ static inline int __swap_count(swp_entry_t entry)
 	return 0;
 }
 
-static inline int __swp_swapcount(swp_entry_t entry)
+static inline int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
 {
 	return 0;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index a8450b4a110c..ef32353c18a6 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -447,7 +447,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 		 * as SWAP_HAS_CACHE.  That's done in later part of code or
 		 * else swap_off will be aborted if we return NULL.
 		 */
-		if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
+		if (!swap_swapcount(si, entry) && swap_slot_cache_enabled)
 			goto fail_put_swap;
 
 		/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index cf8b16b6a98e..2d264efe90d2 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1444,7 +1444,7 @@ int __swap_count(swp_entry_t entry)
  * This does not give an exact answer when swap count is continued,
  * but does include the high COUNT_CONTINUED flag to allow for that.
  */
-static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
+int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
 {
 	pgoff_t offset = swp_offset(entry);
 	struct swap_cluster_info *ci;
@@ -1456,24 +1456,6 @@ static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
 	return count;
 }
 
-/*
- * How many references to @entry are currently swapped out?
- * This does not give an exact answer when swap count is continued,
- * but does include the high COUNT_CONTINUED flag to allow for that.
- */
-int __swp_swapcount(swp_entry_t entry)
-{
-	int count = 0;
-	struct swap_info_struct *si;
-
-	si = get_swap_device(entry);
-	if (si) {
-		count = swap_swapcount(si, entry);
-		put_swap_device(si);
-	}
-	return count;
-}
-
 /*
  * How many references to @entry are currently swapped out?
  * This considers COUNT_CONTINUED so it returns exact answer.
-- 
cgit v1.2.3


From 0d625446d0a451a683a357799912b9e688629707 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 Jun 2023 16:58:53 +0200
Subject: backing_dev: remove current->backing_dev_info

Patch series "cleanup the filemap / direct I/O interaction", v4.

This series cleans up some of the generic write helper calling conventions
and the page cache writeback / invalidation for direct I/O.  This is a
spinoff from the no-bufferhead kernel project, for which we'll want to an
use iomap based buffered write path in the block layer.


This patch (of 12):

The last user of current->backing_dev_info disappeared in commit
b9b1335e6403 ("remove bdi_congested() and wb_congested() and related
functions").  Remove the field and all assignments to it.

Link: https://lkml.kernel.org/r/20230601145904.1385409-1-hch@lst.de
Link: https://lkml.kernel.org/r/20230601145904.1385409-2-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Acked-by: Theodore Ts'o <tytso@mit.edu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Chao Yu <chao@kernel.org>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/btrfs/file.c       | 6 +-----
 fs/ceph/file.c        | 4 ----
 fs/ext4/file.c        | 2 --
 fs/f2fs/file.c        | 2 --
 fs/fuse/file.c        | 4 ----
 fs/gfs2/file.c        | 2 --
 fs/nfs/file.c         | 5 +----
 fs/ntfs/file.c        | 2 --
 fs/ntfs3/file.c       | 3 ---
 fs/xfs/xfs_file.c     | 4 ----
 include/linux/sched.h | 3 ---
 mm/filemap.c          | 3 ---
 12 files changed, 2 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f649647392e0..ecd43ab66fa6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1145,7 +1145,6 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
 	    !(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
 		return -EAGAIN;
 
-	current->backing_dev_info = inode_to_bdi(inode);
 	ret = file_remove_privs(file);
 	if (ret)
 		return ret;
@@ -1165,10 +1164,8 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
 		loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
 
 		ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, end_pos);
-		if (ret) {
-			current->backing_dev_info = NULL;
+		if (ret)
 			return ret;
-		}
 	}
 
 	return 0;
@@ -1689,7 +1686,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
 	if (sync)
 		atomic_dec(&inode->sync_writers);
 
-	current->backing_dev_info = NULL;
 	return num_written;
 }
 
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f4d8bf7dec88..c8ef72f723ba 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1791,9 +1791,6 @@ retry_snap:
 	else
 		ceph_start_io_write(inode);
 
-	/* We can write back this queue in page reclaim */
-	current->backing_dev_info = inode_to_bdi(inode);
-
 	if (iocb->ki_flags & IOCB_APPEND) {
 		err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
 		if (err < 0)
@@ -1940,7 +1937,6 @@ out:
 		ceph_end_io_write(inode);
 out_unlocked:
 	ceph_free_cap_flush(prealloc_cf);
-	current->backing_dev_info = NULL;
 	return written ? written : err;
 }
 
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d101b3b0c7da..bc430270c23c 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -285,9 +285,7 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
 	if (ret <= 0)
 		goto out;
 
-	current->backing_dev_info = inode_to_bdi(inode);
 	ret = generic_perform_write(iocb, from);
-	current->backing_dev_info = NULL;
 
 out:
 	inode_unlock(inode);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5ac53d2627d2..4f423d367a44 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4517,9 +4517,7 @@ static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
 	if (iocb->ki_flags & IOCB_NOWAIT)
 		return -EOPNOTSUPP;
 
-	current->backing_dev_info = inode_to_bdi(inode);
 	ret = generic_perform_write(iocb, from);
-	current->backing_dev_info = NULL;
 
 	if (ret > 0) {
 		iocb->ki_pos += ret;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 89d97f6188e0..97d435874b14 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1362,9 +1362,6 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
 writethrough:
 	inode_lock(inode);
 
-	/* We can write back this queue in page reclaim */
-	current->backing_dev_info = inode_to_bdi(inode);
-
 	err = generic_write_checks(iocb, from);
 	if (err <= 0)
 		goto out;
@@ -1409,7 +1406,6 @@ writethrough:
 			iocb->ki_pos += written;
 	}
 out:
-	current->backing_dev_info = NULL;
 	inode_unlock(inode);
 	if (written > 0)
 		written = generic_write_sync(iocb, written);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 300844f50dcd..904a0d6ac1a1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1041,11 +1041,9 @@ retry:
 			goto out_unlock;
 	}
 
-	current->backing_dev_info = inode_to_bdi(inode);
 	pagefault_disable();
 	ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
 	pagefault_enable();
-	current->backing_dev_info = NULL;
 	if (ret > 0) {
 		iocb->ki_pos += ret;
 		written += ret;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f0edf5a36237..665ce3fc62ea 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -648,11 +648,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	since = filemap_sample_wb_err(file->f_mapping);
 	nfs_start_io_write(inode);
 	result = generic_write_checks(iocb, from);
-	if (result > 0) {
-		current->backing_dev_info = inode_to_bdi(inode);
+	if (result > 0)
 		result = generic_perform_write(iocb, from);
-		current->backing_dev_info = NULL;
-	}
 	nfs_end_io_write(inode);
 	if (result <= 0)
 		goto out;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index c481b14e4fd9..e296f804a9c4 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1911,11 +1911,9 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 	inode_lock(vi);
 	/* We can write back this queue in page reclaim. */
-	current->backing_dev_info = inode_to_bdi(vi);
 	err = ntfs_prepare_file_for_write(iocb, from);
 	if (iov_iter_count(from) && !err)
 		written = ntfs_perform_write(file, from, iocb->ki_pos);
-	current->backing_dev_info = NULL;
 	inode_unlock(vi);
 	iocb->ki_pos += written;
 	if (likely(written > 0))
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 9a3d55c367d9..86d16a2c8339 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -820,7 +820,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
 	if (!pages)
 		return -ENOMEM;
 
-	current->backing_dev_info = inode_to_bdi(inode);
 	err = file_remove_privs(file);
 	if (err)
 		goto out;
@@ -993,8 +992,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
 out:
 	kfree(pages);
 
-	current->backing_dev_info = NULL;
-
 	if (err < 0)
 		return err;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index aede746541f8..431c3fd0e2b5 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -717,9 +717,6 @@ write_retry:
 	if (ret)
 		goto out;
 
-	/* We can write back this queue in page reclaim */
-	current->backing_dev_info = inode_to_bdi(inode);
-
 	trace_xfs_file_buffered_write(iocb, from);
 	ret = iomap_file_buffered_write(iocb, from,
 			&xfs_buffered_write_iomap_ops);
@@ -753,7 +750,6 @@ write_retry:
 		goto write_retry;
 	}
 
-	current->backing_dev_info = NULL;
 out:
 	if (iolock)
 		xfs_iunlock(ip, iolock);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eed5d65b8d1f..54780571fe9a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -41,7 +41,6 @@
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
-struct backing_dev_info;
 struct bio_list;
 struct blk_plug;
 struct bpf_local_storage;
@@ -1186,8 +1185,6 @@ struct task_struct {
 	/* VM state: */
 	struct reclaim_state		*reclaim_state;
 
-	struct backing_dev_info		*backing_dev_info;
-
 	struct io_context		*io_context;
 
 #ifdef CONFIG_COMPACTION
diff --git a/mm/filemap.c b/mm/filemap.c
index 570bc8c3db87..0d371ed91a68 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3964,8 +3964,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	ssize_t		err;
 	ssize_t		status;
 
-	/* We can write back this queue in page reclaim */
-	current->backing_dev_info = inode_to_bdi(inode);
 	err = file_remove_privs(file);
 	if (err)
 		goto out;
@@ -4026,7 +4024,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 			iocb->ki_pos += written;
 	}
 out:
-	current->backing_dev_info = NULL;
 	return written ? written : err;
 }
 EXPORT_SYMBOL(__generic_file_write_iter);
-- 
cgit v1.2.3


From 3c435a0fe35c220bec442dffff04a64aacf952b0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 Jun 2023 16:58:56 +0200
Subject: filemap: add a kiocb_write_and_wait helper

Factor out a helper that does filemap_write_and_wait_range for the range
covered by a read kiocb, or returns -EAGAIN if the kiocb is marked as
nowait and there would be pages to write.

Link: https://lkml.kernel.org/r/20230601145904.1385409-5-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Chao Yu <chao@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 block/fops.c            | 18 +++---------------
 include/linux/pagemap.h |  2 ++
 mm/filemap.c            | 30 ++++++++++++++++++------------
 3 files changed, 23 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/block/fops.c b/block/fops.c
index 58d0aebc7313..575171049c5d 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -576,21 +576,9 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		goto reexpand; /* skip atime */
 
 	if (iocb->ki_flags & IOCB_DIRECT) {
-		struct address_space *mapping = iocb->ki_filp->f_mapping;
-
-		if (iocb->ki_flags & IOCB_NOWAIT) {
-			if (filemap_range_needs_writeback(mapping, pos,
-							  pos + count - 1)) {
-				ret = -EAGAIN;
-				goto reexpand;
-			}
-		} else {
-			ret = filemap_write_and_wait_range(mapping, pos,
-							   pos + count - 1);
-			if (ret < 0)
-				goto reexpand;
-		}
-
+		ret = kiocb_write_and_wait(iocb, count);
+		if (ret < 0)
+			goto reexpand;
 		file_accessed(iocb->ki_filp);
 
 		ret = blkdev_direct_IO(iocb, to);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c1ae5ebc375f..b6a12ca108b7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -30,6 +30,7 @@ static inline void invalidate_remote_inode(struct inode *inode)
 int invalidate_inode_pages2(struct address_space *mapping);
 int invalidate_inode_pages2_range(struct address_space *mapping,
 		pgoff_t start, pgoff_t end);
+
 int write_inode_now(struct inode *, int sync);
 int filemap_fdatawrite(struct address_space *);
 int filemap_flush(struct address_space *);
@@ -54,6 +55,7 @@ int filemap_check_errors(struct address_space *mapping);
 void __filemap_set_wb_err(struct address_space *mapping, int err);
 int filemap_fdatawrite_wbc(struct address_space *mapping,
 			   struct writeback_control *wbc);
+int kiocb_write_and_wait(struct kiocb *iocb, size_t count);
 
 static inline int filemap_write_and_wait(struct address_space *mapping)
 {
diff --git a/mm/filemap.c b/mm/filemap.c
index 3a80a69fa9fa..5566e10ca1a7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2735,6 +2735,21 @@ put_folios:
 }
 EXPORT_SYMBOL_GPL(filemap_read);
 
+int kiocb_write_and_wait(struct kiocb *iocb, size_t count)
+{
+	struct address_space *mapping = iocb->ki_filp->f_mapping;
+	loff_t pos = iocb->ki_pos;
+	loff_t end = pos + count - 1;
+
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		if (filemap_range_needs_writeback(mapping, pos, end))
+			return -EAGAIN;
+		return 0;
+	}
+
+	return filemap_write_and_wait_range(mapping, pos, end);
+}
+
 /**
  * generic_file_read_iter - generic filesystem read routine
  * @iocb:	kernel I/O control block
@@ -2770,18 +2785,9 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 		struct address_space *mapping = file->f_mapping;
 		struct inode *inode = mapping->host;
 
-		if (iocb->ki_flags & IOCB_NOWAIT) {
-			if (filemap_range_needs_writeback(mapping, iocb->ki_pos,
-						iocb->ki_pos + count - 1))
-				return -EAGAIN;
-		} else {
-			retval = filemap_write_and_wait_range(mapping,
-						iocb->ki_pos,
-					        iocb->ki_pos + count - 1);
-			if (retval < 0)
-				return retval;
-		}
-
+		retval = kiocb_write_and_wait(iocb, count);
+		if (retval < 0)
+			return retval;
 		file_accessed(file);
 
 		retval = mapping->a_ops->direct_IO(iocb, iter);
-- 
cgit v1.2.3


From e003f74afbd2feadbb9ffbf9135e2d2fb5d320a5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 Jun 2023 16:58:57 +0200
Subject: filemap: add a kiocb_invalidate_pages helper

Factor out a helper that calls filemap_write_and_wait_range and
invalidate_inode_pages2_range for the range covered by a write kiocb or
returns -EAGAIN if the kiocb is marked as nowait and there would be pages
to write or invalidate.

Link: https://lkml.kernel.org/r/20230601145904.1385409-6-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Chao Yu <chao@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h |  1 +
 mm/filemap.c            | 48 ++++++++++++++++++++++++++++--------------------
 2 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b6a12ca108b7..7b66a67dba51 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -30,6 +30,7 @@ static inline void invalidate_remote_inode(struct inode *inode)
 int invalidate_inode_pages2(struct address_space *mapping);
 int invalidate_inode_pages2_range(struct address_space *mapping,
 		pgoff_t start, pgoff_t end);
+int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
 
 int write_inode_now(struct inode *, int sync);
 int filemap_fdatawrite(struct address_space *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 5566e10ca1a7..6ba6233c4bbb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2750,6 +2750,33 @@ int kiocb_write_and_wait(struct kiocb *iocb, size_t count)
 	return filemap_write_and_wait_range(mapping, pos, end);
 }
 
+int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
+{
+	struct address_space *mapping = iocb->ki_filp->f_mapping;
+	loff_t pos = iocb->ki_pos;
+	loff_t end = pos + count - 1;
+	int ret;
+
+	if (iocb->ki_flags & IOCB_NOWAIT) {
+		/* we could block if there are any pages in the range */
+		if (filemap_range_has_page(mapping, pos, end))
+			return -EAGAIN;
+	} else {
+		ret = filemap_write_and_wait_range(mapping, pos, end);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * After a write we want buffered reads to be sure to go to disk to get
+	 * the new data.  We invalidate clean cached page from the region we're
+	 * about to write.  We do this *before* the write so that we can return
+	 * without clobbering -EIOCBQUEUED from ->direct_IO().
+	 */
+	return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
+					     end >> PAGE_SHIFT);
+}
+
 /**
  * generic_file_read_iter - generic filesystem read routine
  * @iocb:	kernel I/O control block
@@ -3793,30 +3820,11 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	write_len = iov_iter_count(from);
 	end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-	if (iocb->ki_flags & IOCB_NOWAIT) {
-		/* If there are pages to writeback, return */
-		if (filemap_range_has_page(file->f_mapping, pos,
-					   pos + write_len - 1))
-			return -EAGAIN;
-	} else {
-		written = filemap_write_and_wait_range(mapping, pos,
-							pos + write_len - 1);
-		if (written)
-			goto out;
-	}
-
-	/*
-	 * After a write we want buffered reads to be sure to go to disk to get
-	 * the new data.  We invalidate clean cached page from the region we're
-	 * about to write.  We do this *before* the write so that we can return
-	 * without clobbering -EIOCBQUEUED from ->direct_IO().
-	 */
-	written = invalidate_inode_pages2_range(mapping,
-					pos >> PAGE_SHIFT, end);
 	/*
 	 * If a page can not be invalidated, return 0 to fall back
 	 * to buffered write.
 	 */
+	written = kiocb_invalidate_pages(iocb, write_len);
 	if (written) {
 		if (written == -EBUSY)
 			return 0;
-- 
cgit v1.2.3


From c402a9a9430b670926decbb284b756ee6f47c1ec Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 Jun 2023 16:58:58 +0200
Subject: filemap: add a kiocb_invalidate_post_direct_write helper

Add a helper to invalidate page cache after a dio write.

Link: https://lkml.kernel.org/r/20230601145904.1385409-7-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Chao Yu <chao@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/direct-io.c          | 10 ++--------
 fs/iomap/direct-io.c    | 12 ++----------
 include/linux/fs.h      |  5 -----
 include/linux/pagemap.h |  1 +
 mm/filemap.c            | 37 ++++++++++++++++++++-----------------
 5 files changed, 25 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0b380bb8a81e..4f9069aee0fe 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -285,14 +285,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
 	 * zeros from unwritten extents.
 	 */
 	if (flags & DIO_COMPLETE_INVALIDATE &&
-	    ret > 0 && dio_op == REQ_OP_WRITE &&
-	    dio->inode->i_mapping->nrpages) {
-		err = invalidate_inode_pages2_range(dio->inode->i_mapping,
-					offset >> PAGE_SHIFT,
-					(offset + ret - 1) >> PAGE_SHIFT);
-		if (err)
-			dio_warn_stale_pagecache(dio->iocb->ki_filp);
-	}
+	    ret > 0 && dio_op == REQ_OP_WRITE)
+		kiocb_invalidate_post_direct_write(dio->iocb, ret);
 
 	inode_dio_end(dio->inode);
 
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 6207a59d2162..0795c54a745b 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -81,7 +81,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
 	const struct iomap_dio_ops *dops = dio->dops;
 	struct kiocb *iocb = dio->iocb;
-	struct inode *inode = file_inode(iocb->ki_filp);
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret = dio->error;
 
@@ -108,15 +107,8 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
 	 * ->end_io() when necessary, otherwise a racing buffer read would cache
 	 * zeros from unwritten extents.
 	 */
-	if (!dio->error && dio->size &&
-	    (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
-		int err;
-		err = invalidate_inode_pages2_range(inode->i_mapping,
-				offset >> PAGE_SHIFT,
-				(offset + dio->size - 1) >> PAGE_SHIFT);
-		if (err)
-			dio_warn_stale_pagecache(iocb->ki_filp);
-	}
+	if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE))
+		kiocb_invalidate_post_direct_write(iocb, dio->size);
 
 	inode_dio_end(file_inode(iocb->ki_filp));
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 86b50271b4f7..4f196f827d9d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2843,11 +2843,6 @@ static inline void inode_dio_end(struct inode *inode)
 		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
 }
 
-/*
- * Warn about a page cache invalidation failure diring a direct I/O write.
- */
-void dio_warn_stale_pagecache(struct file *filp);
-
 extern void inode_set_flags(struct inode *inode, unsigned int flags,
 			    unsigned int mask);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7b66a67dba51..716953ee1ebd 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -31,6 +31,7 @@ int invalidate_inode_pages2(struct address_space *mapping);
 int invalidate_inode_pages2_range(struct address_space *mapping,
 		pgoff_t start, pgoff_t end);
 int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
+void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
 
 int write_inode_now(struct inode *, int sync);
 int filemap_fdatawrite(struct address_space *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6ba6233c4bbb..b45506f74133 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3789,7 +3789,7 @@ EXPORT_SYMBOL(read_cache_page_gfp);
 /*
  * Warn about a page cache invalidation failure during a direct I/O write.
  */
-void dio_warn_stale_pagecache(struct file *filp)
+static void dio_warn_stale_pagecache(struct file *filp)
 {
 	static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST);
 	char pathname[128];
@@ -3806,19 +3806,23 @@ void dio_warn_stale_pagecache(struct file *filp)
 	}
 }
 
+void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count)
+{
+	struct address_space *mapping = iocb->ki_filp->f_mapping;
+
+	if (mapping->nrpages &&
+	    invalidate_inode_pages2_range(mapping,
+			iocb->ki_pos >> PAGE_SHIFT,
+			(iocb->ki_pos + count - 1) >> PAGE_SHIFT))
+		dio_warn_stale_pagecache(iocb->ki_filp);
+}
+
 ssize_t
 generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 {
-	struct file	*file = iocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode	*inode = mapping->host;
-	loff_t		pos = iocb->ki_pos;
-	ssize_t		written;
-	size_t		write_len;
-	pgoff_t		end;
-
-	write_len = iov_iter_count(from);
-	end = (pos + write_len - 1) >> PAGE_SHIFT;
+	struct address_space *mapping = iocb->ki_filp->f_mapping;
+	size_t write_len = iov_iter_count(from);
+	ssize_t written;
 
 	/*
 	 * If a page can not be invalidated, return 0 to fall back
@@ -3828,7 +3832,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	if (written) {
 		if (written == -EBUSY)
 			return 0;
-		goto out;
+		return written;
 	}
 
 	written = mapping->a_ops->direct_IO(iocb, from);
@@ -3850,11 +3854,11 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	 *
 	 * Skip invalidation for async writes or if mapping has no pages.
 	 */
-	if (written > 0 && mapping->nrpages &&
-	    invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end))
-		dio_warn_stale_pagecache(file);
-
 	if (written > 0) {
+		struct inode *inode = mapping->host;
+		loff_t pos = iocb->ki_pos;
+
+		kiocb_invalidate_post_direct_write(iocb, written);
 		pos += written;
 		write_len -= written;
 		if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
@@ -3865,7 +3869,6 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	}
 	if (written != -EIOCBQUEUED)
 		iov_iter_revert(from, write_len - iov_iter_count(from));
-out:
 	return written;
 }
 EXPORT_SYMBOL(generic_file_direct_write);
-- 
cgit v1.2.3


From 44fff0fa08ec5a6d9d5fb05443a36d854d0ece4d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 1 Jun 2023 16:59:01 +0200
Subject: fs: factor out a direct_write_fallback helper

Add a helper dealing with handling the syncing of a buffered write
fallback for direct I/O.

Link: https://lkml.kernel.org/r/20230601145904.1385409-10-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Chao Yu <chao@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Ilya Dryomov <idryomov@gmail.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/libfs.c         | 41 +++++++++++++++++++++++++++++++++
 include/linux/fs.h |  2 ++
 mm/filemap.c       | 66 +++++++++++++-----------------------------------------
 3 files changed, 58 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/fs/libfs.c b/fs/libfs.c
index 89cf614a3271..5b851315eeed 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1613,3 +1613,44 @@ u64 inode_query_iversion(struct inode *inode)
 	return cur >> I_VERSION_QUERIED_SHIFT;
 }
 EXPORT_SYMBOL(inode_query_iversion);
+
+ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
+		ssize_t direct_written, ssize_t buffered_written)
+{
+	struct address_space *mapping = iocb->ki_filp->f_mapping;
+	loff_t pos = iocb->ki_pos - buffered_written;
+	loff_t end = iocb->ki_pos - 1;
+	int err;
+
+	/*
+	 * If the buffered write fallback returned an error, we want to return
+	 * the number of bytes which were written by direct I/O, or the error
+	 * code if that was zero.
+	 *
+	 * Note that this differs from normal direct-io semantics, which will
+	 * return -EFOO even if some bytes were written.
+	 */
+	if (unlikely(buffered_written < 0)) {
+		if (direct_written)
+			return direct_written;
+		return buffered_written;
+	}
+
+	/*
+	 * We need to ensure that the page cache pages are written to disk and
+	 * invalidated to preserve the expected O_DIRECT semantics.
+	 */
+	err = filemap_write_and_wait_range(mapping, pos, end);
+	if (err < 0) {
+		/*
+		 * We don't know how much we wrote, so just return the number of
+		 * bytes which were direct-written
+		 */
+		if (direct_written)
+			return direct_written;
+		return err;
+	}
+	invalidate_mapping_pages(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+	return direct_written + buffered_written;
+}
+EXPORT_SYMBOL_GPL(direct_write_fallback);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4f196f827d9d..c363f8687c7e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2744,6 +2744,8 @@ extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
 ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
+ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
+		ssize_t direct_written, ssize_t buffered_written);
 
 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
 		rwf_t flags);
diff --git a/mm/filemap.c b/mm/filemap.c
index b45506f74133..916b7c6444fe 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3979,23 +3979,19 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
-	struct inode 	*inode = mapping->host;
-	ssize_t		written = 0;
-	ssize_t		err;
-	ssize_t		status;
+	struct inode *inode = mapping->host;
+	ssize_t ret;
 
-	err = file_remove_privs(file);
-	if (err)
-		goto out;
+	ret = file_remove_privs(file);
+	if (ret)
+		return ret;
 
-	err = file_update_time(file);
-	if (err)
-		goto out;
+	ret = file_update_time(file);
+	if (ret)
+		return ret;
 
 	if (iocb->ki_flags & IOCB_DIRECT) {
-		loff_t pos, endbyte;
-
-		written = generic_file_direct_write(iocb, from);
+		ret = generic_file_direct_write(iocb, from);
 		/*
 		 * If the write stopped short of completing, fall back to
 		 * buffered writes.  Some filesystems do this for writes to
@@ -4003,45 +3999,13 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		 * not succeed (even if it did, DAX does not handle dirty
 		 * page-cache pages correctly).
 		 */
-		if (written < 0 || !iov_iter_count(from) || IS_DAX(inode))
-			goto out;
-
-		pos = iocb->ki_pos;
-		status = generic_perform_write(iocb, from);
-		/*
-		 * If generic_perform_write() returned a synchronous error
-		 * then we want to return the number of bytes which were
-		 * direct-written, or the error code if that was zero.  Note
-		 * that this differs from normal direct-io semantics, which
-		 * will return -EFOO even if some bytes were written.
-		 */
-		if (unlikely(status < 0)) {
-			err = status;
-			goto out;
-		}
-		/*
-		 * We need to ensure that the page cache pages are written to
-		 * disk and invalidated to preserve the expected O_DIRECT
-		 * semantics.
-		 */
-		endbyte = pos + status - 1;
-		err = filemap_write_and_wait_range(mapping, pos, endbyte);
-		if (err == 0) {
-			written += status;
-			invalidate_mapping_pages(mapping,
-						 pos >> PAGE_SHIFT,
-						 endbyte >> PAGE_SHIFT);
-		} else {
-			/*
-			 * We don't know how much we wrote, so just return
-			 * the number of bytes which were direct-written
-			 */
-		}
-	} else {
-		written = generic_perform_write(iocb, from);
+		if (ret < 0 || !iov_iter_count(from) || IS_DAX(inode))
+			return ret;
+		return direct_write_fallback(iocb, from, ret,
+				generic_perform_write(iocb, from));
 	}
-out:
-	return written ? written : err;
+
+	return generic_perform_write(iocb, from);
 }
 EXPORT_SYMBOL(__generic_file_write_iter);
 
-- 
cgit v1.2.3


From 54cbbbf3faf610fb4eba6f8d39d933bcbfc6f4de Mon Sep 17 00:00:00 2001
From: Lorenzo Stoakes <lstoakes@gmail.com>
Date: Thu, 4 May 2023 22:27:51 +0100
Subject: mm/mmap: separate writenotify and dirty tracking logic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm/gup: disallow GUP writing to file-backed mappings by
default", v9.

Writing to file-backed mappings which require folio dirty tracking using
GUP is a fundamentally broken operation, as kernel write access to GUP
mappings do not adhere to the semantics expected by a file system.

A GUP caller uses the direct mapping to access the folio, which does not
cause write notify to trigger, nor does it enforce that the caller marks
the folio dirty.

The problem arises when, after an initial write to the folio, writeback
results in the folio being cleaned and then the caller, via the GUP
interface, writes to the folio again.

As a result of the use of this secondary, direct, mapping to the folio no
write notify will occur, and if the caller does mark the folio dirty, this
will be done so unexpectedly.

For example, consider the following scenario:-

1. A folio is written to via GUP which write-faults the memory, notifying
   the file system and dirtying the folio.
2. Later, writeback is triggered, resulting in the folio being cleaned and
   the PTE being marked read-only.
3. The GUP caller writes to the folio, as it is mapped read/write via the
   direct mapping.
4. The GUP caller, now done with the page, unpins it and sets it dirty
   (though it does not have to).

This change updates both the PUP FOLL_LONGTERM slow and fast APIs.  As
pin_user_pages_fast_only() does not exist, we can rely on a slightly
imperfect whitelisting in the PUP-fast case and fall back to the slow case
should this fail.


This patch (of 3):

vma_wants_writenotify() is specifically intended for setting PTE page
table flags, accounting for existing page table flag state and whether the
underlying filesystem performs dirty tracking for a file-backed mapping.

Everything is predicated firstly on whether the mapping is shared
writable, as this is the only instance where dirty tracking is pertinent -
MAP_PRIVATE mappings will always be CoW'd and unshared, and read-only
file-backed shared mappings cannot be written to, even with FOLL_FORCE.

All other checks are in line with existing logic, though now separated
into checks eplicitily for dirty tracking and those for determining how to
set page table flags.

We make this change so we can perform checks in the GUP logic to determine
which mappings might be problematic when written to.

Link: https://lkml.kernel.org/r/cover.1683235180.git.lstoakes@gmail.com
Link: https://lkml.kernel.org/r/0f218370bd49b4e6bbfbb499f7c7b92c26ba1ceb.1683235180.git.lstoakes@gmail.com
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Mika Penttilä <mpenttil@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Kirill A . Shutemov <kirill@shutemov.name>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 mm/mmap.c          | 58 +++++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 47 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 62bb3272e531..66032f0d515c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2461,6 +2461,7 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
 #define  MM_CP_UFFD_WP_ALL                 (MM_CP_UFFD_WP | \
 					    MM_CP_UFFD_WP_RESOLVE)
 
+bool vma_needs_dirty_tracking(struct vm_area_struct *vma);
 int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
 static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
 {
diff --git a/mm/mmap.c b/mm/mmap.c
index e1624cb2c04e..f084b7940431 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1454,6 +1454,48 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
 }
 #endif /* __ARCH_WANT_SYS_OLD_MMAP */
 
+static bool vm_ops_needs_writenotify(const struct vm_operations_struct *vm_ops)
+{
+	return vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite);
+}
+
+static bool vma_is_shared_writable(struct vm_area_struct *vma)
+{
+	return (vma->vm_flags & (VM_WRITE | VM_SHARED)) ==
+		(VM_WRITE | VM_SHARED);
+}
+
+static bool vma_fs_can_writeback(struct vm_area_struct *vma)
+{
+	/* No managed pages to writeback. */
+	if (vma->vm_flags & VM_PFNMAP)
+		return false;
+
+	return vma->vm_file && vma->vm_file->f_mapping &&
+		mapping_can_writeback(vma->vm_file->f_mapping);
+}
+
+/*
+ * Does this VMA require the underlying folios to have their dirty state
+ * tracked?
+ */
+bool vma_needs_dirty_tracking(struct vm_area_struct *vma)
+{
+	/* Only shared, writable VMAs require dirty tracking. */
+	if (!vma_is_shared_writable(vma))
+		return false;
+
+	/* Does the filesystem need to be notified? */
+	if (vm_ops_needs_writenotify(vma->vm_ops))
+		return true;
+
+	/*
+	 * Even if the filesystem doesn't indicate a need for writenotify, if it
+	 * can writeback, dirty tracking is still required.
+	 */
+	return vma_fs_can_writeback(vma);
+}
+
 /*
  * Some shared mappings will want the pages marked read-only
  * to track write events. If so, we'll downgrade vm_page_prot
@@ -1462,21 +1504,18 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
  */
 int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
 {
-	vm_flags_t vm_flags = vma->vm_flags;
-	const struct vm_operations_struct *vm_ops = vma->vm_ops;
-
 	/* If it was private or non-writable, the write bit is already clear */
-	if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
+	if (!vma_is_shared_writable(vma))
 		return 0;
 
 	/* The backer wishes to know when pages are first written to? */
-	if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
+	if (vm_ops_needs_writenotify(vma->vm_ops))
 		return 1;
 
 	/* The open routine did something to the protections that pgprot_modify
 	 * won't preserve? */
 	if (pgprot_val(vm_page_prot) !=
-	    pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
+	    pgprot_val(vm_pgprot_modify(vm_page_prot, vma->vm_flags)))
 		return 0;
 
 	/*
@@ -1490,13 +1529,8 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
 	if (userfaultfd_wp(vma))
 		return 1;
 
-	/* Specialty mapping? */
-	if (vm_flags & VM_PFNMAP)
-		return 0;
-
 	/* Can the mapping track the dirty pages? */
-	return vma->vm_file && vma->vm_file->f_mapping &&
-		mapping_can_writeback(vma->vm_file->f_mapping);
+	return vma_fs_can_writeback(vma);
 }
 
 /*
-- 
cgit v1.2.3


From 3db55767da7427cebc49e22b25b5f50ff455add6 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 12 May 2023 13:33:38 +0300
Subject: add intptr_t

Add signed intptr_t given that a) it is standard type and b) uintptr_t is
in tree.

Link: https://lkml.kernel.org/r/ed66b9e4-1fb7-45be-9bb9-d4bc291c691f@p183
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/mips/include/asm/fw/cfe/cfe_api.h |  3 ---
 include/linux/types.h                  |  1 +
 lib/zstd/common/zstd_deps.h            | 18 ------------------
 3 files changed, 1 insertion(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/fw/cfe/cfe_api.h b/arch/mips/include/asm/fw/cfe/cfe_api.h
index 25df2f4deb31..b52a6a9c26f1 100644
--- a/arch/mips/include/asm/fw/cfe/cfe_api.h
+++ b/arch/mips/include/asm/fw/cfe/cfe_api.h
@@ -17,9 +17,6 @@
 #include <linux/types.h>
 #include <linux/string.h>
 
-typedef long intptr_t;
-
-
 /*
  * Constants
  */
diff --git a/include/linux/types.h b/include/linux/types.h
index 688fb943556a..7e5a1fb7cfa1 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -35,6 +35,7 @@ typedef __kernel_uid16_t        uid16_t;
 typedef __kernel_gid16_t        gid16_t;
 
 typedef unsigned long		uintptr_t;
+typedef long			intptr_t;
 
 #ifdef CONFIG_HAVE_UID16
 /* This is defined by include/asm-{arch}/posix_types.h */
diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h
index f06df065dec0..2c34e8a33a1c 100644
--- a/lib/zstd/common/zstd_deps.h
+++ b/lib/zstd/common/zstd_deps.h
@@ -105,21 +105,3 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
 
 #endif /* ZSTD_DEPS_IO */
 #endif /* ZSTD_DEPS_NEED_IO */
-
-/*
- * Only requested when MSAN is enabled.
- * Need:
- * intptr_t
- */
-#ifdef ZSTD_DEPS_NEED_STDINT
-#ifndef ZSTD_DEPS_STDINT
-#define ZSTD_DEPS_STDINT
-
-/*
- * The Linux Kernel doesn't provide intptr_t, only uintptr_t, which
- * is an unsigned long.
- */
-typedef long intptr_t;
-
-#endif /* ZSTD_DEPS_STDINT */
-#endif /* ZSTD_DEPS_NEED_STDINT */
-- 
cgit v1.2.3


From 6ca0f81c0b96a5e29de48cb02062b5130d27dbe3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 15:10:49 +0200
Subject: mm: percpu: unhide pcpu_embed_first_chunk prototype

Patch series "mm/init/kernel: missing-prototypes warnings".

These are patches addressing -Wmissing-prototypes warnings in common
kernel code and memory management code files that usually get merged
through the -mm tree.


This patch (of 12):

This function is called whenever CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK or
CONFIG_HAVE_SETUP_PER_CPU_AREA, but only declared when the former is set:

mm/percpu.c:3055:12: error: no previous prototype for 'pcpu_embed_first_chunk' [-Werror=missing-prototypes]

There is no real point in hiding declarations, so just remove
the #ifdef here.

Link: https://lkml.kernel.org/r/20230517131102.934196-1-arnd@kernel.org
Link: https://lkml.kernel.org/r/20230517131102.934196-2-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/percpu.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 1338ea2aa720..42125cf9c506 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -103,12 +103,10 @@ extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai);
 extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 					 void *base_addr);
 
-#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
 extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
 				pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn);
-#endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 void __init pcpu_populate_pte(unsigned long addr);
-- 
cgit v1.2.3


From 8f14a96386b2676a1ccdd9d2f1732fbd7248fa98 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 15:10:50 +0200
Subject: mm: page_poison: always declare __kernel_map_pages() function

The __kernel_map_pages() function is mainly used for
CONFIG_DEBUG_PAGEALLOC, but has a number of architecture specific
definitions that may also be used in other configurations, as well as a
global fallback definition for architectures that do not support
DEBUG_PAGEALLOC.

When the option is disabled, any definitions without the prototype cause a
warning:

mm/page_poison.c:102:6: error: no previous prototype for '__kernel_map_pages' [-Werror=missing-prototypes]

The function is a trivial nop here, so just declare it anyway
to avoid the warning.

Link: https://lkml.kernel.org/r/20230517131102.934196-3-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27ce77080c79..e95d7c575ea6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3453,13 +3453,12 @@ static inline bool debug_pagealloc_enabled_static(void)
 	return static_branch_unlikely(&_debug_pagealloc_enabled);
 }
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
 /*
  * To support DEBUG_PAGEALLOC architecture must ensure that
  * __kernel_map_pages() never fails
  */
 extern void __kernel_map_pages(struct page *page, int numpages, int enable);
-
+#ifdef CONFIG_DEBUG_PAGEALLOC
 static inline void debug_pagealloc_map_pages(struct page *page, int numpages)
 {
 	if (debug_pagealloc_enabled_static())
-- 
cgit v1.2.3


From d9cdb43189ef9aa4f8a12b00e86875544942fa6a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 15:10:56 +0200
Subject: panic: make function declarations visible

A few panic() related functions have a global definition but not
declaration, which causes a warning with W=1:

kernel/panic.c:710:6: error: no previous prototype for '__warn_printk' [-Werror=missing-prototypes]
kernel/panic.c:756:24: error: no previous prototype for '__stack_chk_fail' [-Werror=missing-prototypes]
kernel/exit.c:1917:32: error: no previous prototype for 'abort' [-Werror=missing-prototypes]

__warn_printk() is called both as a global function when CONFIG_BUG
is enabled, and as a local function in other configs. The other
two here are called indirectly from generated or assembler code.

Add prototypes for all of these.

Link: https://lkml.kernel.org/r/20230517131102.934196-9-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/asm-generic/bug.h | 5 +++--
 include/linux/panic.h     | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 4050b191e1a9..6e794420bd39 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -87,10 +87,12 @@ struct bug_entry {
  *
  * Use the versions with printk format strings to provide better diagnostics.
  */
-#ifndef __WARN_FLAGS
 extern __printf(4, 5)
 void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
 		       const char *fmt, ...);
+extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
+
+#ifndef __WARN_FLAGS
 #define __WARN()		__WARN_printf(TAINT_WARN, NULL)
 #define __WARN_printf(taint, arg...) do {				\
 		instrumentation_begin();				\
@@ -98,7 +100,6 @@ void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
 		instrumentation_end();					\
 	} while (0)
 #else
-extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
 #define __WARN()		__WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN))
 #define __WARN_printf(taint, arg...) do {				\
 		instrumentation_begin();				\
diff --git a/include/linux/panic.h b/include/linux/panic.h
index 979b776e3bcb..6717b15e798c 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -32,6 +32,9 @@ extern int sysctl_panic_on_stackoverflow;
 
 extern bool crash_kexec_post_notifiers;
 
+extern void __stack_chk_fail(void);
+void abort(void);
+
 /*
  * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It
  * holds a CPU number which is executing panic() currently. A value of
-- 
cgit v1.2.3


From ad1a48301f659a02df5bff0a121d4a5c0411d36b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 15:10:59 +0200
Subject: init: consolidate prototypes in linux/init.h

The init/main.c file contains some extern declarations for functions
defined in architecture code, and it defines some other functions that are
called from architecture code with a custom prototype.  Both of those
result in warnings with 'make W=1':

init/calibrate.c:261:37: error: no previous prototype for 'calibrate_delay_is_known' [-Werror=missing-prototypes]
init/main.c:790:20: error: no previous prototype for 'mem_encrypt_init' [-Werror=missing-prototypes]
init/main.c:792:20: error: no previous prototype for 'poking_init' [-Werror=missing-prototypes]
arch/arm64/kernel/irq.c:122:13: error: no previous prototype for 'init_IRQ' [-Werror=missing-prototypes]
arch/arm64/kernel/time.c:55:13: error: no previous prototype for 'time_init' [-Werror=missing-prototypes]
arch/x86/kernel/process.c:935:13: error: no previous prototype for 'arch_post_acpi_subsys_init' [-Werror=missing-prototypes]
init/calibrate.c:261:37: error: no previous prototype for 'calibrate_delay_is_known' [-Werror=missing-prototypes]
kernel/fork.c:991:20: error: no previous prototype for 'arch_task_cache_init' [-Werror=missing-prototypes]

Add prototypes for all of these in include/linux/init.h or another
appropriate header, and remove the duplicate declarations from
architecture specific code.

[sfr@canb.auug.org.au: declare time_init_early()]
  Link: https://lkml.kernel.org/r/20230519124311.5167221c@canb.auug.org.au
Link: https://lkml.kernel.org/r/20230517131102.934196-12-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm/include/asm/irq.h          |  1 -
 arch/microblaze/include/asm/setup.h |  2 --
 arch/mips/include/asm/irq.h         |  1 -
 arch/parisc/kernel/smp.c            |  1 -
 arch/powerpc/include/asm/irq.h      |  1 -
 arch/riscv/include/asm/irq.h        |  2 --
 arch/riscv/include/asm/timex.h      |  2 --
 arch/s390/kernel/entry.h            |  2 --
 arch/sh/include/asm/irq.h           |  1 -
 arch/sh/include/asm/rtc.h           |  2 --
 arch/sparc/include/asm/irq_32.h     |  1 -
 arch/sparc/include/asm/irq_64.h     |  1 -
 arch/sparc/include/asm/timer_64.h   |  1 -
 arch/sparc/kernel/kernel.h          |  1 -
 arch/x86/include/asm/irq.h          |  2 --
 arch/x86/include/asm/mem_encrypt.h  |  3 ---
 arch/x86/include/asm/time.h         |  1 -
 arch/x86/include/asm/tsc.h          |  1 -
 include/linux/acpi.h                |  3 ++-
 include/linux/delay.h               |  1 +
 include/linux/init.h                | 20 ++++++++++++++++++++
 init/main.c                         | 18 ------------------
 22 files changed, 23 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index a7c2337b0c7d..18605f1b3580 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -27,7 +27,6 @@ struct irqaction;
 struct pt_regs;
 
 void handle_IRQ(unsigned int, struct pt_regs *);
-void init_IRQ(void);
 
 #ifdef CONFIG_SMP
 #include <linux/cpumask.h>
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index a06cc1f97aa9..3657f5e78a3d 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -16,8 +16,6 @@ extern char *klimit;
 
 extern void mmu_reset(void);
 
-void time_init(void);
-void init_IRQ(void);
 void machine_early_init(const char *cmdline, unsigned int ram,
 		unsigned int fdt, unsigned int msr, unsigned int tlb0,
 		unsigned int tlb1);
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 44f9824c1d8c..75abfa834ab7 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -19,7 +19,6 @@
 #define IRQ_STACK_SIZE			THREAD_SIZE
 #define IRQ_STACK_START			(IRQ_STACK_SIZE - 16)
 
-extern void __init init_IRQ(void);
 extern void *irq_stack[NR_CPUS];
 
 /*
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index b7fc859fa87d..83348125b524 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -271,7 +271,6 @@ void arch_send_call_function_single_ipi(int cpu)
 static void
 smp_cpu_init(int cpunum)
 {
-	extern void init_IRQ(void);    /* arch/parisc/kernel/irq.c */
 	extern void start_cpu_itimer(void); /* arch/parisc/kernel/time.c */
 
 	/* Set modes and Enable floating point coprocessor */
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index deadd2149426..94dffa1dd223 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -50,7 +50,6 @@ extern void *hardirq_ctx[NR_CPUS];
 extern void *softirq_ctx[NR_CPUS];
 
 void __do_IRQ(struct pt_regs *regs);
-extern void __init init_IRQ(void);
 
 int irq_choose_cpu(const struct cpumask *mask);
 
diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h
index 43b9ebfbd943..8e10a94430a2 100644
--- a/arch/riscv/include/asm/irq.h
+++ b/arch/riscv/include/asm/irq.h
@@ -16,6 +16,4 @@ void riscv_set_intc_hwnode_fn(struct fwnode_handle *(*fn)(void));
 
 struct fwnode_handle *riscv_get_intc_hwnode(void);
 
-extern void __init init_IRQ(void);
-
 #endif /* _ASM_RISCV_IRQ_H */
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index d6a7428f6248..a06697846e69 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -88,6 +88,4 @@ static inline int read_current_timer(unsigned long *timer_val)
 	return 0;
 }
 
-extern void time_init(void);
-
 #endif /* _ASM_RISCV_TIMEX_H */
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 34674e38826b..9f41853f36b9 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -34,14 +34,12 @@ void kernel_stack_overflow(struct pt_regs * regs);
 void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 		     struct pt_regs *regs);
 
-void __init init_IRQ(void);
 void do_io_irq(struct pt_regs *regs);
 void do_ext_irq(struct pt_regs *regs);
 void do_restart(void *arg);
 void __init startup_init(void);
 void die(struct pt_regs *regs, const char *str);
 int setup_profiling_timer(unsigned int multiplier);
-void __init time_init(void);
 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
 
 struct s390_mmap_arg_struct;
diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index 1c4923502fd4..0f384b1f45ca 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -22,7 +22,6 @@ extern unsigned short *irq_mask_register;
 /*
  * PINT IRQs
  */
-void init_IRQ_pint(void);
 void make_imask_irq(unsigned int irq);
 
 static inline int generic_irq_demux(int irq)
diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h
index 69dbae2949b0..7fe7002d1d50 100644
--- a/arch/sh/include/asm/rtc.h
+++ b/arch/sh/include/asm/rtc.h
@@ -2,8 +2,6 @@
 #ifndef _ASM_RTC_H
 #define _ASM_RTC_H
 
-void time_init(void);
-
 #define RTC_CAP_4_DIGIT_YEAR	(1 << 0)
 
 struct sh_rtc_platform_info {
diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h
index 43ec2609b811..6ee48321cbc2 100644
--- a/arch/sparc/include/asm/irq_32.h
+++ b/arch/sparc/include/asm/irq_32.h
@@ -17,7 +17,6 @@
 
 #define irq_canonicalize(irq)	(irq)
 
-void __init init_IRQ(void);
 void __init sun4d_init_sbi_irq(void);
 
 #define NO_IRQ		0xffffffff
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 154df2cf19f4..b436029f1ced 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -61,7 +61,6 @@ void sun4u_destroy_msi(unsigned int irq);
 unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino);
 void irq_free(unsigned int irq);
 
-void __init init_IRQ(void);
 void fixup_irqs(void);
 
 static inline void set_softint(unsigned long bits)
diff --git a/arch/sparc/include/asm/timer_64.h b/arch/sparc/include/asm/timer_64.h
index dcfad4613e18..ffff52c8b760 100644
--- a/arch/sparc/include/asm/timer_64.h
+++ b/arch/sparc/include/asm/timer_64.h
@@ -34,7 +34,6 @@ extern struct sparc64_tick_ops *tick_ops;
 
 unsigned long sparc64_get_clock_tick(unsigned int cpu);
 void setup_sparc64_timer(void);
-void __init time_init(void);
 
 #define TICK_PRIV_BIT		BIT(63)
 #define TICKCMP_IRQ_BIT		BIT(63)
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index 9cd09a3ef35f..15da3c0597a5 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -91,7 +91,6 @@ extern int static_irq_count;
 extern spinlock_t irq_action_lock;
 
 void unexpected_irq(int irq, void *dev_id, struct pt_regs * regs);
-void init_IRQ(void);
 
 /* sun4m_irq.c */
 void sun4m_init_IRQ(void);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 768aa234cbb4..29e083b92813 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -40,8 +40,6 @@ extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs);
 
 extern void init_ISA_irqs(void);
 
-extern void __init init_IRQ(void);
-
 #ifdef CONFIG_X86_LOCAL_APIC
 void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
 				    bool exclude_self);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index b7126701574c..1c22c86d13e6 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -87,9 +87,6 @@ static inline void mem_encrypt_free_decrypted_mem(void) { }
 
 #endif	/* CONFIG_AMD_MEM_ENCRYPT */
 
-/* Architecture __weak replacement functions */
-void __init mem_encrypt_init(void);
-
 void add_encrypt_protection_map(void);
 
 /*
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index a53961c64a56..f360104ed172 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -6,7 +6,6 @@
 #include <asm/mc146818rtc.h>
 
 extern void hpet_time_init(void);
-extern void time_init(void);
 extern bool pit_timer_init(void);
 extern bool tsc_clocksource_watchdog_disabled(void);
 
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index fbdc3d951494..1992ef5e41a9 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -32,7 +32,6 @@ extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
 
 extern void tsc_early_init(void);
 extern void tsc_init(void);
-extern unsigned long calibrate_delay_is_known(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7b71dd74baeb..f4c2a87d02c1 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -712,7 +712,6 @@ int acpi_match_platform_list(const struct acpi_platform_list *plat);
 
 extern void acpi_early_init(void);
 extern void acpi_subsystem_init(void);
-extern void arch_post_acpi_subsys_init(void);
 
 extern int acpi_nvs_register(__u64 start, __u64 size);
 
@@ -1084,6 +1083,8 @@ static inline bool acpi_sleep_state_supported(u8 sleep_state)
 
 #endif	/* !CONFIG_ACPI */
 
+extern void arch_post_acpi_subsys_init(void);
+
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 int acpi_ioapic_add(acpi_handle root);
 #else
diff --git a/include/linux/delay.h b/include/linux/delay.h
index 039e7e0c7378..ff9cda975e30 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -56,6 +56,7 @@ static inline void ndelay(unsigned long x)
 
 extern unsigned long lpj_fine;
 void calibrate_delay(void);
+unsigned long calibrate_delay_is_known(void);
 void __attribute__((weak)) calibration_delay_done(void);
 void msleep(unsigned int msecs);
 unsigned long msleep_interruptible(unsigned int msecs);
diff --git a/include/linux/init.h b/include/linux/init.h
index c5fe6d26f5b1..1200fa99e848 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -152,6 +152,24 @@ extern unsigned int reset_devices;
 void setup_arch(char **);
 void prepare_namespace(void);
 void __init init_rootfs(void);
+
+void init_IRQ(void);
+void time_init(void);
+void mem_encrypt_init(void);
+void poking_init(void);
+void pgtable_cache_init(void);
+
+extern initcall_entry_t __initcall_start[];
+extern initcall_entry_t __initcall0_start[];
+extern initcall_entry_t __initcall1_start[];
+extern initcall_entry_t __initcall2_start[];
+extern initcall_entry_t __initcall3_start[];
+extern initcall_entry_t __initcall4_start[];
+extern initcall_entry_t __initcall5_start[];
+extern initcall_entry_t __initcall6_start[];
+extern initcall_entry_t __initcall7_start[];
+extern initcall_entry_t __initcall_end[];
+
 extern struct file_system_type rootfs_fs_type;
 
 #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
@@ -309,6 +327,8 @@ struct obs_kernel_param {
 	int early;
 };
 
+extern const struct obs_kernel_param __setup_start[], __setup_end[];
+
 /*
  * Only for really core code.  See moduleparam.h for the normal way.
  *
diff --git a/init/main.c b/init/main.c
index af50044deed5..d4400efbef0a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -115,10 +115,6 @@
 
 static int kernel_init(void *);
 
-extern void init_IRQ(void);
-extern void radix_tree_init(void);
-extern void maple_tree_init(void);
-
 /*
  * Debug helper: via this flag we know that we are in 'early bootup code'
  * where only the boot processor is running with IRQ disabled.  This means
@@ -137,7 +133,6 @@ EXPORT_SYMBOL(system_state);
 #define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
 #define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT
 
-extern void time_init(void);
 /* Default late time init is NULL. archs can override this later. */
 void (*__initdata late_time_init)(void);
 
@@ -196,8 +191,6 @@ static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
 const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
 static const char *panic_later, *panic_param;
 
-extern const struct obs_kernel_param __setup_start[], __setup_end[];
-
 static bool __init obsolete_checksetup(char *line)
 {
 	const struct obs_kernel_param *p;
@@ -1263,17 +1256,6 @@ int __init_or_module do_one_initcall(initcall_t fn)
 }
 
 
-extern initcall_entry_t __initcall_start[];
-extern initcall_entry_t __initcall0_start[];
-extern initcall_entry_t __initcall1_start[];
-extern initcall_entry_t __initcall2_start[];
-extern initcall_entry_t __initcall3_start[];
-extern initcall_entry_t __initcall4_start[];
-extern initcall_entry_t __initcall5_start[];
-extern initcall_entry_t __initcall6_start[];
-extern initcall_entry_t __initcall7_start[];
-extern initcall_entry_t __initcall_end[];
-
 static initcall_entry_t *initcall_levels[] __initdata = {
 	__initcall0_start,
 	__initcall1_start,
-- 
cgit v1.2.3


From 73648e6fa79a832a6a50e87d4a9e9da416f82aaa Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 15:11:00 +0200
Subject: init: move cifs_root_data() prototype into linux/mount.h

cifs_root_data() is defined in cifs and called from early init code, but
lacks a global prototype:

fs/cifs/cifsroot.c:83:12: error: no previous prototype for 'cifs_root_data'

Move the declaration from do_mounts.c into an appropriate header.

Link: https://lkml.kernel.org/r/20230517131102.934196-13-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mount.h | 2 ++
 init/do_mounts.c      | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 1ea326c368f7..f381eb44b24c 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -124,4 +124,6 @@ extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
 			  struct vfsmount *);
 extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
 
+extern int cifs_root_data(char **dev, char **opts);
+
 #endif /* _LINUX_MOUNT_H */
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 811e94daf0a8..83447c46ad6d 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -489,8 +489,6 @@ static int __init mount_nfs_root(void)
 
 #ifdef CONFIG_CIFS_ROOT
 
-extern int cifs_root_data(char **dev, char **opts);
-
 #define CIFSROOT_TIMEOUT_MIN	5
 #define CIFSROOT_TIMEOUT_MAX	30
 #define CIFSROOT_RETRY_MAX	5
-- 
cgit v1.2.3


From af0a76e1269516d940214be48255669b0b5ff40b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 15:11:01 +0200
Subject: thread_info: move function declarations to linux/thread_info.h

There are a few __weak functions in kernel/fork.c, which architectures
can override. If there is no prototype, the compiler warns about them:

kernel/fork.c:164:13: error: no previous prototype for 'arch_release_task_struct' [-Werror=missing-prototypes]
kernel/fork.c:991:20: error: no previous prototype for 'arch_task_cache_init' [-Werror=missing-prototypes]
kernel/fork.c:1086:12: error: no previous prototype for 'arch_dup_task_struct' [-Werror=missing-prototypes]

There are already prototypes in a number of architecture specific headers
that have addressed those warnings before, but it's much better to have
these in a single place so the warning no longer shows up anywhere.

Link: https://lkml.kernel.org/r/20230517131102.934196-14-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/include/asm/thread_info.h | 4 ----
 arch/s390/include/asm/thread_info.h  | 3 ---
 arch/sh/include/asm/thread_info.h    | 3 ---
 arch/x86/include/asm/thread_info.h   | 3 ---
 include/linux/thread_info.h          | 5 +++++
 5 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 848739c15de8..553d1bc559c6 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -55,10 +55,6 @@ struct thread_info {
 void arch_setup_new_exec(void);
 #define arch_setup_new_exec     arch_setup_new_exec
 
-void arch_release_task_struct(struct task_struct *tsk);
-int arch_dup_task_struct(struct task_struct *dst,
-				struct task_struct *src);
-
 #endif
 
 #define TIF_SIGPENDING		0	/* signal pending */
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c7c97921ed8d..a674c7d25da5 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -52,9 +52,6 @@ struct thread_info {
 
 struct task_struct;
 
-void arch_release_task_struct(struct task_struct *tsk);
-int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-
 void arch_setup_new_exec(void);
 #define arch_setup_new_exec arch_setup_new_exec
 
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 1400fbb8b423..9f19a682d315 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -84,9 +84,6 @@ static inline struct thread_info *current_thread_info(void)
 
 #define THREAD_SIZE_ORDER	(THREAD_SHIFT - PAGE_SHIFT)
 
-extern void arch_task_cache_init(void);
-extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-extern void arch_release_task_struct(struct task_struct *tsk);
 extern void init_thread_xstate(void);
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f1cccba52eb9..d63b02940747 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -232,9 +232,6 @@ static inline int arch_within_stack_frames(const void * const stack,
 			   current_thread_info()->status & TS_COMPAT)
 #endif
 
-extern void arch_task_cache_init(void);
-extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-extern void arch_release_task_struct(struct task_struct *tsk);
 extern void arch_setup_new_exec(void);
 #define arch_setup_new_exec arch_setup_new_exec
 #endif	/* !__ASSEMBLY__ */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index c02646884fa8..9ea0b28068f4 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -256,6 +256,11 @@ check_copy_size(const void *addr, size_t bytes, bool is_source)
 static inline void arch_setup_new_exec(void) { }
 #endif
 
+void arch_task_cache_init(void); /* for CONFIG_SH */
+void arch_release_task_struct(struct task_struct *tsk);
+int arch_dup_task_struct(struct task_struct *dst,
+				struct task_struct *src);
+
 #endif	/* __KERNEL__ */
 
 #endif /* _LINUX_THREAD_INFO_H */
-- 
cgit v1.2.3


From 3403bb4ea5983b4e84f404f91322a9a0cc75d700 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 15:11:02 +0200
Subject: time_namespace: always provide arch_get_vdso_data() prototype for
 vdso

The arch_get_vdso_data() function is defined separately on each
architecture, but only called when CONFIG_TIME_NS is set.  If the
definition is a global function, this causes a W=1 warning without
TIME_NS:

arch/x86/entry/vdso/vma.c:35:19: error: no previous prototype for 'arch_get_vdso_data' [-Werror=missing-prototypes]

Move the prototype out of the #ifdef block to reliably turn off that
warning.

Link: https://lkml.kernel.org/r/20230517131102.934196-15-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/time_namespace.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index bb9d3f5542f8..03d9c5ac01d1 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -44,7 +44,6 @@ struct time_namespace *copy_time_ns(unsigned long flags,
 				    struct time_namespace *old_ns);
 void free_time_ns(struct time_namespace *ns);
 void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
-struct vdso_data *arch_get_vdso_data(void *vvar_page);
 struct page *find_timens_vvar_page(struct vm_area_struct *vma);
 
 static inline void put_time_ns(struct time_namespace *ns)
@@ -163,4 +162,6 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
 }
 #endif
 
+struct vdso_data *arch_get_vdso_data(void *vvar_page);
+
 #endif /* _LINUX_TIMENS_H */
-- 
cgit v1.2.3


From e0ddec73fd4822d2ffe914d5ce3e2718f985276a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 14:49:25 +0200
Subject: kcov: add prototypes for helper functions

A number of internal functions in kcov are only called from generated code
and don't technically need a declaration, but 'make W=1' warns about
global symbols without a prototype:

kernel/kcov.c:199:14: error: no previous prototype for '__sanitizer_cov_trace_pc' [-Werror=missing-prototypes]
kernel/kcov.c:264:14: error: no previous prototype for '__sanitizer_cov_trace_cmp1' [-Werror=missing-prototypes]
kernel/kcov.c:270:14: error: no previous prototype for '__sanitizer_cov_trace_cmp2' [-Werror=missing-prototypes]
kernel/kcov.c:276:14: error: no previous prototype for '__sanitizer_cov_trace_cmp4' [-Werror=missing-prototypes]
kernel/kcov.c:282:14: error: no previous prototype for '__sanitizer_cov_trace_cmp8' [-Werror=missing-prototypes]
kernel/kcov.c:288:14: error: no previous prototype for '__sanitizer_cov_trace_const_cmp1' [-Werror=missing-prototypes]
kernel/kcov.c:295:14: error: no previous prototype for '__sanitizer_cov_trace_const_cmp2' [-Werror=missing-prototypes]
kernel/kcov.c:302:14: error: no previous prototype for '__sanitizer_cov_trace_const_cmp4' [-Werror=missing-prototypes]
kernel/kcov.c:309:14: error: no previous prototype for '__sanitizer_cov_trace_const_cmp8' [-Werror=missing-prototypes]
kernel/kcov.c:316:14: error: no previous prototype for '__sanitizer_cov_trace_switch' [-Werror=missing-prototypes]

Adding prototypes for these in a header solves that problem, but now there
is a mismatch between the built-in type and the prototype on 64-bit
architectures because they expect some functions to take a 64-bit
'unsigned long' argument rather than an 'unsigned long long' u64 type:

include/linux/kcov.h:84:6: error: conflicting types for built-in function '__sanitizer_cov_trace_switch'; expected 'void(long long unsigned int,  void *)' [-Werror=builtin-declaration-mismatch]
   84 | void __sanitizer_cov_trace_switch(u64 val, u64 *cases);
      |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~

Avoid this as well with a custom type definition.

Link: https://lkml.kernel.org/r/20230517124944.929997-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Rong Tao <rongtao@cestc.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kcov.h | 17 +++++++++++++++++
 kernel/kcov.c        |  7 ++++---
 2 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kcov.h b/include/linux/kcov.h
index ee04256f28af..b851ba415e03 100644
--- a/include/linux/kcov.h
+++ b/include/linux/kcov.h
@@ -72,6 +72,23 @@ static inline void kcov_remote_stop_softirq(void)
 		kcov_remote_stop();
 }
 
+#ifdef CONFIG_64BIT
+typedef unsigned long kcov_u64;
+#else
+typedef unsigned long long kcov_u64;
+#endif
+
+void __sanitizer_cov_trace_pc(void);
+void __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2);
+void __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2);
+void __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2);
+void __sanitizer_cov_trace_cmp8(kcov_u64 arg1, kcov_u64 arg2);
+void __sanitizer_cov_trace_const_cmp1(u8 arg1, u8 arg2);
+void __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2);
+void __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2);
+void __sanitizer_cov_trace_const_cmp8(kcov_u64 arg1, kcov_u64 arg2);
+void __sanitizer_cov_trace_switch(kcov_u64 val, void *cases);
+
 #else
 
 static inline void kcov_task_init(struct task_struct *t) {}
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 84c717337df0..f9ac2e9e460f 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -279,7 +279,7 @@ void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2)
 }
 EXPORT_SYMBOL(__sanitizer_cov_trace_cmp4);
 
-void notrace __sanitizer_cov_trace_cmp8(u64 arg1, u64 arg2)
+void notrace __sanitizer_cov_trace_cmp8(kcov_u64 arg1, kcov_u64 arg2)
 {
 	write_comp_data(KCOV_CMP_SIZE(3), arg1, arg2, _RET_IP_);
 }
@@ -306,16 +306,17 @@ void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2)
 }
 EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp4);
 
-void notrace __sanitizer_cov_trace_const_cmp8(u64 arg1, u64 arg2)
+void notrace __sanitizer_cov_trace_const_cmp8(kcov_u64 arg1, kcov_u64 arg2)
 {
 	write_comp_data(KCOV_CMP_SIZE(3) | KCOV_CMP_CONST, arg1, arg2,
 			_RET_IP_);
 }
 EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp8);
 
-void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases)
+void notrace __sanitizer_cov_trace_switch(kcov_u64 val, void *arg)
 {
 	u64 i;
+	u64 *cases = arg;
 	u64 count = cases[0];
 	u64 size = cases[1];
 	u64 type = KCOV_CMP_CONST;
-- 
cgit v1.2.3


From 5e008df11c55228a86a1bae692cc2002503572c9 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 19 May 2023 10:18:25 -0700
Subject: watchdog/perf: define dummy watchdog_update_hrtimer_threshold() on
 correct config

Patch series "watchdog/hardlockup: Add the buddy hardlockup detector", v5.

This patch series adds the "buddy" hardlockup detector.  In brief, the
buddy hardlockup detector can detect hardlockups without arch-level
support by having CPUs checkup on a "buddy" CPU periodically.

Given the new design of this patch series, testing all combinations is
fairly difficult. I've attempted to make sure that all combinations of
CONFIG_ options are good, but it wouldn't surprise me if I missed
something. I apologize in advance and I'll do my best to fix any
problems that are found.


This patch (of 18):

The real watchdog_update_hrtimer_threshold() is defined in
kernel/watchdog_hld.c.  That file is included if
CONFIG_HARDLOCKUP_DETECTOR_PERF and the function is defined in that file
if CONFIG_HARDLOCKUP_CHECK_TIMESTAMP.

The dummy version of the function in "nmi.h" didn't get that quite right.
While this doesn't appear to be a huge deal, it's nice to make it
consistent.

It doesn't break builds because CHECK_TIMESTAMP is only defined by x86 so
others don't get a double definition, and x86 uses perf lockup detector,
so it gets the out of line version.

Link: https://lkml.kernel.org/r/20230519101840.v5.18.Ia44852044cdcb074f387e80df6b45e892965d4a1@changeid
Link: https://lkml.kernel.org/r/20230519101840.v5.1.I8cbb2f4fa740528fcfade4f5439b6cdcdd059251@changeid
Fixes: 7edaeb6841df ("kernel/watchdog: Prevent false positives with turbo modes")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Cc: Colin Cross <ccross@android.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 048c0b9aa623..771d77b62bc1 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -197,7 +197,7 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh);
 #endif
 
 #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
-    defined(CONFIG_HARDLOCKUP_DETECTOR)
+    defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 void watchdog_update_hrtimer_threshold(u64 period);
 #else
 static inline void watchdog_update_hrtimer_threshold(u64 period) { }
-- 
cgit v1.2.3


From 730211182ed083898fa5feb4b28459ffac4c9615 Mon Sep 17 00:00:00 2001
From: Lecopzer Chen <lecopzer.chen@mediatek.com>
Date: Fri, 19 May 2023 10:18:28 -0700
Subject: watchdog/hardlockup: change watchdog_nmi_enable() to void

Nobody cares about the return value of watchdog_nmi_enable(), changing its
prototype to void.

Link: https://lkml.kernel.org/r/20230519101840.v5.4.Ic3a19b592eb1ac4c6f6eade44ffd943e8637b6e5@changeid
Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Signed-off-by: Lecopzer Chen <lecopzer.chen@mediatek.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Nicholas Piggin <npiggin@gmail.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Colin Cross <ccross@android.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/sparc/kernel/nmi.c | 8 +++-----
 include/linux/nmi.h     | 2 +-
 kernel/watchdog.c       | 3 +--
 3 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 060fff95a305..5dcf31f7e81f 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -282,11 +282,11 @@ __setup("nmi_watchdog=", setup_nmi_watchdog);
  * sparc specific NMI watchdog enable function.
  * Enables watchdog if it is not enabled already.
  */
-int watchdog_nmi_enable(unsigned int cpu)
+void watchdog_nmi_enable(unsigned int cpu)
 {
 	if (atomic_read(&nmi_active) == -1) {
 		pr_warn("NMI watchdog cannot be enabled or disabled\n");
-		return -1;
+		return;
 	}
 
 	/*
@@ -295,11 +295,9 @@ int watchdog_nmi_enable(unsigned int cpu)
 	 * process first.
 	 */
 	if (!nmi_init_done)
-		return 0;
+		return;
 
 	smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
-
-	return 0;
 }
 /*
  * sparc specific NMI watchdog disable function.
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 771d77b62bc1..454fe99c4874 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -119,7 +119,7 @@ static inline int hardlockup_detector_perf_init(void) { return 0; }
 void watchdog_nmi_stop(void);
 void watchdog_nmi_start(void);
 int watchdog_nmi_probe(void);
-int watchdog_nmi_enable(unsigned int cpu);
+void watchdog_nmi_enable(unsigned int cpu);
 void watchdog_nmi_disable(unsigned int cpu);
 
 void lockup_detector_reconfigure(void);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 582d572e1379..c705a18b26bf 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -93,10 +93,9 @@ __setup("nmi_watchdog=", hardlockup_panic_setup);
  * softlockup watchdog start and stop. The arch must select the
  * SOFTLOCKUP_DETECTOR Kconfig.
  */
-int __weak watchdog_nmi_enable(unsigned int cpu)
+void __weak watchdog_nmi_enable(unsigned int cpu)
 {
 	hardlockup_detector_perf_enable();
-	return 0;
 }
 
 void __weak watchdog_nmi_disable(unsigned int cpu)
-- 
cgit v1.2.3


From 8b5c59a92b5b37e5c65ec185810d67e3f30f5a2e Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 19 May 2023 10:18:30 -0700
Subject: watchdog/hardlockup: add comments to touch_nmi_watchdog()

In preparation for the buddy hardlockup detector, add comments to
touch_nmi_watchdog() to make it obvious that it touches the configured
hardlockup detector regardless of whether it's backed by an NMI.  Also
note that arch_touch_nmi_watchdog() may not be architecture-specific.

Ideally, we'd like to rename these functions but that is a fairly
disruptive change touching a lot of drivers.  After discussion [1] the
plan is to defer this until a good time.

[1] https://lore.kernel.org/r/ZFy0TX1tfhlH8gxj@alley

[akpm@linux-foundation.org: comment changes, per Petr]
  Link: https://lkml.kernel.org/r/ZGyONWPXpE1DcxA5@alley
Link: https://lkml.kernel.org/r/20230519101840.v5.6.I4e47cbfa1bb2ebbcdb5ca16817aa2887f15dc82c@changeid
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Colin Cross <ccross@android.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 454fe99c4874..dd75e361a11f 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -125,15 +125,30 @@ void watchdog_nmi_disable(unsigned int cpu);
 void lockup_detector_reconfigure(void);
 
 /**
- * touch_nmi_watchdog - restart NMI watchdog timeout.
+ * touch_nmi_watchdog - manually reset the hardlockup watchdog timeout.
  *
- * If the architecture supports the NMI watchdog, touch_nmi_watchdog()
- * may be used to reset the timeout - for code which intentionally
- * disables interrupts for a long time. This call is stateless.
+ * If we support detecting hardlockups, touch_nmi_watchdog() may be
+ * used to pet the watchdog (reset the timeout) - for code which
+ * intentionally disables interrupts for a long time. This call is stateless.
+ *
+ * Though this function has "nmi" in the name, the hardlockup watchdog might
+ * not be backed by NMIs. This function will likely be renamed to
+ * touch_hardlockup_watchdog() in the future.
  */
 static inline void touch_nmi_watchdog(void)
 {
+	/*
+	 * Pass on to the hardlockup detector selected via CONFIG_. Note that
+	 * the hardlockup detector may not be arch-specific nor using NMIs
+	 * and the arch_touch_nmi_watchdog() function will likely be renamed
+	 * in the future.
+	 */
 	arch_touch_nmi_watchdog();
+
+	/*
+	 * Touching the hardlock detector implicitly resets the
+	 * softlockup detector too
+	 */
 	touch_softlockup_watchdog();
 }
 
-- 
cgit v1.2.3


From 81972551df9d168a8183b786ff4de06008469c2e Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 19 May 2023 10:18:32 -0700
Subject: watchdog/hardlockup: move perf hardlockup checking/panic to common
 watchdog.c

The perf hardlockup detector works by looking at interrupt counts and
seeing if they change from run to run.  The interrupt counts are managed
by the common watchdog code via its watchdog_timer_fn().

Currently the API between the perf detector and the common code is a
function: is_hardlockup().  When the hard lockup detector sees that
function return true then it handles printing out debug info and inducing
a panic if necessary.

Let's change the API a little bit in preparation for the buddy hardlockup
detector.  The buddy hardlockup detector wants to print nearly the same
debug info and have nearly the same panic behavior.  That means we want to
move all that code to the common file.  For now, the code in the common
file will only be there if the perf hardlockup detector is enabled, but
eventually it will be selected by a common config.

Right now, this _just_ moves the code from the perf detector file to the
common file and changes the names.  It doesn't make the changes that the
buddy hardlockup detector will need and doesn't do any style cleanups.  A
future patch will do cleanup to make it more obvious what changed.

With the above, we no longer have any callers of is_hardlockup() outside
of the "watchdog.c" file, so we can remove it from the header, make it
static, and move it to the same "#ifdef" block as our new
watchdog_hardlockup_check().  While doing this, it can be noted that even
if no hardlockup detectors were configured the existing code used to still
have the code for counting/checking "hrtimer_interrupts" even if the perf
hardlockup detector wasn't configured.  We didn't need to do that, so move
all the "hrtimer_interrupts" counting to only be there if the perf
hardlockup detector is configured as well.

This change is expected to be a no-op.

Link: https://lkml.kernel.org/r/20230519101840.v5.8.Id4133d3183e798122dc3b6205e7852601f289071@changeid
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Colin Cross <ccross@android.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h    |  5 ++-
 kernel/watchdog.c      | 93 +++++++++++++++++++++++++++++++++++++++-----------
 kernel/watchdog_perf.c | 42 +----------------------
 3 files changed, 78 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index dd75e361a11f..67921436974d 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -15,7 +15,6 @@
 void lockup_detector_init(void);
 void lockup_detector_soft_poweroff(void);
 void lockup_detector_cleanup(void);
-bool is_hardlockup(void);
 
 extern int watchdog_user_enabled;
 extern int nmi_watchdog_user_enabled;
@@ -88,6 +87,10 @@ extern unsigned int hardlockup_panic;
 static inline void hardlockup_detector_disable(void) {}
 #endif
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+void watchdog_hardlockup_check(struct pt_regs *regs);
+#endif
+
 #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
 # define NMI_WATCHDOG_SYSCTL_PERM	0644
 #else
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index c705a18b26bf..12ce37d76e7d 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -85,6 +85,78 @@ __setup("nmi_watchdog=", hardlockup_panic_setup);
 
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+static DEFINE_PER_CPU(bool, hard_watchdog_warn);
+static unsigned long hardlockup_allcpu_dumped;
+
+static bool is_hardlockup(void)
+{
+	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
+
+	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+		return true;
+
+	__this_cpu_write(hrtimer_interrupts_saved, hrint);
+	return false;
+}
+
+static void watchdog_hardlockup_kick(void)
+{
+	__this_cpu_inc(hrtimer_interrupts);
+}
+
+void watchdog_hardlockup_check(struct pt_regs *regs)
+{
+	/* check for a hardlockup
+	 * This is done by making sure our timer interrupt
+	 * is incrementing.  The timer interrupt should have
+	 * fired multiple times before we overflow'd.  If it hasn't
+	 * then this is a good indication the cpu is stuck
+	 */
+	if (is_hardlockup()) {
+		int this_cpu = smp_processor_id();
+
+		/* only print hardlockups once */
+		if (__this_cpu_read(hard_watchdog_warn) == true)
+			return;
+
+		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
+			 this_cpu);
+		print_modules();
+		print_irqtrace_events(current);
+		if (regs)
+			show_regs(regs);
+		else
+			dump_stack();
+
+		/*
+		 * Perform all-CPU dump only once to avoid multiple hardlockups
+		 * generating interleaving traces
+		 */
+		if (sysctl_hardlockup_all_cpu_backtrace &&
+				!test_and_set_bit(0, &hardlockup_allcpu_dumped))
+			trigger_allbutself_cpu_backtrace();
+
+		if (hardlockup_panic)
+			nmi_panic(regs, "Hard LOCKUP");
+
+		__this_cpu_write(hard_watchdog_warn, true);
+		return;
+	}
+
+	__this_cpu_write(hard_watchdog_warn, false);
+	return;
+}
+
+#else /* CONFIG_HARDLOCKUP_DETECTOR_PERF */
+
+static inline void watchdog_hardlockup_kick(void) { }
+
+#endif /* !CONFIG_HARDLOCKUP_DETECTOR_PERF */
+
 /*
  * These functions can be overridden if an architecture implements its
  * own hardlockup detector.
@@ -176,8 +248,6 @@ static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(unsigned long, watchdog_report_ts);
 static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
 static DEFINE_PER_CPU(bool, softlockup_touch_sync);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
 static unsigned long soft_lockup_nmi_warn;
 
 static int __init nowatchdog_setup(char *str)
@@ -312,22 +382,6 @@ static int is_softlockup(unsigned long touch_ts,
 }
 
 /* watchdog detector functions */
-bool is_hardlockup(void)
-{
-	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-
-	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
-		return true;
-
-	__this_cpu_write(hrtimer_interrupts_saved, hrint);
-	return false;
-}
-
-static void watchdog_interrupt_count(void)
-{
-	__this_cpu_inc(hrtimer_interrupts);
-}
-
 static DEFINE_PER_CPU(struct completion, softlockup_completion);
 static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
 
@@ -358,8 +412,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	if (!watchdog_enabled)
 		return HRTIMER_NORESTART;
 
-	/* kick the hardlockup detector */
-	watchdog_interrupt_count();
+	watchdog_hardlockup_kick();
 
 	/* kick the softlockup detector */
 	if (completion_done(this_cpu_ptr(&softlockup_completion))) {
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 8b8015758ea5..04415812d079 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -20,13 +20,11 @@
 #include <asm/irq_regs.h>
 #include <linux/perf_event.h>
 
-static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 static DEFINE_PER_CPU(struct perf_event *, dead_event);
 static struct cpumask dead_events_mask;
 
-static unsigned long hardlockup_allcpu_dumped;
 static atomic_t watchdog_cpus = ATOMIC_INIT(0);
 
 notrace void arch_touch_nmi_watchdog(void)
@@ -122,45 +120,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
 		return;
 	}
 
-	/* check for a hardlockup
-	 * This is done by making sure our timer interrupt
-	 * is incrementing.  The timer interrupt should have
-	 * fired multiple times before we overflow'd.  If it hasn't
-	 * then this is a good indication the cpu is stuck
-	 */
-	if (is_hardlockup()) {
-		int this_cpu = smp_processor_id();
-
-		/* only print hardlockups once */
-		if (__this_cpu_read(hard_watchdog_warn) == true)
-			return;
-
-		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
-			 this_cpu);
-		print_modules();
-		print_irqtrace_events(current);
-		if (regs)
-			show_regs(regs);
-		else
-			dump_stack();
-
-		/*
-		 * Perform all-CPU dump only once to avoid multiple hardlockups
-		 * generating interleaving traces
-		 */
-		if (sysctl_hardlockup_all_cpu_backtrace &&
-				!test_and_set_bit(0, &hardlockup_allcpu_dumped))
-			trigger_allbutself_cpu_backtrace();
-
-		if (hardlockup_panic)
-			nmi_panic(regs, "Hard LOCKUP");
-
-		__this_cpu_write(hard_watchdog_warn, true);
-		return;
-	}
-
-	__this_cpu_write(hard_watchdog_warn, false);
-	return;
+	watchdog_hardlockup_check(regs);
 }
 
 static int hardlockup_detector_event_create(void)
-- 
cgit v1.2.3


From 77c12fc95980d100fdc49e88a5727c242d0dfedc Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 19 May 2023 10:18:34 -0700
Subject: watchdog/hardlockup: add a "cpu" param to watchdog_hardlockup_check()

In preparation for the buddy hardlockup detector where the CPU checking
for lockup might not be the currently running CPU, add a "cpu" parameter
to watchdog_hardlockup_check().

As part of this change, make hrtimer_interrupts an atomic_t since now the
CPU incrementing the value and the CPU reading the value might be
different.  Technially this could also be done with just READ_ONCE and
WRITE_ONCE, but atomic_t feels a little cleaner in this case.

While hrtimer_interrupts is made atomic_t, we change
hrtimer_interrupts_saved from "unsigned long" to "int".  The "int" is
needed to match the data type backing atomic_t for hrtimer_interrupts.
Even if this changes us from 64-bits to 32-bits (which I don't think is
true for most compilers), it doesn't really matter.  All we ever do is
increment it every few seconds and compare it to an old value so 32-bits
is fine (even 16-bits would be).  The "signed" vs "unsigned" also doesn't
matter for simple equality comparisons.

hrtimer_interrupts_saved is _not_ switched to atomic_t nor even accessed
with READ_ONCE / WRITE_ONCE.  The hrtimer_interrupts_saved is always
consistently accessed with the same CPU.  NOTE: with the upcoming "buddy"
detector there is one special case.  When a CPU goes offline/online then
we can change which CPU is the one to consistently access a given instance
of hrtimer_interrupts_saved.  We still can't end up with a partially
updated hrtimer_interrupts_saved, however, because we end up petting all
affected CPUs to make sure the new and old CPU can't end up somehow
read/write hrtimer_interrupts_saved at the same time.

Link: https://lkml.kernel.org/r/20230519101840.v5.10.I3a7d4dd8c23ac30ee0b607d77feb6646b64825c0@changeid
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Colin Cross <ccross@android.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h    |  2 +-
 kernel/watchdog.c      | 52 +++++++++++++++++++++++++++++++-------------------
 kernel/watchdog_perf.c |  2 +-
 3 files changed, 34 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 67921436974d..521bf2ee6eff 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -88,7 +88,7 @@ static inline void hardlockup_detector_disable(void) {}
 #endif
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
-void watchdog_hardlockup_check(struct pt_regs *regs);
+void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
 #endif
 
 #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 169e5dffbc00..2552e224f76a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -87,29 +87,34 @@ __setup("nmi_watchdog=", hardlockup_panic_setup);
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
-static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts);
+static DEFINE_PER_CPU(int, hrtimer_interrupts_saved);
 static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
 static unsigned long watchdog_hardlockup_all_cpu_dumped;
 
-static bool is_hardlockup(void)
+static bool is_hardlockup(unsigned int cpu)
 {
-	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
+	int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu));
 
-	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
 		return true;
 
-	__this_cpu_write(hrtimer_interrupts_saved, hrint);
+	/*
+	 * NOTE: we don't need any fancy atomic_t or READ_ONCE/WRITE_ONCE
+	 * for hrtimer_interrupts_saved. hrtimer_interrupts_saved is
+	 * written/read by a single CPU.
+	 */
+	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
 
 	return false;
 }
 
 static void watchdog_hardlockup_kick(void)
 {
-	__this_cpu_inc(hrtimer_interrupts);
+	atomic_inc(raw_cpu_ptr(&hrtimer_interrupts));
 }
 
-void watchdog_hardlockup_check(struct pt_regs *regs)
+void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 {
 	/*
 	 * Check for a hardlockup by making sure the CPU's timer
@@ -117,35 +122,42 @@ void watchdog_hardlockup_check(struct pt_regs *regs)
 	 * fired multiple times before we overflow'd. If it hasn't
 	 * then this is a good indication the cpu is stuck
 	 */
-	if (is_hardlockup()) {
+	if (is_hardlockup(cpu)) {
 		unsigned int this_cpu = smp_processor_id();
+		struct cpumask backtrace_mask = *cpu_online_mask;
 
 		/* Only print hardlockups once. */
-		if (__this_cpu_read(watchdog_hardlockup_warned))
+		if (per_cpu(watchdog_hardlockup_warned, cpu))
 			return;
 
-		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu);
+		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu);
 		print_modules();
 		print_irqtrace_events(current);
-		if (regs)
-			show_regs(regs);
-		else
-			dump_stack();
+		if (cpu == this_cpu) {
+			if (regs)
+				show_regs(regs);
+			else
+				dump_stack();
+			cpumask_clear_cpu(cpu, &backtrace_mask);
+		} else {
+			if (trigger_single_cpu_backtrace(cpu))
+				cpumask_clear_cpu(cpu, &backtrace_mask);
+		}
 
 		/*
-		 * Perform all-CPU dump only once to avoid multiple hardlockups
-		 * generating interleaving traces
+		 * Perform multi-CPU dump only once to avoid multiple
+		 * hardlockups generating interleaving traces
 		 */
 		if (sysctl_hardlockup_all_cpu_backtrace &&
 		    !test_and_set_bit(0, &watchdog_hardlockup_all_cpu_dumped))
-			trigger_allbutself_cpu_backtrace();
+			trigger_cpumask_backtrace(&backtrace_mask);
 
 		if (hardlockup_panic)
 			nmi_panic(regs, "Hard LOCKUP");
 
-		__this_cpu_write(watchdog_hardlockup_warned, true);
+		per_cpu(watchdog_hardlockup_warned, cpu) = true;
 	} else {
-		__this_cpu_write(watchdog_hardlockup_warned, false);
+		per_cpu(watchdog_hardlockup_warned, cpu) = false;
 	}
 }
 
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 04415812d079..4e60e8023515 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -120,7 +120,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
 		return;
 	}
 
-	watchdog_hardlockup_check(regs);
+	watchdog_hardlockup_check(smp_processor_id(), regs);
 }
 
 static int hardlockup_detector_event_create(void)
-- 
cgit v1.2.3


From ed92e1ef52224c7c9c15fba559448396b059c2ee Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 19 May 2023 10:18:35 -0700
Subject: watchdog/hardlockup: move perf hardlockup watchdog petting to
 watchdog.c

In preparation for the buddy hardlockup detector, which wants the same
petting logic as the current perf hardlockup detector, move the code to
watchdog.c.  While doing this, rename the global variable to match others
nearby.  As part of this change we have to change the code to account for
the fact that the CPU we're running on might be different than the one
we're checking.

Currently the code in watchdog.c is guarded by
CONFIG_HARDLOCKUP_DETECTOR_PERF, which makes this change seem silly.
However, a future patch will change this.

Link: https://lkml.kernel.org/r/20230519101840.v5.11.I00dfd6386ee00da25bf26d140559a41339b53e57@changeid
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Colin Cross <ccross@android.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h    |  5 +++--
 kernel/watchdog.c      | 19 +++++++++++++++++++
 kernel/watchdog_perf.c | 19 -------------------
 3 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 521bf2ee6eff..56fdc3de6894 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -88,7 +88,10 @@ static inline void hardlockup_detector_disable(void) {}
 #endif
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+void arch_touch_nmi_watchdog(void);
 void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
+#elif !defined(CONFIG_HAVE_NMI_WATCHDOG)
+static inline void arch_touch_nmi_watchdog(void) { }
 #endif
 
 #if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
@@ -98,7 +101,6 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
 #endif
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
-extern void arch_touch_nmi_watchdog(void);
 extern void hardlockup_detector_perf_stop(void);
 extern void hardlockup_detector_perf_restart(void);
 extern void hardlockup_detector_perf_disable(void);
@@ -113,7 +115,6 @@ static inline void hardlockup_detector_perf_enable(void) { }
 static inline void hardlockup_detector_perf_cleanup(void) { }
 # if !defined(CONFIG_HAVE_NMI_WATCHDOG)
 static inline int hardlockup_detector_perf_init(void) { return -ENODEV; }
-static inline void arch_touch_nmi_watchdog(void) {}
 # else
 static inline int hardlockup_detector_perf_init(void) { return 0; }
 # endif
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 2552e224f76a..64d7d2a0a7df 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -90,8 +90,22 @@ __setup("nmi_watchdog=", hardlockup_panic_setup);
 static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts);
 static DEFINE_PER_CPU(int, hrtimer_interrupts_saved);
 static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
+static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched);
 static unsigned long watchdog_hardlockup_all_cpu_dumped;
 
+notrace void arch_touch_nmi_watchdog(void)
+{
+	/*
+	 * Using __raw here because some code paths have
+	 * preemption enabled.  If preemption is enabled
+	 * then interrupts should be enabled too, in which
+	 * case we shouldn't have to worry about the watchdog
+	 * going off.
+	 */
+	raw_cpu_write(watchdog_hardlockup_touched, true);
+}
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+
 static bool is_hardlockup(unsigned int cpu)
 {
 	int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu));
@@ -116,6 +130,11 @@ static void watchdog_hardlockup_kick(void)
 
 void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 {
+	if (per_cpu(watchdog_hardlockup_touched, cpu)) {
+		per_cpu(watchdog_hardlockup_touched, cpu) = false;
+		return;
+	}
+
 	/*
 	 * Check for a hardlockup by making sure the CPU's timer
 	 * interrupt is incrementing. The timer interrupt should have
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 4e60e8023515..547917ebd5d3 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -20,26 +20,12 @@
 #include <asm/irq_regs.h>
 #include <linux/perf_event.h>
 
-static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 static DEFINE_PER_CPU(struct perf_event *, dead_event);
 static struct cpumask dead_events_mask;
 
 static atomic_t watchdog_cpus = ATOMIC_INIT(0);
 
-notrace void arch_touch_nmi_watchdog(void)
-{
-	/*
-	 * Using __raw here because some code paths have
-	 * preemption enabled.  If preemption is enabled
-	 * then interrupts should be enabled too, in which
-	 * case we shouldn't have to worry about the watchdog
-	 * going off.
-	 */
-	raw_cpu_write(watchdog_nmi_touch, true);
-}
-EXPORT_SYMBOL(arch_touch_nmi_watchdog);
-
 #ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
 static DEFINE_PER_CPU(ktime_t, last_timestamp);
 static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
@@ -115,11 +101,6 @@ static void watchdog_overflow_callback(struct perf_event *event,
 	if (!watchdog_check_timestamp())
 		return;
 
-	if (__this_cpu_read(watchdog_nmi_touch) == true) {
-		__this_cpu_write(watchdog_nmi_touch, false);
-		return;
-	}
-
 	watchdog_hardlockup_check(smp_processor_id(), regs);
 }
 
-- 
cgit v1.2.3


From df95d3085caa5b99a60eb033d7ad6c2ff2b43dbf Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 19 May 2023 10:18:36 -0700
Subject: watchdog/hardlockup: rename some "NMI watchdog" constants/function

Do a search and replace of:
- NMI_WATCHDOG_ENABLED => WATCHDOG_HARDLOCKUP_ENABLED
- SOFT_WATCHDOG_ENABLED => WATCHDOG_SOFTOCKUP_ENABLED
- watchdog_nmi_ => watchdog_hardlockup_
- nmi_watchdog_available => watchdog_hardlockup_available
- nmi_watchdog_user_enabled => watchdog_hardlockup_user_enabled
- soft_watchdog_user_enabled => watchdog_softlockup_user_enabled
- NMI_WATCHDOG_DEFAULT => WATCHDOG_HARDLOCKUP_DEFAULT

Then update a few comments near where names were changed.

This is specifically to make it less confusing when we want to introduce
the buddy hardlockup detector, which isn't using NMIs.  As part of this,
we sanitized a few names for consistency.

[trix@redhat.com: make variables static]
  Link: https://lkml.kernel.org/r/20230525162822.1.I0fb41d138d158c9230573eaa37dc56afa2fb14ee@changeid
Link: https://lkml.kernel.org/r/20230519101840.v5.12.I91f7277bab4bf8c0cb238732ed92e7ce7bbd71a6@changeid
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Tom Rix <trix@redhat.com>
Reviewed-by: Tom Rix <trix@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Colin Cross <ccross@android.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/powerpc/include/asm/nmi.h            |   4 +-
 arch/powerpc/kernel/watchdog.c            |  12 ++--
 arch/powerpc/platforms/pseries/mobility.c |   4 +-
 arch/sparc/kernel/nmi.c                   |   4 +-
 include/linux/nmi.h                       |  24 +++----
 kernel/watchdog.c                         | 113 +++++++++++++++---------------
 kernel/watchdog_perf.c                    |   2 +-
 7 files changed, 81 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index c3c7adef74de..43bfd4de868f 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -5,10 +5,10 @@
 #ifdef CONFIG_PPC_WATCHDOG
 extern void arch_touch_nmi_watchdog(void);
 long soft_nmi_interrupt(struct pt_regs *regs);
-void watchdog_nmi_set_timeout_pct(u64 pct);
+void watchdog_hardlockup_set_timeout_pct(u64 pct);
 #else
 static inline void arch_touch_nmi_watchdog(void) {}
-static inline void watchdog_nmi_set_timeout_pct(u64 pct) {}
+static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {}
 #endif
 
 #ifdef CONFIG_NMI_IPI
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index dbcc4a793f0b..edb2dd1f53eb 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -438,7 +438,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
 	int cpu = smp_processor_id();
 
-	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+	if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
 		return HRTIMER_NORESTART;
 
 	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
@@ -479,7 +479,7 @@ static void start_watchdog(void *arg)
 		return;
 	}
 
-	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+	if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
 		return;
 
 	if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
@@ -546,7 +546,7 @@ static void watchdog_calc_timeouts(void)
 	wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
 }
 
-void watchdog_nmi_stop(void)
+void watchdog_hardlockup_stop(void)
 {
 	int cpu;
 
@@ -554,7 +554,7 @@ void watchdog_nmi_stop(void)
 		stop_watchdog_on_cpu(cpu);
 }
 
-void watchdog_nmi_start(void)
+void watchdog_hardlockup_start(void)
 {
 	int cpu;
 
@@ -566,7 +566,7 @@ void watchdog_nmi_start(void)
 /*
  * Invoked from core watchdog init.
  */
-int __init watchdog_nmi_probe(void)
+int __init watchdog_hardlockup_probe(void)
 {
 	int err;
 
@@ -582,7 +582,7 @@ int __init watchdog_nmi_probe(void)
 }
 
 #ifdef CONFIG_PPC_PSERIES
-void watchdog_nmi_set_timeout_pct(u64 pct)
+void watchdog_hardlockup_set_timeout_pct(u64 pct)
 {
 	pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct);
 	WRITE_ONCE(wd_timeout_pct, pct);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 6f30113b5468..cd632ba9ebff 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -750,7 +750,7 @@ static int pseries_migrate_partition(u64 handle)
 		goto out;
 
 	if (factor)
-		watchdog_nmi_set_timeout_pct(factor);
+		watchdog_hardlockup_set_timeout_pct(factor);
 
 	ret = pseries_suspend(handle);
 	if (ret == 0) {
@@ -766,7 +766,7 @@ static int pseries_migrate_partition(u64 handle)
 		pseries_cancel_migration(handle, ret);
 
 	if (factor)
-		watchdog_nmi_set_timeout_pct(0);
+		watchdog_hardlockup_set_timeout_pct(0);
 
 out:
 	vas_migration_handler(VAS_RESUME);
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 5dcf31f7e81f..9d9e29b75c43 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -282,7 +282,7 @@ __setup("nmi_watchdog=", setup_nmi_watchdog);
  * sparc specific NMI watchdog enable function.
  * Enables watchdog if it is not enabled already.
  */
-void watchdog_nmi_enable(unsigned int cpu)
+void watchdog_hardlockup_enable(unsigned int cpu)
 {
 	if (atomic_read(&nmi_active) == -1) {
 		pr_warn("NMI watchdog cannot be enabled or disabled\n");
@@ -303,7 +303,7 @@ void watchdog_nmi_enable(unsigned int cpu)
  * sparc specific NMI watchdog disable function.
  * Disables watchdog if it is not disabled already.
  */
-void watchdog_nmi_disable(unsigned int cpu)
+void watchdog_hardlockup_disable(unsigned int cpu)
 {
 	if (atomic_read(&nmi_active) == -1)
 		pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 56fdc3de6894..17b2ae7103d9 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -17,8 +17,6 @@ void lockup_detector_soft_poweroff(void);
 void lockup_detector_cleanup(void);
 
 extern int watchdog_user_enabled;
-extern int nmi_watchdog_user_enabled;
-extern int soft_watchdog_user_enabled;
 extern int watchdog_thresh;
 extern unsigned long watchdog_enabled;
 
@@ -68,17 +66,17 @@ static inline void reset_hung_task_detector(void) { }
  * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
  * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
  *
- * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and
- * 'soft_watchdog_user_enabled' are variables that are only used as an
+ * 'watchdog_user_enabled', 'watchdog_hardlockup_user_enabled' and
+ * 'watchdog_softlockup_user_enabled' are variables that are only used as an
  * 'interface' between the parameters in /proc/sys/kernel and the internal
  * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is
  * handled differently because its value is not boolean, and the lockup
  * detectors are 'suspended' while 'watchdog_thresh' is equal zero.
  */
-#define NMI_WATCHDOG_ENABLED_BIT   0
-#define SOFT_WATCHDOG_ENABLED_BIT  1
-#define NMI_WATCHDOG_ENABLED      (1 << NMI_WATCHDOG_ENABLED_BIT)
-#define SOFT_WATCHDOG_ENABLED     (1 << SOFT_WATCHDOG_ENABLED_BIT)
+#define WATCHDOG_HARDLOCKUP_ENABLED_BIT  0
+#define WATCHDOG_SOFTOCKUP_ENABLED_BIT   1
+#define WATCHDOG_HARDLOCKUP_ENABLED     (1 << WATCHDOG_HARDLOCKUP_ENABLED_BIT)
+#define WATCHDOG_SOFTOCKUP_ENABLED      (1 << WATCHDOG_SOFTOCKUP_ENABLED_BIT)
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR)
 extern void hardlockup_detector_disable(void);
@@ -120,11 +118,11 @@ static inline int hardlockup_detector_perf_init(void) { return 0; }
 # endif
 #endif
 
-void watchdog_nmi_stop(void);
-void watchdog_nmi_start(void);
-int watchdog_nmi_probe(void);
-void watchdog_nmi_enable(unsigned int cpu);
-void watchdog_nmi_disable(unsigned int cpu);
+void watchdog_hardlockup_stop(void);
+void watchdog_hardlockup_start(void);
+int watchdog_hardlockup_probe(void);
+void watchdog_hardlockup_enable(unsigned int cpu);
+void watchdog_hardlockup_disable(unsigned int cpu);
 
 void lockup_detector_reconfigure(void);
 
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 64d7d2a0a7df..c6790c7dc08d 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -30,17 +30,17 @@
 static DEFINE_MUTEX(watchdog_mutex);
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
-# define NMI_WATCHDOG_DEFAULT	1
+# define WATCHDOG_HARDLOCKUP_DEFAULT	1
 #else
-# define NMI_WATCHDOG_DEFAULT	0
+# define WATCHDOG_HARDLOCKUP_DEFAULT	0
 #endif
 
 unsigned long __read_mostly watchdog_enabled;
 int __read_mostly watchdog_user_enabled = 1;
-int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
-int __read_mostly soft_watchdog_user_enabled = 1;
+static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT;
+static int __read_mostly watchdog_softlockup_user_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
-static int __read_mostly nmi_watchdog_available;
+static int __read_mostly watchdog_hardlockup_available;
 
 struct cpumask watchdog_cpumask __read_mostly;
 unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
@@ -66,7 +66,7 @@ unsigned int __read_mostly hardlockup_panic =
  */
 void __init hardlockup_detector_disable(void)
 {
-	nmi_watchdog_user_enabled = 0;
+	watchdog_hardlockup_user_enabled = 0;
 }
 
 static int __init hardlockup_panic_setup(char *str)
@@ -76,9 +76,9 @@ static int __init hardlockup_panic_setup(char *str)
 	else if (!strncmp(str, "nopanic", 7))
 		hardlockup_panic = 0;
 	else if (!strncmp(str, "0", 1))
-		nmi_watchdog_user_enabled = 0;
+		watchdog_hardlockup_user_enabled = 0;
 	else if (!strncmp(str, "1", 1))
-		nmi_watchdog_user_enabled = 1;
+		watchdog_hardlockup_user_enabled = 1;
 	return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -190,40 +190,40 @@ static inline void watchdog_hardlockup_kick(void) { }
  * These functions can be overridden if an architecture implements its
  * own hardlockup detector.
  *
- * watchdog_nmi_enable/disable can be implemented to start and stop when
+ * watchdog_hardlockup_enable/disable can be implemented to start and stop when
  * softlockup watchdog start and stop. The arch must select the
  * SOFTLOCKUP_DETECTOR Kconfig.
  */
-void __weak watchdog_nmi_enable(unsigned int cpu)
+void __weak watchdog_hardlockup_enable(unsigned int cpu)
 {
 	hardlockup_detector_perf_enable();
 }
 
-void __weak watchdog_nmi_disable(unsigned int cpu)
+void __weak watchdog_hardlockup_disable(unsigned int cpu)
 {
 	hardlockup_detector_perf_disable();
 }
 
-/* Return 0, if a NMI watchdog is available. Error code otherwise */
-int __weak __init watchdog_nmi_probe(void)
+/* Return 0, if a hardlockup watchdog is available. Error code otherwise */
+int __weak __init watchdog_hardlockup_probe(void)
 {
 	return hardlockup_detector_perf_init();
 }
 
 /**
- * watchdog_nmi_stop - Stop the watchdog for reconfiguration
+ * watchdog_hardlockup_stop - Stop the watchdog for reconfiguration
  *
  * The reconfiguration steps are:
- * watchdog_nmi_stop();
+ * watchdog_hardlockup_stop();
  * update_variables();
- * watchdog_nmi_start();
+ * watchdog_hardlockup_start();
  */
-void __weak watchdog_nmi_stop(void) { }
+void __weak watchdog_hardlockup_stop(void) { }
 
 /**
- * watchdog_nmi_start - Start the watchdog after reconfiguration
+ * watchdog_hardlockup_start - Start the watchdog after reconfiguration
  *
- * Counterpart to watchdog_nmi_stop().
+ * Counterpart to watchdog_hardlockup_stop().
  *
  * The following variables have been updated in update_variables() and
  * contain the currently valid configuration:
@@ -231,23 +231,23 @@ void __weak watchdog_nmi_stop(void) { }
  * - watchdog_thresh
  * - watchdog_cpumask
  */
-void __weak watchdog_nmi_start(void) { }
+void __weak watchdog_hardlockup_start(void) { }
 
 /**
  * lockup_detector_update_enable - Update the sysctl enable bit
  *
- * Caller needs to make sure that the NMI/perf watchdogs are off, so this
- * can't race with watchdog_nmi_disable().
+ * Caller needs to make sure that the hard watchdogs are off, so this
+ * can't race with watchdog_hardlockup_disable().
  */
 static void lockup_detector_update_enable(void)
 {
 	watchdog_enabled = 0;
 	if (!watchdog_user_enabled)
 		return;
-	if (nmi_watchdog_available && nmi_watchdog_user_enabled)
-		watchdog_enabled |= NMI_WATCHDOG_ENABLED;
-	if (soft_watchdog_user_enabled)
-		watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
+	if (watchdog_hardlockup_available && watchdog_hardlockup_user_enabled)
+		watchdog_enabled |= WATCHDOG_HARDLOCKUP_ENABLED;
+	if (watchdog_softlockup_user_enabled)
+		watchdog_enabled |= WATCHDOG_SOFTOCKUP_ENABLED;
 }
 
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
@@ -288,7 +288,7 @@ __setup("nowatchdog", nowatchdog_setup);
 
 static int __init nosoftlockup_setup(char *str)
 {
-	soft_watchdog_user_enabled = 0;
+	watchdog_softlockup_user_enabled = 0;
 	return 1;
 }
 __setup("nosoftlockup", nosoftlockup_setup);
@@ -402,7 +402,7 @@ static int is_softlockup(unsigned long touch_ts,
 			 unsigned long period_ts,
 			 unsigned long now)
 {
-	if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
+	if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) {
 		/* Warn about unreasonable delays. */
 		if (time_after(now, period_ts + get_softlockup_thresh()))
 			return now - touch_ts;
@@ -537,7 +537,7 @@ static void watchdog_enable(unsigned int cpu)
 	complete(done);
 
 	/*
-	 * Start the timer first to prevent the NMI watchdog triggering
+	 * Start the timer first to prevent the hardlockup watchdog triggering
 	 * before the timer has a chance to fire.
 	 */
 	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
@@ -547,9 +547,9 @@ static void watchdog_enable(unsigned int cpu)
 
 	/* Initialize timestamp */
 	update_touch_ts();
-	/* Enable the perf event */
-	if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
-		watchdog_nmi_enable(cpu);
+	/* Enable the hardlockup detector */
+	if (watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)
+		watchdog_hardlockup_enable(cpu);
 }
 
 static void watchdog_disable(unsigned int cpu)
@@ -559,11 +559,11 @@ static void watchdog_disable(unsigned int cpu)
 	WARN_ON_ONCE(cpu != smp_processor_id());
 
 	/*
-	 * Disable the perf event first. That prevents that a large delay
-	 * between disabling the timer and disabling the perf event causes
-	 * the perf NMI to detect a false positive.
+	 * Disable the hardlockup detector first. That prevents that a large
+	 * delay between disabling the timer and disabling the hardlockup
+	 * detector causes a false positive.
 	 */
-	watchdog_nmi_disable(cpu);
+	watchdog_hardlockup_disable(cpu);
 	hrtimer_cancel(hrtimer);
 	wait_for_completion(this_cpu_ptr(&softlockup_completion));
 }
@@ -619,7 +619,7 @@ int lockup_detector_offline_cpu(unsigned int cpu)
 static void __lockup_detector_reconfigure(void)
 {
 	cpus_read_lock();
-	watchdog_nmi_stop();
+	watchdog_hardlockup_stop();
 
 	softlockup_stop_all();
 	set_sample_period();
@@ -627,7 +627,7 @@ static void __lockup_detector_reconfigure(void)
 	if (watchdog_enabled && watchdog_thresh)
 		softlockup_start_all();
 
-	watchdog_nmi_start();
+	watchdog_hardlockup_start();
 	cpus_read_unlock();
 	/*
 	 * Must be called outside the cpus locked section to prevent
@@ -668,9 +668,9 @@ static __init void lockup_detector_setup(void)
 static void __lockup_detector_reconfigure(void)
 {
 	cpus_read_lock();
-	watchdog_nmi_stop();
+	watchdog_hardlockup_stop();
 	lockup_detector_update_enable();
-	watchdog_nmi_start();
+	watchdog_hardlockup_start();
 	cpus_read_unlock();
 }
 void lockup_detector_reconfigure(void)
@@ -725,14 +725,14 @@ static void proc_watchdog_update(void)
 /*
  * common function for watchdog, nmi_watchdog and soft_watchdog parameter
  *
- * caller             | table->data points to      | 'which'
- * -------------------|----------------------------|--------------------------
- * proc_watchdog      | watchdog_user_enabled      | NMI_WATCHDOG_ENABLED |
- *                    |                            | SOFT_WATCHDOG_ENABLED
- * -------------------|----------------------------|--------------------------
- * proc_nmi_watchdog  | nmi_watchdog_user_enabled  | NMI_WATCHDOG_ENABLED
- * -------------------|----------------------------|--------------------------
- * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
+ * caller             | table->data points to            | 'which'
+ * -------------------|----------------------------------|-------------------------------
+ * proc_watchdog      | watchdog_user_enabled            | WATCHDOG_HARDLOCKUP_ENABLED |
+ *                    |                                  | WATCHDOG_SOFTOCKUP_ENABLED
+ * -------------------|----------------------------------|-------------------------------
+ * proc_nmi_watchdog  | watchdog_hardlockup_user_enabled | WATCHDOG_HARDLOCKUP_ENABLED
+ * -------------------|----------------------------------|-------------------------------
+ * proc_soft_watchdog | watchdog_softlockup_user_enabled | WATCHDOG_SOFTOCKUP_ENABLED
  */
 static int proc_watchdog_common(int which, struct ctl_table *table, int write,
 				void *buffer, size_t *lenp, loff_t *ppos)
@@ -764,7 +764,8 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
 int proc_watchdog(struct ctl_table *table, int write,
 		  void *buffer, size_t *lenp, loff_t *ppos)
 {
-	return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
+	return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED |
+				    WATCHDOG_SOFTOCKUP_ENABLED,
 				    table, write, buffer, lenp, ppos);
 }
 
@@ -774,9 +775,9 @@ int proc_watchdog(struct ctl_table *table, int write,
 int proc_nmi_watchdog(struct ctl_table *table, int write,
 		      void *buffer, size_t *lenp, loff_t *ppos)
 {
-	if (!nmi_watchdog_available && write)
+	if (!watchdog_hardlockup_available && write)
 		return -ENOTSUPP;
-	return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
+	return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED,
 				    table, write, buffer, lenp, ppos);
 }
 
@@ -786,7 +787,7 @@ int proc_nmi_watchdog(struct ctl_table *table, int write,
 int proc_soft_watchdog(struct ctl_table *table, int write,
 			void *buffer, size_t *lenp, loff_t *ppos)
 {
-	return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
+	return proc_watchdog_common(WATCHDOG_SOFTOCKUP_ENABLED,
 				    table, write, buffer, lenp, ppos);
 }
 
@@ -854,7 +855,7 @@ static struct ctl_table watchdog_sysctls[] = {
 	},
 	{
 		.procname       = "nmi_watchdog",
-		.data		= &nmi_watchdog_user_enabled,
+		.data		= &watchdog_hardlockup_user_enabled,
 		.maxlen		= sizeof(int),
 		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
 		.proc_handler   = proc_nmi_watchdog,
@@ -871,7 +872,7 @@ static struct ctl_table watchdog_sysctls[] = {
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
 	{
 		.procname       = "soft_watchdog",
-		.data		= &soft_watchdog_user_enabled,
+		.data		= &watchdog_softlockup_user_enabled,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler   = proc_soft_watchdog,
@@ -940,8 +941,8 @@ void __init lockup_detector_init(void)
 	cpumask_copy(&watchdog_cpumask,
 		     housekeeping_cpumask(HK_TYPE_TIMER));
 
-	if (!watchdog_nmi_probe())
-		nmi_watchdog_available = true;
+	if (!watchdog_hardlockup_probe())
+		watchdog_hardlockup_available = true;
 	lockup_detector_setup();
 	watchdog_sysctl_init();
 }
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 547917ebd5d3..9e6042a892b3 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -215,7 +215,7 @@ void __init hardlockup_detector_perf_restart(void)
 
 	lockdep_assert_cpus_held();
 
-	if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+	if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
 		return;
 
 	for_each_online_cpu(cpu) {
-- 
cgit v1.2.3


From d9b3629ade8ebffb0075e311409796a56bac8282 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 19 May 2023 10:18:37 -0700
Subject: watchdog/hardlockup: have the perf hardlockup use __weak functions
 more cleanly

The fact that there watchdog_hardlockup_enable(),
watchdog_hardlockup_disable(), and watchdog_hardlockup_probe() are
declared __weak means that the configured hardlockup detector can define
non-weak versions of those functions if it needs to.  Instead of doing
this, the perf hardlockup detector hooked itself into the default __weak
implementation, which was a bit awkward.  Clean this up.

From comments, it looks as if the original design was done because the
__weak function were expected to implemented by the architecture and not
by the configured hardlockup detector.  This got awkward when we tried to
add the buddy lockup detector which was not arch-specific but wanted to
hook into those same functions.

This is not expected to have any functional impact.

Link: https://lkml.kernel.org/r/20230519101840.v5.13.I847d9ec852449350997ba00401d2462a9cb4302b@changeid
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Colin Cross <ccross@android.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h    | 10 ----------
 kernel/watchdog.c      | 30 ++++++++++++++++++------------
 kernel/watchdog_perf.c | 20 ++++++++++++++------
 3 files changed, 32 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 17b2ae7103d9..e23b4dc01b4a 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -101,21 +101,11 @@ static inline void arch_touch_nmi_watchdog(void) { }
 #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 extern void hardlockup_detector_perf_stop(void);
 extern void hardlockup_detector_perf_restart(void);
-extern void hardlockup_detector_perf_disable(void);
-extern void hardlockup_detector_perf_enable(void);
 extern void hardlockup_detector_perf_cleanup(void);
-extern int hardlockup_detector_perf_init(void);
 #else
 static inline void hardlockup_detector_perf_stop(void) { }
 static inline void hardlockup_detector_perf_restart(void) { }
-static inline void hardlockup_detector_perf_disable(void) { }
-static inline void hardlockup_detector_perf_enable(void) { }
 static inline void hardlockup_detector_perf_cleanup(void) { }
-# if !defined(CONFIG_HAVE_NMI_WATCHDOG)
-static inline int hardlockup_detector_perf_init(void) { return -ENODEV; }
-# else
-static inline int hardlockup_detector_perf_init(void) { return 0; }
-# endif
 #endif
 
 void watchdog_hardlockup_stop(void);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index c6790c7dc08d..e67125f64719 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -187,27 +187,33 @@ static inline void watchdog_hardlockup_kick(void) { }
 #endif /* !CONFIG_HARDLOCKUP_DETECTOR_PERF */
 
 /*
- * These functions can be overridden if an architecture implements its
- * own hardlockup detector.
+ * These functions can be overridden based on the configured hardlockdup detector.
  *
  * watchdog_hardlockup_enable/disable can be implemented to start and stop when
- * softlockup watchdog start and stop. The arch must select the
+ * softlockup watchdog start and stop. The detector must select the
  * SOFTLOCKUP_DETECTOR Kconfig.
  */
-void __weak watchdog_hardlockup_enable(unsigned int cpu)
-{
-	hardlockup_detector_perf_enable();
-}
+void __weak watchdog_hardlockup_enable(unsigned int cpu) { }
 
-void __weak watchdog_hardlockup_disable(unsigned int cpu)
-{
-	hardlockup_detector_perf_disable();
-}
+void __weak watchdog_hardlockup_disable(unsigned int cpu) { }
 
 /* Return 0, if a hardlockup watchdog is available. Error code otherwise */
 int __weak __init watchdog_hardlockup_probe(void)
 {
-	return hardlockup_detector_perf_init();
+	/*
+	 * If CONFIG_HAVE_NMI_WATCHDOG is defined then an architecture
+	 * is assumed to have the hard watchdog available and we return 0.
+	 */
+	if (IS_ENABLED(CONFIG_HAVE_NMI_WATCHDOG))
+		return 0;
+
+	/*
+	 * Hardlockup detectors other than those using CONFIG_HAVE_NMI_WATCHDOG
+	 * are required to implement a non-weak version of this probe function
+	 * to tell whether they are available. If they don't override then
+	 * we'll return -ENODEV.
+	 */
+	return -ENODEV;
 }
 
 /**
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 9e6042a892b3..349fcd4d2abc 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -132,10 +132,14 @@ static int hardlockup_detector_event_create(void)
 }
 
 /**
- * hardlockup_detector_perf_enable - Enable the local event
+ * watchdog_hardlockup_enable - Enable the local event
+ *
+ * @cpu: The CPU to enable hard lockup on.
  */
-void hardlockup_detector_perf_enable(void)
+void watchdog_hardlockup_enable(unsigned int cpu)
 {
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
 	if (hardlockup_detector_event_create())
 		return;
 
@@ -147,12 +151,16 @@ void hardlockup_detector_perf_enable(void)
 }
 
 /**
- * hardlockup_detector_perf_disable - Disable the local event
+ * watchdog_hardlockup_disable - Disable the local event
+ *
+ * @cpu: The CPU to enable hard lockup on.
  */
-void hardlockup_detector_perf_disable(void)
+void watchdog_hardlockup_disable(unsigned int cpu)
 {
 	struct perf_event *event = this_cpu_read(watchdog_ev);
 
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
 	if (event) {
 		perf_event_disable(event);
 		this_cpu_write(watchdog_ev, NULL);
@@ -227,9 +235,9 @@ void __init hardlockup_detector_perf_restart(void)
 }
 
 /**
- * hardlockup_detector_perf_init - Probe whether NMI event is available at all
+ * watchdog_hardlockup_probe - Probe whether NMI event is available at all
  */
-int __init hardlockup_detector_perf_init(void)
+int __init watchdog_hardlockup_probe(void)
 {
 	int ret = hardlockup_detector_event_create();
 
-- 
cgit v1.2.3


From 1f423c905a6b43b493df1b259e6e6267e5624e62 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 19 May 2023 10:18:38 -0700
Subject: watchdog/hardlockup: detect hard lockups using secondary (buddy) CPUs

Implement a hardlockup detector that doesn't doesn't need any extra
arch-specific support code to detect lockups.  Instead of using something
arch-specific we will use the buddy system, where each CPU watches out for
another one.  Specifically, each CPU will use its softlockup hrtimer to
check that the next CPU is processing hrtimer interrupts by verifying that
a counter is increasing.

NOTE: unlike the other hard lockup detectors, the buddy one can't easily
show what's happening on the CPU that locked up just by doing a simple
backtrace.  It relies on some other mechanism in the system to get
information about the locked up CPUs.  This could be support for NMI
backtraces like [1], it could be a mechanism for printing the PC of locked
CPUs at panic time like [2] / [3], or it could be something else.  Even
though that means we still rely on arch-specific code, this arch-specific
code seems to often be implemented even on architectures that don't have a
hardlockup detector.

This style of hardlockup detector originated in some downstream Android
trees and has been rebased on / carried in ChromeOS trees for quite a long
time for use on arm and arm64 boards.  Historically on these boards we've
leveraged mechanism [2] / [3] to get information about hung CPUs, but we
could move to [1].

Although the original motivation for the buddy system was for use on
systems without an arch-specific hardlockup detector, it can still be
useful to use even on systems that _do_ have an arch-specific hardlockup
detector.  On x86, for instance, there is a 24-part patch series [4] in
progress switching the arch-specific hard lockup detector from a scarce
perf counter to a less-scarce hardware resource.  Potentially the buddy
system could be a simpler alternative to free up the perf counter but
still get hard lockup detection.

Overall, pros (+) and cons (-) of the buddy system compared to an
arch-specific hardlockup detector (which might be implemented using
perf):
+ The buddy system is usable on systems that don't have an
  arch-specific hardlockup detector, like arm32 and arm64 (though it's
  being worked on for arm64 [5]).
+ The buddy system may free up scarce hardware resources.
+ If a CPU totally goes out to lunch (can't process NMIs) the buddy
  system could still detect the problem (though it would be unlikely
  to be able to get a stack trace).
+ The buddy system uses the same timer function to pet the hardlockup
  detector on the running CPU as it uses to detect hardlockups on
  other CPUs. Compared to other hardlockup detectors, this means it
  generates fewer interrupts and thus is likely better able to let
  CPUs stay idle longer.
- If all CPUs are hard locked up at the same time the buddy system
  can't detect it.
- If we don't have SMP we can't use the buddy system.
- The buddy system needs an arch-specific mechanism (possibly NMI
  backtrace) to get info about the locked up CPU.

[1] https://lore.kernel.org/r/20230419225604.21204-1-dianders@chromium.org
[2] https://issuetracker.google.com/172213129
[3] https://docs.kernel.org/trace/coresight/coresight-cpu-debug.html
[4] https://lore.kernel.org/lkml/20230301234753.28582-1-ricardo.neri-calderon@linux.intel.com/
[5] https://lore.kernel.org/linux-arm-kernel/20220903093415.15850-1-lecopzer.chen@mediatek.com/

Link: https://lkml.kernel.org/r/20230519101840.v5.14.I6bf789d21d0c3d75d382e7e51a804a7a51315f2c@changeid
Signed-off-by: Colin Cross <ccross@android.com>
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Tzung-Bi Shih <tzungbi@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ian Rogers <irogers@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h     |  9 ++++-
 kernel/Makefile         |  1 +
 kernel/watchdog.c       | 29 +++++++++++----
 kernel/watchdog_buddy.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug       | 52 +++++++++++++++++++++++++--
 5 files changed, 173 insertions(+), 11 deletions(-)
 create mode 100644 kernel/watchdog_buddy.c

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index e23b4dc01b4a..1cdadc6a6cfd 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -85,8 +85,9 @@ extern unsigned int hardlockup_panic;
 static inline void hardlockup_detector_disable(void) {}
 #endif
 
-#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER)
 void arch_touch_nmi_watchdog(void);
+void watchdog_hardlockup_touch_cpu(unsigned int cpu);
 void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
 #elif !defined(CONFIG_HAVE_NMI_WATCHDOG)
 static inline void arch_touch_nmi_watchdog(void) { }
@@ -116,6 +117,12 @@ void watchdog_hardlockup_disable(unsigned int cpu);
 
 void lockup_detector_reconfigure(void);
 
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_BUDDY
+void watchdog_buddy_check_hardlockup(unsigned long hrtimer_interrupts);
+#else
+static inline void watchdog_buddy_check_hardlockup(unsigned long hrtimer_interrupts) {}
+#endif
+
 /**
  * touch_nmi_watchdog - manually reset the hardlockup watchdog timeout.
  *
diff --git a/kernel/Makefile b/kernel/Makefile
index 7eb72033143c..f9e3fd9195d9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -91,6 +91,7 @@ obj-$(CONFIG_FAIL_FUNCTION) += fail_function.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR_BUDDY) += watchdog_buddy.o
 obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_perf.o
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RELAY) += relay.o
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index e67125f64719..10947c835079 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -85,7 +85,7 @@ __setup("nmi_watchdog=", hardlockup_panic_setup);
 
 #endif /* CONFIG_HARDLOCKUP_DETECTOR */
 
-#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER)
 
 static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts);
 static DEFINE_PER_CPU(int, hrtimer_interrupts_saved);
@@ -106,6 +106,14 @@ notrace void arch_touch_nmi_watchdog(void)
 }
 EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
+void watchdog_hardlockup_touch_cpu(unsigned int cpu)
+{
+	per_cpu(watchdog_hardlockup_touched, cpu) = true;
+
+	/* Match with smp_rmb() in watchdog_hardlockup_check() */
+	smp_wmb();
+}
+
 static bool is_hardlockup(unsigned int cpu)
 {
 	int hrint = atomic_read(&per_cpu(hrtimer_interrupts, cpu));
@@ -123,13 +131,16 @@ static bool is_hardlockup(unsigned int cpu)
 	return false;
 }
 
-static void watchdog_hardlockup_kick(void)
+static unsigned long watchdog_hardlockup_kick(void)
 {
-	atomic_inc(raw_cpu_ptr(&hrtimer_interrupts));
+	return atomic_inc_return(raw_cpu_ptr(&hrtimer_interrupts));
 }
 
 void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 {
+	/* Match with smp_wmb() in watchdog_hardlockup_touch_cpu() */
+	smp_rmb();
+
 	if (per_cpu(watchdog_hardlockup_touched, cpu)) {
 		per_cpu(watchdog_hardlockup_touched, cpu) = false;
 		return;
@@ -180,11 +191,11 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 	}
 }
 
-#else /* CONFIG_HARDLOCKUP_DETECTOR_PERF */
+#else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
 
-static inline void watchdog_hardlockup_kick(void) { }
+static inline unsigned long watchdog_hardlockup_kick(void) { return 0; }
 
-#endif /* !CONFIG_HARDLOCKUP_DETECTOR_PERF */
+#endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
 
 /*
  * These functions can be overridden based on the configured hardlockdup detector.
@@ -443,11 +454,15 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
 	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
+	unsigned long hrtimer_interrupts;
 
 	if (!watchdog_enabled)
 		return HRTIMER_NORESTART;
 
-	watchdog_hardlockup_kick();
+	hrtimer_interrupts = watchdog_hardlockup_kick();
+
+	/* test for hardlockups */
+	watchdog_buddy_check_hardlockup(hrtimer_interrupts);
 
 	/* kick the softlockup detector */
 	if (completion_done(this_cpu_ptr(&softlockup_completion))) {
diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c
new file mode 100644
index 000000000000..fee45af2e5bd
--- /dev/null
+++ b/kernel/watchdog_buddy.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/nmi.h>
+#include <linux/percpu-defs.h>
+
+static cpumask_t __read_mostly watchdog_cpus;
+
+static unsigned int watchdog_next_cpu(unsigned int cpu)
+{
+	cpumask_t cpus = watchdog_cpus;
+	unsigned int next_cpu;
+
+	next_cpu = cpumask_next(cpu, &cpus);
+	if (next_cpu >= nr_cpu_ids)
+		next_cpu = cpumask_first(&cpus);
+
+	if (next_cpu == cpu)
+		return nr_cpu_ids;
+
+	return next_cpu;
+}
+
+int __init watchdog_hardlockup_probe(void)
+{
+	return 0;
+}
+
+void watchdog_hardlockup_enable(unsigned int cpu)
+{
+	unsigned int next_cpu;
+
+	/*
+	 * The new CPU will be marked online before the hrtimer interrupt
+	 * gets a chance to run on it. If another CPU tests for a
+	 * hardlockup on the new CPU before it has run its the hrtimer
+	 * interrupt, it will get a false positive. Touch the watchdog on
+	 * the new CPU to delay the check for at least 3 sampling periods
+	 * to guarantee one hrtimer has run on the new CPU.
+	 */
+	watchdog_hardlockup_touch_cpu(cpu);
+
+	/*
+	 * We are going to check the next CPU. Our watchdog_hrtimer
+	 * need not be zero if the CPU has already been online earlier.
+	 * Touch the watchdog on the next CPU to avoid false positive
+	 * if we try to check it in less then 3 interrupts.
+	 */
+	next_cpu = watchdog_next_cpu(cpu);
+	if (next_cpu < nr_cpu_ids)
+		watchdog_hardlockup_touch_cpu(next_cpu);
+
+	cpumask_set_cpu(cpu, &watchdog_cpus);
+}
+
+void watchdog_hardlockup_disable(unsigned int cpu)
+{
+	unsigned int next_cpu = watchdog_next_cpu(cpu);
+
+	/*
+	 * Offlining this CPU will cause the CPU before this one to start
+	 * checking the one after this one. If this CPU just finished checking
+	 * the next CPU and updating hrtimer_interrupts_saved, and then the
+	 * previous CPU checks it within one sample period, it will trigger a
+	 * false positive. Touch the watchdog on the next CPU to prevent it.
+	 */
+	if (next_cpu < nr_cpu_ids)
+		watchdog_hardlockup_touch_cpu(next_cpu);
+
+	cpumask_clear_cpu(cpu, &watchdog_cpus);
+}
+
+void watchdog_buddy_check_hardlockup(unsigned long hrtimer_interrupts)
+{
+	unsigned int next_cpu;
+
+	/*
+	 * Test for hardlockups every 3 samples. The sample period is
+	 *  watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over
+	 *  watchdog_thresh (over by 20%).
+	 */
+	if (hrtimer_interrupts % 3 != 0)
+		return;
+
+	/* check for a hardlockup on the next CPU */
+	next_cpu = watchdog_next_cpu(smp_processor_id());
+	if (next_cpu >= nr_cpu_ids)
+		return;
+
+	watchdog_hardlockup_check(next_cpu, NULL);
+}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ce51d4dc6803..abcad0513a32 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1035,10 +1035,55 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
 
 	  Say N if unsure.
 
-config HARDLOCKUP_DETECTOR_PERF
+# Both the "perf" and "buddy" hardlockup detectors count hrtimer
+# interrupts. This config enables functions managing this common code.
+config HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
 	bool
 	select SOFTLOCKUP_DETECTOR
 
+config HARDLOCKUP_DETECTOR_PERF
+	bool
+	depends on HAVE_HARDLOCKUP_DETECTOR_PERF
+	select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
+
+config HARDLOCKUP_DETECTOR_BUDDY
+	bool
+	depends on SMP
+	select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
+
+# For hardlockup detectors you can have one directly provided by the arch
+# or use a "non-arch" one. If you're using a "non-arch" one that is
+# further divided the perf hardlockup detector (which, confusingly, needs
+# arch-provided perf support) and the buddy hardlockup detector (which just
+# needs SMP). In either case, using the "non-arch" code conflicts with
+# the NMI watchdog code (which is sometimes used directly and sometimes used
+# by the arch-provided hardlockup detector).
+config HAVE_HARDLOCKUP_DETECTOR_NON_ARCH
+	bool
+	depends on (HAVE_HARDLOCKUP_DETECTOR_PERF || SMP) && !HAVE_NMI_WATCHDOG
+	default y
+
+config HARDLOCKUP_DETECTOR_PREFER_BUDDY
+	bool "Prefer the buddy CPU hardlockup detector"
+	depends on HAVE_HARDLOCKUP_DETECTOR_NON_ARCH && HAVE_HARDLOCKUP_DETECTOR_PERF && SMP
+	help
+	  Say Y here to prefer the buddy hardlockup detector over the perf one.
+
+	  With the buddy detector, each CPU uses its softlockup hrtimer
+	  to check that the next CPU is processing hrtimer interrupts by
+	  verifying that a counter is increasing.
+
+	  This hardlockup detector is useful on systems that don't have
+	  an arch-specific hardlockup detector or if resources needed
+	  for the hardlockup detector are better used for other things.
+
+# This will select the appropriate non-arch hardlockdup detector
+config HARDLOCKUP_DETECTOR_NON_ARCH
+	bool
+	depends on HAVE_HARDLOCKUP_DETECTOR_NON_ARCH
+	select HARDLOCKUP_DETECTOR_BUDDY if !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY
+	select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY
+
 #
 # Enables a timestamp based low pass filter to compensate for perf based
 # hard lockup detection which runs too fast due to turbo modes.
@@ -1053,9 +1098,10 @@ config HARDLOCKUP_CHECK_TIMESTAMP
 config HARDLOCKUP_DETECTOR
 	bool "Detect Hard Lockups"
 	depends on DEBUG_KERNEL && !S390
-	depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
+	depends on HAVE_HARDLOCKUP_DETECTOR_NON_ARCH || HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select LOCKUP_DETECTOR
-	select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
+	select HARDLOCKUP_DETECTOR_NON_ARCH if HAVE_HARDLOCKUP_DETECTOR_NON_ARCH
+
 	help
 	  Say Y here to enable the kernel to act as a watchdog to detect
 	  hard lockups.
-- 
cgit v1.2.3


From b17aa959330e8058452297049a0056ba4b9c72e8 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 19 May 2023 10:18:39 -0700
Subject: watchdog/perf: add a weak function for an arch to detect if perf can
 use NMIs

On arm64, NMI support needs to be detected at runtime.  Add a weak
function to the perf hardlockup detector so that an architecture can
implement it to detect whether NMIs are available.

Link: https://lkml.kernel.org/r/20230519101840.v5.15.Ic55cb6f90ef5967d8aaa2b503a4e67c753f64d3a@changeid
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Colin Cross <ccross@android.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Lecopzer Chen <lecopzer.chen@mediatek.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Pingfan Liu <kernelfans@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h    |  1 +
 kernel/watchdog_perf.c | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 1cdadc6a6cfd..28e65fd1de13 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -208,6 +208,7 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
+bool arch_perf_nmi_is_available(void);
 #endif
 
 #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 349fcd4d2abc..8ea00c4a24b2 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -234,12 +234,22 @@ void __init hardlockup_detector_perf_restart(void)
 	}
 }
 
+bool __weak __init arch_perf_nmi_is_available(void)
+{
+	return true;
+}
+
 /**
  * watchdog_hardlockup_probe - Probe whether NMI event is available at all
  */
 int __init watchdog_hardlockup_probe(void)
 {
-	int ret = hardlockup_detector_event_create();
+	int ret;
+
+	if (!arch_perf_nmi_is_available())
+		return -ENODEV;
+
+	ret = hardlockup_detector_event_create();
 
 	if (ret) {
 		pr_info("Perf NMI watchdog permanently disabled\n");
-- 
cgit v1.2.3


From 930d8f8dbab97cb05dba30e67a2dfa0c6dbf4bc7 Mon Sep 17 00:00:00 2001
From: Lecopzer Chen <lecopzer.chen@mediatek.com>
Date: Fri, 19 May 2023 10:18:40 -0700
Subject: watchdog/perf: adapt the watchdog_perf interface for async model

When lockup_detector_init()->watchdog_hardlockup_probe(), PMU may be not
ready yet.  E.g.  on arm64, PMU is not ready until
device_initcall(armv8_pmu_driver_init).  And it is deeply integrated with
the driver model and cpuhp.  Hence it is hard to push this initialization
before smp_init().

But it is easy to take an opposite approach and try to initialize the
watchdog once again later.  The delayed probe is called using workqueues.
It need to allocate memory and must be proceed in a normal context.  The
delayed probe is able to use if watchdog_hardlockup_probe() returns
non-zero which means the return code returned when PMU is not ready yet.

Provide an API - lockup_detector_retry_init() for anyone who needs to
delayed init lockup detector if they had ever failed at
lockup_detector_init().

The original assumption is: nobody should use delayed probe after
lockup_detector_check() which has __init attribute.  That is, anyone uses
this API must call between lockup_detector_init() and
lockup_detector_check(), and the caller must have __init attribute

Link: https://lkml.kernel.org/r/20230519101840.v5.16.If4ad5dd5d09fb1309cebf8bcead4b6a5a7758ca7@changeid
Reviewed-by: Petr Mladek <pmladek@suse.com>
Co-developed-by: Pingfan Liu <kernelfans@gmail.com>
Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Signed-off-by: Lecopzer Chen <lecopzer.chen@mediatek.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Suggested-by: Petr Mladek <pmladek@suse.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Colin Cross <ccross@android.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h |  2 ++
 kernel/watchdog.c   | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 68 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 28e65fd1de13..83577ae736cc 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -13,6 +13,7 @@
 
 #ifdef CONFIG_LOCKUP_DETECTOR
 void lockup_detector_init(void);
+void lockup_detector_retry_init(void);
 void lockup_detector_soft_poweroff(void);
 void lockup_detector_cleanup(void);
 
@@ -32,6 +33,7 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
 
 #else /* CONFIG_LOCKUP_DETECTOR */
 static inline void lockup_detector_init(void) { }
+static inline void lockup_detector_retry_init(void) { }
 static inline void lockup_detector_soft_poweroff(void) { }
 static inline void lockup_detector_cleanup(void) { }
 #endif /* !CONFIG_LOCKUP_DETECTOR */
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 10947c835079..237990e8d345 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -208,7 +208,13 @@ void __weak watchdog_hardlockup_enable(unsigned int cpu) { }
 
 void __weak watchdog_hardlockup_disable(unsigned int cpu) { }
 
-/* Return 0, if a hardlockup watchdog is available. Error code otherwise */
+/*
+ * Watchdog-detector specific API.
+ *
+ * Return 0 when hardlockup watchdog is available, negative value otherwise.
+ * Note that the negative value means that a delayed probe might
+ * succeed later.
+ */
 int __weak __init watchdog_hardlockup_probe(void)
 {
 	/*
@@ -954,6 +960,62 @@ static void __init watchdog_sysctl_init(void)
 #define watchdog_sysctl_init() do { } while (0)
 #endif /* CONFIG_SYSCTL */
 
+static void __init lockup_detector_delay_init(struct work_struct *work);
+static bool allow_lockup_detector_init_retry __initdata;
+
+static struct work_struct detector_work __initdata =
+		__WORK_INITIALIZER(detector_work, lockup_detector_delay_init);
+
+static void __init lockup_detector_delay_init(struct work_struct *work)
+{
+	int ret;
+
+	ret = watchdog_hardlockup_probe();
+	if (ret) {
+		pr_info("Delayed init of the lockup detector failed: %d\n", ret);
+		pr_info("Hard watchdog permanently disabled\n");
+		return;
+	}
+
+	allow_lockup_detector_init_retry = false;
+
+	watchdog_hardlockup_available = true;
+	lockup_detector_setup();
+}
+
+/*
+ * lockup_detector_retry_init - retry init lockup detector if possible.
+ *
+ * Retry hardlockup detector init. It is useful when it requires some
+ * functionality that has to be initialized later on a particular
+ * platform.
+ */
+void __init lockup_detector_retry_init(void)
+{
+	/* Must be called before late init calls */
+	if (!allow_lockup_detector_init_retry)
+		return;
+
+	schedule_work(&detector_work);
+}
+
+/*
+ * Ensure that optional delayed hardlockup init is proceed before
+ * the init code and memory is freed.
+ */
+static int __init lockup_detector_check(void)
+{
+	/* Prevent any later retry. */
+	allow_lockup_detector_init_retry = false;
+
+	/* Make sure no work is pending. */
+	flush_work(&detector_work);
+
+	return 0;
+
+}
+late_initcall_sync(lockup_detector_check);
+
 void __init lockup_detector_init(void)
 {
 	if (tick_nohz_full_enabled())
@@ -964,6 +1026,9 @@ void __init lockup_detector_init(void)
 
 	if (!watchdog_hardlockup_probe())
 		watchdog_hardlockup_available = true;
+	else
+		allow_lockup_detector_init_retry = true;
+
 	lockup_detector_setup();
 	watchdog_sysctl_init();
 }
-- 
cgit v1.2.3


From d7a0fe9ef6d6484fca4ba55c19091932337d4272 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 19 May 2023 10:18:42 -0700
Subject: arm64: enable perf events based hard lockup detector

With the recent feature added to enable perf events to use pseudo NMIs as
interrupts on platforms which support GICv3 or later, its now been
possible to enable hard lockup detector (or NMI watchdog) on arm64
platforms.  So enable corresponding support.

One thing to note here is that normally lockup detector is initialized
just after the early initcalls but PMU on arm64 comes up much later as
device_initcall().  To cope with that, override
arch_perf_nmi_is_available() to let the watchdog framework know PMU not
ready, and inform the framework to re-initialize lockup detection once PMU
has been initialized.

[dianders@chromium.org: only HAVE_HARDLOCKUP_DETECTOR_PERF if the PMU config is enabled]
  Link: https://lkml.kernel.org/r/20230523073952.1.I60217a63acc35621e13f10be16c0cd7c363caf8c@changeid
Link: https://lkml.kernel.org/r/20230519101840.v5.18.Ia44852044cdcb074f387e80df6b45e892965d4a1@changeid
Co-developed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
Co-developed-by: Pingfan Liu <kernelfans@gmail.com>
Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Signed-off-by: Lecopzer Chen <lecopzer.chen@mediatek.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Colin Cross <ccross@android.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Guenter Roeck <groeck@chromium.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masayoshi Mizuma <msys.mizuma@gmail.com>
Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: "Ravi V. Shankar" <ravi.v.shankar@intel.com>
Cc: Ricardo Neri <ricardo.neri@intel.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Tzung-Bi Shih <tzungbi@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/Kconfig               |  3 +++
 arch/arm64/kernel/watchdog_hld.c | 12 ++++++++++++
 drivers/perf/arm_pmu.c           |  5 +++++
 drivers/perf/arm_pmuv3.c         | 12 ++++++++++--
 include/linux/perf/arm_pmu.h     |  2 ++
 5 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b1201d25a8a4..ef8776121766 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -203,12 +203,15 @@ config ARM64
 	select HAVE_FUNCTION_ERROR_INJECTION
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_GCC_PLUGINS
+	select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \
+		HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_KVM
 	select HAVE_NMI
 	select HAVE_PERF_EVENTS
+	select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_PREEMPT_DYNAMIC_KEY
diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c
index 2401eb1b7e55..dcd25322127c 100644
--- a/arch/arm64/kernel/watchdog_hld.c
+++ b/arch/arm64/kernel/watchdog_hld.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/nmi.h>
 #include <linux/cpufreq.h>
+#include <linux/perf/arm_pmu.h>
 
 /*
  * Safe maximum CPU frequency in case a particular platform doesn't implement
@@ -22,3 +24,13 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)
 
 	return (u64)max_cpu_freq * watchdog_thresh;
 }
+
+bool __init arch_perf_nmi_is_available(void)
+{
+	/*
+	 * hardlockup_detector_perf_init() will success even if Pseudo-NMI turns off,
+	 * however, the pmu interrupts will act like a normal interrupt instead of
+	 * NMI and the hardlockup detector would be broken.
+	 */
+	return arm_pmu_irq_is_nmi();
+}
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 15bd1e34a88e..7b9caa502d33 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -687,6 +687,11 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
 	return per_cpu(hw_events->irq, cpu);
 }
 
+bool arm_pmu_irq_is_nmi(void)
+{
+	return has_nmi;
+}
+
 /*
  * PMU hardware loses all context when a CPU goes offline.
  * When a CPU is hotplugged back in, since some hardware registers are
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index c98e4039386d..7b28d65f3f1c 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -22,6 +22,7 @@
 #include <linux/platform_device.h>
 #include <linux/sched_clock.h>
 #include <linux/smp.h>
+#include <linux/nmi.h>
 
 #include <asm/arm_pmuv3.h>
 
@@ -1348,10 +1349,17 @@ static struct platform_driver armv8_pmu_driver = {
 
 static int __init armv8_pmu_driver_init(void)
 {
+	int ret;
+
 	if (acpi_disabled)
-		return platform_driver_register(&armv8_pmu_driver);
+		ret = platform_driver_register(&armv8_pmu_driver);
 	else
-		return arm_pmu_acpi_probe(armv8_pmuv3_pmu_init);
+		ret = arm_pmu_acpi_probe(armv8_pmuv3_pmu_init);
+
+	if (!ret)
+		lockup_detector_retry_init();
+
+	return ret;
 }
 device_initcall(armv8_pmu_driver_init)
 
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 525b5d64e394..5b00f5cb4cf9 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -171,6 +171,8 @@ void kvm_host_pmu_init(struct arm_pmu *pmu);
 #define kvm_host_pmu_init(x)	do { } while(0)
 #endif
 
+bool arm_pmu_irq_is_nmi(void);
+
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
 void armpmu_free(struct arm_pmu *pmu);
-- 
cgit v1.2.3


From 048a9883267f9b8f8e05dca9e9e8e6f991eea61e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 20 May 2023 21:25:19 +0300
Subject: include/linux/math.h: fix mult_frac() multiple argument evaluation
 bug

mult_frac() evaluates _all_ arguments multiple times in the body.

Clarify comment while I'm at it.

Link: https://lkml.kernel.org/r/f9f9fdbb-ec8e-4f5e-a998-2a58627a1a43@p183
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/math.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/math.h b/include/linux/math.h
index 439b8f0b9ebd..2d388650c556 100644
--- a/include/linux/math.h
+++ b/include/linux/math.h
@@ -118,17 +118,17 @@ __STRUCT_FRACT(s32)
 __STRUCT_FRACT(u32)
 #undef __STRUCT_FRACT
 
-/*
- * Multiplies an integer by a fraction, while avoiding unnecessary
- * overflow or loss of precision.
- */
-#define mult_frac(x, numer, denom)(			\
-{							\
-	typeof(x) quot = (x) / (denom);			\
-	typeof(x) rem  = (x) % (denom);			\
-	(quot * (numer)) + ((rem * (numer)) / (denom));	\
-}							\
-)
+/* Calculate "x * n / d" without unnecessary overflow or loss of precision. */
+#define mult_frac(x, n, d)	\
+({				\
+	typeof(x) x_ = (x);	\
+	typeof(n) n_ = (n);	\
+	typeof(d) d_ = (d);	\
+				\
+	typeof(x_) q = x_ / d_;	\
+	typeof(x_) r = x_ % d_;	\
+	q * n_ + r * n_ / d_;	\
+})
 
 #define sector_div(a, b) do_div(a, b)
 
-- 
cgit v1.2.3


From a94181ec064b3ad1b6f573f5953e2011f8c90292 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 7 Jun 2023 16:28:45 +0200
Subject: syscalls: add sys_ni_posix_timers prototype

The sys_ni_posix_timers() definition causes a warning when the declaration
is missing, so this needs to be added along with the normal syscalls,
outside of the #ifdef.

kernel/time/posix-stubs.c:26:17: error: no previous prototype for 'sys_ni_posix_timers' [-Werror=missing-prototypes]

Link: https://lkml.kernel.org/r/20230607142925.3126422-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/alpha/kernel/osf_sys.c | 2 --
 include/linux/syscalls.h    | 1 +
 kernel/time/posix-stubs.c   | 1 +
 3 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 2a9a877a0508..d98701ee36c6 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1014,8 +1014,6 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
 	return do_sys_settimeofday64(tv ? &kts : NULL, tz ? &ktz : NULL);
 }
 
-asmlinkage long sys_ni_posix_timers(void);
-
 SYSCALL_DEFINE2(osf_utimes, const char __user *, filename,
 		struct timeval32 __user *, tvs)
 {
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 33a0ee3bcb2e..24871f8ec8bb 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1280,6 +1280,7 @@ asmlinkage long sys_ni_syscall(void);
 
 #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
 
+asmlinkage long sys_ni_posix_timers(void);
 
 /*
  * Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly.
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 828aeecbd1e8..39769b2d1005 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -16,6 +16,7 @@
 #include <linux/posix-timers.h>
 #include <linux/time_namespace.h>
 #include <linux/compat.h>
+#include <linux/syscalls.h>
 
 #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
 /* Architectures may override SYS_NI and COMPAT_SYS_NI */
-- 
cgit v1.2.3


From 93b36d0f2892357906f1058778c9188ff857baa1 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@nvidia.com>
Date: Tue, 7 Mar 2023 00:27:21 +0200
Subject: net/mlx5: mlx5_ifc updates for embedded CPU SRIOV

Add ec_vf_vport_base to HCA Capabilities 2. This indicates the base vport
of embedded CPU virtual functions that are connected to the eswitch.

Add ec_vf_function to query/set_hca_caps. If set this indicates
accessing a virtual function on the embedded CPU by function ID. This
should only be used with other_function set to 1.

Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Reviewed-by: Bodong Wang <bodong@nvidia.com>
Reviewed-by: William Tu <witu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index af3a92ad2e6b..1f4f62cb9f34 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1992,7 +1992,10 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   ts_cqe_metadata_size2wqe_counter[0x5];
 	u8	   reserved_at_250[0x10];
 
-	u8	   reserved_at_260[0x5a0];
+	u8	   reserved_at_260[0x120];
+	u8	   reserved_at_380[0x10];
+	u8	   ec_vf_vport_base[0x10];
+	u8	   reserved_at_3a0[0x460];
 };
 
 enum mlx5_ifc_flow_destination_type {
@@ -4805,7 +4808,8 @@ struct mlx5_ifc_set_hca_cap_in_bits {
 	u8         op_mod[0x10];
 
 	u8         other_function[0x1];
-	u8         reserved_at_41[0xf];
+	u8         ec_vf_function[0x1];
+	u8         reserved_at_42[0xe];
 	u8         function_id[0x10];
 
 	u8         reserved_at_60[0x20];
@@ -5956,7 +5960,8 @@ struct mlx5_ifc_query_hca_cap_in_bits {
 	u8         op_mod[0x10];
 
 	u8         other_function[0x1];
-	u8         reserved_at_41[0xf];
+	u8         ec_vf_function[0x1];
+	u8         reserved_at_42[0xe];
 	u8         function_id[0x10];
 
 	u8         reserved_at_60[0x20];
-- 
cgit v1.2.3


From dc13180824b78e1e4e7ae1ce22160ae8e5fb858e Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@nvidia.com>
Date: Tue, 7 Mar 2023 19:36:14 +0200
Subject: net/mlx5: Enable devlink port for embedded cpu VF vports

Enable creation of a devlink port for EC VF vports.

Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Reviewed-by: William Tu <witu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/esw/devlink_port.c   |  8 +++++++-
 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h  | 20 ++++++++++++++++++++
 include/linux/mlx5/driver.h                          |  6 ++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index f370f67d9e33..af779c700278 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -18,7 +18,8 @@ static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_
 {
 	return vport_num == MLX5_VPORT_UPLINK ||
 	       (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) ||
-	       mlx5_eswitch_is_vf_vport(esw, vport_num);
+	       mlx5_eswitch_is_vf_vport(esw, vport_num) ||
+	       mlx5_core_is_ec_vf_vport(esw->dev, vport_num);
 }
 
 static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num)
@@ -56,6 +57,11 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16
 		dl_port->attrs.switch_id.id_len = ppid.id_len;
 		devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
 					      vport_num - 1, external);
+	}  else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) {
+		memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
+		dl_port->attrs.switch_id.id_len = ppid.id_len;
+		devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
+					      vport_num - 1, false);
 	}
 	return dl_port;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 1d879374acaa..0e7b5c6e4020 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -343,4 +343,24 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev);
 bool mlx5_vnet_supported(struct mlx5_core_dev *dev);
 bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev);
 
+static inline u16 mlx5_core_ec_vf_vport_base(const struct mlx5_core_dev *dev)
+{
+	return MLX5_CAP_GEN_2(dev, ec_vf_vport_base);
+}
+
+static inline u16 mlx5_core_ec_sriov_enabled(const struct mlx5_core_dev *dev)
+{
+	return mlx5_core_is_ecpf(dev) && mlx5_core_ec_vf_vport_base(dev);
+}
+
+static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16 vport_num)
+{
+	int base_vport = mlx5_core_ec_vf_vport_base(dev);
+	int max_vport = base_vport + mlx5_core_max_ec_vfs(dev);
+
+	if (!mlx5_core_ec_sriov_enabled(dev))
+		return false;
+
+	return (vport_num >= base_vport && vport_num < max_vport);
+}
 #endif /* __MLX5_CORE_H__ */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 9a744c48eec2..252b6a6965b8 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -474,6 +474,7 @@ struct mlx5_core_sriov {
 	struct mlx5_vf_context	*vfs_ctx;
 	int			num_vfs;
 	u16			max_vfs;
+	u16			max_ec_vfs;
 };
 
 struct mlx5_fc_pool {
@@ -1244,6 +1245,11 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev)
 	return dev->priv.sriov.max_vfs;
 }
 
+static inline u16 mlx5_core_max_ec_vfs(const struct mlx5_core_dev *dev)
+{
+	return dev->priv.sriov.max_ec_vfs;
+}
+
 static inline int mlx5_get_gid_table_len(u16 param)
 {
 	if (param > 4) {
-- 
cgit v1.2.3


From 9ac0b128248e19d06475f4592fe87f6ce18bc554 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@nvidia.com>
Date: Tue, 7 Mar 2023 19:51:22 +0200
Subject: net/mlx5: Update vport caps query/set for EC VFs

These functions are for query/set by vport, there was an underlying
assumption that vport was equal to function ID. That's not the case for
EC VF functions. Set the ec_vf_function bit accordingly.

Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Reviewed-by: William Tu <witu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c     | 19 +++++++++++++++----
 include/linux/mlx5/vport.h                          |  2 +-
 3 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 0e7b5c6e4020..7ca0c7a547aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -325,10 +325,10 @@ void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend);
 int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery);
 int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery);
 
-int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id,
+int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport,
 				  u16 opmod);
-#define mlx5_vport_get_other_func_general_cap(dev, fid, out)		\
-	mlx5_vport_get_other_func_cap(dev, fid, out, MLX5_CAP_GENERAL)
+#define mlx5_vport_get_other_func_general_cap(dev, vport, out)		\
+	mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL)
 
 void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work);
 static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index bc66b078a8a1..6d3984dd5b21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1161,23 +1161,32 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
 
-int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out,
+static int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func)
+{
+	return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev)
+			  : vport;
+}
+
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out,
 				  u16 opmod)
 {
+	bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport);
 	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {};
 
 	opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01);
 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
-	MLX5_SET(query_hca_cap_in, in, function_id, function_id);
+	MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(dev, vport, ec_vf_func));
 	MLX5_SET(query_hca_cap_in, in, other_function, true);
+	MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func);
 	return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
 }
 EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap);
 
 int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap,
-				  u16 function_id, u16 opmod)
+				  u16 vport, u16 opmod)
 {
+	bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport);
 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
 	void *set_hca_cap;
 	void *set_ctx;
@@ -1191,8 +1200,10 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap
 	MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1);
 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
 	memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap));
-	MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id);
+	MLX5_SET(set_hca_cap_in, set_ctx, function_id,
+		 mlx5_vport_to_func_id(dev, vport, ec_vf_func));
 	MLX5_SET(set_hca_cap_in, set_ctx, other_function, true);
+	MLX5_SET(set_hca_cap_in, set_ctx, ec_vf_function, ec_vf_func);
 	ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx);
 
 	kfree(set_ctx);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 7f31432f44c2..fbb9bf447889 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -132,6 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
 int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
 
 u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev);
-int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out,
+int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out,
 				  u16 opmod);
 #endif /* __MLX5_VPORT_H__ */
-- 
cgit v1.2.3


From 395ccd6eb49a12b021ac5deaa56e6b0b8f93241b Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@nvidia.com>
Date: Tue, 7 Mar 2023 00:53:21 +0200
Subject: net/mlx5: Add new page type for EC VF pages

When the embedded cpu supports SRIOV it can be enabled and disabled
independently from the host SRIOV. Track the pages separately so we can
properly wait for returned VF pages.

Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Reviewed-by: William Tu <witu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c   |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 11 ++++++++++-
 include/linux/mlx5/driver.h                         |  1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index bb95b40d25eb..fc13b41cc9b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -246,6 +246,7 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev)
 
 	debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages);
 	debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]);
+	debugfs_create_u32("fw_pages_ec_vfs", 0400, pages, &dev->priv.page_counters[MLX5_EC_VF]);
 	debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]);
 	debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]);
 	debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 95dc67fb3001..dcf58efac159 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -79,7 +79,13 @@ static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_funct
 	if (!func_id)
 		return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF;
 
-	return func_id <= mlx5_core_max_vfs(dev) ?  MLX5_VF : MLX5_SF;
+	if (func_id <= max(mlx5_core_max_vfs(dev), mlx5_core_max_ec_vfs(dev))) {
+		if (ec_function)
+			return MLX5_EC_VF;
+		else
+			return MLX5_VF;
+	}
+	return MLX5_SF;
 }
 
 static u32 mlx5_get_ec_function(u32 function)
@@ -730,6 +736,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 	WARN(dev->priv.page_counters[MLX5_HOST_PF],
 	     "External host PF FW pages counter is %d after reclaiming all pages\n",
 	     dev->priv.page_counters[MLX5_HOST_PF]);
+	WARN(dev->priv.page_counters[MLX5_EC_VF],
+	     "EC VFs FW pages counter is %d after reclaiming all pages\n",
+	     dev->priv.page_counters[MLX5_EC_VF]);
 
 	return 0;
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 252b6a6965b8..18a608a1f567 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -581,6 +581,7 @@ enum mlx5_func_type {
 	MLX5_VF,
 	MLX5_SF,
 	MLX5_HOST_PF,
+	MLX5_EC_VF,
 	MLX5_FUNC_TYPE_NUM,
 };
 
-- 
cgit v1.2.3


From d457a0e329b0bfd3a1450e0b1a18cd2b47a25a08 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 8 Jun 2023 19:17:37 +0000
Subject: net: move gso declarations and functions to their own files

Move declarations into include/net/gso.h and code into net/core/gso.c

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Stanislav Fomichev <sdf@google.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20230608191738.3947077-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/tg3.c              |   1 +
 drivers/net/ethernet/myricom/myri10ge/myri10ge.c |   1 +
 drivers/net/ethernet/sfc/siena/tx_common.c       |   1 +
 drivers/net/ethernet/sfc/tx_common.c             |   1 +
 drivers/net/tap.c                                |   1 +
 drivers/net/usb/r8152.c                          |   1 +
 drivers/net/wireguard/device.c                   |   1 +
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c      |   1 +
 include/linux/netdevice.h                        |  26 +--
 include/linux/skbuff.h                           |  71 ------
 include/net/gro.h                                |   1 +
 include/net/gso.h                                | 109 +++++++++
 include/net/udp.h                                |   1 +
 net/core/Makefile                                |   2 +-
 net/core/dev.c                                   |  70 +-----
 net/core/gro.c                                   |  59 +----
 net/core/gso.c                                   | 273 +++++++++++++++++++++++
 net/core/skbuff.c                                | 142 +-----------
 net/ipv4/af_inet.c                               |   1 +
 net/ipv4/esp4_offload.c                          |   1 +
 net/ipv4/gre_offload.c                           |   1 +
 net/ipv4/ip_output.c                             |   1 +
 net/ipv4/tcp_offload.c                           |   1 +
 net/ipv4/udp.c                                   |   1 +
 net/ipv4/udp_offload.c                           |   1 +
 net/ipv6/esp6_offload.c                          |   1 +
 net/ipv6/ip6_offload.c                           |   1 +
 net/ipv6/ip6_output.c                            |   1 +
 net/ipv6/udp_offload.c                           |   1 +
 net/mac80211/tx.c                                |   1 +
 net/mpls/af_mpls.c                               |   1 +
 net/mpls/mpls_gso.c                              |   1 +
 net/netfilter/nf_flow_table_ip.c                 |   1 +
 net/netfilter/nfnetlink_queue.c                  |   1 +
 net/nsh/nsh.c                                    |   1 +
 net/openvswitch/actions.c                        |   1 +
 net/openvswitch/datapath.c                       |   1 +
 net/sched/act_police.c                           |   1 +
 net/sched/sch_cake.c                             |   1 +
 net/sched/sch_netem.c                            |   1 +
 net/sched/sch_taprio.c                           |   1 +
 net/sched/sch_tbf.c                              |   1 +
 net/sctp/offload.c                               |   1 +
 net/xfrm/xfrm_device.c                           |   1 +
 net/xfrm/xfrm_interface_core.c                   |   1 +
 net/xfrm/xfrm_output.c                           |   1 +
 46 files changed, 425 insertions(+), 365 deletions(-)
 create mode 100644 include/net/gso.h
 create mode 100644 net/core/gso.c

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 58747292521d..5e68a6a4b2af 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -57,6 +57,7 @@
 #include <linux/crc32poly.h>
 
 #include <net/checksum.h>
+#include <net/gso.h>
 #include <net/ip.h>
 
 #include <linux/io.h>
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index c5687d94ea88..7b7e1c5b00f4 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -66,6 +66,7 @@
 #include <linux/slab.h>
 #include <linux/prefetch.h>
 #include <net/checksum.h>
+#include <net/gso.h>
 #include <net/ip.h>
 #include <net/tcp.h>
 #include <asm/byteorder.h>
diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c
index 93a32d61944f..a7a9ab304e13 100644
--- a/drivers/net/ethernet/sfc/siena/tx_common.c
+++ b/drivers/net/ethernet/sfc/siena/tx_common.c
@@ -12,6 +12,7 @@
 #include "efx.h"
 #include "nic_common.h"
 #include "tx_common.h"
+#include <net/gso.h>
 
 static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
 {
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
index 67e789b96c43..4ce7d00e697d 100644
--- a/drivers/net/ethernet/sfc/tx_common.c
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -12,6 +12,7 @@
 #include "efx.h"
 #include "nic_common.h"
 #include "tx_common.h"
+#include <net/gso.h>
 
 static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
 {
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index d30d730ed5a7..9137fb8c1c42 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/uio.h>
 
+#include <net/gso.h>
 #include <net/net_namespace.h>
 #include <net/rtnetlink.h>
 #include <net/sock.h>
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 0999a58ca9d2..0738baa5b82e 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -27,6 +27,7 @@
 #include <linux/firmware.h>
 #include <crypto/hash.h>
 #include <linux/usb/r8152.h>
+#include <net/gso.h>
 
 /* Information for net-next */
 #define NETNEXT_VERSION		"12"
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index d58e9f818d3b..258dcc103921 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -20,6 +20,7 @@
 #include <linux/icmp.h>
 #include <linux/suspend.h>
 #include <net/dst_metadata.h>
+#include <net/gso.h>
 #include <net/icmp.h>
 #include <net/rtnetlink.h>
 #include <net/ip_tunnels.h>
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 5fa6f98b8e55..ef0f53b3b89f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -7,6 +7,7 @@
 #include <linux/ieee80211.h>
 #include <linux/etherdevice.h>
 #include <linux/tcp.h>
+#include <net/gso.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c2f0c6002a84..2d6cb2bf2f05 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4827,13 +4827,6 @@ int skb_crc32c_csum_help(struct sk_buff *skb);
 int skb_csum_hwoffload_help(struct sk_buff *skb,
 			    const netdev_features_t features);
 
-struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
-				  netdev_features_t features, bool tx_path);
-struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
-				    netdev_features_t features, __be16 type);
-struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
-				    netdev_features_t features);
-
 struct netdev_bonding_info {
 	ifslave	slave;
 	ifbond	master;
@@ -4856,11 +4849,6 @@ static inline void ethtool_notify(struct net_device *dev, unsigned int cmd,
 }
 #endif
 
-static inline
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
-{
-	return __skb_gso_segment(skb, features, true);
-}
 __be16 skb_network_protocol(struct sk_buff *skb, int *depth);
 
 static inline bool can_checksum_protocol(netdev_features_t features,
@@ -4987,6 +4975,7 @@ netdev_features_t passthru_features_check(struct sk_buff *skb,
 					  struct net_device *dev,
 					  netdev_features_t features);
 netdev_features_t netif_skb_features(struct sk_buff *skb);
+void skb_warn_bad_offload(const struct sk_buff *skb);
 
 static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 {
@@ -5035,19 +5024,6 @@ void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs);
 void netif_inherit_tso_max(struct net_device *to,
 			   const struct net_device *from);
 
-static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
-					int pulled_hlen, u16 mac_offset,
-					int mac_len)
-{
-	skb->protocol = protocol;
-	skb->encapsulation = 1;
-	skb_push(skb, pulled_hlen);
-	skb_reset_transport_header(skb);
-	skb->mac_header = mac_offset;
-	skb->network_header = skb->mac_header + mac_len;
-	skb->mac_len = mac_len;
-}
-
 static inline bool netif_is_macsec(const struct net_device *dev)
 {
 	return dev->priv_flags & IFF_MACSEC;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e2f48ddb2f7c..91ed66952580 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3974,8 +3974,6 @@ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
 void skb_scrub_packet(struct sk_buff *skb, bool xnet);
-bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
-bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features,
 				 unsigned int offset);
@@ -4841,75 +4839,6 @@ static inline struct sec_path *skb_sec_path(const struct sk_buff *skb)
 #endif
 }
 
-/* Keeps track of mac header offset relative to skb->head.
- * It is useful for TSO of Tunneling protocol. e.g. GRE.
- * For non-tunnel skb it points to skb_mac_header() and for
- * tunnel skb it points to outer mac header.
- * Keeps track of level of encapsulation of network headers.
- */
-struct skb_gso_cb {
-	union {
-		int	mac_offset;
-		int	data_offset;
-	};
-	int	encap_level;
-	__wsum	csum;
-	__u16	csum_start;
-};
-#define SKB_GSO_CB_OFFSET	32
-#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET))
-
-static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
-{
-	return (skb_mac_header(inner_skb) - inner_skb->head) -
-		SKB_GSO_CB(inner_skb)->mac_offset;
-}
-
-static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
-{
-	int new_headroom, headroom;
-	int ret;
-
-	headroom = skb_headroom(skb);
-	ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC);
-	if (ret)
-		return ret;
-
-	new_headroom = skb_headroom(skb);
-	SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom);
-	return 0;
-}
-
-static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res)
-{
-	/* Do not update partial checksums if remote checksum is enabled. */
-	if (skb->remcsum_offload)
-		return;
-
-	SKB_GSO_CB(skb)->csum = res;
-	SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head;
-}
-
-/* Compute the checksum for a gso segment. First compute the checksum value
- * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
- * then add in skb->csum (checksum from csum_start to end of packet).
- * skb->csum and csum_start are then updated to reflect the checksum of the
- * resultant packet starting from the transport header-- the resultant checksum
- * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo
- * header.
- */
-static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
-{
-	unsigned char *csum_start = skb_transport_header(skb);
-	int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
-	__wsum partial = SKB_GSO_CB(skb)->csum;
-
-	SKB_GSO_CB(skb)->csum = res;
-	SKB_GSO_CB(skb)->csum_start = csum_start - skb->head;
-
-	return csum_fold(csum_partial(csum_start, plen, partial));
-}
-
 static inline bool skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff --git a/include/net/gro.h b/include/net/gro.h
index 7b47dd6ce94f..75efa6fb8441 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -452,5 +452,6 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
 		gro_normal_list(napi);
 }
 
+extern struct list_head offload_base;
 
 #endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/gso.h b/include/net/gso.h
new file mode 100644
index 000000000000..29975440cad5
--- /dev/null
+++ b/include/net/gso.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _NET_GSO_H
+#define _NET_GSO_H
+
+#include <linux/skbuff.h>
+
+/* Keeps track of mac header offset relative to skb->head.
+ * It is useful for TSO of Tunneling protocol. e.g. GRE.
+ * For non-tunnel skb it points to skb_mac_header() and for
+ * tunnel skb it points to outer mac header.
+ * Keeps track of level of encapsulation of network headers.
+ */
+struct skb_gso_cb {
+	union {
+		int	mac_offset;
+		int	data_offset;
+	};
+	int	encap_level;
+	__wsum	csum;
+	__u16	csum_start;
+};
+#define SKB_GSO_CB_OFFSET	32
+#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET))
+
+static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
+{
+	return (skb_mac_header(inner_skb) - inner_skb->head) -
+		SKB_GSO_CB(inner_skb)->mac_offset;
+}
+
+static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
+{
+	int new_headroom, headroom;
+	int ret;
+
+	headroom = skb_headroom(skb);
+	ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC);
+	if (ret)
+		return ret;
+
+	new_headroom = skb_headroom(skb);
+	SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom);
+	return 0;
+}
+
+static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res)
+{
+	/* Do not update partial checksums if remote checksum is enabled. */
+	if (skb->remcsum_offload)
+		return;
+
+	SKB_GSO_CB(skb)->csum = res;
+	SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head;
+}
+
+/* Compute the checksum for a gso segment. First compute the checksum value
+ * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
+ * then add in skb->csum (checksum from csum_start to end of packet).
+ * skb->csum and csum_start are then updated to reflect the checksum of the
+ * resultant packet starting from the transport header-- the resultant checksum
+ * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo
+ * header.
+ */
+static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
+{
+	unsigned char *csum_start = skb_transport_header(skb);
+	int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
+	__wsum partial = SKB_GSO_CB(skb)->csum;
+
+	SKB_GSO_CB(skb)->csum = res;
+	SKB_GSO_CB(skb)->csum_start = csum_start - skb->head;
+
+	return csum_fold(csum_partial(csum_start, plen, partial));
+}
+
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path);
+
+static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+					      netdev_features_t features)
+{
+	return __skb_gso_segment(skb, features, true);
+}
+
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features, __be16 type);
+
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features);
+
+bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
+
+bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
+
+static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
+					int pulled_hlen, u16 mac_offset,
+					int mac_len)
+{
+	skb->protocol = protocol;
+	skb->encapsulation = 1;
+	skb_push(skb, pulled_hlen);
+	skb_reset_transport_header(skb);
+	skb->mac_header = mac_offset;
+	skb->network_header = skb->mac_header + mac_len;
+	skb->mac_len = mac_len;
+}
+
+#endif /* _NET_GSO_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 4ed0b47c5582..e01340a27155 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -21,6 +21,7 @@
 #include <linux/list.h>
 #include <linux/bug.h>
 #include <net/inet_sock.h>
+#include <net/gso.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 #include <net/ip.h>
diff --git a/net/core/Makefile b/net/core/Makefile
index 8f367813bc68..731db2eaa610 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -13,7 +13,7 @@ obj-y		     += dev.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
 			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
 			fib_notifier.o xdp.o flow_offload.o gro.o \
-			netdev-genl.o netdev-genl-gen.o
+			netdev-genl.o netdev-genl-gen.o gso.o
 
 obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 6d6f8a7fe6b4..c2456b3667fe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3209,7 +3209,7 @@ static u16 skb_tx_hash(const struct net_device *dev,
 	return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
 }
 
-static void skb_warn_bad_offload(const struct sk_buff *skb)
+void skb_warn_bad_offload(const struct sk_buff *skb)
 {
 	static const netdev_features_t null_features;
 	struct net_device *dev = skb->dev;
@@ -3338,74 +3338,6 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
 	return vlan_get_protocol_and_depth(skb, type, depth);
 }
 
-/* openvswitch calls this on rx path, so we need a different check.
- */
-static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
-{
-	if (tx_path)
-		return skb->ip_summed != CHECKSUM_PARTIAL &&
-		       skb->ip_summed != CHECKSUM_UNNECESSARY;
-
-	return skb->ip_summed == CHECKSUM_NONE;
-}
-
-/**
- *	__skb_gso_segment - Perform segmentation on skb.
- *	@skb: buffer to segment
- *	@features: features for the output path (see dev->features)
- *	@tx_path: whether it is called in TX path
- *
- *	This function segments the given skb and returns a list of segments.
- *
- *	It may return NULL if the skb requires no segmentation.  This is
- *	only possible when GSO is used for verifying header integrity.
- *
- *	Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb.
- */
-struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
-				  netdev_features_t features, bool tx_path)
-{
-	struct sk_buff *segs;
-
-	if (unlikely(skb_needs_check(skb, tx_path))) {
-		int err;
-
-		/* We're going to init ->check field in TCP or UDP header */
-		err = skb_cow_head(skb, 0);
-		if (err < 0)
-			return ERR_PTR(err);
-	}
-
-	/* Only report GSO partial support if it will enable us to
-	 * support segmentation on this frame without needing additional
-	 * work.
-	 */
-	if (features & NETIF_F_GSO_PARTIAL) {
-		netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
-		struct net_device *dev = skb->dev;
-
-		partial_features |= dev->features & dev->gso_partial_features;
-		if (!skb_gso_ok(skb, features | partial_features))
-			features &= ~NETIF_F_GSO_PARTIAL;
-	}
-
-	BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
-		     sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
-
-	SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
-	SKB_GSO_CB(skb)->encap_level = 0;
-
-	skb_reset_mac_header(skb);
-	skb_reset_mac_len(skb);
-
-	segs = skb_mac_gso_segment(skb, features);
-
-	if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
-		skb_warn_bad_offload(skb);
-
-	return segs;
-}
-EXPORT_SYMBOL(__skb_gso_segment);
 
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
diff --git a/net/core/gro.c b/net/core/gro.c
index 4d45f78e2fac..dca800068e41 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -10,7 +10,7 @@
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
 static DEFINE_SPINLOCK(offload_lock);
-static struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base);
+struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base);
 /* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
 int gro_normal_batch __read_mostly = 8;
 
@@ -92,63 +92,6 @@ void dev_remove_offload(struct packet_offload *po)
 }
 EXPORT_SYMBOL(dev_remove_offload);
 
-/**
- *	skb_eth_gso_segment - segmentation handler for ethernet protocols.
- *	@skb: buffer to segment
- *	@features: features for the output path (see dev->features)
- *	@type: Ethernet Protocol ID
- */
-struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
-				    netdev_features_t features, __be16 type)
-{
-	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
-	struct packet_offload *ptype;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(ptype, &offload_base, list) {
-		if (ptype->type == type && ptype->callbacks.gso_segment) {
-			segs = ptype->callbacks.gso_segment(skb, features);
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	return segs;
-}
-EXPORT_SYMBOL(skb_eth_gso_segment);
-
-/**
- *	skb_mac_gso_segment - mac layer segmentation handler.
- *	@skb: buffer to segment
- *	@features: features for the output path (see dev->features)
- */
-struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
-				    netdev_features_t features)
-{
-	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
-	struct packet_offload *ptype;
-	int vlan_depth = skb->mac_len;
-	__be16 type = skb_network_protocol(skb, &vlan_depth);
-
-	if (unlikely(!type))
-		return ERR_PTR(-EINVAL);
-
-	__skb_pull(skb, vlan_depth);
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(ptype, &offload_base, list) {
-		if (ptype->type == type && ptype->callbacks.gso_segment) {
-			segs = ptype->callbacks.gso_segment(skb, features);
-			break;
-		}
-	}
-	rcu_read_unlock();
-
-	__skb_push(skb, skb->data - skb_mac_header(skb));
-
-	return segs;
-}
-EXPORT_SYMBOL(skb_mac_gso_segment);
 
 int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 {
diff --git a/net/core/gso.c b/net/core/gso.c
new file mode 100644
index 000000000000..9e1803bfc9c6
--- /dev/null
+++ b/net/core/gso.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/skbuff.h>
+#include <linux/sctp.h>
+#include <net/gso.h>
+#include <net/gro.h>
+
+/**
+ *	skb_eth_gso_segment - segmentation handler for ethernet protocols.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ *	@type: Ethernet Protocol ID
+ */
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features, __be16 type)
+{
+	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+	struct packet_offload *ptype;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, &offload_base, list) {
+		if (ptype->type == type && ptype->callbacks.gso_segment) {
+			segs = ptype->callbacks.gso_segment(skb, features);
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return segs;
+}
+EXPORT_SYMBOL(skb_eth_gso_segment);
+
+/**
+ *	skb_mac_gso_segment - mac layer segmentation handler.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ */
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+				    netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+	struct packet_offload *ptype;
+	int vlan_depth = skb->mac_len;
+	__be16 type = skb_network_protocol(skb, &vlan_depth);
+
+	if (unlikely(!type))
+		return ERR_PTR(-EINVAL);
+
+	__skb_pull(skb, vlan_depth);
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ptype, &offload_base, list) {
+		if (ptype->type == type && ptype->callbacks.gso_segment) {
+			segs = ptype->callbacks.gso_segment(skb, features);
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	__skb_push(skb, skb->data - skb_mac_header(skb));
+
+	return segs;
+}
+EXPORT_SYMBOL(skb_mac_gso_segment);
+/* openvswitch calls this on rx path, so we need a different check.
+ */
+static bool skb_needs_check(const struct sk_buff *skb, bool tx_path)
+{
+	if (tx_path)
+		return skb->ip_summed != CHECKSUM_PARTIAL &&
+		       skb->ip_summed != CHECKSUM_UNNECESSARY;
+
+	return skb->ip_summed == CHECKSUM_NONE;
+}
+
+/**
+ *	__skb_gso_segment - Perform segmentation on skb.
+ *	@skb: buffer to segment
+ *	@features: features for the output path (see dev->features)
+ *	@tx_path: whether it is called in TX path
+ *
+ *	This function segments the given skb and returns a list of segments.
+ *
+ *	It may return NULL if the skb requires no segmentation.  This is
+ *	only possible when GSO is used for verifying header integrity.
+ *
+ *	Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb.
+ */
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+				  netdev_features_t features, bool tx_path)
+{
+	struct sk_buff *segs;
+
+	if (unlikely(skb_needs_check(skb, tx_path))) {
+		int err;
+
+		/* We're going to init ->check field in TCP or UDP header */
+		err = skb_cow_head(skb, 0);
+		if (err < 0)
+			return ERR_PTR(err);
+	}
+
+	/* Only report GSO partial support if it will enable us to
+	 * support segmentation on this frame without needing additional
+	 * work.
+	 */
+	if (features & NETIF_F_GSO_PARTIAL) {
+		netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
+		struct net_device *dev = skb->dev;
+
+		partial_features |= dev->features & dev->gso_partial_features;
+		if (!skb_gso_ok(skb, features | partial_features))
+			features &= ~NETIF_F_GSO_PARTIAL;
+	}
+
+	BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
+		     sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
+
+	SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+	SKB_GSO_CB(skb)->encap_level = 0;
+
+	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
+
+	segs = skb_mac_gso_segment(skb, features);
+
+	if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
+		skb_warn_bad_offload(skb);
+
+	return segs;
+}
+EXPORT_SYMBOL(__skb_gso_segment);
+
+/**
+ * skb_gso_transport_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_transport_seglen is used to determine the real size of the
+ * individual segments, including Layer4 headers (TCP/UDP).
+ *
+ * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
+ */
+static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+{
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	unsigned int thlen = 0;
+
+	if (skb->encapsulation) {
+		thlen = skb_inner_transport_header(skb) -
+			skb_transport_header(skb);
+
+		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+			thlen += inner_tcp_hdrlen(skb);
+	} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+		thlen = tcp_hdrlen(skb);
+	} else if (unlikely(skb_is_gso_sctp(skb))) {
+		thlen = sizeof(struct sctphdr);
+	} else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
+		thlen = sizeof(struct udphdr);
+	}
+	/* UFO sets gso_size to the size of the fragmentation
+	 * payload, i.e. the size of the L4 (UDP) header is already
+	 * accounted for.
+	 */
+	return thlen + shinfo->gso_size;
+}
+
+/**
+ * skb_gso_network_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_network_seglen is used to determine the real size of the
+ * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
+ *
+ * The MAC/L2 header is not accounted for.
+ */
+static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
+{
+	unsigned int hdr_len = skb_transport_header(skb) -
+			       skb_network_header(skb);
+
+	return hdr_len + skb_gso_transport_seglen(skb);
+}
+
+/**
+ * skb_gso_mac_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_mac_seglen is used to determine the real size of the
+ * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
+ * headers (TCP/UDP).
+ */
+static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
+{
+	unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+	return hdr_len + skb_gso_transport_seglen(skb);
+}
+
+/**
+ * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
+ *
+ * There are a couple of instances where we have a GSO skb, and we
+ * want to determine what size it would be after it is segmented.
+ *
+ * We might want to check:
+ * -    L3+L4+payload size (e.g. IP forwarding)
+ * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
+ *
+ * This is a helper to do that correctly considering GSO_BY_FRAGS.
+ *
+ * @skb: GSO skb
+ *
+ * @seg_len: The segmented length (from skb_gso_*_seglen). In the
+ *           GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
+ *
+ * @max_len: The maximum permissible length.
+ *
+ * Returns true if the segmented length <= max length.
+ */
+static inline bool skb_gso_size_check(const struct sk_buff *skb,
+				      unsigned int seg_len,
+				      unsigned int max_len) {
+	const struct skb_shared_info *shinfo = skb_shinfo(skb);
+	const struct sk_buff *iter;
+
+	if (shinfo->gso_size != GSO_BY_FRAGS)
+		return seg_len <= max_len;
+
+	/* Undo this so we can re-use header sizes */
+	seg_len -= GSO_BY_FRAGS;
+
+	skb_walk_frags(skb, iter) {
+		if (seg_len + skb_headlen(iter) > max_len)
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
+ *
+ * @skb: GSO skb
+ * @mtu: MTU to validate against
+ *
+ * skb_gso_validate_network_len validates if a given skb will fit a
+ * wanted MTU once split. It considers L3 headers, L4 headers, and the
+ * payload.
+ */
+bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
+{
+	return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
+}
+EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
+
+/**
+ * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
+ *
+ * @skb: GSO skb
+ * @len: length to validate against
+ *
+ * skb_gso_validate_mac_len validates if a given skb will fit a wanted
+ * length once split, including L2, L3 and L4 headers and the payload.
+ */
+bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
+{
+	return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
+}
+EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
+
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7c4338221b17..fee2b1c105fe 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -67,6 +67,7 @@
 #include <net/dst.h>
 #include <net/sock.h>
 #include <net/checksum.h>
+#include <net/gso.h>
 #include <net/ip6_checksum.h>
 #include <net/xfrm.h>
 #include <net/mpls.h>
@@ -5766,147 +5767,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 }
 EXPORT_SYMBOL_GPL(skb_scrub_packet);
 
-/**
- * skb_gso_transport_seglen - Return length of individual segments of a gso packet
- *
- * @skb: GSO skb
- *
- * skb_gso_transport_seglen is used to determine the real size of the
- * individual segments, including Layer4 headers (TCP/UDP).
- *
- * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
- */
-static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
-{
-	const struct skb_shared_info *shinfo = skb_shinfo(skb);
-	unsigned int thlen = 0;
-
-	if (skb->encapsulation) {
-		thlen = skb_inner_transport_header(skb) -
-			skb_transport_header(skb);
-
-		if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
-			thlen += inner_tcp_hdrlen(skb);
-	} else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
-		thlen = tcp_hdrlen(skb);
-	} else if (unlikely(skb_is_gso_sctp(skb))) {
-		thlen = sizeof(struct sctphdr);
-	} else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
-		thlen = sizeof(struct udphdr);
-	}
-	/* UFO sets gso_size to the size of the fragmentation
-	 * payload, i.e. the size of the L4 (UDP) header is already
-	 * accounted for.
-	 */
-	return thlen + shinfo->gso_size;
-}
-
-/**
- * skb_gso_network_seglen - Return length of individual segments of a gso packet
- *
- * @skb: GSO skb
- *
- * skb_gso_network_seglen is used to determine the real size of the
- * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
- *
- * The MAC/L2 header is not accounted for.
- */
-static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
-{
-	unsigned int hdr_len = skb_transport_header(skb) -
-			       skb_network_header(skb);
-
-	return hdr_len + skb_gso_transport_seglen(skb);
-}
-
-/**
- * skb_gso_mac_seglen - Return length of individual segments of a gso packet
- *
- * @skb: GSO skb
- *
- * skb_gso_mac_seglen is used to determine the real size of the
- * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
- * headers (TCP/UDP).
- */
-static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
-{
-	unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
-
-	return hdr_len + skb_gso_transport_seglen(skb);
-}
-
-/**
- * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
- *
- * There are a couple of instances where we have a GSO skb, and we
- * want to determine what size it would be after it is segmented.
- *
- * We might want to check:
- * -    L3+L4+payload size (e.g. IP forwarding)
- * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
- *
- * This is a helper to do that correctly considering GSO_BY_FRAGS.
- *
- * @skb: GSO skb
- *
- * @seg_len: The segmented length (from skb_gso_*_seglen). In the
- *           GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
- *
- * @max_len: The maximum permissible length.
- *
- * Returns true if the segmented length <= max length.
- */
-static inline bool skb_gso_size_check(const struct sk_buff *skb,
-				      unsigned int seg_len,
-				      unsigned int max_len) {
-	const struct skb_shared_info *shinfo = skb_shinfo(skb);
-	const struct sk_buff *iter;
-
-	if (shinfo->gso_size != GSO_BY_FRAGS)
-		return seg_len <= max_len;
-
-	/* Undo this so we can re-use header sizes */
-	seg_len -= GSO_BY_FRAGS;
-
-	skb_walk_frags(skb, iter) {
-		if (seg_len + skb_headlen(iter) > max_len)
-			return false;
-	}
-
-	return true;
-}
-
-/**
- * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
- *
- * @skb: GSO skb
- * @mtu: MTU to validate against
- *
- * skb_gso_validate_network_len validates if a given skb will fit a
- * wanted MTU once split. It considers L3 headers, L4 headers, and the
- * payload.
- */
-bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
-{
-	return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
-}
-EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
-
-/**
- * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
- *
- * @skb: GSO skb
- * @len: length to validate against
- *
- * skb_gso_validate_mac_len validates if a given skb will fit a wanted
- * length once split, including L2, L3 and L4 headers and the payload.
- */
-bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
-{
-	return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
-}
-EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
-
 static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
 {
 	int mac_len, meta_len;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fd233c4195ac..0e16ac8282c5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -100,6 +100,7 @@
 #include <net/ip_fib.h>
 #include <net/inet_connection_sock.h>
 #include <net/gro.h>
+#include <net/gso.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <net/udplite.h>
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 3969fa805679..12c5fb3c6e1e 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -17,6 +17,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <net/gro.h>
+#include <net/gso.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/esp.h>
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 2b9cb5398335..311e70bfce40 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -11,6 +11,7 @@
 #include <net/protocol.h>
 #include <net/gre.h>
 #include <net/gro.h>
+#include <net/gso.h>
 
 static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				       netdev_features_t features)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 244fb9365d87..457598dfa128 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -73,6 +73,7 @@
 #include <net/arp.h>
 #include <net/icmp.h>
 #include <net/checksum.h>
+#include <net/gso.h>
 #include <net/inetpeer.h>
 #include <net/inet_ecn.h>
 #include <net/lwtunnel.h>
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 05b38f58b404..8311c38267b5 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -9,6 +9,7 @@
 #include <linux/indirect_call_wrapper.h>
 #include <linux/skbuff.h>
 #include <net/gro.h>
+#include <net/gso.h>
 #include <net/tcp.h>
 #include <net/protocol.h>
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index df5e407286d7..7e0542c10471 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -103,6 +103,7 @@
 #include <net/ip_tunnels.h>
 #include <net/route.h>
 #include <net/checksum.h>
+#include <net/gso.h>
 #include <net/xfrm.h>
 #include <trace/events/udp.h>
 #include <linux/static_key.h>
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 1f01e15ca24f..75aa4de5b731 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -8,6 +8,7 @@
 
 #include <linux/skbuff.h>
 #include <net/gro.h>
+#include <net/gso.h>
 #include <net/udp.h>
 #include <net/protocol.h>
 #include <net/inet_common.h>
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 75c02992c520..b33c7de5bdbc 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -17,6 +17,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <net/gro.h>
+#include <net/gso.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/esp.h>
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 00dc2e3b0184..d6314287338d 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -16,6 +16,7 @@
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <net/gro.h>
+#include <net/gso.h>
 
 #include "ip6_offload.h"
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c722cb881b2d..c06ff7519f19 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -42,6 +42,7 @@
 #include <net/sock.h>
 #include <net/snmp.h>
 
+#include <net/gso.h>
 #include <net/ipv6.h>
 #include <net/ndisc.h>
 #include <net/protocol.h>
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index c39c1e32f980..ad3b8726873e 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -14,6 +14,7 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 #include <net/gro.h>
+#include <net/gso.h>
 
 static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 					 netdev_features_t features)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 148a0e2aa740..cfbe4beb8f1c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -26,6 +26,7 @@
 #include <net/codel_impl.h>
 #include <asm/unaligned.h>
 #include <net/fq_impl.h>
+#include <net/gso.h>
 
 #include "ieee80211_i.h"
 #include "driver-ops.h"
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index dc5165d3eec4..bf6e81d56263 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -12,6 +12,7 @@
 #include <linux/nospec.h>
 #include <linux/vmalloc.h>
 #include <linux/percpu.h>
+#include <net/gso.h>
 #include <net/ip.h>
 #include <net/dst.h>
 #include <net/sock.h>
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 1482259de9b5..533d082f0701 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -14,6 +14,7 @@
 #include <linux/netdev_features.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <net/gso.h>
 #include <net/mpls.h>
 
 static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index d248763917ad..d885d34edfe1 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -8,6 +8,7 @@
 #include <linux/ipv6.h>
 #include <linux/netdevice.h>
 #include <linux/if_ether.h>
+#include <net/gso.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index e311462f6d98..556bc902af00 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -30,6 +30,7 @@
 #include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/list.h>
 #include <linux/cgroup-defs.h>
+#include <net/gso.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <net/netfilter/nf_queue.h>
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 0f23e5e8e03e..f4a38bd6a7e0 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <net/gso.h>
 #include <net/nsh.h>
 #include <net/tun_proto.h>
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index a8cf9a88758e..8074ea00d577 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -17,6 +17,7 @@
 #include <linux/if_vlan.h>
 
 #include <net/dst.h>
+#include <net/gso.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 58f530f60172..a6d2a0b1aa21 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -35,6 +35,7 @@
 #include <linux/rculist.h>
 #include <linux/dmi.h>
 #include <net/genetlink.h>
+#include <net/gso.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/pkt_cls.h>
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 2e9dce03d1ec..f3121c5a85e9 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <net/act_api.h>
+#include <net/gso.h>
 #include <net/netlink.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_police.h>
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 891e007d5c0b..9cff99558694 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -65,6 +65,7 @@
 #include <linux/reciprocal_div.h>
 #include <net/netlink.h>
 #include <linux/if_vlan.h>
+#include <net/gso.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 #include <net/tcp.h>
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6ef3021e1169..0c9e93d66c50 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -21,6 +21,7 @@
 #include <linux/reciprocal_div.h>
 #include <linux/rbtree.h>
 
+#include <net/gso.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 5076da103f63..4a4e6ff894c1 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -20,6 +20,7 @@
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
 #include <linux/time.h>
+#include <net/gso.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 277ad11f4d61..17d2d00ddb18 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -13,6 +13,7 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/skbuff.h>
+#include <net/gso.h>
 #include <net/netlink.h>
 #include <net/sch_generic.h>
 #include <net/pkt_cls.h>
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index eb874e3c399a..502095173d88 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -22,6 +22,7 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/checksum.h>
 #include <net/protocol.h>
+#include <net/gso.h>
 
 static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
 {
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 408f5e55744e..533697e2488f 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <net/dst.h>
+#include <net/gso.h>
 #include <net/xfrm.h>
 #include <linux/notifier.h>
 
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 1f99dc469027..0ee864a76579 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -33,6 +33,7 @@
 #include <linux/uaccess.h>
 #include <linux/atomic.h>
 
+#include <net/gso.h>
 #include <net/icmp.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 369e5de8558f..662c83beb345 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <net/dst.h>
+#include <net/gso.h>
 #include <net/icmp.h>
 #include <net/inet_ecn.h>
 #include <net/xfrm.h>
-- 
cgit v1.2.3


From 2ecfd946169e7f56534db2a5f6935858be3005ba Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 5 Jun 2023 13:14:05 +0300
Subject: RDMA/mlx5: Reduce QP table exposure

driver.h is common header to whole mlx5 code base, but struct
mlx5_qp_table is used in mlx5_ib driver only. So move that struct
to be under sole responsibility of mlx5_ib.

Link: https://lore.kernel.org/r/bec0dc1158e795813b135d1143147977f26bf668.1685953497.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  1 +
 drivers/infiniband/hw/mlx5/qp.h      | 11 ++++++++++-
 include/linux/mlx5/driver.h          |  9 ---------
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index efa4dc6e7dee..4bceef878c43 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -25,6 +25,7 @@
 #include <rdma/mlx5_user_ioctl_verbs.h>
 
 #include "srq.h"
+#include "qp.h"
 
 #define mlx5_ib_dbg(_dev, format, arg...)                                      \
 	dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__,      \
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
index 77f9b4a54816..f5130873dd52 100644
--- a/drivers/infiniband/hw/mlx5/qp.h
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -6,7 +6,16 @@
 #ifndef _MLX5_IB_QP_H
 #define _MLX5_IB_QP_H
 
-#include "mlx5_ib.h"
+struct mlx5_ib_dev;
+
+struct mlx5_qp_table {
+	struct notifier_block nb;
+
+	/* protect radix tree
+	 */
+	spinlock_t lock;
+	struct radix_tree_root tree;
+};
 
 int mlx5_init_qp_table(struct mlx5_ib_dev *dev);
 void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a4c4f737f9c1..e3b616388b18 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -443,15 +443,6 @@ struct mlx5_core_health {
 	struct delayed_work		update_fw_log_ts_work;
 };
 
-struct mlx5_qp_table {
-	struct notifier_block   nb;
-
-	/* protect radix tree
-	 */
-	spinlock_t		lock;
-	struct radix_tree_root	tree;
-};
-
 enum {
 	MLX5_PF_NOTIFY_DISABLE_VF,
 	MLX5_PF_NOTIFY_ENABLE_VF,
-- 
cgit v1.2.3


From afff24899846ffca0c25c75b24893c90aef82603 Mon Sep 17 00:00:00 2001
From: Patrisious Haddad <phaddad@nvidia.com>
Date: Mon, 5 Jun 2023 13:14:06 +0300
Subject: RDMA/mlx5: Handle DCT QP logic separately from low level QP interface

Previously when destroying a DCT, if the firmware function for the
destruction failed, the common resource would have been destroyed
either way, since it was destroyed before the firmware object.
Which leads to kernel warning "refcount_t: underflow" which indicates
possible use-after-free.
Which is triggered when we try to destroy the common resource for the
second time and execute refcount_dec_and_test(&common->refcount).

So, let's fix the destruction order by factoring out the DCT QP logic
to be in separate XArray database.

refcount_t: underflow; use-after-free.
WARNING: CPU: 8 PID: 1002 at lib/refcount.c:28 refcount_warn_saturate+0xd8/0xe0
Modules linked in: xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core overlay mlx5_core fuse
CPU: 8 PID: 1002 Comm: python3 Not tainted 5.16.0-rc5+ #1
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
RIP: 0010:refcount_warn_saturate+0xd8/0xe0
Code: ff 48 c7 c7 18 f5 23 82 c6 05 60 70 ff 00 01 e8 d0 0a 45 00 0f 0b c3 48 c7 c7 c0 f4 23 82 c6 05 4c 70 ff 00 01 e8 ba 0a 45 00 <0f> 0b c3 0f 1f 44 00 00 8b 07 3d 00 00 00 c0 74 12 83 f8 01 74 13
RSP: 0018:ffff8881221d3aa8 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff8881313e8d40 RCX: ffff88852cc1b5c8
RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff88852cc1b5c0
RBP: ffff888100f70000 R08: ffff88853ffd1ba8 R09: 0000000000000003
R10: 00000000fffff000 R11: 3fffffffffffffff R12: 0000000000000246
R13: ffff888100f71fa0 R14: ffff8881221d3c68 R15: 0000000000000020
FS:  00007efebbb13740(0000) GS:ffff88852cc00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005611aac29f80 CR3: 00000001313de004 CR4: 0000000000370ea0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 destroy_resource_common+0x6e/0x95 [mlx5_ib]
 mlx5_core_destroy_rq_tracked+0x38/0xbe [mlx5_ib]
 mlx5_ib_destroy_wq+0x22/0x80 [mlx5_ib]
 ib_destroy_wq_user+0x1f/0x40 [ib_core]
 uverbs_free_wq+0x19/0x40 [ib_uverbs]
 destroy_hw_idr_uobject+0x18/0x50 [ib_uverbs]
 uverbs_destroy_uobject+0x2f/0x190 [ib_uverbs]
 uobj_destroy+0x3c/0x80 [ib_uverbs]
 ib_uverbs_cmd_verbs+0x3e4/0xb80 [ib_uverbs]
 ? uverbs_free_wq+0x40/0x40 [ib_uverbs]
 ? ip_list_rcv+0xf7/0x120
 ? netif_receive_skb_list_internal+0x1b6/0x2d0
 ? task_tick_fair+0xbf/0x450
 ? __handle_mm_fault+0x11fc/0x1450
 ib_uverbs_ioctl+0xa4/0x110 [ib_uverbs]
 __x64_sys_ioctl+0x3e4/0x8e0
 ? handle_mm_fault+0xb9/0x210
 do_syscall_64+0x3d/0x90
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7efebc0be17b
Code: 0f 1e fa 48 8b 05 1d ad 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ed ac 0c 00 f7 d8 64 89 01 48
RSP: 002b:00007ffe71813e78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 00007ffe71813fb8 RCX: 00007efebc0be17b
RDX: 00007ffe71813fa0 RSI: 00000000c0181b01 RDI: 0000000000000005
RBP: 00007ffe71813f80 R08: 00005611aae96020 R09: 000000000000004f
R10: 00007efebbf9ffa0 R11: 0000000000000246 R12: 00007ffe71813f80
R13: 00007ffe71813f4c R14: 00005611aae2eca0 R15: 00007efeae6c89d0
 </TASK>

Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/4470888466c8a898edc9833286967529cc5f3c0d.1685953497.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx5/qp.h  |  1 +
 drivers/infiniband/hw/mlx5/qpc.c | 83 ++++++++++++++++++++++++----------------
 include/linux/mlx5/driver.h      |  1 -
 3 files changed, 51 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
index f5130873dd52..b6ee7c3ee1ca 100644
--- a/drivers/infiniband/hw/mlx5/qp.h
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -10,6 +10,7 @@ struct mlx5_ib_dev;
 
 struct mlx5_qp_table {
 	struct notifier_block nb;
+	struct xarray dct_xa;
 
 	/* protect radix tree
 	 */
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
index bae0334d6e7f..3a2f755d4985 100644
--- a/drivers/infiniband/hw/mlx5/qpc.c
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -88,23 +88,35 @@ static bool is_event_type_allowed(int rsc_type, int event_type)
 	}
 }
 
+static int dct_event_notifier(struct mlx5_ib_dev *dev, struct mlx5_eqe *eqe)
+{
+	struct mlx5_core_dct *dct;
+	unsigned long flags;
+	u32 qpn;
+
+	qpn = be32_to_cpu(eqe->data.dct.dctn) & 0xFFFFFF;
+	xa_lock_irqsave(&dev->qp_table.dct_xa, flags);
+	dct = xa_load(&dev->qp_table.dct_xa, qpn);
+	if (dct)
+		complete(&dct->drained);
+	xa_unlock_irqrestore(&dev->qp_table.dct_xa, flags);
+	return NOTIFY_OK;
+}
+
 static int rsc_event_notifier(struct notifier_block *nb,
 			      unsigned long type, void *data)
 {
+	struct mlx5_ib_dev *dev =
+		container_of(nb, struct mlx5_ib_dev, qp_table.nb);
 	struct mlx5_core_rsc_common *common;
-	struct mlx5_qp_table *table;
-	struct mlx5_core_dct *dct;
+	struct mlx5_eqe *eqe = data;
 	u8 event_type = (u8)type;
 	struct mlx5_core_qp *qp;
-	struct mlx5_eqe *eqe;
 	u32 rsn;
 
 	switch (event_type) {
 	case MLX5_EVENT_TYPE_DCT_DRAINED:
-		eqe = data;
-		rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
-		rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
-		break;
+		return dct_event_notifier(dev, eqe);
 	case MLX5_EVENT_TYPE_PATH_MIG:
 	case MLX5_EVENT_TYPE_COMM_EST:
 	case MLX5_EVENT_TYPE_SQ_DRAINED:
@@ -113,7 +125,6 @@ static int rsc_event_notifier(struct notifier_block *nb,
 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
-		eqe = data;
 		rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
 		rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
 		break;
@@ -121,8 +132,7 @@ static int rsc_event_notifier(struct notifier_block *nb,
 		return NOTIFY_DONE;
 	}
 
-	table = container_of(nb, struct mlx5_qp_table, nb);
-	common = mlx5_get_rsc(table, rsn);
+	common = mlx5_get_rsc(&dev->qp_table, rsn);
 	if (!common)
 		return NOTIFY_OK;
 
@@ -137,11 +147,6 @@ static int rsc_event_notifier(struct notifier_block *nb,
 		qp->event(qp, event_type);
 		/* Need to put resource in event handler */
 		return NOTIFY_OK;
-	case MLX5_RES_DCT:
-		dct = (struct mlx5_core_dct *)common;
-		if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
-			complete(&dct->drained);
-		break;
 	default:
 		break;
 	}
@@ -188,28 +193,15 @@ static void destroy_resource_common(struct mlx5_ib_dev *dev,
 }
 
 static int _mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
-				  struct mlx5_core_dct *dct, bool need_cleanup)
+				  struct mlx5_core_dct *dct)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {};
 	struct mlx5_core_qp *qp = &dct->mqp;
-	int err;
 
-	err = mlx5_core_drain_dct(dev, dct);
-	if (err) {
-		if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
-			goto destroy;
-
-		return err;
-	}
-	wait_for_completion(&dct->drained);
-destroy:
-	if (need_cleanup)
-		destroy_resource_common(dev, &dct->mqp);
 	MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
 	MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
 	MLX5_SET(destroy_dct_in, in, uid, qp->uid);
-	err = mlx5_cmd_exec_in(dev->mdev, destroy_dct, in);
-	return err;
+	return mlx5_cmd_exec_in(dev->mdev, destroy_dct, in);
 }
 
 int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
@@ -227,13 +219,13 @@ int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
 
 	qp->qpn = MLX5_GET(create_dct_out, out, dctn);
 	qp->uid = MLX5_GET(create_dct_in, in, uid);
-	err = create_resource_common(dev, qp, MLX5_RES_DCT);
+	err = xa_err(xa_store_irq(&dev->qp_table.dct_xa, qp->qpn, dct, GFP_KERNEL));
 	if (err)
 		goto err_cmd;
 
 	return 0;
 err_cmd:
-	_mlx5_core_destroy_dct(dev, dct, false);
+	_mlx5_core_destroy_dct(dev, dct);
 	return err;
 }
 
@@ -284,7 +276,31 @@ static int mlx5_core_drain_dct(struct mlx5_ib_dev *dev,
 int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
 			  struct mlx5_core_dct *dct)
 {
-	return _mlx5_core_destroy_dct(dev, dct, true);
+	struct mlx5_qp_table *table = &dev->qp_table;
+	struct mlx5_core_dct *tmp;
+	int err;
+
+	err = mlx5_core_drain_dct(dev, dct);
+	if (err) {
+		if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+			goto destroy;
+
+		return err;
+	}
+	wait_for_completion(&dct->drained);
+
+destroy:
+	tmp = xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, dct, XA_ZERO_ENTRY, GFP_KERNEL);
+	if (WARN_ON(tmp != dct))
+		return xa_err(tmp) ?: -EINVAL;
+
+	err = _mlx5_core_destroy_dct(dev, dct);
+	if (err) {
+		xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, dct, 0);
+		return err;
+	}
+	xa_erase_irq(&table->dct_xa, dct->mqp.qpn);
+	return 0;
 }
 
 int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp)
@@ -488,6 +504,7 @@ int mlx5_init_qp_table(struct mlx5_ib_dev *dev)
 
 	spin_lock_init(&table->lock);
 	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+	xa_init(&table->dct_xa);
 	mlx5_qp_debugfs_init(dev->mdev);
 
 	table->nb.notifier_call = rsc_event_notifier;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e3b616388b18..e67c603d507b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -382,7 +382,6 @@ enum mlx5_res_type {
 	MLX5_RES_SRQ	= 3,
 	MLX5_RES_XSRQ	= 4,
 	MLX5_RES_XRQ	= 5,
-	MLX5_RES_DCT	= MLX5_EVENT_QUEUE_TYPE_DCT,
 };
 
 struct mlx5_core_rsc_common {
-- 
cgit v1.2.3


From 617f5db1a626f18d5cbb7c7faf7bf8f9ea12be78 Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Mon, 5 Jun 2023 13:33:26 +0300
Subject: RDMA/mlx5: Fix affinity assignment

The cited commit aimed to ensure that Virtual Functions (VFs) assign a
queue affinity to a Queue Pair (QP) to distribute traffic when
the LAG master creates a hardware LAG. If the affinity was set while
the hardware was not in LAG, the firmware would ignore the affinity value.

However, this commit unintentionally assigned an affinity to QPs on the LAG
master's VPORT even if the RDMA device was not marked as LAG-enabled.
In most cases, this was not an issue because when the hardware entered
hardware LAG configuration, the RDMA device of the LAG master would be
destroyed and a new one would be created, marked as LAG-enabled.

The problem arises when a user configures Equal-Cost Multipath (ECMP).
In ECMP mode, traffic can be directed to different physical ports based on
the queue affinity, which is intended for use by VPORTS other than the
E-Switch manager. ECMP mode is supported only if both E-Switch managers are
in switchdev mode and the appropriate route is configured via IP. In this
configuration, the RDMA device is not destroyed, and we retain the RDMA
device that is not marked as LAG-enabled.

To ensure correct behavior, Send Queues (SQs) opened by the E-Switch
manager through verbs should be assigned strict affinity. This means they
will only be able to communicate through the native physical port
associated with the E-Switch manager. This will prevent the firmware from
assigning affinity and will not allow the SQs to be remapped in case of
failover.

Fixes: 802dcc7fc5ec ("RDMA/mlx5: Support TX port affinity for VF drivers in LAG mode")
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Link: https://lore.kernel.org/r/425b05f4da840bc684b0f7e8ebf61aeb5cef09b0.1685960567.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h                |  3 +++
 drivers/infiniband/hw/mlx5/qp.c                     |  3 +++
 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 12 ------------
 include/linux/mlx5/driver.h                         | 12 ++++++++++++
 4 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 91fc0cdf377d..2dfa6f49a6f4 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1598,6 +1598,9 @@ static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
 	    MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass))
 		return 0;
 
+	if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active)
+		return 0;
+
 	return dev->lag_active ||
 		(MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 &&
 		 MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity));
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 70ca8ffa9256..78b96bfb4e6a 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1237,6 +1237,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
 
 	MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
 	MLX5_SET(tisc, tisc, transport_domain, tdn);
+	if (!mlx5_ib_lag_should_assign_affinity(dev) &&
+	    mlx5_lag_is_lacp_owner(dev->mdev))
+		MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
 	if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
 		MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 1d879374acaa..229520405d4a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -276,18 +276,6 @@ static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
 	return pci_num_vf(dev->pdev) ? true : false;
 }
 
-static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
-{
-	/* LACP owner conditions:
-	 * 1) Function is physical.
-	 * 2) LAG is supported by FW.
-	 * 3) LAG is managed by driver (currently the only option).
-	 */
-	return  MLX5_CAP_GEN(dev, vport_group_manager) &&
-		   (MLX5_CAP_GEN(dev, num_lag_ports) > 1) &&
-		    MLX5_CAP_GEN(dev, lag_master);
-}
-
 int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev);
 static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev)
 {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index a4c4f737f9c1..8ad16b779898 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1237,6 +1237,18 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev)
 	return dev->priv.sriov.max_vfs;
 }
 
+static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
+{
+	/* LACP owner conditions:
+	 * 1) Function is physical.
+	 * 2) LAG is supported by FW.
+	 * 3) LAG is managed by driver (currently the only option).
+	 */
+	return  MLX5_CAP_GEN(dev, vport_group_manager) &&
+		   (MLX5_CAP_GEN(dev, num_lag_ports) > 1) &&
+		    MLX5_CAP_GEN(dev, lag_master);
+}
+
 static inline int mlx5_get_gid_table_len(u16 param)
 {
 	if (param > 4) {
-- 
cgit v1.2.3


From a8ac2961148e8c720dc760f2e06627cd5c55a154 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omp.ru>
Date: Thu, 1 Jun 2023 23:22:17 +0300
Subject: sh: Avoid using IRQ0 on SH3 and SH4

IRQ0 is no longer returned by platform_get_irq() and its ilk -- they now
return -EINVAL instead.  However, the kernel code supporting SH3/4-based
SoCs still maps the IRQ #s starting at 0 -- modify that code to start the
IRQ #s from 16 instead.

The patch should mostly affect the AP-SH4A-3A/AP-SH4AD-0A boards as they
indeed are using IRQ0 for the SMSC911x compatible Ethernet chip.

Fixes: ce753ad1549c ("platform: finally disallow IRQ0 in platform_get_irq() and its ilk")
Signed-off-by: Sergey Shtylyov <s.shtylyov@omp.ru>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Tested-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Link: https://lore.kernel.org/r/71105dbf-cdb0-72e1-f9eb-eeda8e321696@omp.ru
Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
---
 arch/sh/include/mach-common/mach/highlander.h | 2 +-
 arch/sh/include/mach-common/mach/r2d.h        | 2 +-
 arch/sh/include/mach-dreamcast/mach/sysasic.h | 2 +-
 arch/sh/include/mach-se/mach/se7724.h         | 2 +-
 arch/sh/kernel/cpu/sh3/entry.S                | 4 ++--
 include/linux/sh_intc.h                       | 6 +++---
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/include/mach-common/mach/highlander.h b/arch/sh/include/mach-common/mach/highlander.h
index fb44c299d033..b12c79558422 100644
--- a/arch/sh/include/mach-common/mach/highlander.h
+++ b/arch/sh/include/mach-common/mach/highlander.h
@@ -176,7 +176,7 @@
 #define IVDR_CK_ON	4		/* iVDR Clock ON */
 #endif
 
-#define HL_FPGA_IRQ_BASE	200
+#define HL_FPGA_IRQ_BASE	(200 + 16)
 #define HL_NR_IRL		15
 
 #define IRQ_AX88796		(HL_FPGA_IRQ_BASE + 0)
diff --git a/arch/sh/include/mach-common/mach/r2d.h b/arch/sh/include/mach-common/mach/r2d.h
index 0d7e483c7d3f..69bc1907c563 100644
--- a/arch/sh/include/mach-common/mach/r2d.h
+++ b/arch/sh/include/mach-common/mach/r2d.h
@@ -47,7 +47,7 @@
 
 #define IRLCNTR1	(PA_BCR + 0)	/* Interrupt Control Register1 */
 
-#define R2D_FPGA_IRQ_BASE	100
+#define R2D_FPGA_IRQ_BASE	(100 + 16)
 
 #define IRQ_VOYAGER		(R2D_FPGA_IRQ_BASE + 0)
 #define IRQ_EXT			(R2D_FPGA_IRQ_BASE + 1)
diff --git a/arch/sh/include/mach-dreamcast/mach/sysasic.h b/arch/sh/include/mach-dreamcast/mach/sysasic.h
index ed69ce7f2030..3b27be9a527e 100644
--- a/arch/sh/include/mach-dreamcast/mach/sysasic.h
+++ b/arch/sh/include/mach-dreamcast/mach/sysasic.h
@@ -22,7 +22,7 @@
    takes.
 */
 
-#define HW_EVENT_IRQ_BASE  48
+#define HW_EVENT_IRQ_BASE  (48 + 16)
 
 /* IRQ 13 */
 #define HW_EVENT_VSYNC     (HW_EVENT_IRQ_BASE +  5) /* VSync */
diff --git a/arch/sh/include/mach-se/mach/se7724.h b/arch/sh/include/mach-se/mach/se7724.h
index 1fe28820dfa9..ea6c46633b33 100644
--- a/arch/sh/include/mach-se/mach/se7724.h
+++ b/arch/sh/include/mach-se/mach/se7724.h
@@ -37,7 +37,7 @@
 #define IRQ2_IRQ        evt2irq(0x640)
 
 /* Bits in IRQ012 registers */
-#define SE7724_FPGA_IRQ_BASE	220
+#define SE7724_FPGA_IRQ_BASE	(220 + 16)
 
 /* IRQ0 */
 #define IRQ0_BASE	SE7724_FPGA_IRQ_BASE
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
index e48b3dd996f5..b1f5b3c58a01 100644
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ b/arch/sh/kernel/cpu/sh3/entry.S
@@ -470,9 +470,9 @@ ENTRY(handle_interrupt)
 	mov	r4, r0		! save vector->jmp table offset for later
 
 	shlr2	r4		! vector to IRQ# conversion
-	add	#-0x10, r4
 
-	cmp/pz	r4		! is it a valid IRQ?
+	mov	#0x10, r5
+	cmp/hs	r5, r4		! is it a valid IRQ?
 	bt	10f
 
 	/*
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 37ad81058d6a..27ae79191bdc 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -13,9 +13,9 @@
 /*
  * Convert back and forth between INTEVT and IRQ values.
  */
-#ifdef CONFIG_CPU_HAS_INTEVT
-#define evt2irq(evt)		(((evt) >> 5) - 16)
-#define irq2evt(irq)		(((irq) + 16) << 5)
+#ifdef CONFIG_CPU_HAS_INTEVT	/* Avoid IRQ0 (invalid for platform devices) */
+#define evt2irq(evt)		((evt) >> 5)
+#define irq2evt(irq)		((irq) << 5)
 #else
 #define evt2irq(evt)		(evt)
 #define irq2evt(irq)		(irq)
-- 
cgit v1.2.3


From 703d7521555504b3a316b105b4806d641b7ebc76 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 18 May 2023 13:46:03 -0400
Subject: NFSD: Hoist rq_vec preparation into nfsd_read() [step two]

Now that the preparation of an rq_vec has been removed from the
generic read path, nfsd_splice_read() no longer needs to reset
rq_next_page.

nfsd4_encode_read() calls nfsd_splice_read() directly. As far as I
can ascertain, resetting rq_next_page for NFSv4 splice reads is
unnecessary because rq_next_page is already set correctly.

Moreover, resetting it might even be incorrect if previous
operations in the COMPOUND have already consumed at least a page of
the send buffer. I would expect that the result would be encoding
the READ payload over previously-encoded results.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c          | 10 +++++-----
 fs/nfsd/vfs.c              | 13 ++++++++++++-
 include/linux/sunrpc/xdr.h |  3 +--
 net/sunrpc/xdr.c           | 26 ++++++++++++--------------
 4 files changed, 30 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3887da048232..c67bc904c7b7 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4103,13 +4103,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
 	__be32 zero = xdr_zero;
 	__be32 nfserr;
 
-	read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
-	if (read->rd_vlen < 0)
+	if (xdr_reserve_space_vec(xdr, maxcount) < 0)
 		return nfserr_resource;
 
-	nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
-			    resp->rqstp->rq_vec, read->rd_vlen, &maxcount,
-			    &read->rd_eof);
+	nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, file,
+				read->rd_offset, &maxcount,
+				xdr->buf->page_len & ~PAGE_MASK,
+				&read->rd_eof);
 	read->rd_length = maxcount;
 	if (nfserr)
 		return nfserr;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d9055b5c496b..37febe1ff039 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1003,6 +1003,18 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	}
 }
 
+/**
+ * nfsd_splice_read - Perform a VFS read using a splice pipe
+ * @rqstp: RPC transaction context
+ * @fhp: file handle of file to be read
+ * @file: opened struct file of file to be read
+ * @offset: starting byte offset
+ * @count: IN: requested number of bytes; OUT: number of bytes read
+ * @eof: OUT: set non-zero if operation reached the end of the file
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
+ */
 __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 			struct file *file, loff_t offset, unsigned long *count,
 			u32 *eof)
@@ -1016,7 +1028,6 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	ssize_t host_err;
 
 	trace_nfsd_read_splice(rqstp, fhp, offset, *count);
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
 	host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
 	return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
 }
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 72014c9216fc..f89ec4b5ea16 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -242,8 +242,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
 extern void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
 			   struct page **pages, struct rpc_rqst *rqst);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
-extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec,
-		size_t nbytes);
+extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes);
 extern void __xdr_commit_encode(struct xdr_stream *xdr);
 extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
 extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 36835b2f5446..2a22e78af116 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1070,22 +1070,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
 }
 EXPORT_SYMBOL_GPL(xdr_reserve_space);
 
-
 /**
  * xdr_reserve_space_vec - Reserves a large amount of buffer space for sending
  * @xdr: pointer to xdr_stream
- * @vec: pointer to a kvec array
  * @nbytes: number of bytes to reserve
  *
- * Reserves enough buffer space to encode 'nbytes' of data and stores the
- * pointers in 'vec'. The size argument passed to xdr_reserve_space() is
- * determined based on the number of bytes remaining in the current page to
- * avoid invalidating iov_base pointers when xdr_commit_encode() is called.
+ * The size argument passed to xdr_reserve_space() is determined based
+ * on the number of bytes remaining in the current page to avoid
+ * invalidating iov_base pointers when xdr_commit_encode() is called.
+ *
+ * Return values:
+ *   %0: success
+ *   %-EMSGSIZE: not enough space is available in @xdr
  */
-int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes)
+int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes)
 {
-	int thislen;
-	int v = 0;
+	size_t thislen;
 	__be32 *p;
 
 	/*
@@ -1097,21 +1097,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte
 		xdr->end = xdr->p;
 	}
 
+	/* XXX: Let's find a way to make this more efficient */
 	while (nbytes) {
 		thislen = xdr->buf->page_len % PAGE_SIZE;
 		thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen);
 
 		p = xdr_reserve_space(xdr, thislen);
 		if (!p)
-			return -EIO;
+			return -EMSGSIZE;
 
-		vec[v].iov_base = p;
-		vec[v].iov_len = thislen;
-		v++;
 		nbytes -= thislen;
 	}
 
-	return v;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(xdr_reserve_space_vec);
 
-- 
cgit v1.2.3


From b6334e2cd46f0dab1131c0272426182c79b51114 Mon Sep 17 00:00:00 2001
From: Mao Zhu <zhumao001@208suo.com>
Date: Sun, 11 Jun 2023 08:33:14 -0400
Subject: fs: Fix comment typo

Delete duplicated word in comment.

Signed-off-by: Mao Zhu <zhumao001@208suo.com>
Message-Id: <20230611123314.5282-1-dengshaomin@cdjrlc.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9c2671b285a4..40bef9bf8749 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2675,7 +2675,7 @@ extern void evict_inodes(struct super_block *sb);
 void dump_mapping(const struct address_space *);
 
 /*
- * Userspace may rely on the the inode number being non-zero. For example, glibc
+ * Userspace may rely on the inode number being non-zero. For example, glibc
  * simply ignores files with zero i_ino in unlink() and other places.
  *
  * As an additional complication, if userspace was compiled with
-- 
cgit v1.2.3


From 5e2ff6704a275be009be8979af17c52361b79b89 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Date: Thu, 8 Jun 2023 22:26:25 +0200
Subject: scm: add SO_PASSPIDFD and SCM_PIDFD

Implement SCM_PIDFD, a new type of CMSG type analogical to SCM_CREDENTIALS,
but it contains pidfd instead of plain pid, which allows programmers not
to care about PID reuse problem.

We mask SO_PASSPIDFD feature if CONFIG_UNIX is not builtin because
it depends on a pidfd_prepare() API which is not exported to the kernel
modules.

Idea comes from UAPI kernel group:
https://uapi-group.org/kernel-features/

Big thanks to Christian Brauner and Lennart Poettering for productive
discussions about this.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: David Ahern <dsahern@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>
Cc: Lennart Poettering <mzxreary@0pointer.de>
Cc: Luca Boccassi <bluca@debian.org>
Cc: linux-kernel@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Tested-by: Luca Boccassi <bluca@debian.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h    |  2 ++
 arch/mips/include/uapi/asm/socket.h     |  2 ++
 arch/parisc/include/uapi/asm/socket.h   |  2 ++
 arch/sparc/include/uapi/asm/socket.h    |  2 ++
 include/linux/net.h                     |  1 +
 include/linux/socket.h                  |  1 +
 include/net/scm.h                       | 39 +++++++++++++++++++++++++++++++--
 include/uapi/asm-generic/socket.h       |  2 ++
 net/core/sock.c                         | 11 ++++++++++
 net/mptcp/sockopt.c                     |  1 +
 net/unix/af_unix.c                      | 18 ++++++++++-----
 tools/include/uapi/asm-generic/socket.h |  2 ++
 12 files changed, 76 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 739891b94136..ff310613ae64 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -137,6 +137,8 @@
 
 #define SO_RCVMARK		75
 
+#define SO_PASSPIDFD		76
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 18f3d95ecfec..762dcb80e4ec 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -148,6 +148,8 @@
 
 #define SO_RCVMARK		75
 
+#define SO_PASSPIDFD		76
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index f486d3dfb6bb..df16a3e16d64 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -129,6 +129,8 @@
 
 #define SO_RCVMARK		0x4049
 
+#define SO_PASSPIDFD		0x404A
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 2fda57a3ea86..6e2847804fea 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -130,6 +130,8 @@
 
 #define SO_RCVMARK               0x0054
 
+#define SO_PASSPIDFD             0x0055
+
 #if !defined(__KERNEL__)
 
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 8defc8f1d82e..23324e9a2b3d 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -43,6 +43,7 @@ struct net;
 #define SOCK_PASSSEC		4
 #define SOCK_SUPPORT_ZC		5
 #define SOCK_CUSTOM_SOCKOPT	6
+#define SOCK_PASSPIDFD		7
 
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 3fd3436bc09f..58204700018a 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
 #define	SCM_RIGHTS	0x01		/* rw: access rights (array of int) */
 #define SCM_CREDENTIALS 0x02		/* rw: struct ucred		*/
 #define SCM_SECURITY	0x03		/* rw: security label		*/
+#define SCM_PIDFD	0x04		/* ro: pidfd (int)		*/
 
 struct ucred {
 	__u32	pid;
diff --git a/include/net/scm.h b/include/net/scm.h
index 585adc1346bd..c67f765a165b 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -120,12 +120,44 @@ static inline bool scm_has_secdata(struct socket *sock)
 }
 #endif /* CONFIG_SECURITY_NETWORK */
 
+static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
+{
+	struct file *pidfd_file = NULL;
+	int pidfd;
+
+	/*
+	 * put_cmsg() doesn't return an error if CMSG is truncated,
+	 * that's why we need to opencode these checks here.
+	 */
+	if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
+	    (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
+		msg->msg_flags |= MSG_CTRUNC;
+		return;
+	}
+
+	WARN_ON_ONCE(!scm->pid);
+	pidfd = pidfd_prepare(scm->pid, 0, &pidfd_file);
+
+	if (put_cmsg(msg, SOL_SOCKET, SCM_PIDFD, sizeof(int), &pidfd)) {
+		if (pidfd_file) {
+			put_unused_fd(pidfd);
+			fput(pidfd_file);
+		}
+
+		return;
+	}
+
+	if (pidfd_file)
+		fd_install(pidfd, pidfd_file);
+}
+
 static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
 				struct scm_cookie *scm, int flags)
 {
 	if (!msg->msg_control) {
-		if (test_bit(SOCK_PASSCRED, &sock->flags) || scm->fp ||
-		    scm_has_secdata(sock))
+		if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+		    test_bit(SOCK_PASSPIDFD, &sock->flags) ||
+		    scm->fp || scm_has_secdata(sock))
 			msg->msg_flags |= MSG_CTRUNC;
 		scm_destroy(scm);
 		return;
@@ -141,6 +173,9 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
 		put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds);
 	}
 
+	if (test_bit(SOCK_PASSPIDFD, &sock->flags))
+		scm_pidfd_recv(msg, scm);
+
 	scm_destroy_cred(scm);
 
 	scm_passec(sock, msg, scm);
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 638230899e98..b76169fdb80b 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -132,6 +132,8 @@
 
 #define SO_RCVMARK		75
 
+#define SO_PASSPIDFD		76
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/net/core/sock.c b/net/core/sock.c
index 24f2761bdb1d..ed4eb4ba738b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1246,6 +1246,13 @@ set_sndbuf:
 			clear_bit(SOCK_PASSCRED, &sock->flags);
 		break;
 
+	case SO_PASSPIDFD:
+		if (valbool)
+			set_bit(SOCK_PASSPIDFD, &sock->flags);
+		else
+			clear_bit(SOCK_PASSPIDFD, &sock->flags);
+		break;
+
 	case SO_TIMESTAMP_OLD:
 	case SO_TIMESTAMP_NEW:
 	case SO_TIMESTAMPNS_OLD:
@@ -1732,6 +1739,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
 		v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
 		break;
 
+	case SO_PASSPIDFD:
+		v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
+		break;
+
 	case SO_PEERCRED:
 	{
 		struct ucred peercred;
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index d4258869ac48..e172a5848b0d 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -355,6 +355,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
 	case SO_BROADCAST:
 	case SO_BSDCOMPAT:
 	case SO_PASSCRED:
+	case SO_PASSPIDFD:
 	case SO_PASSSEC:
 	case SO_RXQ_OVFL:
 	case SO_WIFI_STATUS:
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 653136d68b32..c46c2f5d860c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1361,7 +1361,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 		if (err)
 			goto out;
 
-		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
+		if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+		     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
 		    !unix_sk(sk)->addr) {
 			err = unix_autobind(sk);
 			if (err)
@@ -1469,7 +1470,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	if (err)
 		goto out;
 
-	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
+	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+	     test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
 		err = unix_autobind(sk);
 		if (err)
 			goto out;
@@ -1670,6 +1672,8 @@ static void unix_sock_inherit_flags(const struct socket *old,
 {
 	if (test_bit(SOCK_PASSCRED, &old->flags))
 		set_bit(SOCK_PASSCRED, &new->flags);
+	if (test_bit(SOCK_PASSPIDFD, &old->flags))
+		set_bit(SOCK_PASSPIDFD, &new->flags);
 	if (test_bit(SOCK_PASSSEC, &old->flags))
 		set_bit(SOCK_PASSSEC, &new->flags);
 }
@@ -1819,8 +1823,10 @@ static bool unix_passcred_enabled(const struct socket *sock,
 				  const struct sock *other)
 {
 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
+	       test_bit(SOCK_PASSPIDFD, &sock->flags) ||
 	       !other->sk_socket ||
-	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
+	       test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
 }
 
 /*
@@ -1904,7 +1910,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 			goto out;
 	}
 
-	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
+	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+	     test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
 		err = unix_autobind(sk);
 		if (err)
 			goto out;
@@ -2718,7 +2725,8 @@ unlock:
 			/* Never glue messages from different writers */
 			if (!unix_skb_scm_eq(skb, &scm))
 				break;
-		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
+		} else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+			   test_bit(SOCK_PASSPIDFD, &sock->flags)) {
 			/* Copy credentials */
 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
 			unix_set_secdata(&scm, skb);
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index 8756df13be50..fbbc4bf53ee3 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -121,6 +121,8 @@
 
 #define SO_RCVMARK		75
 
+#define SO_PASSPIDFD		76
+
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
-- 
cgit v1.2.3


From 5ab8c41cef30d8b6160a80b69d2eb39d570491ac Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 9 Jun 2023 14:53:30 -0700
Subject: netlink: support extack in dump ->start()

Commit 4a19edb60d02 ("netlink: Pass extack to dump handlers")
added extack support to netlink dumps. It was focused on rtnl
and since rtnl does not use ->start(), ->done() callbacks
it ignored those. Genetlink on the other hand uses ->start()
extensively, for parsing and input validation.

Pass the extact in via struct netlink_dump_control and link
it to cb for the time of ->start(). Both struct netlink_dump_control
and extack itself live on the stack so we can't keep the same
extack for the duration of the dump. This means that the extack
visible in ->start() and each ->dump() callbacks will be different.
Corner cases like reporting a warning message in DONE across dump
calls are still not supported.

We could put the extack (for dumps) in the socket struct,
but layering makes it slightly awkward (extack pointer is decided
before the DO / DUMP split).

The genetlink dump error extacks are now surfaced:

  $ cli.py --spec netlink/specs/ethtool.yaml --dump channels-get
  lib.ynl.NlError: Netlink error: Invalid argument
  nl_len = 64 (48) nl_flags = 0x300 nl_type = 2
	error: -22	extack: {'msg': 'request header missing'}

Previously extack was missing:

  $ cli.py --spec netlink/specs/ethtool.yaml --dump channels-get
  lib.ynl.NlError: Netlink error: Invalid argument
  nl_len = 36 (20) nl_flags = 0x100 nl_type = 2
	error: -22

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  | 1 +
 net/netlink/af_netlink.c | 2 ++
 net/netlink/genetlink.c  | 2 ++
 3 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 19c0791ed9d5..9eec3f4f5351 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -311,6 +311,7 @@ struct netlink_dump_control {
 	int (*start)(struct netlink_callback *);
 	int (*dump)(struct sk_buff *skb, struct netlink_callback *);
 	int (*done)(struct netlink_callback *);
+	struct netlink_ext_ack *extack;
 	void *data;
 	struct module *module;
 	u32 min_dump_alloc;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3a1e0fd5bf14..cbd9aa7ee24a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2360,7 +2360,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK);
 
 	if (control->start) {
+		cb->extack = control->extack;
 		ret = control->start(cb);
+		cb->extack = NULL;
 		if (ret)
 			goto error_put;
 	}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 04c4036bf406..a157247a1e45 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -912,6 +912,7 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
 			.start = genl_start,
 			.dump = genl_lock_dumpit,
 			.done = genl_lock_done,
+			.extack = extack,
 		};
 
 		genl_unlock();
@@ -924,6 +925,7 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
 			.start = genl_start,
 			.dump = ops->dumpit,
 			.done = genl_parallel_done,
+			.extack = extack,
 		};
 
 		err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
-- 
cgit v1.2.3


From 764b83100b9aff52f950e408539c22a37cdedae8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:30 +0200
Subject: cdrom: remove the unused bdev argument to cdrom_open

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Phillip Potter <phil@philpotter.co.uk>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/cdrom/cdrom.c | 3 +--
 drivers/cdrom/gdrom.c | 2 +-
 drivers/scsi/sr.c     | 2 +-
 include/linux/cdrom.h | 3 +--
 4 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 416f723a2dbb..e3eab319cb04 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1155,8 +1155,7 @@ clean_up_and_return:
  * is in their own interest: device control becomes a lot easier
  * this way.
  */
-int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
-	       fmode_t mode)
+int cdrom_open(struct cdrom_device_info *cdi, fmode_t mode)
 {
 	int ret;
 
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index ceded5772aac..eaa2d5a90bc8 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -481,7 +481,7 @@ static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
 	bdev_check_media_change(bdev);
 
 	mutex_lock(&gdrom_mutex);
-	ret = cdrom_open(gd.cd_info, bdev, mode);
+	ret = cdrom_open(gd.cd_info, mode);
 	mutex_unlock(&gdrom_mutex);
 	return ret;
 }
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 12869e6d4ebd..61b83880e395 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -498,7 +498,7 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode)
 		sr_revalidate_disk(cd);
 
 	mutex_lock(&cd->lock);
-	ret = cdrom_open(&cd->cdi, bdev, mode);
+	ret = cdrom_open(&cd->cdi, mode);
 	mutex_unlock(&cd->lock);
 
 	scsi_autopm_put_device(sdev);
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 67caa909e3e6..cc5717cb0fa8 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -101,8 +101,7 @@ int cdrom_read_tocentry(struct cdrom_device_info *cdi,
 		struct cdrom_tocentry *entry);
 
 /* the general block_device operations structure: */
-extern int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev,
-			fmode_t mode);
+int cdrom_open(struct cdrom_device_info *cdi, fmode_t mode);
 extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode);
 extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
 		       fmode_t mode, unsigned int cmd, unsigned long arg);
-- 
cgit v1.2.3


From 473399b50de1fdc12606254351273c71d1786251 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:31 +0200
Subject: cdrom: remove the unused mode argument to cdrom_ioctl

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Phillip Potter <phil@philpotter.co.uk>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/cdrom/cdrom.c | 2 +-
 drivers/cdrom/gdrom.c | 2 +-
 drivers/scsi/sr.c     | 2 +-
 include/linux/cdrom.h | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index e3eab319cb04..245e5bbb05d4 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3336,7 +3336,7 @@ static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
  * ATAPI / SCSI specific code now mainly resides in mmc_ioctl().
  */
 int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
-		fmode_t mode, unsigned int cmd, unsigned long arg)
+		unsigned int cmd, unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
 	int ret;
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index eaa2d5a90bc8..14922403983e 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -505,7 +505,7 @@ static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
 	int ret;
 
 	mutex_lock(&gdrom_mutex);
-	ret = cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg);
+	ret = cdrom_ioctl(gd.cd_info, bdev, cmd, arg);
 	mutex_unlock(&gdrom_mutex);
 
 	return ret;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 61b83880e395..444c7efc14cb 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -539,7 +539,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	scsi_autopm_get_device(sdev);
 
 	if (cmd != CDROMCLOSETRAY && cmd != CDROMEJECT) {
-		ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg);
+		ret = cdrom_ioctl(&cd->cdi, bdev, cmd, arg);
 		if (ret != -ENOSYS)
 			goto put;
 	}
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index cc5717cb0fa8..4aea8c82d169 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -103,8 +103,8 @@ int cdrom_read_tocentry(struct cdrom_device_info *cdi,
 /* the general block_device operations structure: */
 int cdrom_open(struct cdrom_device_info *cdi, fmode_t mode);
 extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode);
-extern int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
-		       fmode_t mode, unsigned int cmd, unsigned long arg);
+int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
+		unsigned int cmd, unsigned long arg);
 extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
 				       unsigned int clearing);
 
-- 
cgit v1.2.3


From a4cec8bc14c02e15006a71f02b0e1bbc72b9f796 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:32 +0200
Subject: cdrom: remove the unused cdrom_close_write release code

cdrom_close_write is empty, and the for_data flag it is keyed off is
never set.  Remove all this clutter.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Phillip Potter <phil@philpotter.co.uk>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/cdrom/cdrom.c | 15 ---------------
 include/linux/cdrom.h |  1 -
 2 files changed, 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 245e5bbb05d4..08abf1ffede0 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -978,15 +978,6 @@ static void cdrom_dvd_rw_close_write(struct cdrom_device_info *cdi)
 	cdi->media_written = 0;
 }
 
-static int cdrom_close_write(struct cdrom_device_info *cdi)
-{
-#if 0
-	return cdrom_flush_cache(cdi);
-#else
-	return 0;
-#endif
-}
-
 /* badly broken, I know. Is due for a fixup anytime. */
 static void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype *tracks)
 {
@@ -1282,12 +1273,6 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
 	opened_for_data = !(cdi->options & CDO_USE_FFLAGS) ||
 		!(mode & FMODE_NDELAY);
 
-	/*
-	 * flush cache on last write release
-	 */
-	if (CDROM_CAN(CDC_RAM) && !cdi->use_count && cdi->for_data)
-		cdrom_close_write(cdi);
-
 	cdo->release(cdi);
 	if (cdi->use_count == 0) {      /* last process that closes dev*/
 		if (opened_for_data &&
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 4aea8c82d169..0a5db0b0c958 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -61,7 +61,6 @@ struct cdrom_device_info {
 	__u8 last_sense;
 	__u8 media_written;		/* dirty flag, DVD+RW bookkeeping */
 	unsigned short mmc3_profile;	/* current MMC3 profile */
-	int for_data;
 	int (*exit)(struct cdrom_device_info *);
 	int mrw_mode_page;
 	__s64 last_media_change_ms;
-- 
cgit v1.2.3


From 8cdf433e2b8e4fc6c7b4393deb93fb258175d537 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:33 +0200
Subject: cdrom: track if a cdrom_device_info was opened for data

Set a flag when a cdrom_device_info is opened for writing, instead of
trying to figure out this at release time.  This will allow to eventually
remove the mode argument to the ->release block_device_operation as
nothing but the CDROM drivers uses that argument.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Phillip Potter <phil@philpotter.co.uk>
Acked-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20230608110258.189493-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/cdrom/cdrom.c | 12 +++++-------
 include/linux/cdrom.h |  1 +
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 08abf1ffede0..adebac1bd210 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1172,6 +1172,7 @@ int cdrom_open(struct cdrom_device_info *cdi, fmode_t mode)
 			ret = 0;
 			cdi->media_written = 0;
 		}
+		cdi->opened_for_data = true;
 	}
 
 	if (ret)
@@ -1252,7 +1253,6 @@ static int check_for_audio_disc(struct cdrom_device_info *cdi,
 void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
 {
 	const struct cdrom_device_ops *cdo = cdi->ops;
-	int opened_for_data;
 
 	cd_dbg(CD_CLOSE, "entering cdrom_release\n");
 
@@ -1270,14 +1270,12 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
 		}
 	}
 
-	opened_for_data = !(cdi->options & CDO_USE_FFLAGS) ||
-		!(mode & FMODE_NDELAY);
-
 	cdo->release(cdi);
-	if (cdi->use_count == 0) {      /* last process that closes dev*/
-		if (opened_for_data &&
-		    cdi->options & CDO_AUTO_EJECT && CDROM_CAN(CDC_OPEN_TRAY))
+
+	if (cdi->use_count == 0 && cdi->opened_for_data) {
+		if (cdi->options & CDO_AUTO_EJECT && CDROM_CAN(CDC_OPEN_TRAY))
 			cdo->tray_move(cdi, 1);
+		cdi->opened_for_data = false;
 	}
 }
 EXPORT_SYMBOL(cdrom_release);
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 0a5db0b0c958..adcc9f2beb26 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -63,6 +63,7 @@ struct cdrom_device_info {
 	unsigned short mmc3_profile;	/* current MMC3 profile */
 	int (*exit)(struct cdrom_device_info *);
 	int mrw_mode_page;
+	bool opened_for_data;
 	__s64 last_media_change_ms;
 };
 
-- 
cgit v1.2.3


From 7ae24fcee9929f9002b84d8121144b2b3590b58c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:34 +0200
Subject: cdrom: remove the unused mode argument to cdrom_release

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Phillip Potter <phil@philpotter.co.uk>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/cdrom/cdrom.c | 2 +-
 drivers/cdrom/gdrom.c | 2 +-
 drivers/scsi/sr.c     | 2 +-
 include/linux/cdrom.h | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index adebac1bd210..998b03fe976e 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1250,7 +1250,7 @@ static int check_for_audio_disc(struct cdrom_device_info *cdi,
 	return 0;
 }
 
-void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode)
+void cdrom_release(struct cdrom_device_info *cdi)
 {
 	const struct cdrom_device_ops *cdo = cdi->ops;
 
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 14922403983e..a401dc4218a9 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -481,7 +481,7 @@ static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
 	bdev_check_media_change(bdev);
 
 	mutex_lock(&gdrom_mutex);
-	ret = cdrom_open(gd.cd_info, mode);
+	ret = cdrom_open(gd.cd_info);
 	mutex_unlock(&gdrom_mutex);
 	return ret;
 }
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 444c7efc14cb..6d33120ee5ba 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -512,7 +512,7 @@ static void sr_block_release(struct gendisk *disk, fmode_t mode)
 	struct scsi_cd *cd = scsi_cd(disk);
 
 	mutex_lock(&cd->lock);
-	cdrom_release(&cd->cdi, mode);
+	cdrom_release(&cd->cdi);
 	mutex_unlock(&cd->lock);
 
 	scsi_device_put(cd->device);
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index adcc9f2beb26..3c253b29f4aa 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -102,7 +102,7 @@ int cdrom_read_tocentry(struct cdrom_device_info *cdi,
 
 /* the general block_device operations structure: */
 int cdrom_open(struct cdrom_device_info *cdi, fmode_t mode);
-extern void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode);
+void cdrom_release(struct cdrom_device_info *cdi);
 int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
 		unsigned int cmd, unsigned long arg);
 extern unsigned int cdrom_check_events(struct cdrom_device_info *cdi,
-- 
cgit v1.2.3


From 444aa2c58cb3b6cfe3b7cc7db6c294d73393a894 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:35 +0200
Subject: block: pass a gendisk on bdev_check_media_change

bdev_check_media_change should only ever be called for the whole device.
Pass a gendisk to make that explicit and rename the function to
disk_check_media_change.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/disk-events.c     | 18 +++++++++---------
 drivers/block/amiflop.c |  2 +-
 drivers/block/ataflop.c |  6 +++---
 drivers/block/floppy.c  | 16 ++++++++--------
 drivers/block/swim.c    |  2 +-
 drivers/block/swim3.c   |  2 +-
 drivers/cdrom/gdrom.c   |  2 +-
 drivers/md/md.c         |  2 +-
 drivers/scsi/sd.c       |  9 ++++-----
 drivers/scsi/sr.c       |  2 +-
 include/linux/blkdev.h  |  2 +-
 11 files changed, 31 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/block/disk-events.c b/block/disk-events.c
index aee25a7e1ab7..8b1b63225738 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -263,31 +263,31 @@ static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
 }
 
 /**
- * bdev_check_media_change - check if a removable media has been changed
- * @bdev: block device to check
+ * disk_check_media_change - check if a removable media has been changed
+ * @disk: gendisk to check
  *
  * Check whether a removable media has been changed, and attempt to free all
  * dentries and inodes and invalidates all block device page cache entries in
  * that case.
  *
- * Returns %true if the block device changed, or %false if not.
+ * Returns %true if the media has changed, or %false if not.
  */
-bool bdev_check_media_change(struct block_device *bdev)
+bool disk_check_media_change(struct gendisk *disk)
 {
 	unsigned int events;
 
-	events = disk_clear_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE |
+	events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
 				   DISK_EVENT_EJECT_REQUEST);
 	if (!(events & DISK_EVENT_MEDIA_CHANGE))
 		return false;
 
-	if (__invalidate_device(bdev, true))
+	if (__invalidate_device(disk->part0, true))
 		pr_warn("VFS: busy inodes on changed media %s\n",
-			bdev->bd_disk->disk_name);
-	set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+			disk->disk_name);
+	set_bit(GD_NEED_PART_SCAN, &disk->state);
 	return true;
 }
-EXPORT_SYMBOL(bdev_check_media_change);
+EXPORT_SYMBOL(disk_check_media_change);
 
 /**
  * disk_force_media_change - force a media change event
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 4c8b2ba579ee..6de12b311749 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1675,7 +1675,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 	}
 
 	if (mode & (FMODE_READ|FMODE_WRITE)) {
-		bdev_check_media_change(bdev);
+		disk_check_media_change(bdev->bd_disk);
 		if (mode & FMODE_WRITE) {
 			int wrprot;
 
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 9deb4df6bdb8..da481ddbca90 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1760,8 +1760,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
 		/* invalidate the buffer track to force a reread */
 		BufferDrive = -1;
 		set_bit(drive, &fake_change);
-		if (bdev_check_media_change(bdev))
-			floppy_revalidate(bdev->bd_disk);
+		if (disk_check_media_change(disk))
+			floppy_revalidate(disk);
 		return 0;
 	default:
 		return -EINVAL;
@@ -1938,7 +1938,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 		return 0;
 
 	if (mode & (FMODE_READ|FMODE_WRITE)) {
-		if (bdev_check_media_change(bdev))
+		if (disk_check_media_change(bdev->bd_disk))
 			floppy_revalidate(bdev->bd_disk);
 		if (mode & FMODE_WRITE) {
 			if (p->wpstat) {
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 28ec6b442e9c..3accafcbc95c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3210,13 +3210,13 @@ static int floppy_raw_cmd_ioctl(int type, int drive, int cmd,
 
 #endif
 
-static int invalidate_drive(struct block_device *bdev)
+static int invalidate_drive(struct gendisk *disk)
 {
 	/* invalidate the buffer track to force a reread */
-	set_bit((long)bdev->bd_disk->private_data, &fake_change);
+	set_bit((long)disk->private_data, &fake_change);
 	process_fd_request();
-	if (bdev_check_media_change(bdev))
-		floppy_revalidate(bdev->bd_disk);
+	if (disk_check_media_change(disk))
+		floppy_revalidate(disk);
 	return 0;
 }
 
@@ -3287,7 +3287,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
 		    drive_state[current_drive].maxtrack ||
 		    ((user_params[drive].sect ^ oldStretch) &
 		     (FD_SWAPSIDES | FD_SECTBASEMASK)))
-			invalidate_drive(bdev);
+			invalidate_drive(bdev->bd_disk);
 		else
 			process_fd_request();
 	}
@@ -3464,7 +3464,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 		current_type[drive] = NULL;
 		floppy_sizes[drive] = MAX_DISK_SIZE << 1;
 		drive_state[drive].keep_data = 0;
-		return invalidate_drive(bdev);
+		return invalidate_drive(bdev->bd_disk);
 	case FDSETPRM:
 	case FDDEFPRM:
 		return set_geometry(cmd, &inparam.g, drive, type, bdev);
@@ -3503,7 +3503,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 	case FDFLUSH:
 		if (lock_fdc(drive))
 			return -EINTR;
-		return invalidate_drive(bdev);
+		return invalidate_drive(bdev->bd_disk);
 	case FDSETEMSGTRESH:
 		drive_params[drive].max_errors.reporting = (unsigned short)(param & 0x0f);
 		return 0;
@@ -4054,7 +4054,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 			drive_state[drive].last_checked = 0;
 			clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
 				  &drive_state[drive].flags);
-			if (bdev_check_media_change(bdev))
+			if (disk_check_media_change(bdev->bd_disk))
 				floppy_revalidate(bdev->bd_disk);
 			if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
 				goto out;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 42b4b6828690..105bc5fd1b8c 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -640,7 +640,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 		return 0;
 
 	if (mode & (FMODE_READ|FMODE_WRITE)) {
-		if (bdev_check_media_change(bdev) && fs->disk_in)
+		if (disk_check_media_change(bdev->bd_disk) && fs->disk_in)
 			fs->ejected = 0;
 		if ((mode & FMODE_WRITE) && fs->write_protected) {
 			err = -EROFS;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index da811a7da03f..3d689ba312f5 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -963,7 +963,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 
 	if (err == 0 && (mode & FMODE_NDELAY) == 0
 	    && (mode & (FMODE_READ|FMODE_WRITE))) {
-		if (bdev_check_media_change(bdev))
+		if (disk_check_media_change(bdev->bd_disk))
 			floppy_revalidate(bdev->bd_disk);
 		if (fs->ejected)
 			err = -ENXIO;
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index a401dc4218a9..3cb92df38ebe 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -478,7 +478,7 @@ static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
 {
 	int ret;
 
-	bdev_check_media_change(bdev);
+	disk_check_media_change(bdev->bd_disk);
 
 	mutex_lock(&gdrom_mutex);
 	ret = cdrom_open(gd.cd_info);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fabf9c543735..77046c91bea4 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7789,7 +7789,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
 	atomic_inc(&mddev->openers);
 	mutex_unlock(&mddev->open_mutex);
 
-	bdev_check_media_change(bdev);
+	disk_check_media_change(bdev->bd_disk);
 	return 0;
 
 out_unlock:
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 1624d528aa1f..aab649d5bad3 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1280,11 +1280,10 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 		mempool_free(rq->special_vec.bv_page, sd_page_pool);
 }
 
-static bool sd_need_revalidate(struct block_device *bdev,
-		struct scsi_disk *sdkp)
+static bool sd_need_revalidate(struct gendisk *disk, struct scsi_disk *sdkp)
 {
 	if (sdkp->device->removable || sdkp->write_prot) {
-		if (bdev_check_media_change(bdev))
+		if (disk_check_media_change(disk))
 			return true;
 	}
 
@@ -1293,7 +1292,7 @@ static bool sd_need_revalidate(struct block_device *bdev,
 	 * nothing to do with partitions, BLKRRPART is used to force a full
 	 * revalidate after things like a format for historical reasons.
 	 */
-	return test_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+	return test_bit(GD_NEED_PART_SCAN, &disk->state);
 }
 
 /**
@@ -1330,7 +1329,7 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
 	if (!scsi_block_when_processing_errors(sdev))
 		goto error_out;
 
-	if (sd_need_revalidate(bdev, sdkp))
+	if (sd_need_revalidate(bdev->bd_disk, sdkp))
 		sd_revalidate_disk(bdev->bd_disk);
 
 	/*
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 6d33120ee5ba..1592e6e10c74 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -494,7 +494,7 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode)
 		return -ENXIO;
 
 	scsi_autopm_get_device(sdev);
-	if (bdev_check_media_change(bdev))
+	if (disk_check_media_change(bdev->bd_disk))
 		sr_revalidate_disk(cd);
 
 	mutex_lock(&cd->lock);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f4c339d9dd03..a1688eba7e5e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -817,7 +817,7 @@ int __register_blkdev(unsigned int major, const char *name,
 	__register_blkdev(major, name, NULL)
 void unregister_blkdev(unsigned int major, const char *name);
 
-bool bdev_check_media_change(struct block_device *bdev);
+bool disk_check_media_change(struct gendisk *disk);
 int __invalidate_device(struct block_device *bdev, bool kill_dirty);
 void set_capacity(struct gendisk *disk, sector_t size);
 
-- 
cgit v1.2.3


From d32e2bf83791727a84ad5d3e3d713e82f9adbe30 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:36 +0200
Subject: block: pass a gendisk to ->open

->open is only called on the whole device.  Make that explicit by
passing a gendisk instead of the block_device.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Acked-by: Jack Wang <jinpu.wang@ionos.com>		[rnbd]
Link: https://lore.kernel.org/r/20230608110258.189493-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/um/drivers/ubd_kern.c          |  5 ++---
 arch/xtensa/platforms/iss/simdisk.c |  4 ++--
 block/bdev.c                        |  2 +-
 drivers/block/amiflop.c             |  8 ++++----
 drivers/block/aoe/aoeblk.c          |  4 ++--
 drivers/block/ataflop.c             | 16 ++++++++--------
 drivers/block/drbd/drbd_main.c      |  6 +++---
 drivers/block/floppy.c              | 30 ++++++++++++++++--------------
 drivers/block/nbd.c                 |  8 ++++----
 drivers/block/pktcdvd.c             |  6 +++---
 drivers/block/rbd.c                 |  4 ++--
 drivers/block/rnbd/rnbd-clt.c       |  4 ++--
 drivers/block/swim.c                | 10 +++++-----
 drivers/block/swim3.c               | 10 +++++-----
 drivers/block/ublk_drv.c            |  4 ++--
 drivers/block/z2ram.c               |  6 ++----
 drivers/block/zram/zram_drv.c       | 13 +++++--------
 drivers/cdrom/gdrom.c               |  4 ++--
 drivers/md/bcache/super.c           |  4 ++--
 drivers/md/dm.c                     |  4 ++--
 drivers/md/md.c                     |  6 +++---
 drivers/mmc/core/block.c            |  4 ++--
 drivers/mtd/mtd_blkdevs.c           |  4 ++--
 drivers/mtd/ubi/block.c             |  4 ++--
 drivers/nvme/host/core.c            |  4 ++--
 drivers/nvme/host/multipath.c       |  4 ++--
 drivers/s390/block/dasd.c           |  4 ++--
 drivers/s390/block/dcssblk.c        |  7 +++----
 drivers/scsi/sd.c                   | 12 ++++++------
 drivers/scsi/sr.c                   |  6 +++---
 include/linux/blkdev.h              |  2 +-
 31 files changed, 102 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index f4c1e6e97ad5..6b831f82881b 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -108,7 +108,7 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
 static DEFINE_MUTEX(ubd_lock);
 static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 
-static int ubd_open(struct block_device *bdev, fmode_t mode);
+static int ubd_open(struct gendisk *disk, fmode_t mode);
 static void ubd_release(struct gendisk *disk, fmode_t mode);
 static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 		     unsigned int cmd, unsigned long arg);
@@ -1154,9 +1154,8 @@ static int __init ubd_driver_init(void){
 
 device_initcall(ubd_driver_init);
 
-static int ubd_open(struct block_device *bdev, fmode_t mode)
+static int ubd_open(struct gendisk *disk, fmode_t mode)
 {
-	struct gendisk *disk = bdev->bd_disk;
 	struct ubd *ubd_dev = disk->private_data;
 	int err = 0;
 
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index f50caaa1c249..38f95f79a127 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -120,9 +120,9 @@ static void simdisk_submit_bio(struct bio *bio)
 	bio_endio(bio);
 }
 
-static int simdisk_open(struct block_device *bdev, fmode_t mode)
+static int simdisk_open(struct gendisk *disk, fmode_t mode)
 {
-	struct simdisk *dev = bdev->bd_disk->private_data;
+	struct simdisk *dev = disk->private_data;
 
 	spin_lock(&dev->lock);
 	++dev->users;
diff --git a/block/bdev.c b/block/bdev.c
index 981f61357951..8a5fded303d4 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -652,7 +652,7 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
 	int ret;
 
 	if (disk->fops->open) {
-		ret = disk->fops->open(bdev, mode);
+		ret = disk->fops->open(disk, mode);
 		if (ret) {
 			/* avoid ghost partitions on a removed medium */
 			if (ret == -ENOMEDIUM &&
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 6de12b311749..0cf2e58294be 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1654,10 +1654,10 @@ static void fd_probe(int dev)
  * /dev/PS0 etc), and disallows simultaneous access to the same
  * drive with different device numbers.
  */
-static int floppy_open(struct block_device *bdev, fmode_t mode)
+static int floppy_open(struct gendisk *disk, fmode_t mode)
 {
-	int drive = MINOR(bdev->bd_dev) & 3;
-	int system =  (MINOR(bdev->bd_dev) & 4) >> 2;
+	int drive = disk->first_minor & 3;
+	int system = (disk->first_minor & 4) >> 2;
 	int old_dev;
 	unsigned long flags;
 
@@ -1675,7 +1675,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 	}
 
 	if (mode & (FMODE_READ|FMODE_WRITE)) {
-		disk_check_media_change(bdev->bd_disk);
+		disk_check_media_change(disk);
 		if (mode & FMODE_WRITE) {
 			int wrprot;
 
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 128722cf6c3c..4ca6bbb326d5 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -204,9 +204,9 @@ aoedisk_rm_debugfs(struct aoedev *d)
 }
 
 static int
-aoeblk_open(struct block_device *bdev, fmode_t mode)
+aoeblk_open(struct gendisk *disk, fmode_t mode)
 {
-	struct aoedev *d = bdev->bd_disk->private_data;
+	struct aoedev *d = disk->private_data;
 	ulong flags;
 
 	if (!virt_addr_valid(d)) {
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index da481ddbca90..4febd52be78c 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -447,7 +447,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 static void fd_probe( int drive );
 static int fd_test_drive_present( int drive );
 static void config_types( void );
-static int floppy_open(struct block_device *bdev, fmode_t mode);
+static int floppy_open(struct gendisk *disk, fmode_t mode);
 static void floppy_release(struct gendisk *disk, fmode_t mode);
 
 /************************* End of Prototypes **************************/
@@ -1915,10 +1915,10 @@ static void __init config_types( void )
  * drive with different device numbers.
  */
 
-static int floppy_open(struct block_device *bdev, fmode_t mode)
+static int floppy_open(struct gendisk *disk, fmode_t mode)
 {
-	struct atari_floppy_struct *p = bdev->bd_disk->private_data;
-	int type  = MINOR(bdev->bd_dev) >> 2;
+	struct atari_floppy_struct *p = disk->private_data;
+	int type = disk->first_minor >> 2;
 
 	DPRINT(("fd_open: type=%d\n",type));
 	if (p->ref && p->type != type)
@@ -1938,8 +1938,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 		return 0;
 
 	if (mode & (FMODE_READ|FMODE_WRITE)) {
-		if (disk_check_media_change(bdev->bd_disk))
-			floppy_revalidate(bdev->bd_disk);
+		if (disk_check_media_change(disk))
+			floppy_revalidate(disk);
 		if (mode & FMODE_WRITE) {
 			if (p->wpstat) {
 				if (p->ref < 0)
@@ -1953,12 +1953,12 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 	return 0;
 }
 
-static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+static int floppy_unlocked_open(struct gendisk *disk, fmode_t mode)
 {
 	int ret;
 
 	mutex_lock(&ataflop_mutex);
-	ret = floppy_open(bdev, mode);
+	ret = floppy_open(disk, mode);
 	mutex_unlock(&ataflop_mutex);
 
 	return ret;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 54223f64610a..8b6c19460f34 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -49,7 +49,7 @@
 #include "drbd_debugfs.h"
 
 static DEFINE_MUTEX(drbd_main_mutex);
-static int drbd_open(struct block_device *bdev, fmode_t mode);
+static int drbd_open(struct gendisk *disk, fmode_t mode);
 static void drbd_release(struct gendisk *gd, fmode_t mode);
 static void md_sync_timer_fn(struct timer_list *t);
 static int w_bitmap_io(struct drbd_work *w, int unused);
@@ -1882,9 +1882,9 @@ int drbd_send_all(struct drbd_connection *connection, struct socket *sock, void
 	return 0;
 }
 
-static int drbd_open(struct block_device *bdev, fmode_t mode)
+static int drbd_open(struct gendisk *disk, fmode_t mode)
 {
-	struct drbd_device *device = bdev->bd_disk->private_data;
+	struct drbd_device *device = disk->private_data;
 	unsigned long flags;
 	int rv = 0;
 
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 3accafcbc95c..ef3bbb7c185b 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -402,7 +402,7 @@ static struct floppy_drive_struct drive_state[N_DRIVE];
 static struct floppy_write_errors write_errors[N_DRIVE];
 static struct timer_list motor_off_timer[N_DRIVE];
 static struct blk_mq_tag_set tag_sets[N_DRIVE];
-static struct block_device *opened_bdev[N_DRIVE];
+static struct gendisk *opened_disk[N_DRIVE];
 static DEFINE_MUTEX(open_lock);
 static struct floppy_raw_cmd *raw_cmd, default_raw_cmd;
 
@@ -3251,10 +3251,11 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
 			    floppy_type[type].size + 1;
 		process_fd_request();
 		for (cnt = 0; cnt < N_DRIVE; cnt++) {
-			struct block_device *bdev = opened_bdev[cnt];
-			if (!bdev || ITYPE(drive_state[cnt].fd_device) != type)
+			struct gendisk *disk = opened_disk[cnt];
+
+			if (!disk || ITYPE(drive_state[cnt].fd_device) != type)
 				continue;
-			__invalidate_device(bdev, true);
+			__invalidate_device(disk->part0, true);
 		}
 		mutex_unlock(&open_lock);
 	} else {
@@ -3973,7 +3974,7 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
 		drive_state[drive].fd_ref = 0;
 	}
 	if (!drive_state[drive].fd_ref)
-		opened_bdev[drive] = NULL;
+		opened_disk[drive] = NULL;
 	mutex_unlock(&open_lock);
 	mutex_unlock(&floppy_mutex);
 }
@@ -3983,9 +3984,9 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
  * /dev/PS0 etc), and disallows simultaneous access to the same
  * drive with different device numbers.
  */
-static int floppy_open(struct block_device *bdev, fmode_t mode)
+static int floppy_open(struct gendisk *disk, fmode_t mode)
 {
-	int drive = (long)bdev->bd_disk->private_data;
+	int drive = (long)disk->private_data;
 	int old_dev, new_dev;
 	int try;
 	int res = -EBUSY;
@@ -3994,7 +3995,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 	mutex_lock(&floppy_mutex);
 	mutex_lock(&open_lock);
 	old_dev = drive_state[drive].fd_device;
-	if (opened_bdev[drive] && opened_bdev[drive] != bdev)
+	if (opened_disk[drive] && opened_disk[drive] != disk)
 		goto out2;
 
 	if (!drive_state[drive].fd_ref && (drive_params[drive].flags & FD_BROKEN_DCL)) {
@@ -4004,7 +4005,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 
 	drive_state[drive].fd_ref++;
 
-	opened_bdev[drive] = bdev;
+	opened_disk[drive] = disk;
 
 	res = -ENXIO;
 
@@ -4038,7 +4039,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 		}
 	}
 
-	new_dev = MINOR(bdev->bd_dev);
+	new_dev = disk->first_minor;
 	drive_state[drive].fd_device = new_dev;
 	set_capacity(disks[drive][ITYPE(new_dev)], floppy_sizes[new_dev]);
 	if (old_dev != -1 && old_dev != new_dev) {
@@ -4054,8 +4055,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 			drive_state[drive].last_checked = 0;
 			clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
 				  &drive_state[drive].flags);
-			if (disk_check_media_change(bdev->bd_disk))
-				floppy_revalidate(bdev->bd_disk);
+			if (disk_check_media_change(disk))
+				floppy_revalidate(disk);
 			if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags))
 				goto out;
 			if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags))
@@ -4073,7 +4074,7 @@ out:
 	drive_state[drive].fd_ref--;
 
 	if (!drive_state[drive].fd_ref)
-		opened_bdev[drive] = NULL;
+		opened_disk[drive] = NULL;
 out2:
 	mutex_unlock(&open_lock);
 	mutex_unlock(&floppy_mutex);
@@ -4203,7 +4204,8 @@ static int floppy_revalidate(struct gendisk *disk)
 			drive_state[drive].generation++;
 		if (drive_no_geom(drive)) {
 			/* auto-sensing */
-			res = __floppy_read_block_0(opened_bdev[drive], drive);
+			res = __floppy_read_block_0(opened_disk[drive]->part0,
+						    drive);
 		} else {
 			if (cf)
 				poll_drive(false, FD_RAW_NEED_DISK);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 6457a094abcc..14202b6a3550 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1553,13 +1553,13 @@ static struct nbd_config *nbd_alloc_config(void)
 	return config;
 }
 
-static int nbd_open(struct block_device *bdev, fmode_t mode)
+static int nbd_open(struct gendisk *disk, fmode_t mode)
 {
 	struct nbd_device *nbd;
 	int ret = 0;
 
 	mutex_lock(&nbd_index_mutex);
-	nbd = bdev->bd_disk->private_data;
+	nbd = disk->private_data;
 	if (!nbd) {
 		ret = -ENXIO;
 		goto out;
@@ -1587,10 +1587,10 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
 		refcount_inc(&nbd->refs);
 		mutex_unlock(&nbd->config_lock);
 		if (max_part)
-			set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+			set_bit(GD_NEED_PART_SCAN, &disk->state);
 	} else if (nbd_disconnected(nbd->config)) {
 		if (max_part)
-			set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+			set_bit(GD_NEED_PART_SCAN, &disk->state);
 	}
 out:
 	mutex_unlock(&nbd_index_mutex);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index af1140548adb..93478d5a3fc4 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2248,14 +2248,14 @@ static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor)
 	return pkt_devs[dev_minor];
 }
 
-static int pkt_open(struct block_device *bdev, fmode_t mode)
+static int pkt_open(struct gendisk *disk, fmode_t mode)
 {
 	struct pktcdvd_device *pd = NULL;
 	int ret;
 
 	mutex_lock(&pktcdvd_mutex);
 	mutex_lock(&ctl_mutex);
-	pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
+	pd = pkt_find_dev_from_minor(disk->first_minor);
 	if (!pd) {
 		ret = -ENODEV;
 		goto out;
@@ -2277,7 +2277,7 @@ static int pkt_open(struct block_device *bdev, fmode_t mode)
 		 * needed here as well, since ext2 (among others) may change
 		 * the blocksize at mount time
 		 */
-		set_blocksize(bdev, CD_FRAMESIZE);
+		set_blocksize(disk->part0, CD_FRAMESIZE);
 	}
 
 	mutex_unlock(&ctl_mutex);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 84ad3b17956f..93231061db2f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -660,9 +660,9 @@ static bool pending_result_dec(struct pending_result *pending, int *result)
 	return true;
 }
 
-static int rbd_open(struct block_device *bdev, fmode_t mode)
+static int rbd_open(struct gendisk *disk, fmode_t mode)
 {
-	struct rbd_device *rbd_dev = bdev->bd_disk->private_data;
+	struct rbd_device *rbd_dev = disk->private_data;
 	bool removing = false;
 
 	spin_lock_irq(&rbd_dev->lock);
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 5eb8c7855970..8ec00f4caf6b 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -921,9 +921,9 @@ rnbd_clt_session *find_or_create_sess(const char *sessname, bool *first)
 	return sess;
 }
 
-static int rnbd_client_open(struct block_device *block_device, fmode_t mode)
+static int rnbd_client_open(struct gendisk *disk, fmode_t mode)
 {
-	struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
+	struct rnbd_clt_dev *dev = disk->private_data;
 
 	if (get_disk_ro(dev->gd) && (mode & FMODE_WRITE))
 		return -EPERM;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 105bc5fd1b8c..7ec8554187f7 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -608,9 +608,9 @@ static void setup_medium(struct floppy_state *fs)
 	}
 }
 
-static int floppy_open(struct block_device *bdev, fmode_t mode)
+static int floppy_open(struct gendisk *disk, fmode_t mode)
 {
-	struct floppy_state *fs = bdev->bd_disk->private_data;
+	struct floppy_state *fs = disk->private_data;
 	struct swim __iomem *base = fs->swd->base;
 	int err;
 
@@ -640,7 +640,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 		return 0;
 
 	if (mode & (FMODE_READ|FMODE_WRITE)) {
-		if (disk_check_media_change(bdev->bd_disk) && fs->disk_in)
+		if (disk_check_media_change(disk) && fs->disk_in)
 			fs->ejected = 0;
 		if ((mode & FMODE_WRITE) && fs->write_protected) {
 			err = -EROFS;
@@ -659,12 +659,12 @@ out:
 	return err;
 }
 
-static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+static int floppy_unlocked_open(struct gendisk *disk, fmode_t mode)
 {
 	int ret;
 
 	mutex_lock(&swim_mutex);
-	ret = floppy_open(bdev, mode);
+	ret = floppy_open(disk, mode);
 	mutex_unlock(&swim_mutex);
 
 	return ret;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 3d689ba312f5..c05a4e110d52 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -248,7 +248,7 @@ static void release_drive(struct floppy_state *fs);
 static int fd_eject(struct floppy_state *fs);
 static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned int cmd, unsigned long param);
-static int floppy_open(struct block_device *bdev, fmode_t mode);
+static int floppy_open(struct gendisk *disk, fmode_t mode);
 static void floppy_release(struct gendisk *disk, fmode_t mode);
 static unsigned int floppy_check_events(struct gendisk *disk,
 					unsigned int clearing);
@@ -923,9 +923,9 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
-static int floppy_open(struct block_device *bdev, fmode_t mode)
+static int floppy_open(struct gendisk *disk, fmode_t mode)
 {
-	struct floppy_state *fs = bdev->bd_disk->private_data;
+	struct floppy_state *fs = disk->private_data;
 	struct swim3 __iomem *sw = fs->swim3;
 	int n, err = 0;
 
@@ -963,8 +963,8 @@ static int floppy_open(struct block_device *bdev, fmode_t mode)
 
 	if (err == 0 && (mode & FMODE_NDELAY) == 0
 	    && (mode & (FMODE_READ|FMODE_WRITE))) {
-		if (disk_check_media_change(bdev->bd_disk))
-			floppy_revalidate(bdev->bd_disk);
+		if (disk_check_media_change(disk))
+			floppy_revalidate(disk);
 		if (fs->ejected)
 			err = -ENXIO;
 	}
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 222a0341913f..92c900ac2ebc 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -447,9 +447,9 @@ static void ublk_store_owner_uid_gid(unsigned int *owner_uid,
 	*owner_gid = from_kgid(&init_user_ns, gid);
 }
 
-static int ublk_open(struct block_device *bdev, fmode_t mode)
+static int ublk_open(struct gendisk *disk, fmode_t mode)
 {
-	struct ublk_device *ub = bdev->bd_disk->private_data;
+	struct ublk_device *ub = disk->private_data;
 
 	if (capable(CAP_SYS_ADMIN))
 		return 0;
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index c1e85f356e4d..a5575e012e29 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -140,16 +140,14 @@ static void get_chipram(void)
 	return;
 }
 
-static int z2_open(struct block_device *bdev, fmode_t mode)
+static int z2_open(struct gendisk *disk, fmode_t mode)
 {
-	int device;
+	int device = disk->first_minor;
 	int max_z2_map = (Z2RAM_SIZE / Z2RAM_CHUNKSIZE) * sizeof(z2ram_map[0]);
 	int max_chip_map = (amiga_chip_size / Z2RAM_CHUNKSIZE) *
 	    sizeof(z2ram_map[0]);
 	int rc = -ENOMEM;
 
-	device = MINOR(bdev->bd_dev);
-
 	mutex_lock(&z2ram_mutex);
 	if (current_device != -1 && current_device != device) {
 		rc = -EBUSY;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 0bc779446c6f..f5644c606040 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -2097,19 +2097,16 @@ static ssize_t reset_store(struct device *dev,
 	return len;
 }
 
-static int zram_open(struct block_device *bdev, fmode_t mode)
+static int zram_open(struct gendisk *disk, fmode_t mode)
 {
-	int ret = 0;
-	struct zram *zram;
+	struct zram *zram = disk->private_data;
 
-	WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
+	WARN_ON(!mutex_is_locked(&disk->open_mutex));
 
-	zram = bdev->bd_disk->private_data;
 	/* zram was claimed to reset so open request fails */
 	if (zram->claim)
-		ret = -EBUSY;
-
-	return ret;
+		return -EBUSY;
+	return 0;
 }
 
 static const struct block_device_operations zram_devops = {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 3cb92df38ebe..d35dd717e9fc 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -474,11 +474,11 @@ static const struct cdrom_device_ops gdrom_ops = {
 				  CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R,
 };
 
-static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
+static int gdrom_bdops_open(struct gendisk *disk, fmode_t mode)
 {
 	int ret;
 
-	disk_check_media_change(bdev->bd_disk);
+	disk_check_media_change(disk);
 
 	mutex_lock(&gdrom_mutex);
 	ret = cdrom_open(gd.cd_info);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index d84c09a73af8..6683f66e7011 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -732,9 +732,9 @@ out:
 
 /* Bcache device */
 
-static int open_dev(struct block_device *b, fmode_t mode)
+static int open_dev(struct gendisk *disk, fmode_t mode)
 {
-	struct bcache_device *d = b->bd_disk->private_data;
+	struct bcache_device *d = disk->private_data;
 
 	if (test_bit(BCACHE_DEV_CLOSING, &d->flags))
 		return -ENXIO;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d759f8bdb3df..06047a0ca4b3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -310,13 +310,13 @@ int dm_deleting_md(struct mapped_device *md)
 	return test_bit(DMF_DELETING, &md->flags);
 }
 
-static int dm_blk_open(struct block_device *bdev, fmode_t mode)
+static int dm_blk_open(struct gendisk *disk, fmode_t mode)
 {
 	struct mapped_device *md;
 
 	spin_lock(&_minor_lock);
 
-	md = bdev->bd_disk->private_data;
+	md = disk->private_data;
 	if (!md)
 		goto out;
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 77046c91bea4..aba13830bdb5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7767,13 +7767,13 @@ out_unlock:
 	return err;
 }
 
-static int md_open(struct block_device *bdev, fmode_t mode)
+static int md_open(struct gendisk *disk, fmode_t mode)
 {
 	struct mddev *mddev;
 	int err;
 
 	spin_lock(&all_mddevs_lock);
-	mddev = mddev_get(bdev->bd_disk->private_data);
+	mddev = mddev_get(disk->private_data);
 	spin_unlock(&all_mddevs_lock);
 	if (!mddev)
 		return -ENODEV;
@@ -7789,7 +7789,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
 	atomic_inc(&mddev->openers);
 	mutex_unlock(&mddev->open_mutex);
 
-	disk_check_media_change(bdev->bd_disk);
+	disk_check_media_change(disk);
 	return 0;
 
 out_unlock:
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 00c33edb9fb9..fe217658705d 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -357,9 +357,9 @@ static const struct attribute_group *mmc_disk_attr_groups[] = {
 	NULL,
 };
 
-static int mmc_blk_open(struct block_device *bdev, fmode_t mode)
+static int mmc_blk_open(struct gendisk *disk, fmode_t mode)
 {
-	struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk);
+	struct mmc_blk_data *md = mmc_blk_get(disk);
 	int ret = -ENXIO;
 
 	mutex_lock(&block_mutex);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 60b222799871..95f3ee6bde84 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -182,9 +182,9 @@ static blk_status_t mtd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_STS_OK;
 }
 
-static int blktrans_open(struct block_device *bdev, fmode_t mode)
+static int blktrans_open(struct gendisk *disk, fmode_t mode)
 {
-	struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data;
+	struct mtd_blktrans_dev *dev = disk->private_data;
 	int ret = 0;
 
 	kref_get(&dev->ref);
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 3711d7f74600..2f3442963919 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -227,9 +227,9 @@ static blk_status_t ubiblock_read(struct request *req)
 	return BLK_STS_OK;
 }
 
-static int ubiblock_open(struct block_device *bdev, fmode_t mode)
+static int ubiblock_open(struct gendisk *disk, fmode_t mode)
 {
-	struct ubiblock *dev = bdev->bd_disk->private_data;
+	struct ubiblock *dev = disk->private_data;
 	int ret;
 
 	mutex_lock(&dev->dev_mutex);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ccb6eb1282f8..b1c8af5d9376 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1591,9 +1591,9 @@ static void nvme_ns_release(struct nvme_ns *ns)
 	nvme_put_ns(ns);
 }
 
-static int nvme_open(struct block_device *bdev, fmode_t mode)
+static int nvme_open(struct gendisk *disk, fmode_t mode)
 {
-	return nvme_ns_open(bdev->bd_disk->private_data);
+	return nvme_ns_open(disk->private_data);
 }
 
 static void nvme_release(struct gendisk *disk, fmode_t mode)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 9171452e2f6d..e8d5d62efa6d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -402,9 +402,9 @@ static void nvme_ns_head_submit_bio(struct bio *bio)
 	srcu_read_unlock(&head->srcu, srcu_idx);
 }
 
-static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
+static int nvme_ns_head_open(struct gendisk *disk, fmode_t mode)
 {
-	if (!nvme_tryget_ns_head(bdev->bd_disk->private_data))
+	if (!nvme_tryget_ns_head(disk->private_data))
 		return -ENXIO;
 	return 0;
 }
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 9fbfce735d56..e445b5fbd7fd 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3234,12 +3234,12 @@ struct blk_mq_ops dasd_mq_ops = {
 	.exit_hctx = dasd_exit_hctx,
 };
 
-static int dasd_open(struct block_device *bdev, fmode_t mode)
+static int dasd_open(struct gendisk *disk, fmode_t mode)
 {
 	struct dasd_device *base;
 	int rc;
 
-	base = dasd_device_from_gendisk(bdev->bd_disk);
+	base = dasd_device_from_gendisk(disk);
 	if (!base)
 		return -ENODEV;
 
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index c09f2e053bf8..6150d20b5843 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -28,7 +28,7 @@
 #define DCSSBLK_PARM_LEN 400
 #define DCSS_BUS_ID_SIZE 20
 
-static int dcssblk_open(struct block_device *bdev, fmode_t mode);
+static int dcssblk_open(struct gendisk *disk, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
 static void dcssblk_submit_bio(struct bio *bio);
 static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
@@ -809,12 +809,11 @@ out_buf:
 }
 
 static int
-dcssblk_open(struct block_device *bdev, fmode_t mode)
+dcssblk_open(struct gendisk *disk, fmode_t mode)
 {
-	struct dcssblk_dev_info *dev_info;
+	struct dcssblk_dev_info *dev_info = disk->private_data;
 	int rc;
 
-	dev_info = bdev->bd_disk->private_data;
 	if (NULL == dev_info) {
 		rc = -ENODEV;
 		goto out;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index aab649d5bad3..c31a675db015 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1297,7 +1297,7 @@ static bool sd_need_revalidate(struct gendisk *disk, struct scsi_disk *sdkp)
 
 /**
  *	sd_open - open a scsi disk device
- *	@bdev: Block device of the scsi disk to open
+ *	@disk: disk to open
  *	@mode: FMODE_* mask
  *
  *	Returns 0 if successful. Returns a negated errno value in case 
@@ -1308,11 +1308,11 @@ static bool sd_need_revalidate(struct gendisk *disk, struct scsi_disk *sdkp)
  *	In the latter case @inode and @filp carry an abridged amount
  *	of information as noted above.
  *
- *	Locking: called with bdev->bd_disk->open_mutex held.
+ *	Locking: called with disk->open_mutex held.
  **/
-static int sd_open(struct block_device *bdev, fmode_t mode)
+static int sd_open(struct gendisk *disk, fmode_t mode)
 {
-	struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
+	struct scsi_disk *sdkp = scsi_disk(disk);
 	struct scsi_device *sdev = sdkp->device;
 	int retval;
 
@@ -1329,8 +1329,8 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
 	if (!scsi_block_when_processing_errors(sdev))
 		goto error_out;
 
-	if (sd_need_revalidate(bdev->bd_disk, sdkp))
-		sd_revalidate_disk(bdev->bd_disk);
+	if (sd_need_revalidate(disk, sdkp))
+		sd_revalidate_disk(disk);
 
 	/*
 	 * If the drive is empty, just let the open fail.
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 1592e6e10c74..3ff3a2f96047 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -484,9 +484,9 @@ static void sr_revalidate_disk(struct scsi_cd *cd)
 	get_sectorsize(cd);
 }
 
-static int sr_block_open(struct block_device *bdev, fmode_t mode)
+static int sr_block_open(struct gendisk *disk, fmode_t mode)
 {
-	struct scsi_cd *cd = scsi_cd(bdev->bd_disk);
+	struct scsi_cd *cd = scsi_cd(disk);
 	struct scsi_device *sdev = cd->device;
 	int ret;
 
@@ -494,7 +494,7 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode)
 		return -ENXIO;
 
 	scsi_autopm_get_device(sdev);
-	if (disk_check_media_change(bdev->bd_disk))
+	if (disk_check_media_change(disk))
 		sr_revalidate_disk(cd);
 
 	mutex_lock(&cd->lock);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a1688eba7e5e..1366eea88186 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1386,7 +1386,7 @@ struct block_device_operations {
 	void (*submit_bio)(struct bio *bio);
 	int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob,
 			unsigned int flags);
-	int (*open) (struct block_device *, fmode_t);
+	int (*open)(struct gendisk *disk, fmode_t mode);
 	void (*release) (struct gendisk *, fmode_t);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-- 
cgit v1.2.3


From ae220766d87cd6799dbf918fea10613ae14c0654 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:37 +0200
Subject: block: remove the unused mode argument to ->release

The mode argument to the ->release block_device_operation is never used,
so remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Acked-by: Jack Wang <jinpu.wang@ionos.com>			[rnbd]
Link: https://lore.kernel.org/r/20230608110258.189493-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/um/drivers/ubd_kern.c          |  4 ++--
 arch/xtensa/platforms/iss/simdisk.c |  2 +-
 block/bdev.c                        | 14 +++++++-------
 drivers/block/amiflop.c             |  2 +-
 drivers/block/aoe/aoeblk.c          |  2 +-
 drivers/block/ataflop.c             |  4 ++--
 drivers/block/drbd/drbd_main.c      |  4 ++--
 drivers/block/floppy.c              |  2 +-
 drivers/block/loop.c                |  2 +-
 drivers/block/nbd.c                 |  2 +-
 drivers/block/pktcdvd.c             |  4 ++--
 drivers/block/rbd.c                 |  2 +-
 drivers/block/rnbd/rnbd-clt.c       |  2 +-
 drivers/block/swim.c                |  2 +-
 drivers/block/swim3.c               |  3 +--
 drivers/block/z2ram.c               |  2 +-
 drivers/cdrom/gdrom.c               |  2 +-
 drivers/md/bcache/super.c           |  2 +-
 drivers/md/dm.c                     |  2 +-
 drivers/md/md.c                     |  2 +-
 drivers/mmc/core/block.c            |  2 +-
 drivers/mtd/mtd_blkdevs.c           |  2 +-
 drivers/mtd/ubi/block.c             |  2 +-
 drivers/nvme/host/core.c            |  2 +-
 drivers/nvme/host/multipath.c       |  2 +-
 drivers/s390/block/dasd.c           |  2 +-
 drivers/s390/block/dcssblk.c        |  4 ++--
 drivers/scsi/sd.c                   |  3 +--
 drivers/scsi/sr.c                   |  2 +-
 include/linux/blkdev.h              |  2 +-
 30 files changed, 41 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 6b831f82881b..8b79554968ad 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -109,7 +109,7 @@ static DEFINE_MUTEX(ubd_lock);
 static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 
 static int ubd_open(struct gendisk *disk, fmode_t mode);
-static void ubd_release(struct gendisk *disk, fmode_t mode);
+static void ubd_release(struct gendisk *disk);
 static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 		     unsigned int cmd, unsigned long arg);
 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
@@ -1182,7 +1182,7 @@ out:
 	return err;
 }
 
-static void ubd_release(struct gendisk *disk, fmode_t mode)
+static void ubd_release(struct gendisk *disk)
 {
 	struct ubd *ubd_dev = disk->private_data;
 
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 38f95f79a127..2ad9da3de0d9 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -130,7 +130,7 @@ static int simdisk_open(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static void simdisk_release(struct gendisk *disk, fmode_t mode)
+static void simdisk_release(struct gendisk *disk)
 {
 	struct simdisk *dev = disk->private_data;
 	spin_lock(&dev->lock);
diff --git a/block/bdev.c b/block/bdev.c
index 8a5fded303d4..2c6888ceb378 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -670,12 +670,12 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
 	return 0;
 }
 
-static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
+static void blkdev_put_whole(struct block_device *bdev)
 {
 	if (atomic_dec_and_test(&bdev->bd_openers))
 		blkdev_flush_mapping(bdev);
 	if (bdev->bd_disk->fops->release)
-		bdev->bd_disk->fops->release(bdev->bd_disk, mode);
+		bdev->bd_disk->fops->release(bdev->bd_disk);
 }
 
 static int blkdev_get_part(struct block_device *part, fmode_t mode)
@@ -699,11 +699,11 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
 	return 0;
 
 out_blkdev_put:
-	blkdev_put_whole(bdev_whole(part), mode);
+	blkdev_put_whole(bdev_whole(part));
 	return ret;
 }
 
-static void blkdev_put_part(struct block_device *part, fmode_t mode)
+static void blkdev_put_part(struct block_device *part)
 {
 	struct block_device *whole = bdev_whole(part);
 
@@ -711,7 +711,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
 		blkdev_flush_mapping(part);
 		whole->bd_disk->open_partitions--;
 	}
-	blkdev_put_whole(whole, mode);
+	blkdev_put_whole(whole);
 }
 
 struct block_device *blkdev_get_no_open(dev_t dev)
@@ -903,9 +903,9 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 	disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
 
 	if (bdev_is_partition(bdev))
-		blkdev_put_part(bdev, mode);
+		blkdev_put_part(bdev);
 	else
-		blkdev_put_whole(bdev, mode);
+		blkdev_put_whole(bdev);
 	mutex_unlock(&disk->open_mutex);
 
 	module_put(disk->fops->owner);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 0cf2e58294be..9a0e9dc74a8c 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1709,7 +1709,7 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static void floppy_release(struct gendisk *disk, fmode_t mode)
+static void floppy_release(struct gendisk *disk)
 {
 	struct amiga_floppy_struct *p = disk->private_data;
 	int drive = p - unit;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 4ca6bbb326d5..c3a39e02ab95 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -232,7 +232,7 @@ aoeblk_open(struct gendisk *disk, fmode_t mode)
 }
 
 static void
-aoeblk_release(struct gendisk *disk, fmode_t mode)
+aoeblk_release(struct gendisk *disk)
 {
 	struct aoedev *d = disk->private_data;
 	ulong flags;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 4febd52be78c..66a3242bb062 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -448,7 +448,7 @@ static void fd_probe( int drive );
 static int fd_test_drive_present( int drive );
 static void config_types( void );
 static int floppy_open(struct gendisk *disk, fmode_t mode);
-static void floppy_release(struct gendisk *disk, fmode_t mode);
+static void floppy_release(struct gendisk *disk);
 
 /************************* End of Prototypes **************************/
 
@@ -1964,7 +1964,7 @@ static int floppy_unlocked_open(struct gendisk *disk, fmode_t mode)
 	return ret;
 }
 
-static void floppy_release(struct gendisk *disk, fmode_t mode)
+static void floppy_release(struct gendisk *disk)
 {
 	struct atari_floppy_struct *p = disk->private_data;
 	mutex_lock(&ataflop_mutex);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 8b6c19460f34..7f3d7ca6ce6b 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -50,7 +50,7 @@
 
 static DEFINE_MUTEX(drbd_main_mutex);
 static int drbd_open(struct gendisk *disk, fmode_t mode);
-static void drbd_release(struct gendisk *gd, fmode_t mode);
+static void drbd_release(struct gendisk *gd);
 static void md_sync_timer_fn(struct timer_list *t);
 static int w_bitmap_io(struct drbd_work *w, int unused);
 
@@ -1908,7 +1908,7 @@ static int drbd_open(struct gendisk *disk, fmode_t mode)
 	return rv;
 }
 
-static void drbd_release(struct gendisk *gd, fmode_t mode)
+static void drbd_release(struct gendisk *gd)
 {
 	struct drbd_device *device = gd->private_data;
 	mutex_lock(&drbd_main_mutex);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index ef3bbb7c185b..d79fac288a73 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3963,7 +3963,7 @@ static void __init config_types(void)
 		pr_cont("\n");
 }
 
-static void floppy_release(struct gendisk *disk, fmode_t mode)
+static void floppy_release(struct gendisk *disk)
 {
 	int drive = (long)disk->private_data;
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index a73c857f5bfe..ca40d24572ae 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1727,7 +1727,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 }
 #endif
 
-static void lo_release(struct gendisk *disk, fmode_t mode)
+static void lo_release(struct gendisk *disk)
 {
 	struct loop_device *lo = disk->private_data;
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 14202b6a3550..cfb835238684 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1597,7 +1597,7 @@ out:
 	return ret;
 }
 
-static void nbd_release(struct gendisk *disk, fmode_t mode)
+static void nbd_release(struct gendisk *disk)
 {
 	struct nbd_device *nbd = disk->private_data;
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 93478d5a3fc4..7bfc058cb665 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2292,7 +2292,7 @@ out:
 	return ret;
 }
 
-static void pkt_close(struct gendisk *disk, fmode_t mode)
+static void pkt_release(struct gendisk *disk)
 {
 	struct pktcdvd_device *pd = disk->private_data;
 
@@ -2616,7 +2616,7 @@ static const struct block_device_operations pktcdvd_ops = {
 	.owner =		THIS_MODULE,
 	.submit_bio =		pkt_submit_bio,
 	.open =			pkt_open,
-	.release =		pkt_close,
+	.release =		pkt_release,
 	.ioctl =		pkt_ioctl,
 	.compat_ioctl =		blkdev_compat_ptr_ioctl,
 	.check_events =		pkt_check_events,
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 93231061db2f..5215eff94fe9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -679,7 +679,7 @@ static int rbd_open(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static void rbd_release(struct gendisk *disk, fmode_t mode)
+static void rbd_release(struct gendisk *disk)
 {
 	struct rbd_device *rbd_dev = disk->private_data;
 	unsigned long open_count_before;
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 8ec00f4caf6b..d5261d36d786 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -935,7 +935,7 @@ static int rnbd_client_open(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static void rnbd_client_release(struct gendisk *gen, fmode_t mode)
+static void rnbd_client_release(struct gendisk *gen)
 {
 	struct rnbd_clt_dev *dev = gen->private_data;
 
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 7ec8554187f7..a629b38dec66 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -670,7 +670,7 @@ static int floppy_unlocked_open(struct gendisk *disk, fmode_t mode)
 	return ret;
 }
 
-static void floppy_release(struct gendisk *disk, fmode_t mode)
+static void floppy_release(struct gendisk *disk)
 {
 	struct floppy_state *fs = disk->private_data;
 	struct swim __iomem *base = fs->swd->base;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index c05a4e110d52..b696deff3d8b 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -249,7 +249,6 @@ static int fd_eject(struct floppy_state *fs);
 static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned int cmd, unsigned long param);
 static int floppy_open(struct gendisk *disk, fmode_t mode);
-static void floppy_release(struct gendisk *disk, fmode_t mode);
 static unsigned int floppy_check_events(struct gendisk *disk,
 					unsigned int clearing);
 static int floppy_revalidate(struct gendisk *disk);
@@ -1004,7 +1003,7 @@ static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
 	return ret;
 }
 
-static void floppy_release(struct gendisk *disk, fmode_t mode)
+static void floppy_release(struct gendisk *disk)
 {
 	struct floppy_state *fs = disk->private_data;
 	struct swim3 __iomem *sw = fs->swim3;
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index a5575e012e29..a2e41cc084ca 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -288,7 +288,7 @@ err_out:
 	return rc;
 }
 
-static void z2_release(struct gendisk *disk, fmode_t mode)
+static void z2_release(struct gendisk *disk)
 {
 	mutex_lock(&z2ram_mutex);
 	if (current_device == -1) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index d35dd717e9fc..dac148d4d1fe 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -486,7 +486,7 @@ static int gdrom_bdops_open(struct gendisk *disk, fmode_t mode)
 	return ret;
 }
 
-static void gdrom_bdops_release(struct gendisk *disk, fmode_t mode)
+static void gdrom_bdops_release(struct gendisk *disk)
 {
 	mutex_lock(&gdrom_mutex);
 	cdrom_release(gd.cd_info, mode);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 6683f66e7011..94b91c45c9e2 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -743,7 +743,7 @@ static int open_dev(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static void release_dev(struct gendisk *b, fmode_t mode)
+static void release_dev(struct gendisk *b)
 {
 	struct bcache_device *d = b->private_data;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 06047a0ca4b3..246b8f028a98 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -334,7 +334,7 @@ out:
 	return md ? 0 : -ENXIO;
 }
 
-static void dm_blk_close(struct gendisk *disk, fmode_t mode)
+static void dm_blk_close(struct gendisk *disk)
 {
 	struct mapped_device *md;
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index aba13830bdb5..159197dd7b6d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7799,7 +7799,7 @@ out:
 	return err;
 }
 
-static void md_release(struct gendisk *disk, fmode_t mode)
+static void md_release(struct gendisk *disk)
 {
 	struct mddev *mddev = disk->private_data;
 
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index fe217658705d..b16eedf22d4e 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -375,7 +375,7 @@ static int mmc_blk_open(struct gendisk *disk, fmode_t mode)
 	return ret;
 }
 
-static void mmc_blk_release(struct gendisk *disk, fmode_t mode)
+static void mmc_blk_release(struct gendisk *disk)
 {
 	struct mmc_blk_data *md = disk->private_data;
 
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 95f3ee6bde84..f0bb09fde95e 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -225,7 +225,7 @@ error_put:
 	return ret;
 }
 
-static void blktrans_release(struct gendisk *disk, fmode_t mode)
+static void blktrans_release(struct gendisk *disk)
 {
 	struct mtd_blktrans_dev *dev = disk->private_data;
 
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 2f3442963919..e85fb9de0b70 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -270,7 +270,7 @@ out_unlock:
 	return ret;
 }
 
-static void ubiblock_release(struct gendisk *gd, fmode_t mode)
+static void ubiblock_release(struct gendisk *gd)
 {
 	struct ubiblock *dev = gd->private_data;
 
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b1c8af5d9376..fd7f8e6d66fd 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1596,7 +1596,7 @@ static int nvme_open(struct gendisk *disk, fmode_t mode)
 	return nvme_ns_open(disk->private_data);
 }
 
-static void nvme_release(struct gendisk *disk, fmode_t mode)
+static void nvme_release(struct gendisk *disk)
 {
 	nvme_ns_release(disk->private_data);
 }
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index e8d5d62efa6d..698c0e70bcfa 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -409,7 +409,7 @@ static int nvme_ns_head_open(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static void nvme_ns_head_release(struct gendisk *disk, fmode_t mode)
+static void nvme_ns_head_release(struct gendisk *disk)
 {
 	nvme_put_ns_head(disk->private_data);
 }
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index e445b5fbd7fd..19295b2df470 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3287,7 +3287,7 @@ unlock:
 	return rc;
 }
 
-static void dasd_release(struct gendisk *disk, fmode_t mode)
+static void dasd_release(struct gendisk *disk)
 {
 	struct dasd_device *base = dasd_device_from_gendisk(disk);
 	if (base) {
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 6150d20b5843..5aee3106bfda 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -29,7 +29,7 @@
 #define DCSS_BUS_ID_SIZE 20
 
 static int dcssblk_open(struct gendisk *disk, fmode_t mode);
-static void dcssblk_release(struct gendisk *disk, fmode_t mode);
+static void dcssblk_release(struct gendisk *disk);
 static void dcssblk_submit_bio(struct bio *bio);
 static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
 		long nr_pages, enum dax_access_mode mode, void **kaddr,
@@ -825,7 +825,7 @@ out:
 }
 
 static void
-dcssblk_release(struct gendisk *disk, fmode_t mode)
+dcssblk_release(struct gendisk *disk)
 {
 	struct dcssblk_dev_info *dev_info = disk->private_data;
 	struct segment_info *entry;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index c31a675db015..c67c84f6ba61 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1373,7 +1373,6 @@ error_out:
  *	sd_release - invoked when the (last) close(2) is called on this
  *	scsi disk.
  *	@disk: disk to release
- *	@mode: FMODE_* mask
  *
  *	Returns 0. 
  *
@@ -1382,7 +1381,7 @@ error_out:
  *
  *	Locking: called with bdev->bd_disk->open_mutex held.
  **/
-static void sd_release(struct gendisk *disk, fmode_t mode)
+static void sd_release(struct gendisk *disk)
 {
 	struct scsi_disk *sdkp = scsi_disk(disk);
 	struct scsi_device *sdev = sdkp->device;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 3ff3a2f96047..55082acb59bc 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -507,7 +507,7 @@ static int sr_block_open(struct gendisk *disk, fmode_t mode)
 	return ret;
 }
 
-static void sr_block_release(struct gendisk *disk, fmode_t mode)
+static void sr_block_release(struct gendisk *disk)
 {
 	struct scsi_cd *cd = scsi_cd(disk);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1366eea88186..25bdd0cc74dc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1387,7 +1387,7 @@ struct block_device_operations {
 	int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob,
 			unsigned int flags);
 	int (*open)(struct gendisk *disk, fmode_t mode);
-	void (*release) (struct gendisk *, fmode_t);
+	void (*release)(struct gendisk *disk);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	unsigned int (*check_events) (struct gendisk *disk,
-- 
cgit v1.2.3


From 2736e8eeb0ccdc71d1f4256c9c9a28f58cc43307 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:43 +0200
Subject: block: use the holder as indication for exclusive opens

The current interface for exclusive opens is rather confusing as it
requires both the FMODE_EXCL flag and a holder.  Remove the need to pass
FMODE_EXCL and just key off the exclusive open off a non-NULL holder.

For blkdev_put this requires adding the holder argument, which provides
better debug checking that only the holder actually releases the hold,
but at the same time allows removing the now superfluous mode argument.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Acked-by: David Sterba <dsterba@suse.com>		[btrfs]
Acked-by: Jack Wang <jinpu.wang@ionos.com>		[rnbd]
Link: https://lore.kernel.org/r/20230608110258.189493-16-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bdev.c                        | 37 ++++++++++++++++++++----------------
 block/fops.c                        |  6 ++++--
 block/genhd.c                       |  5 ++---
 block/ioctl.c                       |  5 ++---
 drivers/block/drbd/drbd_nl.c        | 23 +++++++++++++---------
 drivers/block/pktcdvd.c             | 13 ++++++-------
 drivers/block/rnbd/rnbd-srv.c       |  4 ++--
 drivers/block/xen-blkback/xenbus.c  |  2 +-
 drivers/block/zram/zram_drv.c       |  8 ++++----
 drivers/md/bcache/super.c           | 15 +++++++--------
 drivers/md/dm.c                     |  6 +++---
 drivers/md/md.c                     | 38 +++++++++++++++++++------------------
 drivers/mtd/devices/block2mtd.c     |  4 ++--
 drivers/nvme/target/io-cmd-bdev.c   |  2 +-
 drivers/s390/block/dasd_genhd.c     |  2 +-
 drivers/target/target_core_iblock.c |  6 +++---
 drivers/target/target_core_pscsi.c  |  8 +++-----
 fs/btrfs/dev-replace.c              |  6 +++---
 fs/btrfs/ioctl.c                    | 12 ++++++------
 fs/btrfs/volumes.c                  | 28 +++++++++++++--------------
 fs/btrfs/volumes.h                  |  6 +++---
 fs/erofs/super.c                    |  7 ++++---
 fs/ext4/super.c                     | 11 +++--------
 fs/f2fs/super.c                     |  2 +-
 fs/jfs/jfs_logmgr.c                 |  6 +++---
 fs/nfs/blocklayout/dev.c            |  4 ++--
 fs/nilfs2/super.c                   |  6 +++---
 fs/ocfs2/cluster/heartbeat.c        |  4 ++--
 fs/reiserfs/journal.c               | 19 +++++++++----------
 fs/reiserfs/reiserfs.h              |  1 -
 fs/super.c                          | 20 +++++++++----------
 fs/xfs/xfs_super.c                  | 15 ++++++++-------
 include/linux/blkdev.h              |  2 +-
 kernel/power/hibernate.c            | 12 ++++--------
 kernel/power/power.h                |  2 +-
 kernel/power/swap.c                 | 21 +++++++++-----------
 mm/swapfile.c                       |  7 +++----
 37 files changed, 183 insertions(+), 192 deletions(-)

(limited to 'include/linux')

diff --git a/block/bdev.c b/block/bdev.c
index 2c6888ceb378..db63e5bcc46f 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -604,7 +604,7 @@ void bd_abort_claiming(struct block_device *bdev, void *holder)
 }
 EXPORT_SYMBOL(bd_abort_claiming);
 
-static void bd_end_claim(struct block_device *bdev)
+static void bd_end_claim(struct block_device *bdev, void *holder)
 {
 	struct block_device *whole = bdev_whole(bdev);
 	bool unblock = false;
@@ -614,6 +614,7 @@ static void bd_end_claim(struct block_device *bdev)
 	 * bdev_lock.  open_mutex is used to synchronize disk_holder unlinking.
 	 */
 	mutex_lock(&bdev_lock);
+	WARN_ON_ONCE(bdev->bd_holder != holder);
 	WARN_ON_ONCE(--bdev->bd_holders < 0);
 	WARN_ON_ONCE(--whole->bd_holders < 0);
 	if (!bdev->bd_holders) {
@@ -750,10 +751,9 @@ void blkdev_put_no_open(struct block_device *bdev)
  * @holder: exclusive holder identifier
  * @hops: holder operations
  *
- * Open the block device described by device number @dev. If @mode includes
- * %FMODE_EXCL, the block device is opened with exclusive access.  Specifying
- * %FMODE_EXCL with a %NULL @holder is invalid.  Exclusive opens may nest for
- * the same @holder.
+ * Open the block device described by device number @dev. If @holder is not
+ * %NULL, the block device is opened with exclusive access.  Exclusive opens may
+ * nest for the same @holder.
  *
  * Use this interface ONLY if you really do not have anything better - i.e. when
  * you are behind a truly sucky interface and all you are given is a device
@@ -785,10 +785,16 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
 		return ERR_PTR(-ENXIO);
 	disk = bdev->bd_disk;
 
-	if (mode & FMODE_EXCL) {
+	if (holder) {
+		mode |= FMODE_EXCL;
 		ret = bd_prepare_to_claim(bdev, holder, hops);
 		if (ret)
 			goto put_blkdev;
+	} else {
+		if (WARN_ON_ONCE(mode & FMODE_EXCL)) {
+			ret = -EIO;
+			goto put_blkdev;
+		}
 	}
 
 	disk_block_events(disk);
@@ -805,7 +811,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
 		ret = blkdev_get_whole(bdev, mode);
 	if (ret)
 		goto put_module;
-	if (mode & FMODE_EXCL) {
+	if (holder) {
 		bd_finish_claiming(bdev, holder, hops);
 
 		/*
@@ -829,7 +835,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
 put_module:
 	module_put(disk->fops->owner);
 abort_claiming:
-	if (mode & FMODE_EXCL)
+	if (holder)
 		bd_abort_claiming(bdev, holder);
 	mutex_unlock(&disk->open_mutex);
 	disk_unblock_events(disk);
@@ -845,10 +851,9 @@ EXPORT_SYMBOL(blkdev_get_by_dev);
  * @mode: FMODE_* mask
  * @holder: exclusive holder identifier
  *
- * Open the block device described by the device file at @path.  If @mode
- * includes %FMODE_EXCL, the block device is opened with exclusive access.
- * Specifying %FMODE_EXCL with a %NULL @holder is invalid.  Exclusive opens may
- * nest for the same @holder.
+ * Open the block device described by the device file at @path.  If @holder is
+ * not %NULL, the block device is opened with exclusive access.  Exclusive opens
+ * may nest for the same @holder.
  *
  * CONTEXT:
  * Might sleep.
@@ -869,7 +874,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
 
 	bdev = blkdev_get_by_dev(dev, mode, holder, hops);
 	if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
-		blkdev_put(bdev, mode);
+		blkdev_put(bdev, holder);
 		return ERR_PTR(-EACCES);
 	}
 
@@ -877,7 +882,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
 }
 EXPORT_SYMBOL(blkdev_get_by_path);
 
-void blkdev_put(struct block_device *bdev, fmode_t mode)
+void blkdev_put(struct block_device *bdev, void *holder)
 {
 	struct gendisk *disk = bdev->bd_disk;
 
@@ -892,8 +897,8 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 		sync_blockdev(bdev);
 
 	mutex_lock(&disk->open_mutex);
-	if (mode & FMODE_EXCL)
-		bd_end_claim(bdev);
+	if (holder)
+		bd_end_claim(bdev, holder);
 
 	/*
 	 * Trigger event checking and tell drivers to flush MEDIA_CHANGE
diff --git a/block/fops.c b/block/fops.c
index 26af2b39c758..9f26e25bafa1 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -490,7 +490,9 @@ static int blkdev_open(struct inode *inode, struct file *filp)
 	if ((filp->f_flags & O_ACCMODE) == 3)
 		filp->f_mode |= FMODE_WRITE_IOCTL;
 
-	bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp, NULL);
+	bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode,
+				 (filp->f_mode & FMODE_EXCL) ? filp : NULL,
+				 NULL);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
 
@@ -504,7 +506,7 @@ static int blkdev_release(struct inode *inode, struct file *filp)
 {
 	struct block_device *bdev = filp->private_data;
 
-	blkdev_put(bdev, filp->f_mode);
+	blkdev_put(bdev, (filp->f_mode & FMODE_EXCL) ? filp : NULL);
 	return 0;
 }
 
diff --git a/block/genhd.c b/block/genhd.c
index 4e5fd6aaa883..b56f8b5c88b3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -365,12 +365,11 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
 	}
 
 	set_bit(GD_NEED_PART_SCAN, &disk->state);
-	bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL,
-				 NULL);
+	bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL, NULL);
 	if (IS_ERR(bdev))
 		ret =  PTR_ERR(bdev);
 	else
-		blkdev_put(bdev, mode & ~FMODE_EXCL);
+		blkdev_put(bdev, NULL);
 
 	/*
 	 * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set,
diff --git a/block/ioctl.c b/block/ioctl.c
index c7d7d4345edb..b39bd5b41ee4 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -454,11 +454,10 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
 	if (mode & FMODE_EXCL)
 		return set_blocksize(bdev, n);
 
-	if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode | FMODE_EXCL, &bdev,
-			NULL)))
+	if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode, &bdev, NULL)))
 		return -EBUSY;
 	ret = set_blocksize(bdev, n);
-	blkdev_put(bdev, mode | FMODE_EXCL);
+	blkdev_put(bdev, &bdev);
 
 	return ret;
 }
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index cab59dab3410..10b1e5171332 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1640,8 +1640,7 @@ static struct block_device *open_backing_dev(struct drbd_device *device,
 	struct block_device *bdev;
 	int err = 0;
 
-	bdev = blkdev_get_by_path(bdev_path,
-				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+	bdev = blkdev_get_by_path(bdev_path, FMODE_READ | FMODE_WRITE,
 				  claim_ptr, NULL);
 	if (IS_ERR(bdev)) {
 		drbd_err(device, "open(\"%s\") failed with %ld\n",
@@ -1654,7 +1653,7 @@ static struct block_device *open_backing_dev(struct drbd_device *device,
 
 	err = bd_link_disk_holder(bdev, device->vdisk);
 	if (err) {
-		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		blkdev_put(bdev, claim_ptr);
 		drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
 				bdev_path, err);
 		bdev = ERR_PTR(err);
@@ -1696,13 +1695,13 @@ static int open_backing_devices(struct drbd_device *device,
 }
 
 static void close_backing_dev(struct drbd_device *device, struct block_device *bdev,
-	bool do_bd_unlink)
+		void *claim_ptr, bool do_bd_unlink)
 {
 	if (!bdev)
 		return;
 	if (do_bd_unlink)
 		bd_unlink_disk_holder(bdev, device->vdisk);
-	blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+	blkdev_put(bdev, claim_ptr);
 }
 
 void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
@@ -1710,8 +1709,11 @@ void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *
 	if (ldev == NULL)
 		return;
 
-	close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev);
-	close_backing_dev(device, ldev->backing_bdev, true);
+	close_backing_dev(device, ldev->md_bdev,
+			  ldev->md.meta_dev_idx < 0 ?
+				(void *)device : (void *)drbd_m_holder,
+			  ldev->md_bdev != ldev->backing_bdev);
+	close_backing_dev(device, ldev->backing_bdev, device, true);
 
 	kfree(ldev->disk_conf);
 	kfree(ldev);
@@ -2127,8 +2129,11 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
  fail:
 	conn_reconfig_done(connection);
 	if (nbc) {
-		close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev);
-		close_backing_dev(device, nbc->backing_bdev, true);
+		close_backing_dev(device, nbc->md_bdev,
+			  nbc->disk_conf->meta_dev_idx < 0 ?
+				(void *)device : (void *)drbd_m_holder,
+			  nbc->md_bdev != nbc->backing_bdev);
+		close_backing_dev(device, nbc->backing_bdev, device, true);
 		kfree(nbc);
 	}
 	kfree(new_disk_conf);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 7bfc058cb665..c3299e49edd5 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2167,8 +2167,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
 	 * to read/write from/to it. It is already opened in O_NONBLOCK mode
 	 * so open should not fail.
 	 */
-	bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd,
-				 NULL);
+	bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ, pd, NULL);
 	if (IS_ERR(bdev)) {
 		ret = PTR_ERR(bdev);
 		goto out;
@@ -2215,7 +2214,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
 	return 0;
 
 out_putdev:
-	blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
+	blkdev_put(bdev, pd);
 out:
 	return ret;
 }
@@ -2234,7 +2233,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
 	pkt_lock_door(pd, 0);
 
 	pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
-	blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
+	blkdev_put(pd->bdev, pd);
 
 	pkt_shrink_pktlist(pd);
 }
@@ -2520,7 +2519,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 		return PTR_ERR(bdev);
 	sdev = scsi_device_from_queue(bdev->bd_disk->queue);
 	if (!sdev) {
-		blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+		blkdev_put(bdev, NULL);
 		return -EINVAL;
 	}
 	put_device(&sdev->sdev_gendev);
@@ -2545,7 +2544,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 	return 0;
 
 out_mem:
-	blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+	blkdev_put(bdev, NULL);
 	/* This is safe: open() is still holding a reference. */
 	module_put(THIS_MODULE);
 	return -ENOMEM;
@@ -2751,7 +2750,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
 	pkt_debugfs_dev_remove(pd);
 	pkt_sysfs_dev_remove(pd);
 
-	blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY);
+	blkdev_put(pd->bdev, NULL);
 
 	remove_proc_entry(pd->disk->disk_name, pkt_proc);
 	dev_notice(ddev, "writer unmapped\n");
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index a92a4289d0ec..a909f8763ce7 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -219,7 +219,7 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
 	rnbd_put_sess_dev(sess_dev);
 	wait_for_completion(&dc); /* wait for inflights to drop to zero */
 
-	blkdev_put(sess_dev->bdev, sess_dev->open_flags);
+	blkdev_put(sess_dev->bdev, NULL);
 	mutex_lock(&sess_dev->dev->lock);
 	list_del(&sess_dev->dev_list);
 	if (sess_dev->open_flags & FMODE_WRITE)
@@ -791,7 +791,7 @@ srv_dev_put:
 	}
 	rnbd_put_srv_dev(srv_dev);
 blkdev_put:
-	blkdev_put(bdev, open_flags);
+	blkdev_put(bdev, NULL);
 free_path:
 	kfree(full_path);
 reject:
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 43b36da9b354..141b60aad570 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -473,7 +473,7 @@ static void xenvbd_sysfs_delif(struct xenbus_device *dev)
 static void xen_vbd_free(struct xen_vbd *vbd)
 {
 	if (vbd->bdev)
-		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
+		blkdev_put(vbd->bdev, NULL);
 	vbd->bdev = NULL;
 }
 
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f5644c606040..21615d67a9bd 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -420,7 +420,7 @@ static void reset_bdev(struct zram *zram)
 		return;
 
 	bdev = zram->bdev;
-	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+	blkdev_put(bdev, zram);
 	/* hope filp_close flush all of IO */
 	filp_close(zram->backing_dev, NULL);
 	zram->backing_dev = NULL;
@@ -507,8 +507,8 @@ static ssize_t backing_dev_store(struct device *dev,
 		goto out;
 	}
 
-	bdev = blkdev_get_by_dev(inode->i_rdev,
-			FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram, NULL);
+	bdev = blkdev_get_by_dev(inode->i_rdev, FMODE_READ | FMODE_WRITE, zram,
+				 NULL);
 	if (IS_ERR(bdev)) {
 		err = PTR_ERR(bdev);
 		bdev = NULL;
@@ -539,7 +539,7 @@ out:
 	kvfree(bitmap);
 
 	if (bdev)
-		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		blkdev_put(bdev, zram);
 
 	if (backing_dev)
 		filp_close(backing_dev, NULL);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 4a2aed047aec..7022fea396f2 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl)
 		put_page(virt_to_page(dc->sb_disk));
 
 	if (!IS_ERR_OR_NULL(dc->bdev))
-		blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+		blkdev_put(dc->bdev, bcache_kobj);
 
 	wake_up(&unregister_wait);
 
@@ -2218,7 +2218,7 @@ void bch_cache_release(struct kobject *kobj)
 		put_page(virt_to_page(ca->sb_disk));
 
 	if (!IS_ERR_OR_NULL(ca->bdev))
-		blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+		blkdev_put(ca->bdev, bcache_kobj);
 
 	kfree(ca);
 	module_put(THIS_MODULE);
@@ -2359,7 +2359,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
 		 * call blkdev_put() to bdev in bch_cache_release(). So we
 		 * explicitly call blkdev_put() here.
 		 */
-		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+		blkdev_put(bdev, bcache_kobj);
 		if (ret == -ENOMEM)
 			err = "cache_alloc(): -ENOMEM";
 		else if (ret == -EPERM)
@@ -2461,7 +2461,7 @@ static void register_bdev_worker(struct work_struct *work)
 	if (!dc) {
 		fail = true;
 		put_page(virt_to_page(args->sb_disk));
-		blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		blkdev_put(args->bdev, bcache_kobj);
 		goto out;
 	}
 
@@ -2491,7 +2491,7 @@ static void register_cache_worker(struct work_struct *work)
 	if (!ca) {
 		fail = true;
 		put_page(virt_to_page(args->sb_disk));
-		blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		blkdev_put(args->bdev, bcache_kobj);
 		goto out;
 	}
 
@@ -2558,8 +2558,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 
 	ret = -EINVAL;
 	err = "failed to open device";
-	bdev = blkdev_get_by_path(strim(path),
-				  FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+	bdev = blkdev_get_by_path(strim(path), FMODE_READ | FMODE_WRITE,
 				  bcache_kobj, NULL);
 	if (IS_ERR(bdev)) {
 		if (bdev == ERR_PTR(-EBUSY)) {
@@ -2648,7 +2647,7 @@ async_done:
 out_put_sb_page:
 	put_page(virt_to_page(sb_disk));
 out_blkdev_put:
-	blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+	blkdev_put(bdev, register_bcache);
 out_free_sb:
 	kfree(sb);
 out_free_path:
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 246b8f028a98..b16e37362c5a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -746,7 +746,7 @@ static struct table_device *open_table_device(struct mapped_device *md,
 		return ERR_PTR(-ENOMEM);
 	refcount_set(&td->count, 1);
 
-	bdev = blkdev_get_by_dev(dev, mode | FMODE_EXCL, _dm_claim_ptr, NULL);
+	bdev = blkdev_get_by_dev(dev, mode, _dm_claim_ptr, NULL);
 	if (IS_ERR(bdev)) {
 		r = PTR_ERR(bdev);
 		goto out_free_td;
@@ -771,7 +771,7 @@ static struct table_device *open_table_device(struct mapped_device *md,
 	return td;
 
 out_blkdev_put:
-	blkdev_put(bdev, mode | FMODE_EXCL);
+	blkdev_put(bdev, _dm_claim_ptr);
 out_free_td:
 	kfree(td);
 	return ERR_PTR(r);
@@ -784,7 +784,7 @@ static void close_table_device(struct table_device *td, struct mapped_device *md
 {
 	if (md->disk->slave_dir)
 		bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
-	blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
+	blkdev_put(td->dm_dev.bdev, _dm_claim_ptr);
 	put_dax(td->dm_dev.dax_dev);
 	list_del(&td->list);
 	kfree(td);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 159197dd7b6d..dad4a5539f9f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2449,7 +2449,10 @@ static void rdev_delayed_delete(struct work_struct *ws)
 
 void md_autodetect_dev(dev_t dev);
 
-static void export_rdev(struct md_rdev *rdev)
+/* just for claiming the bdev */
+static struct md_rdev claim_rdev;
+
+static void export_rdev(struct md_rdev *rdev, struct mddev *mddev)
 {
 	pr_debug("md: export_rdev(%pg)\n", rdev->bdev);
 	md_rdev_clear(rdev);
@@ -2457,7 +2460,7 @@ static void export_rdev(struct md_rdev *rdev)
 	if (test_bit(AutoDetected, &rdev->flags))
 		md_autodetect_dev(rdev->bdev->bd_dev);
 #endif
-	blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+	blkdev_put(rdev->bdev, mddev->major_version == -2 ? &claim_rdev : rdev);
 	rdev->bdev = NULL;
 	kobject_put(&rdev->kobj);
 }
@@ -2485,7 +2488,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
 	INIT_WORK(&rdev->del_work, rdev_delayed_delete);
 	kobject_get(&rdev->kobj);
 	queue_work(md_rdev_misc_wq, &rdev->del_work);
-	export_rdev(rdev);
+	export_rdev(rdev, rdev->mddev);
 }
 
 static void export_array(struct mddev *mddev)
@@ -3612,6 +3615,7 @@ int md_rdev_init(struct md_rdev *rdev)
 	return badblocks_init(&rdev->badblocks, 0);
 }
 EXPORT_SYMBOL_GPL(md_rdev_init);
+
 /*
  * Import a device. If 'super_format' >= 0, then sanity check the superblock
  *
@@ -3624,7 +3628,6 @@ EXPORT_SYMBOL_GPL(md_rdev_init);
  */
 static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
 {
-	static struct md_rdev claim_rdev; /* just for claiming the bdev */
 	struct md_rdev *rdev;
 	sector_t size;
 	int err;
@@ -3640,8 +3643,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
 	if (err)
 		goto out_clear_rdev;
 
-	rdev->bdev = blkdev_get_by_dev(newdev,
-			FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+	rdev->bdev = blkdev_get_by_dev(newdev, FMODE_READ | FMODE_WRITE,
 			super_format == -2 ? &claim_rdev : rdev, NULL);
 	if (IS_ERR(rdev->bdev)) {
 		pr_warn("md: could not open device unknown-block(%u,%u).\n",
@@ -3679,7 +3681,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
 	return rdev;
 
 out_blkdev_put:
-	blkdev_put(rdev->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+	blkdev_put(rdev->bdev, super_format == -2 ? &claim_rdev : rdev);
 out_clear_rdev:
 	md_rdev_clear(rdev);
 out_free_rdev:
@@ -4560,7 +4562,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
 	err = bind_rdev_to_array(rdev, mddev);
  out:
 	if (err)
-		export_rdev(rdev);
+		export_rdev(rdev, mddev);
 	mddev_unlock(mddev);
 	if (!err)
 		md_new_event();
@@ -6498,7 +6500,7 @@ static void autorun_devices(int part)
 			rdev_for_each_list(rdev, tmp, &candidates) {
 				list_del_init(&rdev->same_set);
 				if (bind_rdev_to_array(rdev, mddev))
-					export_rdev(rdev);
+					export_rdev(rdev, mddev);
 			}
 			autorun_array(mddev);
 			mddev_unlock(mddev);
@@ -6508,7 +6510,7 @@ static void autorun_devices(int part)
 		 */
 		rdev_for_each_list(rdev, tmp, &candidates) {
 			list_del_init(&rdev->same_set);
-			export_rdev(rdev);
+			export_rdev(rdev, mddev);
 		}
 		mddev_put(mddev);
 	}
@@ -6696,13 +6698,13 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
 				pr_warn("md: %pg has different UUID to %pg\n",
 					rdev->bdev,
 					rdev0->bdev);
-				export_rdev(rdev);
+				export_rdev(rdev, mddev);
 				return -EINVAL;
 			}
 		}
 		err = bind_rdev_to_array(rdev, mddev);
 		if (err)
-			export_rdev(rdev);
+			export_rdev(rdev, mddev);
 		return err;
 	}
 
@@ -6746,7 +6748,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
 			/* This was a hot-add request, but events doesn't
 			 * match, so reject it.
 			 */
-			export_rdev(rdev);
+			export_rdev(rdev, mddev);
 			return -EINVAL;
 		}
 
@@ -6772,7 +6774,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
 				}
 			}
 			if (has_journal || mddev->bitmap) {
-				export_rdev(rdev);
+				export_rdev(rdev, mddev);
 				return -EBUSY;
 			}
 			set_bit(Journal, &rdev->flags);
@@ -6787,7 +6789,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
 				/* --add initiated by this node */
 				err = md_cluster_ops->add_new_disk(mddev, rdev);
 				if (err) {
-					export_rdev(rdev);
+					export_rdev(rdev, mddev);
 					return err;
 				}
 			}
@@ -6797,7 +6799,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
 		err = bind_rdev_to_array(rdev, mddev);
 
 		if (err)
-			export_rdev(rdev);
+			export_rdev(rdev, mddev);
 
 		if (mddev_is_clustered(mddev)) {
 			if (info->state & (1 << MD_DISK_CANDIDATE)) {
@@ -6860,7 +6862,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
 
 		err = bind_rdev_to_array(rdev, mddev);
 		if (err) {
-			export_rdev(rdev);
+			export_rdev(rdev, mddev);
 			return err;
 		}
 	}
@@ -6985,7 +6987,7 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
 	return 0;
 
 abort_export:
-	export_rdev(rdev);
+	export_rdev(rdev, mddev);
 	return err;
 }
 
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 218eb2af564a..44fc23af4c3f 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -209,7 +209,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
 	if (dev->blkdev) {
 		invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping,
 					0, -1);
-		blkdev_put(dev->blkdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+		blkdev_put(dev->blkdev, NULL);
 	}
 
 	kfree(dev);
@@ -261,7 +261,7 @@ static struct block_device __ref *mdtblock_early_get_bdev(const char *devname,
 static struct block2mtd_dev *add_device(char *devname, int erase_size,
 		char *label, int timeout)
 {
-	const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
+	const fmode_t mode = FMODE_READ | FMODE_WRITE;
 	struct block_device *bdev;
 	struct block2mtd_dev *dev;
 	char *name;
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 9b6d6d85c725..65ed2d478fac 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -51,7 +51,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
 void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
 {
 	if (ns->bdev) {
-		blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
+		blkdev_put(ns->bdev, NULL);
 		ns->bdev = NULL;
 	}
 }
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index f21198bc483e..d2b27b84f854 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -179,7 +179,7 @@ void dasd_destroy_partitions(struct dasd_block *block)
 	mutex_unlock(&bdev->bd_disk->open_mutex);
 
 	/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
-	blkdev_put(bdev, FMODE_READ);
+	blkdev_put(bdev, NULL);
 }
 
 int dasd_gendisk_init(void)
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index a5cbbefa78ee..c62f961f46e3 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -108,7 +108,7 @@ static int iblock_configure_device(struct se_device *dev)
 	pr_debug( "IBLOCK: Claiming struct block_device: %s\n",
 			ib_dev->ibd_udev_path);
 
-	mode = FMODE_READ|FMODE_EXCL;
+	mode = FMODE_READ;
 	if (!ib_dev->ibd_readonly)
 		mode |= FMODE_WRITE;
 	else
@@ -175,7 +175,7 @@ static int iblock_configure_device(struct se_device *dev)
 	return 0;
 
 out_blkdev_put:
-	blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
+	blkdev_put(ib_dev->ibd_bd, ib_dev);
 out_free_bioset:
 	bioset_exit(&ib_dev->ibd_bio_set);
 out:
@@ -201,7 +201,7 @@ static void iblock_destroy_device(struct se_device *dev)
 	struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
 
 	if (ib_dev->ibd_bd != NULL)
-		blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
+		blkdev_put(ib_dev->ibd_bd, ib_dev);
 	bioset_exit(&ib_dev->ibd_bio_set);
 }
 
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index e3494e036c6c..da3b5512d7ae 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -366,8 +366,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 	 * Claim exclusive struct block_device access to struct scsi_device
 	 * for TYPE_DISK and TYPE_ZBC using supplied udev_path
 	 */
-	bd = blkdev_get_by_path(dev->udev_path,
-				FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv,
+	bd = blkdev_get_by_path(dev->udev_path, FMODE_WRITE | FMODE_READ, pdv,
 				NULL);
 	if (IS_ERR(bd)) {
 		pr_err("pSCSI: blkdev_get_by_path() failed\n");
@@ -378,7 +377,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 
 	ret = pscsi_add_device_to_list(dev, sd);
 	if (ret) {
-		blkdev_put(pdv->pdv_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
+		blkdev_put(pdv->pdv_bd, pdv);
 		scsi_device_put(sd);
 		return ret;
 	}
@@ -566,8 +565,7 @@ static void pscsi_destroy_device(struct se_device *dev)
 		 */
 		if ((sd->type == TYPE_DISK || sd->type == TYPE_ZBC) &&
 		    pdv->pdv_bd) {
-			blkdev_put(pdv->pdv_bd,
-				   FMODE_WRITE|FMODE_READ|FMODE_EXCL);
+			blkdev_put(pdv->pdv_bd, pdv);
 			pdv->pdv_bd = NULL;
 		}
 		/*
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 4de4984fa99b..677e9d9e1527 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -257,7 +257,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 		return -EINVAL;
 	}
 
-	bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
+	bdev = blkdev_get_by_path(device_path, FMODE_WRITE,
 				  fs_info->bdev_holder, NULL);
 	if (IS_ERR(bdev)) {
 		btrfs_err(fs_info, "target device %s is invalid!", device_path);
@@ -315,7 +315,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	device->bdev = bdev;
 	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
 	set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
-	device->mode = FMODE_EXCL;
+	device->holder = fs_info->bdev_holder;
 	device->dev_stats_valid = 1;
 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
 	device->fs_devices = fs_devices;
@@ -334,7 +334,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	return 0;
 
 error:
-	blkdev_put(bdev, FMODE_EXCL);
+	blkdev_put(bdev, fs_info->bdev_holder);
 	return ret;
 }
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2fa36f694daa..d99376a79ef4 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2672,7 +2672,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ioctl_vol_args_v2 *vol_args;
 	struct block_device *bdev = NULL;
-	fmode_t mode;
+	void *holder;
 	int ret;
 	bool cancel = false;
 
@@ -2709,7 +2709,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 		goto err_drop;
 
 	/* Exclusive operation is now claimed */
-	ret = btrfs_rm_device(fs_info, &args, &bdev, &mode);
+	ret = btrfs_rm_device(fs_info, &args, &bdev, &holder);
 
 	btrfs_exclop_finish(fs_info);
 
@@ -2724,7 +2724,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 err_drop:
 	mnt_drop_write_file(file);
 	if (bdev)
-		blkdev_put(bdev, mode);
+		blkdev_put(bdev, holder);
 out:
 	btrfs_put_dev_args_from_path(&args);
 	kfree(vol_args);
@@ -2738,7 +2738,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ioctl_vol_args *vol_args;
 	struct block_device *bdev = NULL;
-	fmode_t mode;
+	void *holder;
 	int ret;
 	bool cancel = false;
 
@@ -2765,7 +2765,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 	ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE,
 					   cancel);
 	if (ret == 0) {
-		ret = btrfs_rm_device(fs_info, &args, &bdev, &mode);
+		ret = btrfs_rm_device(fs_info, &args, &bdev, &holder);
 		if (!ret)
 			btrfs_info(fs_info, "disk deleted %s", vol_args->name);
 		btrfs_exclop_finish(fs_info);
@@ -2773,7 +2773,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 
 	mnt_drop_write_file(file);
 	if (bdev)
-		blkdev_put(bdev, mode);
+		blkdev_put(bdev, holder);
 out:
 	btrfs_put_dev_args_from_path(&args);
 	kfree(vol_args);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 035868cee3dd..7b12e05cdbf0 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -507,14 +507,14 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
 		sync_blockdev(*bdev);
 	ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
 	if (ret) {
-		blkdev_put(*bdev, flags);
+		blkdev_put(*bdev, holder);
 		goto error;
 	}
 	invalidate_bdev(*bdev);
 	*disk_super = btrfs_read_dev_super(*bdev);
 	if (IS_ERR(*disk_super)) {
 		ret = PTR_ERR(*disk_super);
-		blkdev_put(*bdev, flags);
+		blkdev_put(*bdev, holder);
 		goto error;
 	}
 
@@ -642,7 +642,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 
 	device->bdev = bdev;
 	clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
-	device->mode = flags;
+	device->holder = holder;
 
 	fs_devices->open_devices++;
 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
@@ -656,7 +656,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 
 error_free_page:
 	btrfs_release_disk_super(disk_super);
-	blkdev_put(bdev, flags);
+	blkdev_put(bdev, holder);
 
 	return -EINVAL;
 }
@@ -1057,7 +1057,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
 			continue;
 
 		if (device->bdev) {
-			blkdev_put(device->bdev, device->mode);
+			blkdev_put(device->bdev, device->holder);
 			device->bdev = NULL;
 			fs_devices->open_devices--;
 		}
@@ -1103,7 +1103,7 @@ static void btrfs_close_bdev(struct btrfs_device *device)
 		invalidate_bdev(device->bdev);
 	}
 
-	blkdev_put(device->bdev, device->mode);
+	blkdev_put(device->bdev, device->holder);
 }
 
 static void btrfs_close_one_device(struct btrfs_device *device)
@@ -1213,8 +1213,6 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
 	struct btrfs_device *latest_dev = NULL;
 	struct btrfs_device *tmp_device;
 
-	flags |= FMODE_EXCL;
-
 	list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
 				 dev_list) {
 		int ret;
@@ -1400,7 +1398,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags)
 	btrfs_release_disk_super(disk_super);
 
 error_bdev_put:
-	blkdev_put(bdev, flags);
+	blkdev_put(bdev, NULL);
 
 	return device;
 }
@@ -2087,7 +2085,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
 
 int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 		    struct btrfs_dev_lookup_args *args,
-		    struct block_device **bdev, fmode_t *mode)
+		    struct block_device **bdev, void **holder)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_device *device;
@@ -2226,7 +2224,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 	}
 
 	*bdev = device->bdev;
-	*mode = device->mode;
+	*holder = device->holder;
 	synchronize_rcu();
 	btrfs_free_device(device);
 
@@ -2394,7 +2392,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
 	else
 		memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
 	btrfs_release_disk_super(disk_super);
-	blkdev_put(bdev, FMODE_READ);
+	blkdev_put(bdev, NULL);
 	return 0;
 }
 
@@ -2627,7 +2625,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	if (sb_rdonly(sb) && !fs_devices->seeding)
 		return -EROFS;
 
-	bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
+	bdev = blkdev_get_by_path(device_path, FMODE_WRITE,
 				  fs_info->bdev_holder, NULL);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
@@ -2690,7 +2688,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	device->commit_total_bytes = device->total_bytes;
 	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
 	clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
-	device->mode = FMODE_EXCL;
+	device->holder = fs_info->bdev_holder;
 	device->dev_stats_valid = 1;
 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
 
@@ -2848,7 +2846,7 @@ error_free_zone:
 error_free_device:
 	btrfs_free_device(device);
 error:
-	blkdev_put(bdev, FMODE_EXCL);
+	blkdev_put(bdev, fs_info->bdev_holder);
 	if (locked) {
 		mutex_unlock(&uuid_mutex);
 		up_write(&sb->s_umount);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index eb97a397b3c3..840a8df39907 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -94,8 +94,8 @@ struct btrfs_device {
 
 	struct btrfs_zoned_device_info *zone_info;
 
-	/* the mode sent to blkdev_get */
-	fmode_t mode;
+	/* block device holder for blkdev_get/put */
+	void *holder;
 
 	/*
 	 * Device's major-minor number. Must be set even if the device is not
@@ -619,7 +619,7 @@ void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
 void btrfs_free_device(struct btrfs_device *device);
 int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 		    struct btrfs_dev_lookup_args *args,
-		    struct block_device **bdev, fmode_t *mode);
+		    struct block_device **bdev, void **holder);
 void __exit btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
 int btrfs_grow_device(struct btrfs_trans_handle *trans,
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 6c263e9cd38b..54dba967a2d4 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -19,6 +19,7 @@
 #include <trace/events/erofs.h>
 
 static struct kmem_cache *erofs_inode_cachep __read_mostly;
+struct file_system_type erofs_fs_type;
 
 void _erofs_err(struct super_block *sb, const char *function,
 		const char *fmt, ...)
@@ -253,8 +254,8 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
 			return PTR_ERR(fscache);
 		dif->fscache = fscache;
 	} else if (!sbi->devs->flatdev) {
-		bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL,
-					  sb->s_type, NULL);
+		bdev = blkdev_get_by_path(dif->path, FMODE_READ, sb->s_type,
+					  NULL);
 		if (IS_ERR(bdev))
 			return PTR_ERR(bdev);
 		dif->bdev = bdev;
@@ -877,7 +878,7 @@ static int erofs_release_device_info(int id, void *ptr, void *data)
 
 	fs_put_dax(dif->dax_dev, NULL);
 	if (dif->bdev)
-		blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL);
+		blkdev_put(dif->bdev, &erofs_fs_type);
 	erofs_fscache_unregister_cookie(dif->fscache);
 	dif->fscache = NULL;
 	kfree(dif->path);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9070ea9154d7..92dd699139a3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1112,7 +1112,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 {
 	struct block_device *bdev;
 
-	bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb,
+	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, sb,
 				 &ext4_holder_ops);
 	if (IS_ERR(bdev))
 		goto fail;
@@ -1128,17 +1128,12 @@ fail:
 /*
  * Release the journal device
  */
-static void ext4_blkdev_put(struct block_device *bdev)
-{
-	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
 static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
 {
 	struct block_device *bdev;
 	bdev = sbi->s_journal_bdev;
 	if (bdev) {
-		ext4_blkdev_put(bdev);
+		blkdev_put(bdev, sbi->s_es);
 		sbi->s_journal_bdev = NULL;
 	}
 }
@@ -5915,7 +5910,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 out_journal:
 	jbd2_journal_destroy(journal);
 out_bdev:
-	ext4_blkdev_put(bdev);
+	blkdev_put(bdev, sb);
 	return NULL;
 }
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7c34ab082f13..a5adb1d316e3 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1538,7 +1538,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 	int i;
 
 	for (i = 0; i < sbi->s_ndevs; i++) {
-		blkdev_put(FDEV(i).bdev, FMODE_EXCL);
+		blkdev_put(FDEV(i).bdev, sbi->sb->s_type);
 #ifdef CONFIG_BLK_DEV_ZONED
 		kvfree(FDEV(i).blkz_seq);
 #endif
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 46d393c8088a..82f70d46f4e5 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1100,7 +1100,7 @@ int lmLogOpen(struct super_block *sb)
 	 * file systems to log may have n-to-1 relationship;
 	 */
 
-	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
+	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ | FMODE_WRITE,
 				 log, NULL);
 	if (IS_ERR(bdev)) {
 		rc = PTR_ERR(bdev);
@@ -1141,7 +1141,7 @@ journal_found:
 	lbmLogShutdown(log);
 
       close:		/* close external log device */
-	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+	blkdev_put(bdev, log);
 
       free:		/* free log descriptor */
 	mutex_unlock(&jfs_log_mutex);
@@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb)
 	bdev = log->bdev;
 	rc = lmLogShutdown(log);
 
-	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+	blkdev_put(bdev, log);
 
 	kfree(log);
 
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 38b066ca699e..9be7f958f60e 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -35,7 +35,7 @@ bl_free_device(struct pnfs_block_dev *dev)
 		}
 
 		if (dev->bdev)
-			blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
+			blkdev_put(dev->bdev, NULL);
 	}
 }
 
@@ -374,7 +374,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
 	return 0;
 
 out_blkdev_put:
-	blkdev_put(d->bdev, FMODE_READ | FMODE_WRITE);
+	blkdev_put(d->bdev, NULL);
 	return error;
 }
 
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 91bfbd973d1d..61d5e79a5e81 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1278,7 +1278,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
 {
 	struct nilfs_super_data sd;
 	struct super_block *s;
-	fmode_t mode = FMODE_READ | FMODE_EXCL;
+	fmode_t mode = FMODE_READ;
 	struct dentry *root_dentry;
 	int err, s_new = false;
 
@@ -1357,7 +1357,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
 	}
 
 	if (!s_new)
-		blkdev_put(sd.bdev, mode);
+		blkdev_put(sd.bdev, fs_type);
 
 	return root_dentry;
 
@@ -1366,7 +1366,7 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
 
  failed:
 	if (!s_new)
-		blkdev_put(sd.bdev, mode);
+		blkdev_put(sd.bdev, fs_type);
 	return ERR_PTR(err);
 }
 
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 6b13b8c3f2b8..c6ae9aee01ed 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1503,7 +1503,7 @@ static void o2hb_region_release(struct config_item *item)
 	}
 
 	if (reg->hr_bdev)
-		blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
+		blkdev_put(reg->hr_bdev, NULL);
 
 	kfree(reg->hr_slots);
 
@@ -1893,7 +1893,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
 
 out3:
 	if (ret < 0) {
-		blkdev_put(reg->hr_bdev, FMODE_READ | FMODE_WRITE);
+		blkdev_put(reg->hr_bdev, NULL);
 		reg->hr_bdev = NULL;
 	}
 out2:
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 5e4db9a0c8e5..905297ea5545 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2589,7 +2589,7 @@ static void release_journal_dev(struct super_block *super,
 			       struct reiserfs_journal *journal)
 {
 	if (journal->j_dev_bd != NULL) {
-		blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
+		blkdev_put(journal->j_dev_bd, journal);
 		journal->j_dev_bd = NULL;
 	}
 }
@@ -2598,9 +2598,10 @@ static int journal_init_dev(struct super_block *super,
 			    struct reiserfs_journal *journal,
 			    const char *jdev_name)
 {
+	fmode_t blkdev_mode = FMODE_READ;
+	void *holder = journal;
 	int result;
 	dev_t jdev;
-	fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
 
 	result = 0;
 
@@ -2608,16 +2609,15 @@ static int journal_init_dev(struct super_block *super,
 	jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
 	    new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
 
-	if (bdev_read_only(super->s_bdev))
-		blkdev_mode = FMODE_READ;
+	if (!bdev_read_only(super->s_bdev))
+		blkdev_mode |= FMODE_WRITE;
 
 	/* there is no "jdev" option and journal is on separate device */
 	if ((!jdev_name || !jdev_name[0])) {
 		if (jdev == super->s_dev)
-			blkdev_mode &= ~FMODE_EXCL;
-		journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode,
-						      journal, NULL);
-		journal->j_dev_mode = blkdev_mode;
+			holder = NULL;
+		journal->j_dev_bd = blkdev_get_by_dev(jdev, blkdev_mode, holder,
+						      NULL);
 		if (IS_ERR(journal->j_dev_bd)) {
 			result = PTR_ERR(journal->j_dev_bd);
 			journal->j_dev_bd = NULL;
@@ -2631,8 +2631,7 @@ static int journal_init_dev(struct super_block *super,
 		return 0;
 	}
 
-	journal->j_dev_mode = blkdev_mode;
-	journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, journal,
+	journal->j_dev_bd = blkdev_get_by_path(jdev_name, blkdev_mode, holder,
 					       NULL);
 	if (IS_ERR(journal->j_dev_bd)) {
 		result = PTR_ERR(journal->j_dev_bd);
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 1bccf6a2e908..55e85256aae8 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -300,7 +300,6 @@ struct reiserfs_journal {
 	struct reiserfs_journal_cnode *j_first;
 
 	struct block_device *j_dev_bd;
-	fmode_t j_dev_mode;
 
 	/* first block on s_dev of reserved area journal */
 	int j_1st_reserved_block;
diff --git a/fs/super.c b/fs/super.c
index f127589700ab..8563794a8bc4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1255,7 +1255,7 @@ int get_tree_bdev(struct fs_context *fc,
 {
 	struct block_device *bdev;
 	struct super_block *s;
-	fmode_t mode = FMODE_READ | FMODE_EXCL;
+	fmode_t mode = FMODE_READ;
 	int error = 0;
 
 	if (!(fc->sb_flags & SB_RDONLY))
@@ -1279,7 +1279,7 @@ int get_tree_bdev(struct fs_context *fc,
 	if (bdev->bd_fsfreeze_count > 0) {
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
 		warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
-		blkdev_put(bdev, mode);
+		blkdev_put(bdev, fc->fs_type);
 		return -EBUSY;
 	}
 
@@ -1288,7 +1288,7 @@ int get_tree_bdev(struct fs_context *fc,
 	s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	if (IS_ERR(s)) {
-		blkdev_put(bdev, mode);
+		blkdev_put(bdev, fc->fs_type);
 		return PTR_ERR(s);
 	}
 
@@ -1297,7 +1297,7 @@ int get_tree_bdev(struct fs_context *fc,
 		if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
 			warnf(fc, "%pg: Can't mount, would change RO state", bdev);
 			deactivate_locked_super(s);
-			blkdev_put(bdev, mode);
+			blkdev_put(bdev, fc->fs_type);
 			return -EBUSY;
 		}
 
@@ -1309,7 +1309,7 @@ int get_tree_bdev(struct fs_context *fc,
 		 * holding an active reference.
 		 */
 		up_write(&s->s_umount);
-		blkdev_put(bdev, mode);
+		blkdev_put(bdev, fc->fs_type);
 		down_write(&s->s_umount);
 	} else {
 		s->s_mode = mode;
@@ -1344,7 +1344,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 {
 	struct block_device *bdev;
 	struct super_block *s;
-	fmode_t mode = FMODE_READ | FMODE_EXCL;
+	fmode_t mode = FMODE_READ;
 	int error = 0;
 
 	if (!(flags & SB_RDONLY))
@@ -1386,7 +1386,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 		 * holding an active reference.
 		 */
 		up_write(&s->s_umount);
-		blkdev_put(bdev, mode);
+		blkdev_put(bdev, fs_type);
 		down_write(&s->s_umount);
 	} else {
 		s->s_mode = mode;
@@ -1409,7 +1409,7 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 error_s:
 	error = PTR_ERR(s);
 error_bdev:
-	blkdev_put(bdev, mode);
+	blkdev_put(bdev, fs_type);
 error:
 	return ERR_PTR(error);
 }
@@ -1418,13 +1418,11 @@ EXPORT_SYMBOL(mount_bdev);
 void kill_block_super(struct super_block *sb)
 {
 	struct block_device *bdev = sb->s_bdev;
-	fmode_t mode = sb->s_mode;
 
 	bdev->bd_super = NULL;
 	generic_shutdown_super(sb);
 	sync_blockdev(bdev);
-	WARN_ON_ONCE(!(mode & FMODE_EXCL));
-	blkdev_put(bdev, mode | FMODE_EXCL);
+	blkdev_put(bdev, sb->s_type);
 }
 
 EXPORT_SYMBOL(kill_block_super);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 1b4bd5c88f4a..3b7cf8268057 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -396,8 +396,8 @@ xfs_blkdev_get(
 {
 	int			error = 0;
 
-	*bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
-				    mp, &xfs_holder_ops);
+	*bdevp = blkdev_get_by_path(name, FMODE_READ | FMODE_WRITE, mp,
+				    &xfs_holder_ops);
 	if (IS_ERR(*bdevp)) {
 		error = PTR_ERR(*bdevp);
 		xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
@@ -408,10 +408,11 @@ xfs_blkdev_get(
 
 STATIC void
 xfs_blkdev_put(
+	struct xfs_mount	*mp,
 	struct block_device	*bdev)
 {
 	if (bdev)
-		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+		blkdev_put(bdev, mp);
 }
 
 STATIC void
@@ -422,13 +423,13 @@ xfs_close_devices(
 		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
 
 		xfs_free_buftarg(mp->m_logdev_targp);
-		xfs_blkdev_put(logdev);
+		xfs_blkdev_put(mp, logdev);
 	}
 	if (mp->m_rtdev_targp) {
 		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
 
 		xfs_free_buftarg(mp->m_rtdev_targp);
-		xfs_blkdev_put(rtdev);
+		xfs_blkdev_put(mp, rtdev);
 	}
 	xfs_free_buftarg(mp->m_ddev_targp);
 }
@@ -503,10 +504,10 @@ xfs_open_devices(
  out_free_ddev_targ:
 	xfs_free_buftarg(mp->m_ddev_targp);
  out_close_rtdev:
-	xfs_blkdev_put(rtdev);
+	xfs_blkdev_put(mp, rtdev);
  out_close_logdev:
 	if (logdev && logdev != ddev)
-		xfs_blkdev_put(logdev);
+		xfs_blkdev_put(mp, logdev);
 	return error;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 25bdd0cc74dc..d5b99796f12c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1480,7 +1480,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
 int bd_prepare_to_claim(struct block_device *bdev, void *holder,
 		const struct blk_holder_ops *hops);
 void bd_abort_claiming(struct block_device *bdev, void *holder);
-void blkdev_put(struct block_device *bdev, fmode_t mode);
+void blkdev_put(struct block_device *bdev, void *holder);
 
 /* just for blk-cgroup, don't use elsewhere */
 struct block_device *blkdev_get_no_open(dev_t dev);
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 7ae95ec72f99..f62e89d0d906 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -688,22 +688,18 @@ static int load_image_and_restore(bool snapshot_test)
 {
 	int error;
 	unsigned int flags;
-	fmode_t mode = FMODE_READ;
-
-	if (snapshot_test)
-		mode |= FMODE_EXCL;
 
 	pm_pr_dbg("Loading hibernation image.\n");
 
 	lock_device_hotplug();
 	error = create_basic_memory_bitmaps();
 	if (error) {
-		swsusp_close(mode);
+		swsusp_close(snapshot_test);
 		goto Unlock;
 	}
 
 	error = swsusp_read(&flags);
-	swsusp_close(mode);
+	swsusp_close(snapshot_test);
 	if (!error)
 		error = hibernation_restore(flags & SF_PLATFORM_MODE);
 
@@ -956,7 +952,7 @@ static int software_resume(void)
 	/* The snapshot device should not be opened while we're running */
 	if (!hibernate_acquire()) {
 		error = -EBUSY;
-		swsusp_close(FMODE_READ | FMODE_EXCL);
+		swsusp_close(false);
 		goto Unlock;
 	}
 
@@ -991,7 +987,7 @@ static int software_resume(void)
 	pm_pr_dbg("Hibernation image not present or could not be loaded.\n");
 	return error;
  Close_Finish:
-	swsusp_close(FMODE_READ | FMODE_EXCL);
+	swsusp_close(false);
 	goto Finish;
 }
 
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 978189fcafd1..a8e0c44b804e 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -177,7 +177,7 @@ int swsusp_check(bool snapshot_test);
 extern void swsusp_free(void);
 extern int swsusp_read(unsigned int *flags_p);
 extern int swsusp_write(unsigned int flags);
-extern void swsusp_close(fmode_t);
+void swsusp_close(bool snapshot_test);
 #ifdef CONFIG_SUSPEND
 extern int swsusp_unmark(void);
 #endif
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index b03ff1a33c7f..cc9259307c94 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -363,7 +363,7 @@ static int swsusp_swap_check(void)
 
 	res = set_blocksize(hib_resume_bdev, PAGE_SIZE);
 	if (res < 0)
-		blkdev_put(hib_resume_bdev, FMODE_WRITE);
+		blkdev_put(hib_resume_bdev, NULL);
 
 	return res;
 }
@@ -443,7 +443,7 @@ static int get_swap_writer(struct swap_map_handle *handle)
 err_rel:
 	release_swap_writer(handle);
 err_close:
-	swsusp_close(FMODE_WRITE);
+	swsusp_close(false);
 	return ret;
 }
 
@@ -508,7 +508,7 @@ static int swap_writer_finish(struct swap_map_handle *handle,
 	if (error)
 		free_all_swap_pages(root_swap);
 	release_swap_writer(handle);
-	swsusp_close(FMODE_WRITE);
+	swsusp_close(false);
 
 	return error;
 }
@@ -1518,14 +1518,11 @@ static void *swsusp_holder;
 
 int swsusp_check(bool snapshot_test)
 {
+	void *holder = snapshot_test ? &swsusp_holder : NULL;
 	int error;
-	fmode_t mode = FMODE_READ;
 
-	if (snapshot_test)
-		mode |= FMODE_EXCL;
-
-	hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
-					    mode, &swsusp_holder, NULL);
+	hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, FMODE_READ,
+					    holder, NULL);
 	if (!IS_ERR(hib_resume_bdev)) {
 		set_blocksize(hib_resume_bdev, PAGE_SIZE);
 		clear_page(swsusp_header);
@@ -1552,7 +1549,7 @@ int swsusp_check(bool snapshot_test)
 
 put:
 		if (error)
-			blkdev_put(hib_resume_bdev, mode);
+			blkdev_put(hib_resume_bdev, holder);
 		else
 			pr_debug("Image signature found, resuming\n");
 	} else {
@@ -1569,14 +1566,14 @@ put:
  *	swsusp_close - close swap device.
  */
 
-void swsusp_close(fmode_t mode)
+void swsusp_close(bool snapshot_test)
 {
 	if (IS_ERR(hib_resume_bdev)) {
 		pr_debug("Image device not initialised\n");
 		return;
 	}
 
-	blkdev_put(hib_resume_bdev, mode);
+	blkdev_put(hib_resume_bdev, snapshot_test ? &swsusp_holder : NULL);
 }
 
 /**
diff --git a/mm/swapfile.c b/mm/swapfile.c
index cfbcf7d5705f..16554256be65 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2539,7 +2539,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		struct block_device *bdev = I_BDEV(inode);
 
 		set_blocksize(bdev, old_block_size);
-		blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		blkdev_put(bdev, p);
 	}
 
 	inode_lock(inode);
@@ -2770,8 +2770,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 
 	if (S_ISBLK(inode->i_mode)) {
 		p->bdev = blkdev_get_by_dev(inode->i_rdev,
-				   FMODE_READ | FMODE_WRITE | FMODE_EXCL, p,
-				   NULL);
+				   FMODE_READ | FMODE_WRITE, p, NULL);
 		if (IS_ERR(p->bdev)) {
 			error = PTR_ERR(p->bdev);
 			p->bdev = NULL;
@@ -3222,7 +3221,7 @@ bad_swap:
 	p->cluster_next_cpu = NULL;
 	if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
 		set_blocksize(p->bdev, p->old_block_size);
-		blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		blkdev_put(p->bdev, p);
 	}
 	inode = NULL;
 	destroy_swap_extents(p);
-- 
cgit v1.2.3


From 3f0b3e785e8b54a40c530fa77b7ab37bec925c57 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:44 +0200
Subject: block: add a sb_open_mode helper

Add a helper to return the open flags for blkdev_get_by* for passed in
super block flags instead of open coding the logic in many places.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-17-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/btrfs/super.c       |  5 +----
 fs/nilfs2/super.c      |  7 ++-----
 fs/super.c             | 15 ++++-----------
 include/linux/blkdev.h |  7 +++++++
 4 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1a2ee9407f54..fd02b92e3910 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1440,12 +1440,9 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 	struct btrfs_fs_devices *fs_devices = NULL;
 	struct btrfs_fs_info *fs_info = NULL;
 	void *new_sec_opts = NULL;
-	fmode_t mode = FMODE_READ;
+	fmode_t mode = sb_open_mode(flags);
 	int error = 0;
 
-	if (!(flags & SB_RDONLY))
-		mode |= FMODE_WRITE;
-
 	if (data) {
 		error = security_sb_eat_lsm_opts(data, &new_sec_opts);
 		if (error)
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 61d5e79a5e81..a41fd84d4e28 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1278,14 +1278,11 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
 {
 	struct nilfs_super_data sd;
 	struct super_block *s;
-	fmode_t mode = FMODE_READ;
 	struct dentry *root_dentry;
 	int err, s_new = false;
 
-	if (!(flags & SB_RDONLY))
-		mode |= FMODE_WRITE;
-
-	sd.bdev = blkdev_get_by_path(dev_name, mode, fs_type, NULL);
+	sd.bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
+				     NULL);
 	if (IS_ERR(sd.bdev))
 		return ERR_CAST(sd.bdev);
 
diff --git a/fs/super.c b/fs/super.c
index 8563794a8bc4..dc7f32839833 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1255,17 +1255,13 @@ int get_tree_bdev(struct fs_context *fc,
 {
 	struct block_device *bdev;
 	struct super_block *s;
-	fmode_t mode = FMODE_READ;
 	int error = 0;
 
-	if (!(fc->sb_flags & SB_RDONLY))
-		mode |= FMODE_WRITE;
-
 	if (!fc->source)
 		return invalf(fc, "No source specified");
 
-	bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type,
-				  &fs_holder_ops);
+	bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags),
+				  fc->fs_type, &fs_holder_ops);
 	if (IS_ERR(bdev)) {
 		errorf(fc, "%s: Can't open blockdev", fc->source);
 		return PTR_ERR(bdev);
@@ -1344,13 +1340,10 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 {
 	struct block_device *bdev;
 	struct super_block *s;
-	fmode_t mode = FMODE_READ;
 	int error = 0;
 
-	if (!(flags & SB_RDONLY))
-		mode |= FMODE_WRITE;
-
-	bdev = blkdev_get_by_path(dev_name, mode, fs_type, &fs_holder_ops);
+	bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
+				  &fs_holder_ops);
 	if (IS_ERR(bdev))
 		return ERR_CAST(bdev);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d5b99796f12c..978036039020 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1473,6 +1473,13 @@ struct blk_holder_ops {
 	void (*mark_dead)(struct block_device *bdev);
 };
 
+/*
+ * Return the correct open flags for blkdev_get_by_* for super block flags
+ * as stored in sb->s_flags.
+ */
+#define sb_open_mode(flags) \
+	(FMODE_READ | (((flags) & SB_RDONLY) ? 0 : FMODE_WRITE))
+
 struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
 		const struct blk_holder_ops *hops);
 struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
-- 
cgit v1.2.3


From 81b1fb7d17c0110df839e13468ada9e99bb6e5f4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:45 +0200
Subject: fs: remove sb->s_mode

There is no real need to store the open mode in the super_block now.
It is only used by f2fs, which can easily recalculate it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-18-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/f2fs/super.c    | 10 ++++++----
 fs/nilfs2/super.c  |  1 -
 fs/super.c         |  2 --
 include/linux/fs.h |  1 -
 4 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a5adb1d316e3..5a764fecd1c7 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3993,6 +3993,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
 	unsigned int max_devices = MAX_DEVICES;
 	unsigned int logical_blksize;
+	fmode_t mode = sb_open_mode(sbi->sb->s_flags);
 	int i;
 
 	/* Initialize single device information */
@@ -4024,8 +4025,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 		if (max_devices == 1) {
 			/* Single zoned block device mount */
 			FDEV(0).bdev =
-				blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
-					sbi->sb->s_mode, sbi->sb->s_type, NULL);
+				blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, mode,
+						  sbi->sb->s_type, NULL);
 		} else {
 			/* Multi-device mount */
 			memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
@@ -4043,8 +4044,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 					(FDEV(i).total_segments <<
 					sbi->log_blocks_per_seg) - 1;
 			}
-			FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
-					sbi->sb->s_mode, sbi->sb->s_type, NULL);
+			FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, mode,
+							  sbi->sb->s_type,
+							  NULL);
 		}
 		if (IS_ERR(FDEV(i).bdev))
 			return PTR_ERR(FDEV(i).bdev);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index a41fd84d4e28..15a5a1099427 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1316,7 +1316,6 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
 		s_new = true;
 
 		/* New superblock instance created */
-		s->s_mode = mode;
 		snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);
 		sb_set_blocksize(s, block_size(sd.bdev));
 
diff --git a/fs/super.c b/fs/super.c
index dc7f32839833..86f40f898198 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1308,7 +1308,6 @@ int get_tree_bdev(struct fs_context *fc,
 		blkdev_put(bdev, fc->fs_type);
 		down_write(&s->s_umount);
 	} else {
-		s->s_mode = mode;
 		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
 		shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
 					fc->fs_type->name, s->s_id);
@@ -1382,7 +1381,6 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 		blkdev_put(bdev, fs_type);
 		down_write(&s->s_umount);
 	} else {
-		s->s_mode = mode;
 		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
 		shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
 					fs_type->name, s->s_id);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7b2053649820..ad1d2c9afb3f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1215,7 +1215,6 @@ struct super_block {
 	uuid_t			s_uuid;		/* UUID */
 
 	unsigned int		s_max_links;
-	fmode_t			s_mode;
 
 	/*
 	 * The next field is for VFS *only*. No filesystems have any business
-- 
cgit v1.2.3


From 1991299e49fa58c3ba7e91599932f84bf537d592 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:48 +0200
Subject: scsi: replace the fmode_t argument to ->sg_io_fn with a simple bool

Instead of passing a fmode_t and only checking it for FMODE_WRITE, pass
a bool open_for_write to prepare for callers that won't have the fmode_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-21-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c         | 2 +-
 block/bsg.c             | 8 +++++---
 drivers/scsi/scsi_bsg.c | 4 ++--
 include/linux/bsg.h     | 2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 435c32373cd6..b3acdbdb6e7e 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -26,7 +26,7 @@ struct bsg_set {
 };
 
 static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
-		fmode_t mode, unsigned int timeout)
+		bool open_for_write, unsigned int timeout)
 {
 	struct bsg_job *job;
 	struct request *rq;
diff --git a/block/bsg.c b/block/bsg.c
index 7eca43f33d7f..bec4027842b3 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -54,7 +54,8 @@ static unsigned int bsg_timeout(struct bsg_device *bd, struct sg_io_v4 *hdr)
 	return max_t(unsigned int, timeout, BLK_MIN_SG_TIMEOUT);
 }
 
-static int bsg_sg_io(struct bsg_device *bd, fmode_t mode, void __user *uarg)
+static int bsg_sg_io(struct bsg_device *bd, bool open_for_write,
+		     void __user *uarg)
 {
 	struct sg_io_v4 hdr;
 	int ret;
@@ -63,7 +64,8 @@ static int bsg_sg_io(struct bsg_device *bd, fmode_t mode, void __user *uarg)
 		return -EFAULT;
 	if (hdr.guard != 'Q')
 		return -EINVAL;
-	ret = bd->sg_io_fn(bd->queue, &hdr, mode, bsg_timeout(bd, &hdr));
+	ret = bd->sg_io_fn(bd->queue, &hdr, open_for_write,
+			   bsg_timeout(bd, &hdr));
 	if (!ret && copy_to_user(uarg, &hdr, sizeof(hdr)))
 		return -EFAULT;
 	return ret;
@@ -146,7 +148,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case SG_EMULATED_HOST:
 		return put_user(1, intp);
 	case SG_IO:
-		return bsg_sg_io(bd, file->f_mode, uarg);
+		return bsg_sg_io(bd, file->f_mode & FMODE_WRITE, uarg);
 	case SCSI_IOCTL_SEND_COMMAND:
 		pr_warn_ratelimited("%s: calling unsupported SCSI_IOCTL_SEND_COMMAND\n",
 				current->comm);
diff --git a/drivers/scsi/scsi_bsg.c b/drivers/scsi/scsi_bsg.c
index 12431f35f861..a9a9ec086a7e 100644
--- a/drivers/scsi/scsi_bsg.c
+++ b/drivers/scsi/scsi_bsg.c
@@ -10,7 +10,7 @@
 #define uptr64(val) ((void __user *)(uintptr_t)(val))
 
 static int scsi_bsg_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
-		fmode_t mode, unsigned int timeout)
+		bool open_for_write, unsigned int timeout)
 {
 	struct scsi_cmnd *scmd;
 	struct request *rq;
@@ -42,7 +42,7 @@ static int scsi_bsg_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
 	if (copy_from_user(scmd->cmnd, uptr64(hdr->request), scmd->cmd_len))
 		goto out_put_request;
 	ret = -EPERM;
-	if (!scsi_cmd_allowed(scmd->cmnd, mode & FMODE_WRITE))
+	if (!scsi_cmd_allowed(scmd->cmnd, open_for_write))
 		goto out_put_request;
 
 	ret = 0;
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 1ac81c809da9..ee2df73edf83 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -9,7 +9,7 @@ struct device;
 struct request_queue;
 
 typedef int (bsg_sg_io_fn)(struct request_queue *, struct sg_io_v4 *hdr,
-		fmode_t mode, unsigned int timeout);
+		bool open_for_write, unsigned int timeout);
 
 struct bsg_device *bsg_register_queue(struct request_queue *q,
 		struct device *parent, const char *name,
-- 
cgit v1.2.3


From 658afed19ceed54a52b9e9e69c0791c8868ff55d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:50 +0200
Subject: mtd: block: use a simple bool to track open for write

Instead of propagating the fmode_t, just use a bool to track if a mtd
block device was opened for writing.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Acked-by: Richard Weinberger <richard@nod.at>
Link: https://lore.kernel.org/r/20230608110258.189493-23-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/mtd/mtd_blkdevs.c    | 2 +-
 drivers/mtd/mtdblock.c       | 2 +-
 include/linux/mtd/blktrans.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index f0bb09fde95e..bd0b75453643 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -208,7 +208,7 @@ static int blktrans_open(struct gendisk *disk, fmode_t mode)
 	ret = __get_mtd_device(dev->mtd);
 	if (ret)
 		goto error_release;
-	dev->file_mode = mode;
+	dev->writable = mode & FMODE_WRITE;
 
 unlock:
 	dev->open++;
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index a0a1194dc1d9..fa476fb4dffb 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -294,7 +294,7 @@ static void mtdblock_release(struct mtd_blktrans_dev *mbd)
 		 * It was the last usage. Free the cache, but only sync if
 		 * opened for writing.
 		 */
-		if (mbd->file_mode & FMODE_WRITE)
+		if (mbd->writable)
 			mtd_sync(mbd->mtd);
 		vfree(mtdblk->cache_data);
 	}
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index 15cc9b95e32b..6e471436bba5 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -34,7 +34,7 @@ struct mtd_blktrans_dev {
 	struct blk_mq_tag_set *tag_set;
 	spinlock_t queue_lock;
 	void *priv;
-	fmode_t file_mode;
+	bool writable;
 };
 
 struct mtd_blktrans_ops {
-- 
cgit v1.2.3


From cfb425761c79b6056ae5bb73f8d400f03b513959 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:53 +0200
Subject: block: move a few internal definitions out of blkdev.h

All these helpers are only used in core block code, so move them out of
the public header.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-26-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h            | 23 +++++++++++++++++++++--
 include/linux/blkdev.h | 27 ---------------------------
 2 files changed, 21 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index 9582fcd0df41..6910220aa030 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -394,10 +394,27 @@ static inline struct bio *blk_queue_bounce(struct bio *bio,
 #ifdef CONFIG_BLK_DEV_ZONED
 void disk_free_zone_bitmaps(struct gendisk *disk);
 void disk_clear_zone_settings(struct gendisk *disk);
-#else
+int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
+		unsigned int cmd, unsigned long arg);
+int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+		unsigned int cmd, unsigned long arg);
+#else /* CONFIG_BLK_DEV_ZONED */
 static inline void disk_free_zone_bitmaps(struct gendisk *disk) {}
 static inline void disk_clear_zone_settings(struct gendisk *disk) {}
-#endif
+static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
+		fmode_t mode, unsigned int cmd, unsigned long arg)
+{
+	return -ENOTTY;
+}
+static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
+		fmode_t mode, unsigned int cmd, unsigned long arg)
+{
+	return -ENOTTY;
+}
+#endif /* CONFIG_BLK_DEV_ZONED */
+
+struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
+void bdev_add(struct block_device *bdev, dev_t dev);
 
 int blk_alloc_ext_minor(void);
 void blk_free_ext_minor(unsigned int minor);
@@ -449,6 +466,8 @@ extern struct device_attribute dev_attr_events_poll_msecs;
 
 extern struct attribute_group blk_trace_attr_group;
 
+int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
+		loff_t lend);
 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 978036039020..6b65623e447c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -318,7 +318,6 @@ typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
 void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model);
 
 #ifdef CONFIG_BLK_DEV_ZONED
-
 #define BLK_ALL_ZONES  ((unsigned int)-1)
 int blkdev_report_zones(struct block_device *bdev, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
@@ -328,33 +327,11 @@ extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
 			    gfp_t gfp_mask);
 int blk_revalidate_disk_zones(struct gendisk *disk,
 			      void (*update_driver_data)(struct gendisk *disk));
-
-extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
-				     unsigned int cmd, unsigned long arg);
-extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
-				  unsigned int cmd, unsigned long arg);
-
 #else /* CONFIG_BLK_DEV_ZONED */
-
 static inline unsigned int bdev_nr_zones(struct block_device *bdev)
 {
 	return 0;
 }
-
-static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
-					    fmode_t mode, unsigned int cmd,
-					    unsigned long arg)
-{
-	return -ENOTTY;
-}
-
-static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
-					 fmode_t mode, unsigned int cmd,
-					 unsigned long arg)
-{
-	return -ENOTTY;
-}
-
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 /*
@@ -1493,11 +1470,7 @@ void blkdev_put(struct block_device *bdev, void *holder);
 struct block_device *blkdev_get_no_open(dev_t dev);
 void blkdev_put_no_open(struct block_device *bdev);
 
-struct block_device *bdev_alloc(struct gendisk *disk, u8 partno);
-void bdev_add(struct block_device *bdev, dev_t dev);
 struct block_device *I_BDEV(struct inode *inode);
-int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
-		loff_t lend);
 
 #ifdef CONFIG_BLOCK
 void invalidate_bdev(struct block_device *bdev);
-- 
cgit v1.2.3


From 05bdb9965305bbfdae79b31d22df03d1e2cfcb22 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:55 +0200
Subject: block: replace fmode_t with a block-specific type for block open
 flags

The only overlap between the block open flags mapped into the fmode_t and
other uses of fmode_t are FMODE_READ and FMODE_WRITE.  Define a new
blk_mode_t instead for use in blkdev_get_by_{dev,path}, ->open and
->ioctl and stop abusing fmode_t.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jack Wang <jinpu.wang@ionos.com>		[rnbd]
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-28-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/um/drivers/ubd_kern.c          |  8 +++----
 arch/xtensa/platforms/iss/simdisk.c |  2 +-
 block/bdev.c                        | 32 ++++++++++++++--------------
 block/blk-zoned.c                   |  8 +++----
 block/blk.h                         | 11 +++++-----
 block/fops.c                        | 32 ++++++++++++++++++++++------
 block/genhd.c                       |  8 +++----
 block/ioctl.c                       | 42 +++++++++++--------------------------
 drivers/block/amiflop.c             | 12 +++++------
 drivers/block/aoe/aoeblk.c          |  4 ++--
 drivers/block/ataflop.c             | 25 +++++++++++-----------
 drivers/block/drbd/drbd_main.c      |  7 ++++---
 drivers/block/drbd/drbd_nl.c        |  2 +-
 drivers/block/floppy.c              | 28 ++++++++++++-------------
 drivers/block/loop.c                | 22 +++++++++----------
 drivers/block/mtip32xx/mtip32xx.c   |  4 ++--
 drivers/block/nbd.c                 |  4 ++--
 drivers/block/pktcdvd.c             | 17 ++++++++-------
 drivers/block/rbd.c                 |  2 +-
 drivers/block/rnbd/rnbd-clt.c       |  4 ++--
 drivers/block/rnbd/rnbd-srv.c       |  4 ++--
 drivers/block/sunvdc.c              |  2 +-
 drivers/block/swim.c                | 16 +++++++-------
 drivers/block/swim3.c               | 24 ++++++++++-----------
 drivers/block/ublk_drv.c            |  2 +-
 drivers/block/xen-blkback/xenbus.c  |  2 +-
 drivers/block/xen-blkfront.c        |  2 +-
 drivers/block/z2ram.c               |  2 +-
 drivers/block/zram/zram_drv.c       |  6 +++---
 drivers/cdrom/cdrom.c               |  6 +++---
 drivers/cdrom/gdrom.c               |  4 ++--
 drivers/md/bcache/bcache.h          |  2 +-
 drivers/md/bcache/request.c         |  4 ++--
 drivers/md/bcache/super.c           |  6 +++---
 drivers/md/dm-cache-target.c        | 12 +++++------
 drivers/md/dm-clone-target.c        | 10 ++++-----
 drivers/md/dm-core.h                |  7 +++----
 drivers/md/dm-era-target.c          |  6 ++++--
 drivers/md/dm-ioctl.c               | 10 ++++-----
 drivers/md/dm-snap.c                |  4 ++--
 drivers/md/dm-table.c               | 11 +++++-----
 drivers/md/dm-thin.c                |  9 ++++----
 drivers/md/dm-verity-fec.c          |  2 +-
 drivers/md/dm-verity-target.c       |  6 +++---
 drivers/md/dm.c                     | 10 ++++-----
 drivers/md/dm.h                     |  2 +-
 drivers/md/md.c                     |  8 +++----
 drivers/mmc/core/block.c            |  8 +++----
 drivers/mtd/devices/block2mtd.c     |  4 ++--
 drivers/mtd/mtd_blkdevs.c           |  4 ++--
 drivers/mtd/ubi/block.c             |  5 ++---
 drivers/nvme/host/core.c            |  2 +-
 drivers/nvme/host/ioctl.c           |  8 +++----
 drivers/nvme/host/multipath.c       |  2 +-
 drivers/nvme/host/nvme.h            |  4 ++--
 drivers/nvme/target/io-cmd-bdev.c   |  2 +-
 drivers/s390/block/dasd.c           |  6 ++----
 drivers/s390/block/dasd_genhd.c     |  3 ++-
 drivers/s390/block/dasd_int.h       |  3 ++-
 drivers/s390/block/dasd_ioctl.c     |  2 +-
 drivers/s390/block/dcssblk.c        |  4 ++--
 drivers/scsi/sd.c                   | 19 +++++++++--------
 drivers/scsi/sr.c                   | 10 ++++-----
 drivers/target/target_core_iblock.c |  5 ++---
 drivers/target/target_core_pscsi.c  |  4 ++--
 fs/btrfs/dev-replace.c              |  2 +-
 fs/btrfs/super.c                    |  8 +++----
 fs/btrfs/volumes.c                  | 16 +++++++-------
 fs/btrfs/volumes.h                  |  4 ++--
 fs/erofs/super.c                    |  2 +-
 fs/ext4/super.c                     |  2 +-
 fs/f2fs/super.c                     |  2 +-
 fs/jfs/jfs_logmgr.c                 |  2 +-
 fs/nfs/blocklayout/dev.c            |  5 +++--
 fs/ocfs2/cluster/heartbeat.c        |  3 ++-
 fs/reiserfs/journal.c               |  4 ++--
 fs/xfs/xfs_super.c                  |  2 +-
 include/linux/blkdev.h              | 30 +++++++++++++++++++-------
 include/linux/cdrom.h               |  3 ++-
 include/linux/device-mapper.h       |  8 +++----
 kernel/power/swap.c                 |  6 +++---
 mm/swapfile.c                       |  2 +-
 82 files changed, 334 insertions(+), 315 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 20c1a16199c5..50206feac577 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -108,9 +108,9 @@ static inline void ubd_set_bit(__u64 bit, unsigned char *data)
 static DEFINE_MUTEX(ubd_lock);
 static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 
-static int ubd_open(struct gendisk *disk, fmode_t mode);
+static int ubd_open(struct gendisk *disk, blk_mode_t mode);
 static void ubd_release(struct gendisk *disk);
-static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
+static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
 		     unsigned int cmd, unsigned long arg);
 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
@@ -1154,7 +1154,7 @@ static int __init ubd_driver_init(void){
 
 device_initcall(ubd_driver_init);
 
-static int ubd_open(struct gendisk *disk, fmode_t mode)
+static int ubd_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct ubd *ubd_dev = disk->private_data;
 	int err = 0;
@@ -1389,7 +1389,7 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
+static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
 		     unsigned int cmd, unsigned long arg)
 {
 	struct ubd *ubd_dev = bdev->bd_disk->private_data;
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 2ad9da3de0d9..178cf96ca10a 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -120,7 +120,7 @@ static void simdisk_submit_bio(struct bio *bio)
 	bio_endio(bio);
 }
 
-static int simdisk_open(struct gendisk *disk, fmode_t mode)
+static int simdisk_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct simdisk *dev = disk->private_data;
 
diff --git a/block/bdev.c b/block/bdev.c
index db63e5bcc46f..bd558a9ba3cd 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -93,7 +93,7 @@ EXPORT_SYMBOL(invalidate_bdev);
  * Drop all buffers & page cache for given bdev range. This function bails
  * with error if bdev has other exclusive owner (such as filesystem).
  */
-int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
+int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
 			loff_t lstart, loff_t lend)
 {
 	/*
@@ -101,14 +101,14 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
 	 * while we discard the buffer cache to avoid discarding buffers
 	 * under live filesystem.
 	 */
-	if (!(mode & FMODE_EXCL)) {
+	if (!(mode & BLK_OPEN_EXCL)) {
 		int err = bd_prepare_to_claim(bdev, truncate_bdev_range, NULL);
 		if (err)
 			goto invalidate;
 	}
 
 	truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
-	if (!(mode & FMODE_EXCL))
+	if (!(mode & BLK_OPEN_EXCL))
 		bd_abort_claiming(bdev, truncate_bdev_range);
 	return 0;
 
@@ -647,7 +647,7 @@ static void blkdev_flush_mapping(struct block_device *bdev)
 	bdev_write_inode(bdev);
 }
 
-static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
+static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
 	int ret;
@@ -679,7 +679,7 @@ static void blkdev_put_whole(struct block_device *bdev)
 		bdev->bd_disk->fops->release(bdev->bd_disk);
 }
 
-static int blkdev_get_part(struct block_device *part, fmode_t mode)
+static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
 {
 	struct gendisk *disk = part->bd_disk;
 	int ret;
@@ -743,11 +743,11 @@ void blkdev_put_no_open(struct block_device *bdev)
 {
 	put_device(&bdev->bd_device);
 }
-
+	
 /**
  * blkdev_get_by_dev - open a block device by device number
  * @dev: device number of block device to open
- * @mode: FMODE_* mask
+ * @mode: open mode (BLK_OPEN_*)
  * @holder: exclusive holder identifier
  * @hops: holder operations
  *
@@ -765,7 +765,7 @@ void blkdev_put_no_open(struct block_device *bdev)
  * RETURNS:
  * Reference to the block_device on success, ERR_PTR(-errno) on failure.
  */
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
+struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
 		const struct blk_holder_ops *hops)
 {
 	bool unblock_events = true;
@@ -775,8 +775,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
 
 	ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
 			MAJOR(dev), MINOR(dev),
-			((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
-			((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
+			((mode & BLK_OPEN_READ) ? DEVCG_ACC_READ : 0) |
+			((mode & BLK_OPEN_WRITE) ? DEVCG_ACC_WRITE : 0));
 	if (ret)
 		return ERR_PTR(ret);
 
@@ -786,12 +786,12 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
 	disk = bdev->bd_disk;
 
 	if (holder) {
-		mode |= FMODE_EXCL;
+		mode |= BLK_OPEN_EXCL;
 		ret = bd_prepare_to_claim(bdev, holder, hops);
 		if (ret)
 			goto put_blkdev;
 	} else {
-		if (WARN_ON_ONCE(mode & FMODE_EXCL)) {
+		if (WARN_ON_ONCE(mode & BLK_OPEN_EXCL)) {
 			ret = -EIO;
 			goto put_blkdev;
 		}
@@ -821,7 +821,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
 		 * writeable reference is too fragile given the way @mode is
 		 * used in blkdev_get/put().
 		 */
-		if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
 		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
 			bdev->bd_write_holder = true;
 			unblock_events = false;
@@ -848,7 +848,7 @@ EXPORT_SYMBOL(blkdev_get_by_dev);
 /**
  * blkdev_get_by_path - open a block device by name
  * @path: path to the block device to open
- * @mode: FMODE_* mask
+ * @mode: open mode (BLK_OPEN_*)
  * @holder: exclusive holder identifier
  *
  * Open the block device described by the device file at @path.  If @holder is
@@ -861,7 +861,7 @@ EXPORT_SYMBOL(blkdev_get_by_dev);
  * RETURNS:
  * Reference to the block_device on success, ERR_PTR(-errno) on failure.
  */
-struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
 		void *holder, const struct blk_holder_ops *hops)
 {
 	struct block_device *bdev;
@@ -873,7 +873,7 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
 		return ERR_PTR(error);
 
 	bdev = blkdev_get_by_dev(dev, mode, holder, hops);
-	if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+	if (!IS_ERR(bdev) && (mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
 		blkdev_put(bdev, holder);
 		return ERR_PTR(-EACCES);
 	}
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 02cc2c629ac9..0f9f97cdddd9 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -356,8 +356,8 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
 	return 0;
 }
 
-static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode,
-				      const struct blk_zone_range *zrange)
+static int blkdev_truncate_zone_range(struct block_device *bdev,
+		blk_mode_t mode, const struct blk_zone_range *zrange)
 {
 	loff_t start, end;
 
@@ -376,7 +376,7 @@ static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode,
  * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
  * Called from blkdev_ioctl.
  */
-int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
 			   unsigned int cmd, unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
@@ -390,7 +390,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!bdev_is_zoned(bdev))
 		return -ENOTTY;
 
-	if (!(mode & FMODE_WRITE))
+	if (!(mode & BLK_OPEN_WRITE))
 		return -EBADF;
 
 	if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
diff --git a/block/blk.h b/block/blk.h
index e28d5d67d31a..768852a84fef 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -396,7 +396,7 @@ void disk_free_zone_bitmaps(struct gendisk *disk);
 void disk_clear_zone_settings(struct gendisk *disk);
 int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
 		unsigned long arg);
-int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
 		unsigned int cmd, unsigned long arg);
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline void disk_free_zone_bitmaps(struct gendisk *disk) {}
@@ -407,7 +407,7 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
 	return -ENOTTY;
 }
 static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
-		fmode_t mode, unsigned int cmd, unsigned long arg)
+		blk_mode_t mode, unsigned int cmd, unsigned long arg)
 {
 	return -ENOTTY;
 }
@@ -451,7 +451,7 @@ static inline void bio_release_page(struct bio *bio, struct page *page)
 
 struct request_queue *blk_alloc_queue(int node_id);
 
-int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
+int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode);
 
 int disk_alloc_events(struct gendisk *disk);
 void disk_add_events(struct gendisk *disk);
@@ -466,8 +466,9 @@ extern struct device_attribute dev_attr_events_poll_msecs;
 
 extern struct attribute_group blk_trace_attr_group;
 
-int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
-		loff_t lend);
+blk_mode_t file_to_blk_mode(struct file *file);
+int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
+		loff_t lstart, loff_t lend);
 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
diff --git a/block/fops.c b/block/fops.c
index 9f26e25bafa1..086612103b9d 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -470,6 +470,30 @@ static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
 	return error;
 }
 
+blk_mode_t file_to_blk_mode(struct file *file)
+{
+	blk_mode_t mode = 0;
+
+	if (file->f_mode & FMODE_READ)
+		mode |= BLK_OPEN_READ;
+	if (file->f_mode & FMODE_WRITE)
+		mode |= BLK_OPEN_WRITE;
+	if (file->f_mode & FMODE_EXCL)
+		mode |= BLK_OPEN_EXCL;
+	if (file->f_flags & O_NDELAY)
+		mode |= BLK_OPEN_NDELAY;
+
+	/*
+	 * If all bits in O_ACCMODE set (aka O_RDWR | O_WRONLY), the floppy
+	 * driver has historically allowed ioctls as if the file was opened for
+	 * writing, but does not allow and actual reads or writes.
+	 */
+	if ((file->f_flags & O_ACCMODE) == (O_RDWR | O_WRONLY))
+		mode |= BLK_OPEN_WRITE_IOCTL;
+
+	return mode;
+}
+
 static int blkdev_open(struct inode *inode, struct file *filp)
 {
 	struct block_device *bdev;
@@ -483,14 +507,10 @@ static int blkdev_open(struct inode *inode, struct file *filp)
 	filp->f_flags |= O_LARGEFILE;
 	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
 
-	if (filp->f_flags & O_NDELAY)
-		filp->f_mode |= FMODE_NDELAY;
 	if (filp->f_flags & O_EXCL)
 		filp->f_mode |= FMODE_EXCL;
-	if ((filp->f_flags & O_ACCMODE) == 3)
-		filp->f_mode |= FMODE_WRITE_IOCTL;
 
-	bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode,
+	bdev = blkdev_get_by_dev(inode->i_rdev, file_to_blk_mode(filp),
 				 (filp->f_mode & FMODE_EXCL) ? filp : NULL,
 				 NULL);
 	if (IS_ERR(bdev))
@@ -648,7 +668,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
 	filemap_invalidate_lock(inode->i_mapping);
 
 	/* Invalidate the page cache, including dirty pages. */
-	error = truncate_bdev_range(bdev, file->f_mode, start, end);
+	error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
 	if (error)
 		goto fail;
 
diff --git a/block/genhd.c b/block/genhd.c
index b56f8b5c88b3..2c2f9a716822 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -339,7 +339,7 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
 }
 EXPORT_SYMBOL_GPL(disk_uevent);
 
-int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
+int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
 {
 	struct block_device *bdev;
 	int ret = 0;
@@ -357,7 +357,7 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
 	 * synchronize with other exclusive openers and other partition
 	 * scanners.
 	 */
-	if (!(mode & FMODE_EXCL)) {
+	if (!(mode & BLK_OPEN_EXCL)) {
 		ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions,
 					  NULL);
 		if (ret)
@@ -377,7 +377,7 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
 	 * creat partition for underlying disk.
 	 */
 	clear_bit(GD_NEED_PART_SCAN, &disk->state);
-	if (!(mode & FMODE_EXCL))
+	if (!(mode & BLK_OPEN_EXCL))
 		bd_abort_claiming(disk->part0, disk_scan_partitions);
 	return ret;
 }
@@ -505,7 +505,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
 
 		bdev_add(disk->part0, ddev->devt);
 		if (get_capacity(disk))
-			disk_scan_partitions(disk, FMODE_READ);
+			disk_scan_partitions(disk, BLK_OPEN_READ);
 
 		/*
 		 * Announce the disk and partitions after all partitions are
diff --git a/block/ioctl.c b/block/ioctl.c
index 3a10c34b8ef6..61bb94fd4281 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -82,7 +82,7 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
 }
 #endif
 
-static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
+static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
 		unsigned long arg)
 {
 	uint64_t range[2];
@@ -90,7 +90,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
 	struct inode *inode = bdev->bd_inode;
 	int err;
 
-	if (!(mode & FMODE_WRITE))
+	if (!(mode & BLK_OPEN_WRITE))
 		return -EBADF;
 
 	if (!bdev_max_discard_sectors(bdev))
@@ -120,14 +120,14 @@ fail:
 	return err;
 }
 
-static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode,
+static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode,
 		void __user *argp)
 {
 	uint64_t start, len;
 	uint64_t range[2];
 	int err;
 
-	if (!(mode & FMODE_WRITE))
+	if (!(mode & BLK_OPEN_WRITE))
 		return -EBADF;
 	if (!bdev_max_secure_erase_sectors(bdev))
 		return -EOPNOTSUPP;
@@ -151,7 +151,7 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, fmode_t mode,
 }
 
 
-static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
+static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
 		unsigned long arg)
 {
 	uint64_t range[2];
@@ -159,7 +159,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
 	struct inode *inode = bdev->bd_inode;
 	int err;
 
-	if (!(mode & FMODE_WRITE))
+	if (!(mode & BLK_OPEN_WRITE))
 		return -EBADF;
 
 	if (copy_from_user(range, (void __user *)arg, sizeof(range)))
@@ -240,7 +240,7 @@ static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val)
  * drivers that implement only commands that are completely compatible
  * between 32-bit and 64-bit user space
  */
-int blkdev_compat_ptr_ioctl(struct block_device *bdev, fmode_t mode,
+int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode,
 			unsigned cmd, unsigned long arg)
 {
 	struct gendisk *disk = bdev->bd_disk;
@@ -439,7 +439,7 @@ static int compat_hdio_getgeo(struct block_device *bdev,
 #endif
 
 /* set the logical block size */
-static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
+static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode,
 		int __user *argp)
 {
 	int ret, n;
@@ -451,7 +451,7 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
 	if (get_user(n, argp))
 		return -EFAULT;
 
-	if (mode & FMODE_EXCL)
+	if (mode & BLK_OPEN_EXCL)
 		return set_blocksize(bdev, n);
 
 	if (IS_ERR(blkdev_get_by_dev(bdev->bd_dev, mode, &bdev, NULL)))
@@ -467,7 +467,7 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
  * user space. Note the separate arg/argp parameters that are needed
  * to deal with the compat_ptr() conversion.
  */
-static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
+static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
 			       unsigned int cmd, unsigned long arg,
 			       void __user *argp)
 {
@@ -560,18 +560,9 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
 	struct block_device *bdev = I_BDEV(file->f_mapping->host);
 	void __user *argp = (void __user *)arg;
-	fmode_t mode = file->f_mode;
+	blk_mode_t mode = file_to_blk_mode(file);
 	int ret;
 
-	/*
-	 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
-	 * to updated it before every ioctl.
-	 */
-	if (file->f_flags & O_NDELAY)
-		mode |= FMODE_NDELAY;
-	else
-		mode &= ~FMODE_NDELAY;
-
 	switch (cmd) {
 	/* These need separate implementations for the data structure */
 	case HDIO_GETGEO:
@@ -630,16 +621,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	void __user *argp = compat_ptr(arg);
 	struct block_device *bdev = I_BDEV(file->f_mapping->host);
 	struct gendisk *disk = bdev->bd_disk;
-	fmode_t mode = file->f_mode;
-
-	/*
-	 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
-	 * to updated it before every ioctl.
-	 */
-	if (file->f_flags & O_NDELAY)
-		mode |= FMODE_NDELAY;
-	else
-		mode &= ~FMODE_NDELAY;
+	blk_mode_t mode = file_to_blk_mode(file);
 
 	switch (cmd) {
 	/* These need separate implementations for the data structure */
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 9a0e9dc74a8c..e460c9799d9f 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1532,7 +1532,7 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
 		    unsigned int cmd, unsigned long param)
 {
 	struct amiga_floppy_struct *p = bdev->bd_disk->private_data;
@@ -1607,7 +1607,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
 	return 0;
 }
 
-static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_ioctl(struct block_device *bdev, blk_mode_t mode,
 			     unsigned int cmd, unsigned long param)
 {
 	int ret;
@@ -1654,7 +1654,7 @@ static void fd_probe(int dev)
  * /dev/PS0 etc), and disallows simultaneous access to the same
  * drive with different device numbers.
  */
-static int floppy_open(struct gendisk *disk, fmode_t mode)
+static int floppy_open(struct gendisk *disk, blk_mode_t mode)
 {
 	int drive = disk->first_minor & 3;
 	int system = (disk->first_minor & 4) >> 2;
@@ -1673,10 +1673,9 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 		mutex_unlock(&amiflop_mutex);
 		return -ENXIO;
 	}
-
-	if (mode & (FMODE_READ|FMODE_WRITE)) {
+	if (mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) {
 		disk_check_media_change(disk);
-		if (mode & FMODE_WRITE) {
+		if (mode & BLK_OPEN_WRITE) {
 			int wrprot;
 
 			get_fdc(drive);
@@ -1691,7 +1690,6 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 			}
 		}
 	}
-
 	local_irq_save(flags);
 	fd_ref[drive]++;
 	fd_device[drive] = system;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index c3a39e02ab95..cf6883756155 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -204,7 +204,7 @@ aoedisk_rm_debugfs(struct aoedev *d)
 }
 
 static int
-aoeblk_open(struct gendisk *disk, fmode_t mode)
+aoeblk_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct aoedev *d = disk->private_data;
 	ulong flags;
@@ -285,7 +285,7 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 static int
-aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
+aoeblk_ioctl(struct block_device *bdev, blk_mode_t mode, uint cmd, ulong arg)
 {
 	struct aoedev *d;
 
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 66a3242bb062..cd738cab725f 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -442,12 +442,12 @@ static void fd_times_out(struct timer_list *unused);
 static void finish_fdc( void );
 static void finish_fdc_done( int dummy );
 static void setup_req_params( int drive );
-static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
-                     cmd, unsigned long param);
+static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
+		unsigned int cmd, unsigned long param);
 static void fd_probe( int drive );
 static int fd_test_drive_present( int drive );
 static void config_types( void );
-static int floppy_open(struct gendisk *disk, fmode_t mode);
+static int floppy_open(struct gendisk *disk, blk_mode_t mode);
 static void floppy_release(struct gendisk *disk);
 
 /************************* End of Prototypes **************************/
@@ -1581,7 +1581,7 @@ out:
 	return BLK_STS_OK;
 }
 
-static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
 		    unsigned int cmd, unsigned long param)
 {
 	struct gendisk *disk = bdev->bd_disk;
@@ -1768,7 +1768,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
 	}
 }
 
-static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_ioctl(struct block_device *bdev, blk_mode_t mode,
 			     unsigned int cmd, unsigned long arg)
 {
 	int ret;
@@ -1915,7 +1915,7 @@ static void __init config_types( void )
  * drive with different device numbers.
  */
 
-static int floppy_open(struct gendisk *disk, fmode_t mode)
+static int floppy_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct atari_floppy_struct *p = disk->private_data;
 	int type = disk->first_minor >> 2;
@@ -1924,23 +1924,22 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 	if (p->ref && p->type != type)
 		return -EBUSY;
 
-	if (p->ref == -1 || (p->ref && mode & FMODE_EXCL))
+	if (p->ref == -1 || (p->ref && mode & BLK_OPEN_EXCL))
 		return -EBUSY;
-
-	if (mode & FMODE_EXCL)
+	if (mode & BLK_OPEN_EXCL)
 		p->ref = -1;
 	else
 		p->ref++;
 
 	p->type = type;
 
-	if (mode & FMODE_NDELAY)
+	if (mode & BLK_OPEN_NDELAY)
 		return 0;
 
-	if (mode & (FMODE_READ|FMODE_WRITE)) {
+	if (mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) {
 		if (disk_check_media_change(disk))
 			floppy_revalidate(disk);
-		if (mode & FMODE_WRITE) {
+		if (mode & BLK_OPEN_WRITE) {
 			if (p->wpstat) {
 				if (p->ref < 0)
 					p->ref = 0;
@@ -1953,7 +1952,7 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static int floppy_unlocked_open(struct gendisk *disk, fmode_t mode)
+static int floppy_unlocked_open(struct gendisk *disk, blk_mode_t mode)
 {
 	int ret;
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 7f3d7ca6ce6b..965f672557f2 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -49,7 +49,7 @@
 #include "drbd_debugfs.h"
 
 static DEFINE_MUTEX(drbd_main_mutex);
-static int drbd_open(struct gendisk *disk, fmode_t mode);
+static int drbd_open(struct gendisk *disk, blk_mode_t mode);
 static void drbd_release(struct gendisk *gd);
 static void md_sync_timer_fn(struct timer_list *t);
 static int w_bitmap_io(struct drbd_work *w, int unused);
@@ -1882,7 +1882,7 @@ int drbd_send_all(struct drbd_connection *connection, struct socket *sock, void
 	return 0;
 }
 
-static int drbd_open(struct gendisk *disk, fmode_t mode)
+static int drbd_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct drbd_device *device = disk->private_data;
 	unsigned long flags;
@@ -1894,7 +1894,7 @@ static int drbd_open(struct gendisk *disk, fmode_t mode)
 	 * and no race with updating open_cnt */
 
 	if (device->state.role != R_PRIMARY) {
-		if (mode & FMODE_WRITE)
+		if (mode & BLK_OPEN_WRITE)
 			rv = -EROFS;
 		else if (!drbd_allow_oos)
 			rv = -EMEDIUMTYPE;
@@ -1911,6 +1911,7 @@ static int drbd_open(struct gendisk *disk, fmode_t mode)
 static void drbd_release(struct gendisk *gd)
 {
 	struct drbd_device *device = gd->private_data;
+
 	mutex_lock(&drbd_main_mutex);
 	device->open_cnt--;
 	mutex_unlock(&drbd_main_mutex);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 10b1e5171332..cddae6f4b00f 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1640,7 +1640,7 @@ static struct block_device *open_backing_dev(struct drbd_device *device,
 	struct block_device *bdev;
 	int err = 0;
 
-	bdev = blkdev_get_by_path(bdev_path, FMODE_READ | FMODE_WRITE,
+	bdev = blkdev_get_by_path(bdev_path, BLK_OPEN_READ | BLK_OPEN_WRITE,
 				  claim_ptr, NULL);
 	if (IS_ERR(bdev)) {
 		drbd_err(device, "open(\"%s\") failed with %ld\n",
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index d79fac288a73..2db9b186b977 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3394,8 +3394,8 @@ static bool valid_floppy_drive_params(const short autodetect[FD_AUTODETECT_SIZE]
 	return true;
 }
 
-static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
-		    unsigned long param)
+static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
+		unsigned int cmd, unsigned long param)
 {
 	int drive = (long)bdev->bd_disk->private_data;
 	int type = ITYPE(drive_state[drive].fd_device);
@@ -3428,7 +3428,8 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 		return ret;
 
 	/* permission checks */
-	if (((cmd & 0x40) && !(mode & (FMODE_WRITE | FMODE_WRITE_IOCTL))) ||
+	if (((cmd & 0x40) &&
+	     !(mode & (BLK_OPEN_WRITE | BLK_OPEN_WRITE_IOCTL))) ||
 	    ((cmd & 0x80) && !capable(CAP_SYS_ADMIN)))
 		return -EPERM;
 
@@ -3566,7 +3567,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
 	return 0;
 }
 
-static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_ioctl(struct block_device *bdev, blk_mode_t mode,
 			     unsigned int cmd, unsigned long param)
 {
 	int ret;
@@ -3654,8 +3655,8 @@ struct compat_floppy_write_errors {
 #define FDGETFDCSTAT32 _IOR(2, 0x15, struct compat_floppy_fdc_state)
 #define FDWERRORGET32  _IOR(2, 0x17, struct compat_floppy_write_errors)
 
-static int compat_set_geometry(struct block_device *bdev, fmode_t mode, unsigned int cmd,
-		    struct compat_floppy_struct __user *arg)
+static int compat_set_geometry(struct block_device *bdev, blk_mode_t mode,
+		unsigned int cmd, struct compat_floppy_struct __user *arg)
 {
 	struct floppy_struct v;
 	int drive, type;
@@ -3664,7 +3665,7 @@ static int compat_set_geometry(struct block_device *bdev, fmode_t mode, unsigned
 	BUILD_BUG_ON(offsetof(struct floppy_struct, name) !=
 		     offsetof(struct compat_floppy_struct, name));
 
-	if (!(mode & (FMODE_WRITE | FMODE_WRITE_IOCTL)))
+	if (!(mode & (BLK_OPEN_WRITE | BLK_OPEN_WRITE_IOCTL)))
 		return -EPERM;
 
 	memset(&v, 0, sizeof(struct floppy_struct));
@@ -3861,8 +3862,8 @@ static int compat_werrorget(int drive,
 	return 0;
 }
 
-static int fd_compat_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
-		    unsigned long param)
+static int fd_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
+		unsigned int cmd, unsigned long param)
 {
 	int drive = (long)bdev->bd_disk->private_data;
 	switch (cmd) {
@@ -3984,7 +3985,7 @@ static void floppy_release(struct gendisk *disk)
  * /dev/PS0 etc), and disallows simultaneous access to the same
  * drive with different device numbers.
  */
-static int floppy_open(struct gendisk *disk, fmode_t mode)
+static int floppy_open(struct gendisk *disk, blk_mode_t mode)
 {
 	int drive = (long)disk->private_data;
 	int old_dev, new_dev;
@@ -4049,9 +4050,8 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 
 	if (fdc_state[FDC(drive)].rawcmd == 1)
 		fdc_state[FDC(drive)].rawcmd = 2;
-
-	if (!(mode & FMODE_NDELAY)) {
-		if (mode & (FMODE_READ|FMODE_WRITE)) {
+	if (!(mode & BLK_OPEN_NDELAY)) {
+		if (mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) {
 			drive_state[drive].last_checked = 0;
 			clear_bit(FD_OPEN_SHOULD_FAIL_BIT,
 				  &drive_state[drive].flags);
@@ -4063,7 +4063,7 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 				goto out;
 		}
 		res = -EROFS;
-		if ((mode & FMODE_WRITE) &&
+		if ((mode & BLK_OPEN_WRITE) &&
 		    !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags))
 			goto out;
 	}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ca40d24572ae..37511d2b2caf 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -990,7 +990,7 @@ loop_set_status_from_info(struct loop_device *lo,
 	return 0;
 }
 
-static int loop_configure(struct loop_device *lo, fmode_t mode,
+static int loop_configure(struct loop_device *lo, blk_mode_t mode,
 			  struct block_device *bdev,
 			  const struct loop_config *config)
 {
@@ -1014,7 +1014,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	 * If we don't hold exclusive handle for the device, upgrade to it
 	 * here to avoid changing device under exclusive owner.
 	 */
-	if (!(mode & FMODE_EXCL)) {
+	if (!(mode & BLK_OPEN_EXCL)) {
 		error = bd_prepare_to_claim(bdev, loop_configure, NULL);
 		if (error)
 			goto out_putf;
@@ -1050,7 +1050,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	if (error)
 		goto out_unlock;
 
-	if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
+	if (!(file->f_mode & FMODE_WRITE) || !(mode & BLK_OPEN_WRITE) ||
 	    !file->f_op->write_iter)
 		lo->lo_flags |= LO_FLAGS_READ_ONLY;
 
@@ -1116,7 +1116,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 	if (partscan)
 		loop_reread_partitions(lo);
 
-	if (!(mode & FMODE_EXCL))
+	if (!(mode & BLK_OPEN_EXCL))
 		bd_abort_claiming(bdev, loop_configure);
 
 	return 0;
@@ -1124,7 +1124,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
 out_unlock:
 	loop_global_unlock(lo, is_loop);
 out_bdev:
-	if (!(mode & FMODE_EXCL))
+	if (!(mode & BLK_OPEN_EXCL))
 		bd_abort_claiming(bdev, loop_configure);
 out_putf:
 	fput(file);
@@ -1528,7 +1528,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
 	return err;
 }
 
-static int lo_ioctl(struct block_device *bdev, fmode_t mode,
+static int lo_ioctl(struct block_device *bdev, blk_mode_t mode,
 	unsigned int cmd, unsigned long arg)
 {
 	struct loop_device *lo = bdev->bd_disk->private_data;
@@ -1563,24 +1563,22 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
 		return loop_clr_fd(lo);
 	case LOOP_SET_STATUS:
 		err = -EPERM;
-		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
+		if ((mode & BLK_OPEN_WRITE) || capable(CAP_SYS_ADMIN))
 			err = loop_set_status_old(lo, argp);
-		}
 		break;
 	case LOOP_GET_STATUS:
 		return loop_get_status_old(lo, argp);
 	case LOOP_SET_STATUS64:
 		err = -EPERM;
-		if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
+		if ((mode & BLK_OPEN_WRITE) || capable(CAP_SYS_ADMIN))
 			err = loop_set_status64(lo, argp);
-		}
 		break;
 	case LOOP_GET_STATUS64:
 		return loop_get_status64(lo, argp);
 	case LOOP_SET_CAPACITY:
 	case LOOP_SET_DIRECT_IO:
 	case LOOP_SET_BLOCK_SIZE:
-		if (!(mode & FMODE_WRITE) && !capable(CAP_SYS_ADMIN))
+		if (!(mode & BLK_OPEN_WRITE) && !capable(CAP_SYS_ADMIN))
 			return -EPERM;
 		fallthrough;
 	default:
@@ -1691,7 +1689,7 @@ loop_get_status_compat(struct loop_device *lo,
 	return err;
 }
 
-static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
+static int lo_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
 			   unsigned int cmd, unsigned long arg)
 {
 	struct loop_device *lo = bdev->bd_disk->private_data;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 815d77ba6381..b200950e8fb5 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3041,7 +3041,7 @@ static int rssd_disk_name_format(char *prefix,
  *                 structure pointer.
  */
 static int mtip_block_ioctl(struct block_device *dev,
-			    fmode_t mode,
+			    blk_mode_t mode,
 			    unsigned cmd,
 			    unsigned long arg)
 {
@@ -3079,7 +3079,7 @@ static int mtip_block_ioctl(struct block_device *dev,
  *                 structure pointer.
  */
 static int mtip_block_compat_ioctl(struct block_device *dev,
-			    fmode_t mode,
+			    blk_mode_t mode,
 			    unsigned cmd,
 			    unsigned long arg)
 {
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index cfb835238684..8576d696c7a2 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1502,7 +1502,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 	return -ENOTTY;
 }
 
-static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
+static int nbd_ioctl(struct block_device *bdev, blk_mode_t mode,
 		     unsigned int cmd, unsigned long arg)
 {
 	struct nbd_device *nbd = bdev->bd_disk->private_data;
@@ -1553,7 +1553,7 @@ static struct nbd_config *nbd_alloc_config(void)
 	return config;
 }
 
-static int nbd_open(struct gendisk *disk, fmode_t mode)
+static int nbd_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct nbd_device *nbd;
 	int ret = 0;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index c3299e49edd5..a1428538bda5 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2154,7 +2154,7 @@ static int pkt_open_write(struct pktcdvd_device *pd)
 /*
  * called at open time.
  */
-static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
+static int pkt_open_dev(struct pktcdvd_device *pd, bool write)
 {
 	struct device *ddev = disk_to_dev(pd->disk);
 	int ret;
@@ -2167,7 +2167,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
 	 * to read/write from/to it. It is already opened in O_NONBLOCK mode
 	 * so open should not fail.
 	 */
-	bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ, pd, NULL);
+	bdev = blkdev_get_by_dev(pd->bdev->bd_dev, BLK_OPEN_READ, pd, NULL);
 	if (IS_ERR(bdev)) {
 		ret = PTR_ERR(bdev);
 		goto out;
@@ -2247,7 +2247,7 @@ static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor)
 	return pkt_devs[dev_minor];
 }
 
-static int pkt_open(struct gendisk *disk, fmode_t mode)
+static int pkt_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct pktcdvd_device *pd = NULL;
 	int ret;
@@ -2263,13 +2263,13 @@ static int pkt_open(struct gendisk *disk, fmode_t mode)
 
 	pd->refcnt++;
 	if (pd->refcnt > 1) {
-		if ((mode & FMODE_WRITE) &&
+		if ((mode & BLK_OPEN_WRITE) &&
 		    !test_bit(PACKET_WRITABLE, &pd->flags)) {
 			ret = -EBUSY;
 			goto out_dec;
 		}
 	} else {
-		ret = pkt_open_dev(pd, mode & FMODE_WRITE);
+		ret = pkt_open_dev(pd, mode & BLK_OPEN_WRITE);
 		if (ret)
 			goto out_dec;
 		/*
@@ -2278,7 +2278,6 @@ static int pkt_open(struct gendisk *disk, fmode_t mode)
 		 */
 		set_blocksize(disk->part0, CD_FRAMESIZE);
 	}
-
 	mutex_unlock(&ctl_mutex);
 	mutex_unlock(&pktcdvd_mutex);
 	return 0;
@@ -2514,7 +2513,8 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 		}
 	}
 
-	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL, NULL);
+	bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_NDELAY, NULL,
+				 NULL);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
 	sdev = scsi_device_from_queue(bdev->bd_disk->queue);
@@ -2550,7 +2550,8 @@ out_mem:
 	return -ENOMEM;
 }
 
-static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+static int pkt_ioctl(struct block_device *bdev, blk_mode_t mode,
+		unsigned int cmd, unsigned long arg)
 {
 	struct pktcdvd_device *pd = bdev->bd_disk->private_data;
 	struct device *ddev = disk_to_dev(pd->disk);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 5215eff94fe9..39f2903fe25f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -660,7 +660,7 @@ static bool pending_result_dec(struct pending_result *pending, int *result)
 	return true;
 }
 
-static int rbd_open(struct gendisk *disk, fmode_t mode)
+static int rbd_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct rbd_device *rbd_dev = disk->private_data;
 	bool removing = false;
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index d5261d36d786..b0550b68645d 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -921,11 +921,11 @@ rnbd_clt_session *find_or_create_sess(const char *sessname, bool *first)
 	return sess;
 }
 
-static int rnbd_client_open(struct gendisk *disk, fmode_t mode)
+static int rnbd_client_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct rnbd_clt_dev *dev = disk->private_data;
 
-	if (get_disk_ro(dev->gd) && (mode & FMODE_WRITE))
+	if (get_disk_ro(dev->gd) && (mode & BLK_OPEN_WRITE))
 		return -EPERM;
 
 	if (dev->dev_state == DEV_STATE_UNMAPPED ||
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 591a1be370c4..c186df0ec641 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -677,14 +677,14 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
 	struct rnbd_srv_sess_dev *srv_sess_dev;
 	const struct rnbd_msg_open *open_msg = msg;
 	struct block_device *bdev;
-	fmode_t open_flags = FMODE_READ;
+	blk_mode_t open_flags = BLK_OPEN_READ;
 	char *full_path;
 	struct rnbd_msg_open_rsp *rsp = data;
 
 	trace_process_msg_open(srv_sess, open_msg);
 
 	if (open_msg->access_mode != RNBD_ACCESS_RO)
-		open_flags |= FMODE_WRITE;
+		open_flags |= BLK_OPEN_WRITE;
 
 	mutex_lock(&srv_sess->lock);
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 9fa821fa76b0..7bf4b48e2282 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -139,7 +139,7 @@ static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
  * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD.
  * Needed to be able to install inside an ldom from an iso image.
  */
-static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
+static int vdc_ioctl(struct block_device *bdev, blk_mode_t mode,
 		     unsigned command, unsigned long argument)
 {
 	struct vdc_port *port = bdev->bd_disk->private_data;
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index a629b38dec66..651009b3a601 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -608,20 +608,18 @@ static void setup_medium(struct floppy_state *fs)
 	}
 }
 
-static int floppy_open(struct gendisk *disk, fmode_t mode)
+static int floppy_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct floppy_state *fs = disk->private_data;
 	struct swim __iomem *base = fs->swd->base;
 	int err;
 
-	if (fs->ref_count == -1 || (fs->ref_count && mode & FMODE_EXCL))
+	if (fs->ref_count == -1 || (fs->ref_count && mode & BLK_OPEN_EXCL))
 		return -EBUSY;
-
-	if (mode & FMODE_EXCL)
+	if (mode & BLK_OPEN_EXCL)
 		fs->ref_count = -1;
 	else
 		fs->ref_count++;
-
 	swim_write(base, setup, S_IBM_DRIVE  | S_FCLK_DIV2);
 	udelay(10);
 	swim_drive(base, fs->location);
@@ -636,10 +634,10 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 
 	set_capacity(fs->disk, fs->total_secs);
 
-	if (mode & FMODE_NDELAY)
+	if (mode & BLK_OPEN_NDELAY)
 		return 0;
 
-	if (mode & (FMODE_READ|FMODE_WRITE)) {
+	if (mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) {
 		if (disk_check_media_change(disk) && fs->disk_in)
 			fs->ejected = 0;
 		if ((mode & FMODE_WRITE) && fs->write_protected) {
@@ -659,7 +657,7 @@ out:
 	return err;
 }
 
-static int floppy_unlocked_open(struct gendisk *disk, fmode_t mode)
+static int floppy_unlocked_open(struct gendisk *disk, blk_mode_t mode)
 {
 	int ret;
 
@@ -686,7 +684,7 @@ static void floppy_release(struct gendisk *disk)
 	mutex_unlock(&swim_mutex);
 }
 
-static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+static int floppy_ioctl(struct block_device *bdev, blk_mode_t mode,
 			unsigned int cmd, unsigned long param)
 {
 	struct floppy_state *fs = bdev->bd_disk->private_data;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index b696deff3d8b..945a03154250 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -246,9 +246,9 @@ static int grab_drive(struct floppy_state *fs, enum swim_state state,
 		      int interruptible);
 static void release_drive(struct floppy_state *fs);
 static int fd_eject(struct floppy_state *fs);
-static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+static int floppy_ioctl(struct block_device *bdev, blk_mode_t mode,
 			unsigned int cmd, unsigned long param);
-static int floppy_open(struct gendisk *disk, fmode_t mode);
+static int floppy_open(struct gendisk *disk, blk_mode_t mode);
 static unsigned int floppy_check_events(struct gendisk *disk,
 					unsigned int clearing);
 static int floppy_revalidate(struct gendisk *disk);
@@ -882,7 +882,7 @@ static int fd_eject(struct floppy_state *fs)
 static struct floppy_struct floppy_type =
 	{ 2880,18,2,80,0,0x1B,0x00,0xCF,0x6C,NULL };	/*  7 1.44MB 3.5"   */
 
-static int floppy_locked_ioctl(struct block_device *bdev, fmode_t mode,
+static int floppy_locked_ioctl(struct block_device *bdev,
 			unsigned int cmd, unsigned long param)
 {
 	struct floppy_state *fs = bdev->bd_disk->private_data;
@@ -910,7 +910,7 @@ static int floppy_locked_ioctl(struct block_device *bdev, fmode_t mode,
 	return -ENOTTY;
 }
 
-static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+static int floppy_ioctl(struct block_device *bdev, blk_mode_t mode,
 				 unsigned int cmd, unsigned long param)
 {
 	int ret;
@@ -922,7 +922,7 @@ static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
 	return ret;
 }
 
-static int floppy_open(struct gendisk *disk, fmode_t mode)
+static int floppy_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct floppy_state *fs = disk->private_data;
 	struct swim3 __iomem *sw = fs->swim3;
@@ -957,18 +957,18 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 		swim3_action(fs, SETMFM);
 		swim3_select(fs, RELAX);
 
-	} else if (fs->ref_count == -1 || mode & FMODE_EXCL)
+	} else if (fs->ref_count == -1 || mode & BLK_OPEN_EXCL)
 		return -EBUSY;
 
-	if (err == 0 && (mode & FMODE_NDELAY) == 0
-	    && (mode & (FMODE_READ|FMODE_WRITE))) {
+	if (err == 0 && !(mode & BLK_OPEN_NDELAY) &&
+	    (mode & (BLK_OPEN_READ | BLK_OPEN_WRITE))) {
 		if (disk_check_media_change(disk))
 			floppy_revalidate(disk);
 		if (fs->ejected)
 			err = -ENXIO;
 	}
 
-	if (err == 0 && (mode & FMODE_WRITE)) {
+	if (err == 0 && (mode & BLK_OPEN_WRITE)) {
 		if (fs->write_prot < 0)
 			fs->write_prot = swim3_readbit(fs, WRITE_PROT);
 		if (fs->write_prot)
@@ -984,7 +984,7 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 		return err;
 	}
 
-	if (mode & FMODE_EXCL)
+	if (mode & BLK_OPEN_EXCL)
 		fs->ref_count = -1;
 	else
 		++fs->ref_count;
@@ -992,12 +992,12 @@ static int floppy_open(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+static int floppy_unlocked_open(struct gendisk *disk, blk_mode_t mode)
 {
 	int ret;
 
 	mutex_lock(&swim3_mutex);
-	ret = floppy_open(bdev, mode);
+	ret = floppy_open(disk, mode);
 	mutex_unlock(&swim3_mutex);
 
 	return ret;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 92c900ac2ebc..9fdc4c7f908d 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -447,7 +447,7 @@ static void ublk_store_owner_uid_gid(unsigned int *owner_uid,
 	*owner_gid = from_kgid(&init_user_ns, gid);
 }
 
-static int ublk_open(struct gendisk *disk, fmode_t mode)
+static int ublk_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct ublk_device *ub = disk->private_data;
 
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 141b60aad570..bb66178c432b 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -492,7 +492,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
 	vbd->pdevice  = MKDEV(major, minor);
 
 	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
-				 FMODE_READ : FMODE_WRITE, NULL, NULL);
+				 BLK_OPEN_READ : BLK_OPEN_WRITE, NULL, NULL);
 
 	if (IS_ERR(bdev)) {
 		pr_warn("xen_vbd_create: device %08x could not be opened\n",
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 23ed258b57f0..52e74adbaad6 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -509,7 +509,7 @@ static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
 	return 0;
 }
 
-static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
+static int blkif_ioctl(struct block_device *bdev, blk_mode_t mode,
 		       unsigned command, unsigned long argument)
 {
 	struct blkfront_info *info = bdev->bd_disk->private_data;
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index a2e41cc084ca..11493167b0a8 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -140,7 +140,7 @@ static void get_chipram(void)
 	return;
 }
 
-static int z2_open(struct gendisk *disk, fmode_t mode)
+static int z2_open(struct gendisk *disk, blk_mode_t mode)
 {
 	int device = disk->first_minor;
 	int max_z2_map = (Z2RAM_SIZE / Z2RAM_CHUNKSIZE) * sizeof(z2ram_map[0]);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 21615d67a9bd..1867f378b319 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -507,8 +507,8 @@ static ssize_t backing_dev_store(struct device *dev,
 		goto out;
 	}
 
-	bdev = blkdev_get_by_dev(inode->i_rdev, FMODE_READ | FMODE_WRITE, zram,
-				 NULL);
+	bdev = blkdev_get_by_dev(inode->i_rdev, BLK_OPEN_READ | BLK_OPEN_WRITE,
+				 zram, NULL);
 	if (IS_ERR(bdev)) {
 		err = PTR_ERR(bdev);
 		bdev = NULL;
@@ -2097,7 +2097,7 @@ static ssize_t reset_store(struct device *dev,
 	return len;
 }
 
-static int zram_open(struct gendisk *disk, fmode_t mode)
+static int zram_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct zram *zram = disk->private_data;
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 998b03fe976e..bd8cd59c758a 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1146,7 +1146,7 @@ clean_up_and_return:
  * is in their own interest: device control becomes a lot easier
  * this way.
  */
-int cdrom_open(struct cdrom_device_info *cdi, fmode_t mode)
+int cdrom_open(struct cdrom_device_info *cdi, blk_mode_t mode)
 {
 	int ret;
 
@@ -1155,7 +1155,7 @@ int cdrom_open(struct cdrom_device_info *cdi, fmode_t mode)
 	/* if this was a O_NONBLOCK open and we should honor the flags,
 	 * do a quick open without drive/disc integrity checks. */
 	cdi->use_count++;
-	if ((mode & FMODE_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) {
+	if ((mode & BLK_OPEN_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) {
 		ret = cdi->ops->open(cdi, 1);
 	} else {
 		ret = open_for_data(cdi);
@@ -1163,7 +1163,7 @@ int cdrom_open(struct cdrom_device_info *cdi, fmode_t mode)
 			goto err;
 		if (CDROM_CAN(CDC_GENERIC_PACKET))
 			cdrom_mmc3_profile(cdi);
-		if (mode & FMODE_WRITE) {
+		if (mode & BLK_OPEN_WRITE) {
 			ret = -EROFS;
 			if (cdrom_open_write(cdi))
 				goto err_release;
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index dac148d4d1fe..3a46e27479ff 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -474,7 +474,7 @@ static const struct cdrom_device_ops gdrom_ops = {
 				  CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R,
 };
 
-static int gdrom_bdops_open(struct gendisk *disk, fmode_t mode)
+static int gdrom_bdops_open(struct gendisk *disk, blk_mode_t mode)
 {
 	int ret;
 
@@ -499,7 +499,7 @@ static unsigned int gdrom_bdops_check_events(struct gendisk *disk,
 	return cdrom_check_events(gd.cd_info, clearing);
 }
 
-static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
+static int gdrom_bdops_ioctl(struct block_device *bdev, blk_mode_t mode,
 	unsigned cmd, unsigned long arg)
 {
 	int ret;
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index aebb7ef10e63..700dc5588d5f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -275,7 +275,7 @@ struct bcache_device {
 
 	int (*cache_miss)(struct btree *b, struct search *s,
 			  struct bio *bio, unsigned int sectors);
-	int (*ioctl)(struct bcache_device *d, fmode_t mode,
+	int (*ioctl)(struct bcache_device *d, blk_mode_t mode,
 		     unsigned int cmd, unsigned long arg);
 };
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 67a2e29e0b40..a9b1f3896249 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1228,7 +1228,7 @@ void cached_dev_submit_bio(struct bio *bio)
 		detached_dev_do_request(d, bio, orig_bdev, start_time);
 }
 
-static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
+static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
 			    unsigned int cmd, unsigned long arg)
 {
 	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
@@ -1318,7 +1318,7 @@ void flash_dev_submit_bio(struct bio *bio)
 	continue_at(cl, search_free, NULL);
 }
 
-static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode,
+static int flash_dev_ioctl(struct bcache_device *d, blk_mode_t mode,
 			   unsigned int cmd, unsigned long arg)
 {
 	return -ENOTTY;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 7022fea396f2..1f829e74db0a 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -732,7 +732,7 @@ out:
 
 /* Bcache device */
 
-static int open_dev(struct gendisk *disk, fmode_t mode)
+static int open_dev(struct gendisk *disk, blk_mode_t mode)
 {
 	struct bcache_device *d = disk->private_data;
 
@@ -750,7 +750,7 @@ static void release_dev(struct gendisk *b)
 	closure_put(&d->cl);
 }
 
-static int ioctl_dev(struct block_device *b, fmode_t mode,
+static int ioctl_dev(struct block_device *b, blk_mode_t mode,
 		     unsigned int cmd, unsigned long arg)
 {
 	struct bcache_device *d = b->bd_disk->private_data;
@@ -2558,7 +2558,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 
 	ret = -EINVAL;
 	err = "failed to open device";
-	bdev = blkdev_get_by_path(strim(path), FMODE_READ | FMODE_WRITE,
+	bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ | BLK_OPEN_WRITE,
 				  bcache_kobj, NULL);
 	if (IS_ERR(bdev)) {
 		if (bdev == ERR_PTR(-EBUSY)) {
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 872896218550..911f73f7ebba 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2051,8 +2051,8 @@ static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
 	if (!at_least_one_arg(as, error))
 		return -EINVAL;
 
-	r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
-			  &ca->metadata_dev);
+	r = dm_get_device(ca->ti, dm_shift_arg(as),
+			  BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->metadata_dev);
 	if (r) {
 		*error = "Error opening metadata device";
 		return r;
@@ -2074,8 +2074,8 @@ static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
 	if (!at_least_one_arg(as, error))
 		return -EINVAL;
 
-	r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
-			  &ca->cache_dev);
+	r = dm_get_device(ca->ti, dm_shift_arg(as),
+			  BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->cache_dev);
 	if (r) {
 		*error = "Error opening cache device";
 		return r;
@@ -2093,8 +2093,8 @@ static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
 	if (!at_least_one_arg(as, error))
 		return -EINVAL;
 
-	r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
-			  &ca->origin_dev);
+	r = dm_get_device(ca->ti, dm_shift_arg(as),
+			  BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->origin_dev);
 	if (r) {
 		*error = "Error opening origin device";
 		return r;
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index f467cdb5a022..94b2fc33f64b 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -1683,8 +1683,8 @@ static int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char *
 	int r;
 	sector_t metadata_dev_size;
 
-	r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
-			  &clone->metadata_dev);
+	r = dm_get_device(clone->ti, dm_shift_arg(as),
+			  BLK_OPEN_READ | BLK_OPEN_WRITE, &clone->metadata_dev);
 	if (r) {
 		*error = "Error opening metadata device";
 		return r;
@@ -1703,8 +1703,8 @@ static int parse_dest_dev(struct clone *clone, struct dm_arg_set *as, char **err
 	int r;
 	sector_t dest_dev_size;
 
-	r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
-			  &clone->dest_dev);
+	r = dm_get_device(clone->ti, dm_shift_arg(as),
+			  BLK_OPEN_READ | BLK_OPEN_WRITE, &clone->dest_dev);
 	if (r) {
 		*error = "Error opening destination device";
 		return r;
@@ -1725,7 +1725,7 @@ static int parse_source_dev(struct clone *clone, struct dm_arg_set *as, char **e
 	int r;
 	sector_t source_dev_size;
 
-	r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ,
+	r = dm_get_device(clone->ti, dm_shift_arg(as), BLK_OPEN_READ,
 			  &clone->source_dev);
 	if (r) {
 		*error = "Error opening source device";
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index aecab0c0720f..ce913ad91a52 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -207,11 +207,10 @@ struct dm_table {
 	unsigned integrity_added:1;
 
 	/*
-	 * Indicates the rw permissions for the new logical
-	 * device.  This should be a combination of FMODE_READ
-	 * and FMODE_WRITE.
+	 * Indicates the rw permissions for the new logical device.  This
+	 * should be a combination of BLK_OPEN_READ and BLK_OPEN_WRITE.
 	 */
-	fmode_t mode;
+	blk_mode_t mode;
 
 	/* a list of devices used by this table */
 	struct list_head devices;
diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 0d70914217ee..6acfa5bf97a4 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -1482,14 +1482,16 @@ static int era_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	era->ti = ti;
 
-	r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &era->metadata_dev);
+	r = dm_get_device(ti, argv[0], BLK_OPEN_READ | BLK_OPEN_WRITE,
+			  &era->metadata_dev);
 	if (r) {
 		ti->error = "Error opening metadata device";
 		era_destroy(era);
 		return -EINVAL;
 	}
 
-	r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &era->origin_dev);
+	r = dm_get_device(ti, argv[1], BLK_OPEN_READ | BLK_OPEN_WRITE,
+			  &era->origin_dev);
 	if (r) {
 		ti->error = "Error opening data device";
 		era_destroy(era);
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index cc77cf3d4109..8ba4cbb92351 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -861,7 +861,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
 
 		table = dm_get_inactive_table(md, &srcu_idx);
 		if (table) {
-			if (!(dm_table_get_mode(table) & FMODE_WRITE))
+			if (!(dm_table_get_mode(table) & BLK_OPEN_WRITE))
 				param->flags |= DM_READONLY_FLAG;
 			param->target_count = table->num_targets;
 		}
@@ -1192,7 +1192,7 @@ static int do_resume(struct dm_ioctl *param)
 		if (old_size && new_size && old_size != new_size)
 			need_resize_uevent = true;
 
-		if (dm_table_get_mode(new_map) & FMODE_WRITE)
+		if (dm_table_get_mode(new_map) & BLK_OPEN_WRITE)
 			set_disk_ro(dm_disk(md), 0);
 		else
 			set_disk_ro(dm_disk(md), 1);
@@ -1381,12 +1381,12 @@ static int dev_arm_poll(struct file *filp, struct dm_ioctl *param, size_t param_
 	return 0;
 }
 
-static inline fmode_t get_mode(struct dm_ioctl *param)
+static inline blk_mode_t get_mode(struct dm_ioctl *param)
 {
-	fmode_t mode = FMODE_READ | FMODE_WRITE;
+	blk_mode_t mode = BLK_OPEN_READ | BLK_OPEN_WRITE;
 
 	if (param->flags & DM_READONLY_FLAG)
-		mode = FMODE_READ;
+		mode = BLK_OPEN_READ;
 
 	return mode;
 }
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 7832974b73eb..bf7a574499a3 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1242,7 +1242,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	int r = -EINVAL;
 	char *origin_path, *cow_path;
 	unsigned int args_used, num_flush_bios = 1;
-	fmode_t origin_mode = FMODE_READ;
+	blk_mode_t origin_mode = BLK_OPEN_READ;
 
 	if (argc < 4) {
 		ti->error = "requires 4 or more arguments";
@@ -1252,7 +1252,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (dm_target_is_snapshot_merge(ti)) {
 		num_flush_bios = 2;
-		origin_mode = FMODE_WRITE;
+		origin_mode = BLK_OPEN_WRITE;
 	}
 
 	s = kzalloc(sizeof(*s), GFP_KERNEL);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2fd5826bfce1..7d208b2b1a19 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -126,7 +126,7 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
 	return 0;
 }
 
-int dm_table_create(struct dm_table **result, fmode_t mode,
+int dm_table_create(struct dm_table **result, blk_mode_t mode,
 		    unsigned int num_targets, struct mapped_device *md)
 {
 	struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
@@ -304,7 +304,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
  * device and not to touch the existing bdev field in case
  * it is accessed concurrently.
  */
-static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
+static int upgrade_mode(struct dm_dev_internal *dd, blk_mode_t new_mode,
 			struct mapped_device *md)
 {
 	int r;
@@ -330,7 +330,7 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
  * Note: the __ref annotation is because this function can call the __init
  * marked early_lookup_bdev when called during early boot code from dm-init.c.
  */
-int __ref dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+int __ref dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
 		  struct dm_dev **result)
 {
 	int r;
@@ -662,7 +662,8 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 		t->singleton = true;
 	}
 
-	if (dm_target_always_writeable(ti->type) && !(t->mode & FMODE_WRITE)) {
+	if (dm_target_always_writeable(ti->type) &&
+	    !(t->mode & BLK_OPEN_WRITE)) {
 		ti->error = "target type may not be included in a read-only table";
 		goto bad;
 	}
@@ -2033,7 +2034,7 @@ struct list_head *dm_table_get_devices(struct dm_table *t)
 	return &t->devices;
 }
 
-fmode_t dm_table_get_mode(struct dm_table *t)
+blk_mode_t dm_table_get_mode(struct dm_table *t)
 {
 	return t->mode;
 }
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 2b13c949bd72..464c6b678417 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3301,7 +3301,7 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	unsigned long block_size;
 	dm_block_t low_water_blocks;
 	struct dm_dev *metadata_dev;
-	fmode_t metadata_mode;
+	blk_mode_t metadata_mode;
 
 	/*
 	 * FIXME Remove validation from scope of lock.
@@ -3334,7 +3334,8 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (r)
 		goto out_unlock;
 
-	metadata_mode = FMODE_READ | ((pf.mode == PM_READ_ONLY) ? 0 : FMODE_WRITE);
+	metadata_mode = BLK_OPEN_READ |
+		((pf.mode == PM_READ_ONLY) ? 0 : BLK_OPEN_WRITE);
 	r = dm_get_device(ti, argv[0], metadata_mode, &metadata_dev);
 	if (r) {
 		ti->error = "Error opening metadata block device";
@@ -3342,7 +3343,7 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 	warn_if_metadata_device_too_big(metadata_dev->bdev);
 
-	r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev);
+	r = dm_get_device(ti, argv[1], BLK_OPEN_READ | BLK_OPEN_WRITE, &data_dev);
 	if (r) {
 		ti->error = "Error getting data device";
 		goto out_metadata;
@@ -4223,7 +4224,7 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 			goto bad_origin_dev;
 		}
 
-		r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
+		r = dm_get_device(ti, argv[2], BLK_OPEN_READ, &origin_dev);
 		if (r) {
 			ti->error = "Error opening origin device";
 			goto bad_origin_dev;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index a9ee2faa75a2..3ef9f018da60 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -607,7 +607,7 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
 	(*argc)--;
 
 	if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) {
-		r = dm_get_device(ti, arg_value, FMODE_READ, &v->fec->dev);
+		r = dm_get_device(ti, arg_value, BLK_OPEN_READ, &v->fec->dev);
 		if (r) {
 			ti->error = "FEC device lookup failed";
 			return r;
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index e35c16e06d06..26adcfea0302 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -1196,7 +1196,7 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	if (r)
 		goto bad;
 
-	if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) {
+	if ((dm_table_get_mode(ti->table) & ~BLK_OPEN_READ)) {
 		ti->error = "Device must be readonly";
 		r = -EINVAL;
 		goto bad;
@@ -1225,13 +1225,13 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 	v->version = num;
 
-	r = dm_get_device(ti, argv[1], FMODE_READ, &v->data_dev);
+	r = dm_get_device(ti, argv[1], BLK_OPEN_READ, &v->data_dev);
 	if (r) {
 		ti->error = "Data device lookup failed";
 		goto bad;
 	}
 
-	r = dm_get_device(ti, argv[2], FMODE_READ, &v->hash_dev);
+	r = dm_get_device(ti, argv[2], BLK_OPEN_READ, &v->hash_dev);
 	if (r) {
 		ti->error = "Hash device lookup failed";
 		goto bad;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b16e37362c5a..ca2dc079c3f4 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -310,7 +310,7 @@ int dm_deleting_md(struct mapped_device *md)
 	return test_bit(DMF_DELETING, &md->flags);
 }
 
-static int dm_blk_open(struct gendisk *disk, fmode_t mode)
+static int dm_blk_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct mapped_device *md;
 
@@ -448,7 +448,7 @@ static void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx)
 	dm_put_live_table(md, srcu_idx);
 }
 
-static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
+static int dm_blk_ioctl(struct block_device *bdev, blk_mode_t mode,
 			unsigned int cmd, unsigned long arg)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
@@ -734,7 +734,7 @@ static char *_dm_claim_ptr = "I belong to device-mapper";
  * Open a table device so we can use it as a map destination.
  */
 static struct table_device *open_table_device(struct mapped_device *md,
-		dev_t dev, fmode_t mode)
+		dev_t dev, blk_mode_t mode)
 {
 	struct table_device *td;
 	struct block_device *bdev;
@@ -791,7 +791,7 @@ static void close_table_device(struct table_device *td, struct mapped_device *md
 }
 
 static struct table_device *find_table_device(struct list_head *l, dev_t dev,
-					      fmode_t mode)
+					      blk_mode_t mode)
 {
 	struct table_device *td;
 
@@ -802,7 +802,7 @@ static struct table_device *find_table_device(struct list_head *l, dev_t dev,
 	return NULL;
 }
 
-int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
+int dm_get_table_device(struct mapped_device *md, dev_t dev, blk_mode_t mode,
 			struct dm_dev **result)
 {
 	struct table_device *td;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index a856e0aee73b..63d9010d8e61 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -203,7 +203,7 @@ int dm_open_count(struct mapped_device *md);
 int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred);
 int dm_cancel_deferred_remove(struct mapped_device *md);
 int dm_request_based(struct mapped_device *md);
-int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
+int dm_get_table_device(struct mapped_device *md, dev_t dev, blk_mode_t mode,
 			struct dm_dev **result);
 void dm_put_table_device(struct mapped_device *md, struct dm_dev *d);
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index dad4a5539f9f..ca0de7ddd943 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3643,7 +3643,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
 	if (err)
 		goto out_clear_rdev;
 
-	rdev->bdev = blkdev_get_by_dev(newdev, FMODE_READ | FMODE_WRITE,
+	rdev->bdev = blkdev_get_by_dev(newdev, BLK_OPEN_READ | BLK_OPEN_WRITE,
 			super_format == -2 ? &claim_rdev : rdev, NULL);
 	if (IS_ERR(rdev->bdev)) {
 		pr_warn("md: could not open device unknown-block(%u,%u).\n",
@@ -7488,7 +7488,7 @@ static int __md_set_array_info(struct mddev *mddev, void __user *argp)
 	return err;
 }
 
-static int md_ioctl(struct block_device *bdev, fmode_t mode,
+static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
 			unsigned int cmd, unsigned long arg)
 {
 	int err = 0;
@@ -7720,7 +7720,7 @@ out:
 	return err;
 }
 #ifdef CONFIG_COMPAT
-static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
+static int md_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
 		    unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
@@ -7769,7 +7769,7 @@ out_unlock:
 	return err;
 }
 
-static int md_open(struct gendisk *disk, fmode_t mode)
+static int md_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct mddev *mddev;
 	int err;
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index b16eedf22d4e..2a33b5073cc4 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -357,7 +357,7 @@ static const struct attribute_group *mmc_disk_attr_groups[] = {
 	NULL,
 };
 
-static int mmc_blk_open(struct gendisk *disk, fmode_t mode)
+static int mmc_blk_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct mmc_blk_data *md = mmc_blk_get(disk);
 	int ret = -ENXIO;
@@ -365,7 +365,7 @@ static int mmc_blk_open(struct gendisk *disk, fmode_t mode)
 	mutex_lock(&block_mutex);
 	if (md) {
 		ret = 0;
-		if ((mode & FMODE_WRITE) && md->read_only) {
+		if ((mode & BLK_OPEN_WRITE) && md->read_only) {
 			mmc_blk_put(md);
 			ret = -EROFS;
 		}
@@ -754,7 +754,7 @@ static int mmc_blk_check_blkdev(struct block_device *bdev)
 	return 0;
 }
 
-static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode,
+static int mmc_blk_ioctl(struct block_device *bdev, blk_mode_t mode,
 	unsigned int cmd, unsigned long arg)
 {
 	struct mmc_blk_data *md;
@@ -791,7 +791,7 @@ static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode,
 }
 
 #ifdef CONFIG_COMPAT
-static int mmc_blk_compat_ioctl(struct block_device *bdev, fmode_t mode,
+static int mmc_blk_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
 	unsigned int cmd, unsigned long arg)
 {
 	return mmc_blk_ioctl(bdev, mode, cmd, (unsigned long) compat_ptr(arg));
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index 44fc23af4c3f..be106dc20ff3 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -220,7 +220,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
  * early_lookup_bdev when called from the early boot code.
  */
 static struct block_device __ref *mdtblock_early_get_bdev(const char *devname,
-		fmode_t mode, int timeout, struct block2mtd_dev *dev)
+		blk_mode_t mode, int timeout, struct block2mtd_dev *dev)
 {
 	struct block_device *bdev = ERR_PTR(-ENODEV);
 #ifndef MODULE
@@ -261,7 +261,7 @@ static struct block_device __ref *mdtblock_early_get_bdev(const char *devname,
 static struct block2mtd_dev *add_device(char *devname, int erase_size,
 		char *label, int timeout)
 {
-	const fmode_t mode = FMODE_READ | FMODE_WRITE;
+	const blk_mode_t mode = BLK_OPEN_READ | BLK_OPEN_WRITE;
 	struct block_device *bdev;
 	struct block2mtd_dev *dev;
 	char *name;
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index bd0b75453643..ff18636e0889 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -182,7 +182,7 @@ static blk_status_t mtd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_STS_OK;
 }
 
-static int blktrans_open(struct gendisk *disk, fmode_t mode)
+static int blktrans_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct mtd_blktrans_dev *dev = disk->private_data;
 	int ret = 0;
@@ -208,7 +208,7 @@ static int blktrans_open(struct gendisk *disk, fmode_t mode)
 	ret = __get_mtd_device(dev->mtd);
 	if (ret)
 		goto error_release;
-	dev->writable = mode & FMODE_WRITE;
+	dev->writable = mode & BLK_OPEN_WRITE;
 
 unlock:
 	dev->open++;
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index e85fb9de0b70..437c5b83ffe5 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -227,7 +227,7 @@ static blk_status_t ubiblock_read(struct request *req)
 	return BLK_STS_OK;
 }
 
-static int ubiblock_open(struct gendisk *disk, fmode_t mode)
+static int ubiblock_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct ubiblock *dev = disk->private_data;
 	int ret;
@@ -246,11 +246,10 @@ static int ubiblock_open(struct gendisk *disk, fmode_t mode)
 	 * It's just a paranoid check, as write requests will get rejected
 	 * in any case.
 	 */
-	if (mode & FMODE_WRITE) {
+	if (mode & BLK_OPEN_WRITE) {
 		ret = -EROFS;
 		goto out_unlock;
 	}
-
 	dev->desc = ubi_open_volume(dev->ubi_num, dev->vol_id, UBI_READONLY);
 	if (IS_ERR(dev->desc)) {
 		dev_err(disk_to_dev(dev->gd), "failed to open ubi volume %d_%d",
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fd7f8e6d66fd..c3d72fc677f7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1591,7 +1591,7 @@ static void nvme_ns_release(struct nvme_ns *ns)
 	nvme_put_ns(ns);
 }
 
-static int nvme_open(struct gendisk *disk, fmode_t mode)
+static int nvme_open(struct gendisk *disk, blk_mode_t mode)
 {
 	return nvme_ns_open(disk->private_data);
 }
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 8bf09047348e..0fd0aa571cc9 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -709,11 +709,11 @@ static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned int cmd,
 	}
 }
 
-int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+int nvme_ioctl(struct block_device *bdev, blk_mode_t mode,
 		unsigned int cmd, unsigned long arg)
 {
 	struct nvme_ns *ns = bdev->bd_disk->private_data;
-	bool open_for_write = mode & FMODE_WRITE;
+	bool open_for_write = mode & BLK_OPEN_WRITE;
 	void __user *argp = (void __user *)arg;
 	unsigned int flags = 0;
 
@@ -817,11 +817,11 @@ static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
 	return ret;
 }
 
-int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
+int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
 		unsigned int cmd, unsigned long arg)
 {
 	struct nvme_ns_head *head = bdev->bd_disk->private_data;
-	bool open_for_write = mode & FMODE_WRITE;
+	bool open_for_write = mode & BLK_OPEN_WRITE;
 	void __user *argp = (void __user *)arg;
 	struct nvme_ns *ns;
 	int srcu_idx, ret = -EWOULDBLOCK;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 698c0e70bcfa..91a9a55227fa 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -402,7 +402,7 @@ static void nvme_ns_head_submit_bio(struct bio *bio)
 	srcu_read_unlock(&head->srcu, srcu_idx);
 }
 
-static int nvme_ns_head_open(struct gendisk *disk, fmode_t mode)
+static int nvme_ns_head_open(struct gendisk *disk, blk_mode_t mode)
 {
 	if (!nvme_tryget_ns_head(disk->private_data))
 		return -ENXIO;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bf46f122e9e1..953e59f56139 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -836,10 +836,10 @@ void nvme_put_ns_head(struct nvme_ns_head *head);
 int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
 		const struct file_operations *fops, struct module *owner);
 void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
-int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+int nvme_ioctl(struct block_device *bdev, blk_mode_t mode,
 		unsigned int cmd, unsigned long arg);
 long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
+int nvme_ns_head_ioctl(struct block_device *bdev, blk_mode_t mode,
 		unsigned int cmd, unsigned long arg);
 long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
 		unsigned long arg);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 65ed2d478fac..2733e0158585 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -85,7 +85,7 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
 		return -ENOTBLK;
 
 	ns->bdev = blkdev_get_by_path(ns->device_path,
-			FMODE_READ | FMODE_WRITE, NULL, NULL);
+			BLK_OPEN_READ | BLK_OPEN_WRITE, NULL, NULL);
 	if (IS_ERR(ns->bdev)) {
 		ret = PTR_ERR(ns->bdev);
 		if (ret != -ENOTBLK) {
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 19295b2df470..45788955c4e6 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3234,7 +3234,7 @@ struct blk_mq_ops dasd_mq_ops = {
 	.exit_hctx = dasd_exit_hctx,
 };
 
-static int dasd_open(struct gendisk *disk, fmode_t mode)
+static int dasd_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct dasd_device *base;
 	int rc;
@@ -3268,14 +3268,12 @@ static int dasd_open(struct gendisk *disk, fmode_t mode)
 		rc = -ENODEV;
 		goto out;
 	}
-
-	if ((mode & FMODE_WRITE) &&
+	if ((mode & BLK_OPEN_WRITE) &&
 	    (test_bit(DASD_FLAG_DEVICE_RO, &base->flags) ||
 	     (base->features & DASD_FEATURE_READONLY))) {
 		rc = -EROFS;
 		goto out;
 	}
-
 	dasd_put_device(base);
 	return 0;
 
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index d2b27b84f854..fe5108a1b332 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -130,7 +130,8 @@ int dasd_scan_partitions(struct dasd_block *block)
 	struct block_device *bdev;
 	int rc;
 
-	bdev = blkdev_get_by_dev(disk_devt(block->gdp), FMODE_READ, NULL, NULL);
+	bdev = blkdev_get_by_dev(disk_devt(block->gdp), BLK_OPEN_READ, NULL,
+				 NULL);
 	if (IS_ERR(bdev)) {
 		DBF_DEV_EVENT(DBF_ERR, block->base,
 			      "scan partitions error, blkdev_get returned %ld",
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 33f812f0e515..0aa56351da72 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -965,7 +965,8 @@ int dasd_scan_partitions(struct dasd_block *);
 void dasd_destroy_partitions(struct dasd_block *);
 
 /* externals in dasd_ioctl.c */
-int dasd_ioctl(struct block_device *, fmode_t, unsigned int, unsigned long);
+int dasd_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd,
+		unsigned long arg);
 int dasd_set_read_only(struct block_device *bdev, bool ro);
 
 /* externals in dasd_proc.c */
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 9327dcdd6e5e..838c9f5313e6 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -612,7 +612,7 @@ static int dasd_ioctl_readall_cmb(struct dasd_block *block, unsigned int cmd,
 	return ret;
 }
 
-int dasd_ioctl(struct block_device *bdev, fmode_t mode,
+int dasd_ioctl(struct block_device *bdev, blk_mode_t mode,
 	       unsigned int cmd, unsigned long arg)
 {
 	struct dasd_block *block;
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 5aee3106bfda..200f88f0e451 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -28,7 +28,7 @@
 #define DCSSBLK_PARM_LEN 400
 #define DCSS_BUS_ID_SIZE 20
 
-static int dcssblk_open(struct gendisk *disk, fmode_t mode);
+static int dcssblk_open(struct gendisk *disk, blk_mode_t mode);
 static void dcssblk_release(struct gendisk *disk);
 static void dcssblk_submit_bio(struct bio *bio);
 static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
@@ -809,7 +809,7 @@ out_buf:
 }
 
 static int
-dcssblk_open(struct gendisk *disk, fmode_t mode)
+dcssblk_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct dcssblk_dev_info *dev_info = disk->private_data;
 	int rc;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 02b6704ec2b4..ab216976dbdc 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1298,7 +1298,7 @@ static bool sd_need_revalidate(struct gendisk *disk, struct scsi_disk *sdkp)
 /**
  *	sd_open - open a scsi disk device
  *	@disk: disk to open
- *	@mode: FMODE_* mask
+ *	@mode: open mode
  *
  *	Returns 0 if successful. Returns a negated errno value in case 
  *	of error.
@@ -1310,7 +1310,7 @@ static bool sd_need_revalidate(struct gendisk *disk, struct scsi_disk *sdkp)
  *
  *	Locking: called with disk->open_mutex held.
  **/
-static int sd_open(struct gendisk *disk, fmode_t mode)
+static int sd_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct scsi_disk *sdkp = scsi_disk(disk);
 	struct scsi_device *sdev = sdkp->device;
@@ -1336,7 +1336,8 @@ static int sd_open(struct gendisk *disk, fmode_t mode)
 	 * If the drive is empty, just let the open fail.
 	 */
 	retval = -ENOMEDIUM;
-	if (sdev->removable && !sdkp->media_present && !(mode & FMODE_NDELAY))
+	if (sdev->removable && !sdkp->media_present &&
+	    !(mode & BLK_OPEN_NDELAY))
 		goto error_out;
 
 	/*
@@ -1344,7 +1345,7 @@ static int sd_open(struct gendisk *disk, fmode_t mode)
 	 * if the user expects to be able to write to the thing.
 	 */
 	retval = -EROFS;
-	if (sdkp->write_prot && (mode & FMODE_WRITE))
+	if (sdkp->write_prot && (mode & BLK_OPEN_WRITE))
 		goto error_out;
 
 	/*
@@ -1379,7 +1380,7 @@ error_out:
  *	Note: may block (uninterruptible) if error recovery is underway
  *	on this disk.
  *
- *	Locking: called with bdev->bd_disk->open_mutex held.
+ *	Locking: called with disk->open_mutex held.
  **/
 static void sd_release(struct gendisk *disk)
 {
@@ -1424,7 +1425,7 @@ static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 /**
  *	sd_ioctl - process an ioctl
  *	@bdev: target block device
- *	@mode: FMODE_* mask
+ *	@mode: open mode
  *	@cmd: ioctl command number
  *	@arg: this is third argument given to ioctl(2) system call.
  *	Often contains a pointer.
@@ -1435,7 +1436,7 @@ static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
  *	Note: most ioctls are forward onto the block subsystem or further
  *	down in the scsi subsystem.
  **/
-static int sd_ioctl(struct block_device *bdev, fmode_t mode,
+static int sd_ioctl(struct block_device *bdev, blk_mode_t mode,
 		    unsigned int cmd, unsigned long arg)
 {
 	struct gendisk *disk = bdev->bd_disk;
@@ -1457,13 +1458,13 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode,
 	 * access to the device is prohibited.
 	 */
 	error = scsi_ioctl_block_when_processing_errors(sdp, cmd,
-			(mode & FMODE_NDELAY) != 0);
+			(mode & BLK_OPEN_NDELAY));
 	if (error)
 		return error;
 
 	if (is_sed_ioctl(cmd))
 		return sed_ioctl(sdkp->opal_dev, cmd, p);
-	return scsi_ioctl(sdp, mode & FMODE_WRITE, cmd, p);
+	return scsi_ioctl(sdp, mode & BLK_OPEN_WRITE, cmd, p);
 }
 
 static void set_media_not_present(struct scsi_disk *sdkp)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 00aaafc8dd78..ce886c8c9dbe 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -484,7 +484,7 @@ static void sr_revalidate_disk(struct scsi_cd *cd)
 	get_sectorsize(cd);
 }
 
-static int sr_block_open(struct gendisk *disk, fmode_t mode)
+static int sr_block_open(struct gendisk *disk, blk_mode_t mode)
 {
 	struct scsi_cd *cd = scsi_cd(disk);
 	struct scsi_device *sdev = cd->device;
@@ -518,8 +518,8 @@ static void sr_block_release(struct gendisk *disk)
 	scsi_device_put(cd->device);
 }
 
-static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
-			  unsigned long arg)
+static int sr_block_ioctl(struct block_device *bdev, blk_mode_t mode,
+		unsigned cmd, unsigned long arg)
 {
 	struct scsi_cd *cd = scsi_cd(bdev->bd_disk);
 	struct scsi_device *sdev = cd->device;
@@ -532,7 +532,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	mutex_lock(&cd->lock);
 
 	ret = scsi_ioctl_block_when_processing_errors(sdev, cmd,
-			(mode & FMODE_NDELAY) != 0);
+			(mode & BLK_OPEN_NDELAY));
 	if (ret)
 		goto out;
 
@@ -543,7 +543,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		if (ret != -ENOSYS)
 			goto put;
 	}
-	ret = scsi_ioctl(sdev, mode & FMODE_WRITE, cmd, argp);
+	ret = scsi_ioctl(sdev, mode & BLK_OPEN_WRITE, cmd, argp);
 
 put:
 	scsi_autopm_put_device(sdev);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index c62f961f46e3..3c462d69daca 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -90,7 +90,7 @@ static int iblock_configure_device(struct se_device *dev)
 	struct request_queue *q;
 	struct block_device *bd = NULL;
 	struct blk_integrity *bi;
-	fmode_t mode;
+	blk_mode_t mode = BLK_OPEN_READ;
 	unsigned int max_write_zeroes_sectors;
 	int ret;
 
@@ -108,9 +108,8 @@ static int iblock_configure_device(struct se_device *dev)
 	pr_debug( "IBLOCK: Claiming struct block_device: %s\n",
 			ib_dev->ibd_udev_path);
 
-	mode = FMODE_READ;
 	if (!ib_dev->ibd_readonly)
-		mode |= FMODE_WRITE;
+		mode |= BLK_OPEN_WRITE;
 	else
 		dev->dev_flags |= DF_READ_ONLY;
 
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index da3b5512d7ae..0d4f09693ef4 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -366,8 +366,8 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd)
 	 * Claim exclusive struct block_device access to struct scsi_device
 	 * for TYPE_DISK and TYPE_ZBC using supplied udev_path
 	 */
-	bd = blkdev_get_by_path(dev->udev_path, FMODE_WRITE | FMODE_READ, pdv,
-				NULL);
+	bd = blkdev_get_by_path(dev->udev_path, BLK_OPEN_WRITE | BLK_OPEN_READ,
+				pdv, NULL);
 	if (IS_ERR(bd)) {
 		pr_err("pSCSI: blkdev_get_by_path() failed\n");
 		scsi_device_put(sd);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 677e9d9e1527..2d00600ff413 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -257,7 +257,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 		return -EINVAL;
 	}
 
-	bdev = blkdev_get_by_path(device_path, FMODE_WRITE,
+	bdev = blkdev_get_by_path(device_path, BLK_OPEN_WRITE,
 				  fs_info->bdev_holder, NULL);
 	if (IS_ERR(bdev)) {
 		btrfs_err(fs_info, "target device %s is invalid!", device_path);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index fd02b92e3910..1c3c1d7ad68c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -849,7 +849,7 @@ out:
  * All other options will be parsed on much later in the mount process and
  * only when we need to allocate a new super block.
  */
-static int btrfs_parse_device_options(const char *options, fmode_t flags)
+static int btrfs_parse_device_options(const char *options, blk_mode_t flags)
 {
 	substring_t args[MAX_OPT_ARGS];
 	char *device_name, *opts, *orig, *p;
@@ -1440,7 +1440,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
 	struct btrfs_fs_devices *fs_devices = NULL;
 	struct btrfs_fs_info *fs_info = NULL;
 	void *new_sec_opts = NULL;
-	fmode_t mode = sb_open_mode(flags);
+	blk_mode_t mode = sb_open_mode(flags);
 	int error = 0;
 
 	if (data) {
@@ -2185,7 +2185,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 	switch (cmd) {
 	case BTRFS_IOC_SCAN_DEV:
 		mutex_lock(&uuid_mutex);
-		device = btrfs_scan_one_device(vol->name, FMODE_READ);
+		device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ);
 		ret = PTR_ERR_OR_ZERO(device);
 		mutex_unlock(&uuid_mutex);
 		break;
@@ -2199,7 +2199,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 		break;
 	case BTRFS_IOC_DEVICES_READY:
 		mutex_lock(&uuid_mutex);
-		device = btrfs_scan_one_device(vol->name, FMODE_READ);
+		device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ);
 		if (IS_ERR(device)) {
 			mutex_unlock(&uuid_mutex);
 			ret = PTR_ERR(device);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7b12e05cdbf0..c85e54f86035 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -490,7 +490,7 @@ static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
 
 
 static int
-btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
+btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
 		      int flush, struct block_device **bdev,
 		      struct btrfs_super_block **disk_super)
 {
@@ -590,7 +590,7 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device
  * fs_devices->device_list_mutex here.
  */
 static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
-			struct btrfs_device *device, fmode_t flags,
+			struct btrfs_device *device, blk_mode_t flags,
 			void *holder)
 {
 	struct block_device *bdev;
@@ -1207,7 +1207,7 @@ void btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 }
 
 static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
-				fmode_t flags, void *holder)
+				blk_mode_t flags, void *holder)
 {
 	struct btrfs_device *device;
 	struct btrfs_device *latest_dev = NULL;
@@ -1255,7 +1255,7 @@ static int devid_cmp(void *priv, const struct list_head *a,
 }
 
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
-		       fmode_t flags, void *holder)
+		       blk_mode_t flags, void *holder)
 {
 	int ret;
 
@@ -1346,7 +1346,7 @@ int btrfs_forget_devices(dev_t devt)
  * and we are not allowed to call set_blocksize during the scan. The superblock
  * is read via pagecache
  */
-struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags)
+struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags)
 {
 	struct btrfs_super_block *disk_super;
 	bool new_device_added = false;
@@ -2378,7 +2378,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
 		return -ENOMEM;
 	}
 
-	ret = btrfs_get_bdev_and_sb(path, FMODE_READ, NULL, 0,
+	ret = btrfs_get_bdev_and_sb(path, BLK_OPEN_READ, NULL, 0,
 				    &bdev, &disk_super);
 	if (ret) {
 		btrfs_put_dev_args_from_path(args);
@@ -2625,7 +2625,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	if (sb_rdonly(sb) && !fs_devices->seeding)
 		return -EROFS;
 
-	bdev = blkdev_get_by_path(device_path, FMODE_WRITE,
+	bdev = blkdev_get_by_path(device_path, BLK_OPEN_WRITE,
 				  fs_info->bdev_holder, NULL);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
@@ -6907,7 +6907,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
 	if (IS_ERR(fs_devices))
 		return fs_devices;
 
-	ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
+	ret = open_fs_devices(fs_devices, BLK_OPEN_READ, fs_info->bdev_holder);
 	if (ret) {
 		free_fs_devices(fs_devices);
 		return ERR_PTR(ret);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 840a8df39907..8227ba4d64b8 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -599,8 +599,8 @@ struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
 					    u64 type);
 void btrfs_mapping_tree_free(struct extent_map_tree *tree);
 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
-		       fmode_t flags, void *holder);
-struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags);
+		       blk_mode_t flags, void *holder);
+struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags);
 int btrfs_forget_devices(dev_t devt);
 void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 54dba967a2d4..3f080f0afc02 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -254,7 +254,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
 			return PTR_ERR(fscache);
 		dif->fscache = fscache;
 	} else if (!sbi->devs->flatdev) {
-		bdev = blkdev_get_by_path(dif->path, FMODE_READ, sb->s_type,
+		bdev = blkdev_get_by_path(dif->path, BLK_OPEN_READ, sb->s_type,
 					  NULL);
 		if (IS_ERR(bdev))
 			return PTR_ERR(bdev);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 92dd699139a3..94a7b56ed876 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1112,7 +1112,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 {
 	struct block_device *bdev;
 
-	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, sb,
+	bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
 				 &ext4_holder_ops);
 	if (IS_ERR(bdev))
 		goto fail;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 5a764fecd1c7..e34197a70dc1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3993,7 +3993,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
 	unsigned int max_devices = MAX_DEVICES;
 	unsigned int logical_blksize;
-	fmode_t mode = sb_open_mode(sbi->sb->s_flags);
+	blk_mode_t mode = sb_open_mode(sbi->sb->s_flags);
 	int i;
 
 	/* Initialize single device information */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 82f70d46f4e5..e855b8fde76c 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1100,7 +1100,7 @@ int lmLogOpen(struct super_block *sb)
 	 * file systems to log may have n-to-1 relationship;
 	 */
 
-	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ | FMODE_WRITE,
+	bdev = blkdev_get_by_dev(sbi->logdev, BLK_OPEN_READ | BLK_OPEN_WRITE,
 				 log, NULL);
 	if (IS_ERR(bdev)) {
 		rc = PTR_ERR(bdev);
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 9be7f958f60e..70f5563a8e81 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -243,7 +243,8 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
 	if (!dev)
 		return -EIO;
 
-	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL, NULL);
+	bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL,
+				 NULL);
 	if (IS_ERR(bdev)) {
 		printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n",
 			MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
@@ -312,7 +313,7 @@ bl_open_path(struct pnfs_block_volume *v, const char *prefix)
 	if (!devname)
 		return ERR_PTR(-ENOMEM);
 
-	bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL,
+	bdev = blkdev_get_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE, NULL,
 				  NULL);
 	if (IS_ERR(bdev)) {
 		pr_warn("pNFS: failed to open device %s (%ld)\n",
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index c6ae9aee01ed..21472e3ed182 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1786,7 +1786,8 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
 		goto out2;
 
 	reg->hr_bdev = blkdev_get_by_dev(f.file->f_mapping->host->i_rdev,
-					 FMODE_WRITE | FMODE_READ, NULL, NULL);
+					 BLK_OPEN_WRITE | BLK_OPEN_READ, NULL,
+					 NULL);
 	if (IS_ERR(reg->hr_bdev)) {
 		ret = PTR_ERR(reg->hr_bdev);
 		reg->hr_bdev = NULL;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 905297ea5545..62beee3c62b6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2598,7 +2598,7 @@ static int journal_init_dev(struct super_block *super,
 			    struct reiserfs_journal *journal,
 			    const char *jdev_name)
 {
-	fmode_t blkdev_mode = FMODE_READ;
+	blk_mode_t blkdev_mode = BLK_OPEN_READ;
 	void *holder = journal;
 	int result;
 	dev_t jdev;
@@ -2610,7 +2610,7 @@ static int journal_init_dev(struct super_block *super,
 	    new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
 
 	if (!bdev_read_only(super->s_bdev))
-		blkdev_mode |= FMODE_WRITE;
+		blkdev_mode |= BLK_OPEN_WRITE;
 
 	/* there is no "jdev" option and journal is on separate device */
 	if ((!jdev_name || !jdev_name[0])) {
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 3b7cf8268057..67ad1c937637 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -396,7 +396,7 @@ xfs_blkdev_get(
 {
 	int			error = 0;
 
-	*bdevp = blkdev_get_by_path(name, FMODE_READ | FMODE_WRITE, mp,
+	*bdevp = blkdev_get_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE, mp,
 				    &xfs_holder_ops);
 	if (IS_ERR(*bdevp)) {
 		error = PTR_ERR(*bdevp);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6b65623e447c..824e31dd752a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -112,6 +112,19 @@ struct blk_integrity {
 	unsigned char				tag_size;
 };
 
+typedef unsigned int __bitwise blk_mode_t;
+
+/* open for reading */
+#define BLK_OPEN_READ		((__force blk_mode_t)(1 << 0))
+/* open for writing */
+#define BLK_OPEN_WRITE		((__force blk_mode_t)(1 << 1))
+/* open exclusively (vs other exclusive openers */
+#define BLK_OPEN_EXCL		((__force blk_mode_t)(1 << 2))
+/* opened with O_NDELAY */
+#define BLK_OPEN_NDELAY		((__force blk_mode_t)(1 << 3))
+/* open for "writes" only for ioctls (specialy hack for floppy.c) */
+#define BLK_OPEN_WRITE_IOCTL	((__force blk_mode_t)(1 << 4))
+
 struct gendisk {
 	/*
 	 * major/first_minor/minors should not be set by any new driver, the
@@ -187,6 +200,7 @@ struct gendisk {
 	struct badblocks *bb;
 	struct lockdep_map lockdep_map;
 	u64 diskseq;
+	blk_mode_t open_mode;
 
 	/*
 	 * Independent sector access ranges. This is always NULL for
@@ -1363,10 +1377,12 @@ struct block_device_operations {
 	void (*submit_bio)(struct bio *bio);
 	int (*poll_bio)(struct bio *bio, struct io_comp_batch *iob,
 			unsigned int flags);
-	int (*open)(struct gendisk *disk, fmode_t mode);
+	int (*open)(struct gendisk *disk, blk_mode_t mode);
 	void (*release)(struct gendisk *disk);
-	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
+	int (*ioctl)(struct block_device *bdev, blk_mode_t mode,
+			unsigned cmd, unsigned long arg);
+	int (*compat_ioctl)(struct block_device *bdev, blk_mode_t mode,
+			unsigned cmd, unsigned long arg);
 	unsigned int (*check_events) (struct gendisk *disk,
 				      unsigned int clearing);
 	void (*unlock_native_capacity) (struct gendisk *);
@@ -1393,7 +1409,7 @@ struct block_device_operations {
 };
 
 #ifdef CONFIG_COMPAT
-extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t,
+extern int blkdev_compat_ptr_ioctl(struct block_device *, blk_mode_t,
 				      unsigned int, unsigned long);
 #else
 #define blkdev_compat_ptr_ioctl NULL
@@ -1455,11 +1471,11 @@ struct blk_holder_ops {
  * as stored in sb->s_flags.
  */
 #define sb_open_mode(flags) \
-	(FMODE_READ | (((flags) & SB_RDONLY) ? 0 : FMODE_WRITE))
+	(BLK_OPEN_READ | (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE))
 
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder,
+struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
 		const struct blk_holder_ops *hops);
-struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+struct block_device *blkdev_get_by_path(const char *path, blk_mode_t mode,
 		void *holder, const struct blk_holder_ops *hops);
 int bd_prepare_to_claim(struct block_device *bdev, void *holder,
 		const struct blk_holder_ops *hops);
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 3c253b29f4aa..98c6fd0b39b6 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -13,6 +13,7 @@
 
 #include <linux/fs.h>		/* not really needed, later.. */
 #include <linux/list.h>
+#include <linux/blkdev.h>
 #include <scsi/scsi_common.h>
 #include <uapi/linux/cdrom.h>
 
@@ -101,7 +102,7 @@ int cdrom_read_tocentry(struct cdrom_device_info *cdi,
 		struct cdrom_tocentry *entry);
 
 /* the general block_device operations structure: */
-int cdrom_open(struct cdrom_device_info *cdi, fmode_t mode);
+int cdrom_open(struct cdrom_device_info *cdi, blk_mode_t mode);
 void cdrom_release(struct cdrom_device_info *cdi);
 int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
 		unsigned int cmd, unsigned long arg);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index c27b84002d83..69d0435c7ebb 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -166,7 +166,7 @@ void dm_error(const char *message);
 struct dm_dev {
 	struct block_device *bdev;
 	struct dax_device *dax_dev;
-	fmode_t mode;
+	blk_mode_t mode;
 	char name[16];
 };
 
@@ -174,7 +174,7 @@ struct dm_dev {
  * Constructors should call these functions to ensure destination devices
  * are opened/closed correctly.
  */
-int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
+int dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode,
 		  struct dm_dev **result);
 void dm_put_device(struct dm_target *ti, struct dm_dev *d);
 
@@ -543,7 +543,7 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo);
 /*
  * First create an empty table.
  */
-int dm_table_create(struct dm_table **result, fmode_t mode,
+int dm_table_create(struct dm_table **result, blk_mode_t mode,
 		    unsigned int num_targets, struct mapped_device *md);
 
 /*
@@ -586,7 +586,7 @@ void dm_sync_table(struct mapped_device *md);
  * Queries
  */
 sector_t dm_table_get_size(struct dm_table *t);
-fmode_t dm_table_get_mode(struct dm_table *t);
+blk_mode_t dm_table_get_mode(struct dm_table *t);
 struct mapped_device *dm_table_get_md(struct dm_table *t);
 const char *dm_table_device_name(struct dm_table *t);
 
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index cc9259307c94..f6ebcd00c410 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -356,8 +356,8 @@ static int swsusp_swap_check(void)
 		return res;
 	root_swap = res;
 
-	hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, FMODE_WRITE,
-			NULL, NULL);
+	hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device,
+			BLK_OPEN_WRITE, NULL, NULL);
 	if (IS_ERR(hib_resume_bdev))
 		return PTR_ERR(hib_resume_bdev);
 
@@ -1521,7 +1521,7 @@ int swsusp_check(bool snapshot_test)
 	void *holder = snapshot_test ? &swsusp_holder : NULL;
 	int error;
 
-	hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, FMODE_READ,
+	hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, BLK_OPEN_READ,
 					    holder, NULL);
 	if (!IS_ERR(hib_resume_bdev)) {
 		set_blocksize(hib_resume_bdev, PAGE_SIZE);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 16554256be65..6bc83060df9a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2770,7 +2770,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
 
 	if (S_ISBLK(inode->i_mode)) {
 		p->bdev = blkdev_get_by_dev(inode->i_rdev,
-				   FMODE_READ | FMODE_WRITE, p, NULL);
+				BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL);
 		if (IS_ERR(p->bdev)) {
 			error = PTR_ERR(p->bdev);
 			p->bdev = NULL;
-- 
cgit v1.2.3


From 0733ad8002916b9dbbbcfe6e92ad44d2657de1c1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Jun 2023 13:02:58 +0200
Subject: fs: remove the now unused FMODE_* flags

FMODE_NDELAY, FMODE_EXCL and FMODE_WRITE_IOCTL were only used for
block internal purposed and are now entirely unused, so remove them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20230608110258.189493-31-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/fs.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ad1d2c9afb3f..8045c7ef4000 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -119,13 +119,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_PWRITE		((__force fmode_t)0x10)
 /* File is opened for execution with sys_execve / sys_uselib */
 #define FMODE_EXEC		((__force fmode_t)0x20)
-/* File is opened with O_NDELAY (only set for block devices) */
-#define FMODE_NDELAY		((__force fmode_t)0x40)
-/* File is opened with O_EXCL (only set for block devices) */
-#define FMODE_EXCL		((__force fmode_t)0x80)
-/* File is opened using open(.., 3, ..) and is writeable only for ioctls
-   (specialy hack for floppy.c) */
-#define FMODE_WRITE_IOCTL	((__force fmode_t)0x100)
 /* 32bit hashes as llseek() offset (for directories) */
 #define FMODE_32BITHASH         ((__force fmode_t)0x200)
 /* 64bit hashes as llseek() offset (for directories) */
-- 
cgit v1.2.3


From 4f1731df60f9033669f024d06ae26a6301260b55 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 10 Jun 2023 10:30:43 +0800
Subject: blk-mq: fix potential io hang by wrong 'wake_batch'

In __blk_mq_tag_busy/idle(), updating 'active_queues' and calculating
'wake_batch' is not atomic:

t1:			t2:
_blk_mq_tag_busy	blk_mq_tag_busy
inc active_queues
// assume 1->2
			inc active_queues
			// 2 -> 3
			blk_mq_update_wake_batch
			// calculate based on 3
blk_mq_update_wake_batch
/* calculate based on 2, while active_queues is actually 3. */

Fix this problem by protecting them wih 'tags->lock', this is not a hot
path, so performance should not be concerned. And now that all writers
are inside the lock, switch 'actives_queues' from atomic to unsigned
int.

Fixes: 180dccb0dba4 ("blk-mq: fix tag_get wait task can't be awakened")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230610023043.2559121-1-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c |  2 +-
 block/blk-mq-tag.c     | 15 ++++++++++-----
 block/blk-mq.h         |  3 +--
 include/linux/blk-mq.h |  3 +--
 4 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 68165a50951b..c3b5930106b2 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -401,7 +401,7 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m,
 	seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
 	seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
 	seq_printf(m, "active_queues=%d\n",
-		   atomic_read(&tags->active_queues));
+		   READ_ONCE(tags->active_queues));
 
 	seq_puts(m, "\nbitmap_tags:\n");
 	sbitmap_queue_show(&tags->bitmap_tags, m);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index d6af9d431dc6..426197312069 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -38,6 +38,7 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
 void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
 	unsigned int users;
+	struct blk_mq_tags *tags = hctx->tags;
 
 	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
@@ -51,9 +52,11 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 		set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state);
 	}
 
-	users = atomic_inc_return(&hctx->tags->active_queues);
-
-	blk_mq_update_wake_batch(hctx->tags, users);
+	spin_lock_irq(&tags->lock);
+	users = tags->active_queues + 1;
+	WRITE_ONCE(tags->active_queues, users);
+	blk_mq_update_wake_batch(tags, users);
+	spin_unlock_irq(&tags->lock);
 }
 
 /*
@@ -86,9 +89,11 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 			return;
 	}
 
-	users = atomic_dec_return(&tags->active_queues);
-
+	spin_lock_irq(&tags->lock);
+	users = tags->active_queues - 1;
+	WRITE_ONCE(tags->active_queues, users);
 	blk_mq_update_wake_batch(tags, users);
+	spin_unlock_irq(&tags->lock);
 
 	blk_mq_tag_wakeup_all(tags, false);
 }
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 8c642e9f32f1..1743857e0b01 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -412,8 +412,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 			return true;
 	}
 
-	users = atomic_read(&hctx->tags->active_queues);
-
+	users = READ_ONCE(hctx->tags->active_queues);
 	if (!users)
 		return true;
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 59b52ec155b1..f401067ac03a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -739,8 +739,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 struct blk_mq_tags {
 	unsigned int nr_tags;
 	unsigned int nr_reserved_tags;
-
-	atomic_t active_queues;
+	unsigned int active_queues;
 
 	struct sbitmap_queue bitmap_tags;
 	struct sbitmap_queue breserved_tags;
-- 
cgit v1.2.3


From c5d68d25bd6b5798bf0eb96661e1b26748e970d7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 5 Jun 2023 09:11:30 -0400
Subject: svcrdma: Clean up allocation of svc_rdma_recv_ctxt

The physical device's favored NUMA node ID is available when
allocating a recv_ctxt. Use that value instead of relying on the
assumption that the memory allocation happens to be running on a
node close to the device.

This clean up eliminates the hack of destroying recv_ctxts that
were not created by the receive CQ thread -- recv_ctxts are now
always allocated on a "good" node.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h         |  1 -
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 18 +++++++-----------
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index fbc4bd423b35..a0f3ea357977 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -135,7 +135,6 @@ struct svc_rdma_recv_ctxt {
 	struct ib_sge		rc_recv_sge;
 	void			*rc_recv_buf;
 	struct xdr_stream	rc_stream;
-	bool			rc_temp;
 	u32			rc_byte_len;
 	unsigned int		rc_page_count;
 	u32			rc_inv_rkey;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index a22fe7587fa6..46a719ba4917 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -125,14 +125,15 @@ static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
 static struct svc_rdma_recv_ctxt *
 svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
 {
+	int node = ibdev_to_node(rdma->sc_cm_id->device);
 	struct svc_rdma_recv_ctxt *ctxt;
 	dma_addr_t addr;
 	void *buffer;
 
-	ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
+	ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
 	if (!ctxt)
 		goto fail0;
-	buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
+	buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
 	if (!buffer)
 		goto fail1;
 	addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
@@ -155,7 +156,6 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
 	ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
 	ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
 	ctxt->rc_recv_buf = buffer;
-	ctxt->rc_temp = false;
 	return ctxt;
 
 fail2:
@@ -232,10 +232,7 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
 	pcl_free(&ctxt->rc_write_pcl);
 	pcl_free(&ctxt->rc_reply_pcl);
 
-	if (!ctxt->rc_temp)
-		llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
-	else
-		svc_rdma_recv_ctxt_destroy(rdma, ctxt);
+	llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
 }
 
 /**
@@ -258,7 +255,7 @@ void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
 }
 
 static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
-				   unsigned int wanted, bool temp)
+				   unsigned int wanted)
 {
 	const struct ib_recv_wr *bad_wr = NULL;
 	struct svc_rdma_recv_ctxt *ctxt;
@@ -275,7 +272,6 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
 			break;
 
 		trace_svcrdma_post_recv(ctxt);
-		ctxt->rc_temp = temp;
 		ctxt->rc_recv_wr.next = recv_chain;
 		recv_chain = &ctxt->rc_recv_wr;
 		rdma->sc_pending_recvs++;
@@ -309,7 +305,7 @@ err_free:
  */
 bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
 {
-	return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
+	return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests);
 }
 
 /**
@@ -343,7 +339,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 	 * client reconnects.
 	 */
 	if (rdma->sc_pending_recvs < rdma->sc_max_requests)
-		if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false))
+		if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch))
 			goto dropped;
 
 	/* All wc fields are now known to be valid */
-- 
cgit v1.2.3


From a944209c11aff9a5c9b7987fc958cc2344dca51f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 12 Jun 2023 10:10:07 -0400
Subject: SUNRPC: Revert 579900670ac7 ("svcrdma: Remove unused sc_pages field")

Pre-requisite for releasing pages in the send completion handler.
Reverted by hand: patch -R would not apply cleanly.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h       |  3 ++-
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index a0f3ea357977..8e654da55170 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -158,8 +158,9 @@ struct svc_rdma_send_ctxt {
 	struct xdr_buf		sc_hdrbuf;
 	struct xdr_stream	sc_stream;
 	void			*sc_xprt_buf;
+	int			sc_page_count;
 	int			sc_cur_sge_no;
-
+	struct page		*sc_pages[RPCSVC_MAXPAGES];
 	struct ib_sge		sc_sges[];
 };
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 8e7ccef74207..4c62bc41ea40 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -213,6 +213,7 @@ out:
 
 	ctxt->sc_send_wr.num_sge = 0;
 	ctxt->sc_cur_sge_no = 0;
+	ctxt->sc_page_count = 0;
 	return ctxt;
 
 out_empty:
@@ -227,6 +228,8 @@ out_empty:
  * svc_rdma_send_ctxt_put - Return send_ctxt to free list
  * @rdma: controlling svcxprt_rdma
  * @ctxt: object to return to the free list
+ *
+ * Pages left in sc_pages are DMA unmapped and released.
  */
 void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
 			    struct svc_rdma_send_ctxt *ctxt)
@@ -234,6 +237,9 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
 	struct ib_device *device = rdma->sc_cm_id->device;
 	unsigned int i;
 
+	for (i = 0; i < ctxt->sc_page_count; ++i)
+		put_page(ctxt->sc_pages[i]);
+
 	/* The first SGE contains the transport header, which
 	 * remains mapped until @ctxt is destroyed.
 	 */
@@ -798,6 +804,25 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
 				       svc_rdma_xb_dma_map, &args);
 }
 
+/* The svc_rqst and all resources it owns are released as soon as
+ * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
+ * so they are released by the Send completion handler.
+ */
+static inline void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
+					  struct svc_rdma_send_ctxt *ctxt)
+{
+	int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
+
+	ctxt->sc_page_count += pages;
+	for (i = 0; i < pages; i++) {
+		ctxt->sc_pages[i] = rqstp->rq_respages[i];
+		rqstp->rq_respages[i] = NULL;
+	}
+
+	/* Prevent svc_xprt_release from releasing pages in rq_pages */
+	rqstp->rq_next_page = rqstp->rq_respages;
+}
+
 /* Prepare the portion of the RPC Reply that will be transmitted
  * via RDMA Send. The RPC-over-RDMA transport header is prepared
  * in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
-- 
cgit v1.2.3


From c4b50cdf9d9d7962d58ece5efba865f56ec40398 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 12 Jun 2023 10:10:14 -0400
Subject: svcrdma: Revert 2a1e4f21d841 ("svcrdma: Normalize Send page
 handling")

Get rid of the completion wait in svc_rdma_sendto(), and release
pages in the send completion handler again. A subsequent patch will
handle releasing those pages more efficiently.

Reverted by hand: patch -R would not apply cleanly.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h            |  1 -
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |  8 +-------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c      | 27 ++++++++++++---------------
 3 files changed, 13 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 8e654da55170..a5ee0af2a310 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -154,7 +154,6 @@ struct svc_rdma_send_ctxt {
 
 	struct ib_send_wr	sc_send_wr;
 	struct ib_cqe		sc_cqe;
-	struct completion	sc_done;
 	struct xdr_buf		sc_hdrbuf;
 	struct xdr_stream	sc_stream;
 	void			*sc_xprt_buf;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index aa2227a7e552..7420a2c990c7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -93,13 +93,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
 	 */
 	get_page(virt_to_page(rqst->rq_buffer));
 	sctxt->sc_send_wr.opcode = IB_WR_SEND;
-	ret = svc_rdma_send(rdma, sctxt);
-	if (ret < 0)
-		return ret;
-
-	ret = wait_for_completion_killable(&sctxt->sc_done);
-	svc_rdma_send_ctxt_put(rdma, sctxt);
-	return ret;
+	return svc_rdma_send(rdma, sctxt);
 }
 
 /* Server-side transport endpoint wants a whole page for its send
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 4c62bc41ea40..1ae4236d04a3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -147,7 +147,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
 	ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
 	ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
 	ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
-	init_completion(&ctxt->sc_done);
 	ctxt->sc_cqe.done = svc_rdma_wc_send;
 	ctxt->sc_xprt_buf = buffer;
 	xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
@@ -286,12 +285,12 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
 		container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
 
 	svc_rdma_wake_send_waiters(rdma, 1);
-	complete(&ctxt->sc_done);
 
 	if (unlikely(wc->status != IB_WC_SUCCESS))
 		goto flushed;
 
 	trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
+	svc_rdma_send_ctxt_put(rdma, ctxt);
 	return;
 
 flushed:
@@ -299,6 +298,7 @@ flushed:
 		trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid);
 	else
 		trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid);
+	svc_rdma_send_ctxt_put(rdma, ctxt);
 	svc_xprt_deferred_close(&rdma->sc_xprt);
 }
 
@@ -315,8 +315,6 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
 	struct ib_send_wr *wr = &ctxt->sc_send_wr;
 	int ret;
 
-	reinit_completion(&ctxt->sc_done);
-
 	/* Sync the transport header buffer */
 	ib_dma_sync_single_for_device(rdma->sc_pd->device,
 				      wr->sg_list[0].addr,
@@ -808,8 +806,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
  * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
  * so they are released by the Send completion handler.
  */
-static inline void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
-					  struct svc_rdma_send_ctxt *ctxt)
+static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
+				   struct svc_rdma_send_ctxt *ctxt)
 {
 	int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
 
@@ -852,6 +850,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
 	if (ret < 0)
 		return ret;
 
+	svc_rdma_save_io_pages(rqstp, sctxt);
+
 	if (rctxt->rc_inv_rkey) {
 		sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
 		sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
@@ -859,13 +859,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
 		sctxt->sc_send_wr.opcode = IB_WR_SEND;
 	}
 
-	ret = svc_rdma_send(rdma, sctxt);
-	if (ret < 0)
-		return ret;
-
-	ret = wait_for_completion_killable(&sctxt->sc_done);
-	svc_rdma_send_ctxt_put(rdma, sctxt);
-	return ret;
+	return svc_rdma_send(rdma, sctxt);
 }
 
 /**
@@ -931,8 +925,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
 	sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
 	if (svc_rdma_send(rdma, sctxt))
 		goto put_ctxt;
-
-	wait_for_completion_killable(&sctxt->sc_done);
+	return;
 
 put_ctxt:
 	svc_rdma_send_ctxt_put(rdma, sctxt);
@@ -1006,6 +999,10 @@ reply_chunk:
 	if (ret != -E2BIG && ret != -EINVAL)
 		goto put_ctxt;
 
+	/* Send completion releases payload pages that were part
+	 * of previously posted RDMA Writes.
+	 */
+	svc_rdma_save_io_pages(rqstp, sctxt);
 	svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
 	return 0;
 
-- 
cgit v1.2.3


From d16b3af46679a1eb21652c37711a60d3d4e6b8c0 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 10 Jun 2023 11:57:37 +0800
Subject: cgroup: remove unused task_cgroup_path()

task_cgroup_path() is not used anymore. So remove it.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h |  1 -
 kernel/cgroup/cgroup.c | 39 ---------------------------------------
 2 files changed, 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 885f5395fcd0..1261a47932a6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -118,7 +118,6 @@ int cgroup_rm_cftypes(struct cftype *cfts);
 void cgroup_file_notify(struct cgroup_file *cfile);
 void cgroup_file_show(struct cgroup_file *cfile, bool show);
 
-int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
 int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 		     struct pid *pid, struct task_struct *tsk);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 57f31b234433..04d1c0cde882 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2364,45 +2364,6 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
 }
 EXPORT_SYMBOL_GPL(cgroup_path_ns);
 
-/**
- * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
- * @task: target task
- * @buf: the buffer to write the path into
- * @buflen: the length of the buffer
- *
- * Determine @task's cgroup on the first (the one with the lowest non-zero
- * hierarchy_id) cgroup hierarchy and copy its path into @buf.  This
- * function grabs cgroup_mutex and shouldn't be used inside locks used by
- * cgroup controller callbacks.
- *
- * Return value is the same as kernfs_path().
- */
-int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
-{
-	struct cgroup_root *root;
-	struct cgroup *cgrp;
-	int hierarchy_id = 1;
-	int ret;
-
-	cgroup_lock();
-	spin_lock_irq(&css_set_lock);
-
-	root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
-
-	if (root) {
-		cgrp = task_cgroup_from_root(task, root);
-		ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
-	} else {
-		/* if no hierarchy exists, everyone is in "/" */
-		ret = strscpy(buf, "/", buflen);
-	}
-
-	spin_unlock_irq(&css_set_lock);
-	cgroup_unlock();
-	return ret;
-}
-EXPORT_SYMBOL_GPL(task_cgroup_path);
-
 /**
  * cgroup_attach_lock - Lock for ->attach()
  * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem
-- 
cgit v1.2.3


From fca76071bab2304b379c35674d3b9e36a82e364a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 6 Mar 2023 21:55:42 +0200
Subject: lib/string_helpers: Split out string_choices.h

Some users may only need the string choice APIs. Split
the respective header, i.e. string_choices.h. Include
it in the string_helpers.h for backward compatibility.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 MAINTAINERS                    |  1 +
 include/linux/string_choices.h | 32 ++++++++++++++++++++++++++++++++
 include/linux/string_helpers.h | 26 +-------------------------
 3 files changed, 34 insertions(+), 25 deletions(-)
 create mode 100644 include/linux/string_choices.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 44a413a8483f..781be518fda4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8672,6 +8672,7 @@ GENERIC STRING LIBRARY
 R:	Andy Shevchenko <andy@kernel.org>
 S:	Maintained
 F:	include/linux/string.h
+F:	include/linux/string_choices.h
 F:	include/linux/string_helpers.h
 F:	lib/string.c
 F:	lib/string_helpers.c
diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
new file mode 100644
index 000000000000..b7e7b9fd098c
--- /dev/null
+++ b/include/linux/string_choices.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_STRING_CHOICES_H_
+#define _LINUX_STRING_CHOICES_H_
+
+#include <linux/types.h>
+
+static inline const char *str_enable_disable(bool v)
+{
+	return v ? "enable" : "disable";
+}
+
+static inline const char *str_enabled_disabled(bool v)
+{
+	return v ? "enabled" : "disabled";
+}
+
+static inline const char *str_read_write(bool v)
+{
+	return v ? "read" : "write";
+}
+
+static inline const char *str_on_off(bool v)
+{
+	return v ? "on" : "off";
+}
+
+static inline const char *str_yes_no(bool v)
+{
+	return v ? "yes" : "no";
+}
+
+#endif
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index fae6beaaa217..789ab30045da 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -4,6 +4,7 @@
 
 #include <linux/bits.h>
 #include <linux/ctype.h>
+#include <linux/string_choices.h>
 #include <linux/string.h>
 #include <linux/types.h>
 
@@ -113,29 +114,4 @@ void kfree_strarray(char **array, size_t n);
 
 char **devm_kasprintf_strarray(struct device *dev, const char *prefix, size_t n);
 
-static inline const char *str_yes_no(bool v)
-{
-	return v ? "yes" : "no";
-}
-
-static inline const char *str_on_off(bool v)
-{
-	return v ? "on" : "off";
-}
-
-static inline const char *str_enable_disable(bool v)
-{
-	return v ? "enable" : "disable";
-}
-
-static inline const char *str_enabled_disabled(bool v)
-{
-	return v ? "enabled" : "disabled";
-}
-
-static inline const char *str_read_write(bool v)
-{
-	return v ? "read" : "write";
-}
-
 #endif
-- 
cgit v1.2.3


From a9fc76645ca02a79ab491e2b05e29dc222b1f6b4 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 6 Mar 2023 21:55:43 +0200
Subject: lib/string_choices: Add str_high_low() helper

Add str_high_low() helper to return 'high' or 'low' string literal.
Also add an inversed variant, i.e. str_low_high().

All the same for str_hi_low().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/string_choices.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h
index b7e7b9fd098c..48120222b9b2 100644
--- a/include/linux/string_choices.h
+++ b/include/linux/string_choices.h
@@ -14,6 +14,18 @@ static inline const char *str_enabled_disabled(bool v)
 	return v ? "enabled" : "disabled";
 }
 
+static inline const char *str_hi_lo(bool v)
+{
+	return v ? "hi" : "lo";
+}
+#define str_lo_hi(v)		str_hi_lo(!(v))
+
+static inline const char *str_high_low(bool v)
+{
+	return v ? "high" : "low";
+}
+#define str_low_high(v)		str_high_low(!(v))
+
 static inline const char *str_read_write(bool v)
 {
 	return v ? "read" : "write";
-- 
cgit v1.2.3


From ba3da66783184ca94a6c1be2a6a03d20d8889b14 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 9 Jun 2023 17:24:57 -0500
Subject: PCI: Unexport pci_save_aer_state()

pci_save_aer_state() and pci_restore_aer_state() are only used in
drivers/pci, so don't expose them to the rest of the kernel.  No functional
change intended.

Link: https://lore.kernel.org/r/20230609222500.1267795-2-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Stefan Roese <sr@denx.de>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
---
 drivers/pci/pci.h   | 4 ++++
 include/linux/aer.h | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 2475098f6518..a97a735e6623 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -686,6 +686,8 @@ extern const struct attribute_group aer_stats_attr_group;
 void pci_aer_clear_fatal_status(struct pci_dev *dev);
 int pci_aer_clear_status(struct pci_dev *dev);
 int pci_aer_raw_clear_status(struct pci_dev *dev);
+void pci_save_aer_state(struct pci_dev *dev);
+void pci_restore_aer_state(struct pci_dev *dev);
 #else
 static inline void pci_no_aer(void) { }
 static inline void pci_aer_init(struct pci_dev *d) { }
@@ -693,6 +695,8 @@ static inline void pci_aer_exit(struct pci_dev *d) { }
 static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { }
 static inline int pci_aer_clear_status(struct pci_dev *dev) { return -EINVAL; }
 static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; }
+static inline void pci_save_aer_state(struct pci_dev *dev) { }
+static inline void pci_restore_aer_state(struct pci_dev *dev) { }
 #endif
 
 #ifdef CONFIG_ACPI
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 97f64ba1b34a..3a3ab05e13fd 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -45,8 +45,6 @@ struct aer_capability_regs {
 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
 int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
-void pci_save_aer_state(struct pci_dev *dev);
-void pci_restore_aer_state(struct pci_dev *dev);
 #else
 static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev)
 {
@@ -60,8 +58,6 @@ static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
 {
 	return -EINVAL;
 }
-static inline void pci_save_aer_state(struct pci_dev *dev) {}
-static inline void pci_restore_aer_state(struct pci_dev *dev) {}
 #endif
 
 void cper_print_aer(struct pci_dev *dev, int aer_severity,
-- 
cgit v1.2.3


From 92bbe55182affa9f3b00a266d5f41fbc8a2114d6 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 1 May 2023 14:40:29 +0200
Subject: nvmet: reorder fields in 'struct nvmefc_fcp_req'

Group some variables based on their sizes to reduce holes.
On x86_64, this shrinks the size of 'struct nvmefc_fcp_req' from
112 to 104 bytes.

This structure is embedded in some other structures (nvme_fc_fcp_op
which itself is embedded in nvme_fcp_op_w_sgl), so it helps reducing the
size of these structures too.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme-fc-driver.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index fa092b9be2fd..4109f1bd6128 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -185,7 +185,6 @@ enum nvmefc_fcp_datadir {
  * @first_sgl: memory for 1st scatter/gather list segment for payload data
  * @sg_cnt:    number of elements in the scatter/gather list
  * @io_dir:    direction of the FCP request (see NVMEFC_FCP_xxx)
- * @sqid:      The nvme SQID the command is being issued on
  * @done:      The callback routine the LLDD is to invoke upon completion of
  *             the FCP operation. req argument is the pointer to the original
  *             FCP IO operation.
@@ -194,12 +193,13 @@ enum nvmefc_fcp_datadir {
  *             while processing the operation. The length of the buffer
  *             corresponds to the fcprqst_priv_sz value specified in the
  *             nvme_fc_port_template supplied by the LLDD.
+ * @sqid:      The nvme SQID the command is being issued on
  *
  * Values set by the LLDD indicating completion status of the FCP operation.
  * Must be set prior to calling the done() callback.
+ * @rcv_rsplen: length, in bytes, of the FCP RSP IU received.
  * @transferred_length: amount of payload data, in bytes, that were
  *             transferred. Should equal payload_length on success.
- * @rcv_rsplen: length, in bytes, of the FCP RSP IU received.
  * @status:    Completion status of the FCP operation. must be 0 upon success,
  *             negative errno value upon failure (ex: -EIO). Note: this is
  *             NOT a reflection of the NVME CQE completion status. Only the
@@ -219,14 +219,14 @@ struct nvmefc_fcp_req {
 	int			sg_cnt;
 	enum nvmefc_fcp_datadir	io_dir;
 
-	__le16			sqid;
-
 	void (*done)(struct nvmefc_fcp_req *req);
 
 	void			*private;
 
-	u32			transferred_length;
+	__le16			sqid;
+
 	u16			rcv_rsplen;
+	u32			transferred_length;
 	u32			status;
 } __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
 
-- 
cgit v1.2.3


From cdb8c100d8a4b4e31c829724e40b4fdf32977cce Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Fri, 2 Jun 2023 02:30:22 -0500
Subject: include/linux/suspend.h: Only show pm_pr_dbg messages at
 suspend/resume

All uses in the kernel are currently already oriented around
suspend/resume. As some other parts of the kernel may also use these
messages in functions that could also be used outside of
suspend/resume, only enable in suspend/resume path.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/suspend.h | 8 +++++---
 kernel/power/main.c     | 6 ++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 1a0426e6761c..74f406c53ac0 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -555,6 +555,7 @@ static inline void unlock_system_sleep(unsigned int flags) {}
 #ifdef CONFIG_PM_SLEEP_DEBUG
 extern bool pm_print_times_enabled;
 extern bool pm_debug_messages_on;
+extern bool pm_debug_messages_should_print(void);
 static inline int pm_dyn_debug_messages_on(void)
 {
 #ifdef CONFIG_DYNAMIC_DEBUG
@@ -568,14 +569,14 @@ static inline int pm_dyn_debug_messages_on(void)
 #endif
 #define __pm_pr_dbg(fmt, ...)					\
 	do {							\
-		if (pm_debug_messages_on)			\
+		if (pm_debug_messages_should_print())		\
 			printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__);	\
 		else if (pm_dyn_debug_messages_on())		\
 			pr_debug(fmt, ##__VA_ARGS__);	\
 	} while (0)
 #define __pm_deferred_pr_dbg(fmt, ...)				\
 	do {							\
-		if (pm_debug_messages_on)			\
+		if (pm_debug_messages_should_print())		\
 			printk_deferred(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__);	\
 	} while (0)
 #else
@@ -593,7 +594,8 @@ static inline int pm_dyn_debug_messages_on(void)
 /**
  * pm_pr_dbg - print pm sleep debug messages
  *
- * If pm_debug_messages_on is enabled, print message.
+ * If pm_debug_messages_on is enabled and the system is entering/leaving
+ *      suspend, print message.
  * If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is enabled,
  *	print message only from instances explicitly enabled on dynamic debug's
  *	control.
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 3113ec2f1db4..daa535012e51 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -556,6 +556,12 @@ power_attr_ro(pm_wakeup_irq);
 
 bool pm_debug_messages_on __read_mostly;
 
+bool pm_debug_messages_should_print(void)
+{
+	return pm_debug_messages_on && pm_suspend_target_state != PM_SUSPEND_ON;
+}
+EXPORT_SYMBOL_GPL(pm_debug_messages_should_print);
+
 static ssize_t pm_debug_messages_show(struct kobject *kobj,
 				      struct kobj_attribute *attr, char *buf)
 {
-- 
cgit v1.2.3


From a3bbdc52c38fa95488ca713e54bcb40699c26acf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 9 Jun 2023 11:02:16 +0100
Subject: Remove file->f_op->sendpage

Remove file->f_op->sendpage as splicing to a socket now calls sendmsg
rather than sendpage.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/fs.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index de2cb1132f07..67998c64556d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1790,7 +1790,6 @@ struct file_operations {
 	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
 	int (*fasync) (int, struct file *, int);
 	int (*lock) (struct file *, int, struct file_lock *);
-	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
 	int (*flock) (struct file *, int, struct file_lock *);
-- 
cgit v1.2.3


From 5df5dd03a8f71ca9640f208d8f523856e1069ee7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 9 Jun 2023 11:02:18 +0100
Subject: sunrpc: Use sendmsg(MSG_SPLICE_PAGES) rather then sendpage

When transmitting data, call down into TCP using sendmsg with
MSG_SPLICE_PAGES to indicate that content should be spliced rather than
performing sendpage calls to transmit header, data pages and trailer.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna@kernel.org>
cc: Jeff Layton <jlayton@kernel.org>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/sunrpc/svc.h | 11 +++++------
 net/sunrpc/svcsock.c       | 38 ++++++++++++--------------------------
 2 files changed, 17 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 762d7231e574..f66ec8fdb331 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -161,16 +161,15 @@ static inline bool svc_put_not_last(struct svc_serv *serv)
 extern u32 svc_max_payload(const struct svc_rqst *rqstp);
 
 /*
- * RPC Requsts and replies are stored in one or more pages.
+ * RPC Requests and replies are stored in one or more pages.
  * We maintain an array of pages for each server thread.
  * Requests are copied into these pages as they arrive.  Remaining
  * pages are available to write the reply into.
  *
- * Pages are sent using ->sendpage so each server thread needs to
- * allocate more to replace those used in sending.  To help keep track
- * of these pages we have a receive list where all pages initialy live,
- * and a send list where pages are moved to when there are to be part
- * of a reply.
+ * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread
+ * needs to allocate more to replace those used in sending.  To help keep track
+ * of these pages we have a receive list where all pages initialy live, and a
+ * send list where pages are moved to when there are to be part of a reply.
  *
  * We use xdr_buf for holding responses as it fits well with NFS
  * read responses (that have a header, and some data pages, and possibly
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f77cebe2c071..9d9f522e3ae1 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1203,13 +1203,14 @@ err_noclose:
 static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
 			      int flags)
 {
-	return kernel_sendpage(sock, virt_to_page(vec->iov_base),
-			       offset_in_page(vec->iov_base),
-			       vec->iov_len, flags);
+	struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | flags, };
+
+	iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 1, vec->iov_len);
+	return sock_sendmsg(sock, &msg);
 }
 
 /*
- * kernel_sendpage() is used exclusively to reduce the number of
+ * MSG_SPLICE_PAGES is used exclusively to reduce the number of
  * copy operations in this path. Therefore the caller must ensure
  * that the pages backing @xdr are unchanging.
  *
@@ -1249,28 +1250,13 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
 	if (ret != head->iov_len)
 		goto out;
 
-	if (xdr->page_len) {
-		unsigned int offset, len, remaining;
-		struct bio_vec *bvec;
-
-		bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT);
-		offset = offset_in_page(xdr->page_base);
-		remaining = xdr->page_len;
-		while (remaining > 0) {
-			len = min(remaining, bvec->bv_len - offset);
-			ret = kernel_sendpage(sock, bvec->bv_page,
-					      bvec->bv_offset + offset,
-					      len, 0);
-			if (ret < 0)
-				return ret;
-			*sentp += ret;
-			if (ret != len)
-				goto out;
-			remaining -= len;
-			offset = 0;
-			bvec++;
-		}
-	}
+	msg.msg_flags = MSG_SPLICE_PAGES;
+	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
+		      xdr_buf_pagecount(xdr), xdr->page_len);
+	ret = sock_sendmsg(sock, &msg);
+	if (ret < 0)
+		return ret;
+	*sentp += ret;
 
 	if (tail->iov_len) {
 		ret = svc_tcp_send_kvec(sock, tail, 0);
-- 
cgit v1.2.3


From edd60d24bd858cef165274e4cd6cab43bdc58d15 Mon Sep 17 00:00:00 2001
From: Prashanth K <quic_prashk@quicinc.com>
Date: Wed, 31 May 2023 20:11:14 +0530
Subject: usb: common: usb-conn-gpio: Set last role to unknown before initial
 detection

Currently if we bootup a device without cable connected, then
usb-conn-gpio won't call set_role() since last_role is same as
current role. This happens because during probe last_role gets
initialised to zero.

To avoid this, added a new constant in enum usb_role, last_role
is set to USB_ROLE_UNKNOWN before performing initial detection.

While at it, also handle default case for the usb_role switch
in cdns3, intel-xhci-usb-role-switch & musb/jz4740 to avoid
build warnings.

Fixes: 4602f3bff266 ("usb: common: add USB GPIO based connection detection driver")
Signed-off-by: Prashanth K <quic_prashk@quicinc.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Message-ID: <1685544074-17337-1-git-send-email-quic_prashk@quicinc.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/cdns3/core.c                       | 2 ++
 drivers/usb/common/usb-conn-gpio.c             | 3 +++
 drivers/usb/musb/jz4740.c                      | 2 ++
 drivers/usb/roles/intel-xhci-usb-role-switch.c | 2 ++
 include/linux/usb/role.h                       | 1 +
 5 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c
index dbcdf3b24b47..69d2921f2d3b 100644
--- a/drivers/usb/cdns3/core.c
+++ b/drivers/usb/cdns3/core.c
@@ -252,6 +252,8 @@ static enum usb_role cdns_hw_role_state_machine(struct cdns *cdns)
 		if (!vbus)
 			role = USB_ROLE_NONE;
 		break;
+	default:
+		break;
 	}
 
 	dev_dbg(cdns->dev, "role %d -> %d\n", cdns->role, role);
diff --git a/drivers/usb/common/usb-conn-gpio.c b/drivers/usb/common/usb-conn-gpio.c
index 766005d20bae..d0e8624c7dfe 100644
--- a/drivers/usb/common/usb-conn-gpio.c
+++ b/drivers/usb/common/usb-conn-gpio.c
@@ -257,6 +257,9 @@ static int usb_conn_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, info);
 	device_set_wakeup_capable(&pdev->dev, true);
 
+	/* Set last role to unknown before performing the initial detection */
+	info->last_role = USB_ROLE_UNKNOWN;
+
 	/* Perform initial detection */
 	usb_conn_queue_dwork(info, 0);
 
diff --git a/drivers/usb/musb/jz4740.c b/drivers/usb/musb/jz4740.c
index 5aabdd7e2511..6d880c4cce36 100644
--- a/drivers/usb/musb/jz4740.c
+++ b/drivers/usb/musb/jz4740.c
@@ -95,6 +95,8 @@ static int jz4740_musb_role_switch_set(struct usb_role_switch *sw,
 	case USB_ROLE_HOST:
 		atomic_notifier_call_chain(&phy->notifier, USB_EVENT_ID, phy);
 		break;
+	default:
+		break;
 	}
 
 	return 0;
diff --git a/drivers/usb/roles/intel-xhci-usb-role-switch.c b/drivers/usb/roles/intel-xhci-usb-role-switch.c
index e5c6c413a075..8e2997d65f11 100644
--- a/drivers/usb/roles/intel-xhci-usb-role-switch.c
+++ b/drivers/usb/roles/intel-xhci-usb-role-switch.c
@@ -97,6 +97,8 @@ static int intel_xhci_usb_set_role(struct usb_role_switch *sw,
 		val |= SW_VBUS_VALID;
 		drd_config = DRD_CONFIG_STATIC_DEVICE;
 		break;
+	default:
+		break;
 	}
 	val |= SW_IDPIN_EN;
 	if (data->enable_sw_switch) {
diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h
index b5deafd91f67..65e790a28913 100644
--- a/include/linux/usb/role.h
+++ b/include/linux/usb/role.h
@@ -11,6 +11,7 @@ enum usb_role {
 	USB_ROLE_NONE,
 	USB_ROLE_HOST,
 	USB_ROLE_DEVICE,
+	USB_ROLE_UNKNOWN,
 };
 
 typedef int (*usb_role_switch_set_t)(struct usb_role_switch *sw,
-- 
cgit v1.2.3


From 7d0b80647f73a170dd20d18fbf01de0f770ed7c8 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 5 Jun 2023 15:54:11 +0300
Subject: gpiolib: remove unused gpio_cansleep()

There is not a single user in the entire kernel of this deprecated API,
kill it for good.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 Documentation/driver-api/gpio/legacy.rst                   | 12 +-----------
 .../translations/zh_CN/driver-api/gpio/legacy.rst          | 14 +++-----------
 Documentation/translations/zh_TW/gpio.txt                  | 14 +++-----------
 arch/m68k/include/asm/mcfgpio.h                            |  8 --------
 arch/mips/include/asm/mach-au1x00/gpio-au1000.h            |  5 -----
 arch/mips/include/asm/mach-au1x00/gpio-au1300.h            |  5 -----
 include/linux/gpio.h                                       | 12 ------------
 7 files changed, 7 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/gpio/legacy.rst b/Documentation/driver-api/gpio/legacy.rst
index 9eda54811faa..b6505914791c 100644
--- a/Documentation/driver-api/gpio/legacy.rst
+++ b/Documentation/driver-api/gpio/legacy.rst
@@ -165,8 +165,7 @@ Most GPIO controllers can be accessed with memory read/write instructions.
 Those don't need to sleep, and can safely be done from inside hard
 (nonthreaded) IRQ handlers and similar contexts.
 
-Use the following calls to access such GPIOs,
-for which gpio_cansleep() will always return false (see below)::
+Use the following calls to access such GPIOs::
 
 	/* GPIO INPUT:  return zero or nonzero */
 	int gpio_get_value(unsigned gpio);
@@ -200,13 +199,6 @@ Some GPIO controllers must be accessed using message based busses like I2C
 or SPI.  Commands to read or write those GPIO values require waiting to
 get to the head of a queue to transmit a command and get its response.
 This requires sleeping, which can't be done from inside IRQ handlers.
-
-Platforms that support this type of GPIO distinguish them from other GPIOs
-by returning nonzero from this call (which requires a valid GPIO number,
-which should have been previously allocated with gpio_request)::
-
-	int gpio_cansleep(unsigned gpio);
-
 To access such GPIOs, a different set of accessors is defined::
 
 	/* GPIO INPUT:  return zero or nonzero, might sleep */
@@ -215,7 +207,6 @@ To access such GPIOs, a different set of accessors is defined::
 	/* GPIO OUTPUT, might sleep */
 	void gpio_set_value_cansleep(unsigned gpio, int value);
 
-
 Accessing such GPIOs requires a context which may sleep,  for example
 a threaded IRQ handler, and those accessors must be used instead of
 spinlock-safe accessors without the cansleep() name suffix.
@@ -537,7 +528,6 @@ code, which always dispatches through the gpio_chip::
 
   #define gpio_get_value	__gpio_get_value
   #define gpio_set_value	__gpio_set_value
-  #define gpio_cansleep		__gpio_cansleep
 
 Fancier implementations could instead define those as inline functions with
 logic optimizing access to specific SOC-based GPIOs.  For example, if the
diff --git a/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst b/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst
index 1bddecf73670..aeccff777170 100644
--- a/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst
+++ b/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst
@@ -153,8 +153,7 @@ get/set(获取/设置)函数调用没法返回错误,且有可能是配置错误
 大多数 GPIO 控制器可以通过内存读/写指令来访问。这些指令不会休眠,可以
 安全地在硬(非线程)中断例程和类似的上下文中完成。
 
-对于那些用 gpio_cansleep()测试总是返回失败的 GPIO(见下文)，使用
-以下的函数访问::
+对于那些 GPIO，使用以下的函数访问::
 
 	/* GPIO 输入:返回零或非零 */
 	int gpio_get_value(unsigned gpio);
@@ -186,11 +185,6 @@ GPIO值是布尔值，零表示低电平，非零表示高电平。当读取一
 GPIO 值的命令需要等待其信息排到队首才发送命令，再获得其反馈。期间需要
 休眠，这不能在 IRQ 例程(中断上下文)中执行。
 
-支持此类 GPIO 的平台通过以下函数返回非零值来区分出这种 GPIO。(此函数需要
-一个之前通过 gpio_request 分配到的有效 GPIO 编号)::
-
-	int gpio_cansleep(unsigned gpio);
-
 为了访问这种 GPIO,内核定义了一套不同的函数::
 
 	/* GPIO 输入:返回零或非零 ,可能会休眠 */
@@ -199,7 +193,6 @@ GPIO 值的命令需要等待其信息排到队首才发送命令，再获得其
 	/* GPIO 输出,可能会休眠 */
 	void gpio_set_value_cansleep(unsigned gpio, int value);
 
-
 访问这样的 GPIO 需要一个允许休眠的上下文，例如线程 IRQ 处理例程，并用以上的
 访问函数替换那些没有 cansleep()后缀的自旋锁安全访问函数。
 
@@ -483,8 +476,8 @@ GPIO 实现者的框架（可选）
 
 为了支持这个框架，一个平台的 Kconfig 文件将会 "select"(选择)
 ARCH_REQUIRE_GPIOLIB 或 ARCH_WANT_OPTIONAL_GPIOLIB，并让它的
-<asm/gpio.h> 包含 <asm-generic/gpio.h>，同时定义三个方法:
-gpio_get_value()、gpio_set_value()和 gpio_cansleep()。
+<asm/gpio.h> 包含 <asm-generic/gpio.h>，同时定义两个方法:
+gpio_get_value()、gpio_set_value()。
 
 它也应提供一个 ARCH_NR_GPIOS 的定义值，这样可以更好地反映该平台 GPIO
 的实际数量,节省静态表的空间。(这个定义值应该包含片上系统内建 GPIO 和
@@ -502,7 +495,6 @@ ARCH_WANT_OPTIONAL_GPIOLIB 意味着 gpiolib 核心默认关闭,且用户可以
 
   #define gpio_get_value	__gpio_get_value
   #define gpio_set_value	__gpio_set_value
-  #define gpio_cansleep		__gpio_cansleep
 
 这些定义可以用更理想的实现方法替代，那就是使用经过逻辑优化的内联函数来访问
 基于特定片上系统的 GPIO。例如,若引用的 GPIO (寄存器位偏移)是常量“12”，
diff --git a/Documentation/translations/zh_TW/gpio.txt b/Documentation/translations/zh_TW/gpio.txt
index 66bc7f2bbe53..b93788a2628b 100644
--- a/Documentation/translations/zh_TW/gpio.txt
+++ b/Documentation/translations/zh_TW/gpio.txt
@@ -161,8 +161,7 @@ get/set(獲取/設置)函數調用沒法返回錯誤,且有可能是配置錯誤
 大多數 GPIO 控制器可以通過內存讀/寫指令來訪問。這些指令不會休眠,可以
 安全地在硬(非線程)中斷例程和類似的上下文中完成。
 
-對於那些用 gpio_cansleep()測試總是返回失敗的 GPIO(見下文)，使用
-以下的函數訪問:
+對於那些 GPIO，使用以下的函數訪問:
 
 	/* GPIO 輸入:返回零或非零 */
 	int gpio_get_value(unsigned gpio);
@@ -193,11 +192,6 @@ GPIO值是布爾值，零表示低電平，非零表示高電平。當讀取一
 GPIO 值的命令需要等待其信息排到隊首才發送命令，再獲得其反饋。期間需要
 休眠，這不能在 IRQ 例程(中斷上下文)中執行。
 
-支持此類 GPIO 的平台通過以下函數返回非零值來區分出這種 GPIO。(此函數需要
-一個之前通過 gpio_request 分配到的有效 GPIO 編號):
-
-	int gpio_cansleep(unsigned gpio);
-
 爲了訪問這種 GPIO,內核定義了一套不同的函數:
 
 	/* GPIO 輸入:返回零或非零 ,可能會休眠 */
@@ -206,7 +200,6 @@ GPIO 值的命令需要等待其信息排到隊首才發送命令，再獲得其
 	/* GPIO 輸出,可能會休眠 */
 	void gpio_set_value_cansleep(unsigned gpio, int value);
 
-
 訪問這樣的 GPIO 需要一個允許休眠的上下文，例如線程 IRQ 處理例程，並用以上的
 訪問函數替換那些沒有 cansleep()後綴的自旋鎖安全訪問函數。
 
@@ -449,8 +442,8 @@ GPIO 實現者的框架 (可選)
 -------
 爲了支持這個框架，一個平台的 Kconfig 文件將會 "select"(選擇)
 ARCH_REQUIRE_GPIOLIB 或 ARCH_WANT_OPTIONAL_GPIOLIB，並讓它的
-<asm/gpio.h> 包含 <asm-generic/gpio.h>，同時定義三個方法:
-gpio_get_value()、gpio_set_value()和 gpio_cansleep()。
+<asm/gpio.h> 包含 <asm-generic/gpio.h>，同時定義二個方法:
+gpio_get_value()、gpio_set_value()。
 
 它也應提供一個 ARCH_NR_GPIOS 的定義值，這樣可以更好地反映該平台 GPIO
 的實際數量,節省靜態表的空間。(這個定義值應該包含片上系統內建 GPIO 和
@@ -468,7 +461,6 @@ ARCH_WANT_OPTIONAL_GPIOLIB 意味著 gpiolib 核心默認關閉,且用戶可以
 
   #define gpio_get_value	__gpio_get_value
   #define gpio_set_value	__gpio_set_value
-  #define gpio_cansleep		__gpio_cansleep
 
 這些定義可以用更理想的實現方法替代，那就是使用經過邏輯優化的內聯函數來訪問
 基於特定片上系統的 GPIO。例如,若引用的 GPIO (寄存器位偏移)是常量「12」，
diff --git a/arch/m68k/include/asm/mcfgpio.h b/arch/m68k/include/asm/mcfgpio.h
index 2cefe8445980..7abd322c019f 100644
--- a/arch/m68k/include/asm/mcfgpio.h
+++ b/arch/m68k/include/asm/mcfgpio.h
@@ -34,14 +34,6 @@ static inline void __gpio_set_value(unsigned gpio, int value)
 		__mcfgpio_set_value(gpio, value);
 }
 
-static inline int __gpio_cansleep(unsigned gpio)
-{
-	if (gpio < MCFGPIO_PIN_MAX)
-		return 0;
-	else
-		return -EINVAL;
-}
-
 static inline int __gpio_to_irq(unsigned gpio)
 {
 	return -EINVAL;
diff --git a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
index adde1fa5097e..82bc2766e2ec 100644
--- a/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
+++ b/arch/mips/include/asm/mach-au1x00/gpio-au1000.h
@@ -500,11 +500,6 @@ static inline int alchemy_gpio_is_valid(int gpio)
 		alchemy_gpio1_is_valid(gpio);
 }
 
-static inline int alchemy_gpio_cansleep(int gpio)
-{
-	return 0;	/* Alchemy never gets tired */
-}
-
 static inline int alchemy_gpio_to_irq(int gpio)
 {
 	return (gpio >= ALCHEMY_GPIO2_BASE) ?
diff --git a/arch/mips/include/asm/mach-au1x00/gpio-au1300.h b/arch/mips/include/asm/mach-au1x00/gpio-au1300.h
index d16add7ba49d..43d44f384f97 100644
--- a/arch/mips/include/asm/mach-au1x00/gpio-au1300.h
+++ b/arch/mips/include/asm/mach-au1x00/gpio-au1300.h
@@ -98,11 +98,6 @@ static inline int au1300_gpio_is_valid(unsigned int gpio)
 	return ret;
 }
 
-static inline int au1300_gpio_cansleep(unsigned int gpio)
-{
-	return 0;
-}
-
 /* hardware remembers gpio 0-63 levels on powerup */
 static inline int au1300_gpio_getinitlvl(unsigned int gpio)
 {
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 88efac969754..7ecc25c543ce 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -108,11 +108,6 @@ static inline void gpio_set_value(unsigned gpio, int value)
 	return gpiod_set_raw_value(gpio_to_desc(gpio), value);
 }
 
-static inline int gpio_cansleep(unsigned gpio)
-{
-	return gpiod_cansleep(gpio_to_desc(gpio));
-}
-
 static inline int gpio_to_irq(unsigned gpio)
 {
 	return gpiod_to_irq(gpio_to_desc(gpio));
@@ -195,13 +190,6 @@ static inline void gpio_set_value(unsigned gpio, int value)
 	WARN_ON(1);
 }
 
-static inline int gpio_cansleep(unsigned gpio)
-{
-	/* GPIO can never have been requested or set as {in,out}put */
-	WARN_ON(1);
-	return 0;
-}
-
 static inline int gpio_get_value_cansleep(unsigned gpio)
 {
 	/* GPIO can never have been requested or set as {in,out}put */
-- 
cgit v1.2.3


From 158826c73d48097f843bacc1bcafa6dbc114f4e5 Mon Sep 17 00:00:00 2001
From: Naman Jain <quic_namajain@quicinc.com>
Date: Tue, 6 Jun 2023 19:16:25 +0530
Subject: soc: qcom: socinfo: Add support for new fields in revision 18

Add support for below fields coming in socinfo structure under v18:
* num_kvps: number of key value pairs (KVP)
* kvps_offset: the offset of the KVP table from the base address of
  socinfo structure in SMEM
KVP table has boolean values for certain feature flags, used to determine
hardware configuration.

Signed-off-by: Naman Jain <quic_namajain@quicinc.com>
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20230606134626.18790-2-quic_namajain@quicinc.com
---
 drivers/soc/qcom/socinfo.c       | 1 +
 include/linux/soc/qcom/socinfo.h | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c
index dfd4330daef5..cb70f8439cfa 100644
--- a/drivers/soc/qcom/socinfo.c
+++ b/drivers/soc/qcom/socinfo.c
@@ -565,6 +565,7 @@ static void socinfo_debugfs_init(struct qcom_socinfo *qcom_socinfo,
 			   &qcom_socinfo->info.fmt);
 
 	switch (qcom_socinfo->info.fmt) {
+	case SOCINFO_VERSION(0, 18):
 	case SOCINFO_VERSION(0, 17):
 		qcom_socinfo->info.oem_variant = __le32_to_cpu(info->oem_variant);
 		debugfs_create_u32("oem_variant", 0444, qcom_socinfo->dbg_root,
diff --git a/include/linux/soc/qcom/socinfo.h b/include/linux/soc/qcom/socinfo.h
index d1cbc49a2a2d..3cc266d8a8b4 100644
--- a/include/linux/soc/qcom/socinfo.h
+++ b/include/linux/soc/qcom/socinfo.h
@@ -65,6 +65,9 @@ struct socinfo {
 	__le32  nnum_partname_mapping;
 	/* Version 17 */
 	__le32 oem_variant;
+	/* Version 18 */
+	__le32 num_kvps;
+	__le32 kvps_offset;
 };
 
 #endif
-- 
cgit v1.2.3


From d9c2a255cfe026c8bf85a39631734f022ecefaff Mon Sep 17 00:00:00 2001
From: Naman Jain <quic_namajain@quicinc.com>
Date: Tue, 6 Jun 2023 19:16:26 +0530
Subject: soc: qcom: socinfo: Add support for new fields in revision 19

Add support for below fields coming in socinfo structure under v19:
* num_func_clusters: number of clusters with at least one functional core
* boot_cluster: cluster selected as boot cluster
* boot_core: core selected as boot core
While at it, rename some variables to align them with their
functionalities.

Signed-off-by: Naman Jain <quic_namajain@quicinc.com>
Signed-off-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20230606134626.18790-3-quic_namajain@quicinc.com
---
 drivers/soc/qcom/socinfo.c       | 32 ++++++++++++++++++++++++--------
 include/linux/soc/qcom/socinfo.h |  8 ++++++--
 2 files changed, 30 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c
index cb70f8439cfa..a30d8bb6e938 100644
--- a/drivers/soc/qcom/socinfo.c
+++ b/drivers/soc/qcom/socinfo.c
@@ -133,12 +133,15 @@ struct socinfo_params {
 	u32 nproduct_id;
 	u32 num_clusters;
 	u32 ncluster_array_offset;
-	u32 num_defective_parts;
-	u32 ndefective_parts_array_offset;
+	u32 num_subset_parts;
+	u32 nsubset_parts_array_offset;
 	u32 nmodem_supported;
 	u32 feature_code;
 	u32 pcode;
 	u32 oem_variant;
+	u32 num_func_clusters;
+	u32 boot_cluster;
+	u32 boot_core;
 };
 
 struct smem_image_version {
@@ -565,6 +568,18 @@ static void socinfo_debugfs_init(struct qcom_socinfo *qcom_socinfo,
 			   &qcom_socinfo->info.fmt);
 
 	switch (qcom_socinfo->info.fmt) {
+	case SOCINFO_VERSION(0, 19):
+		qcom_socinfo->info.num_func_clusters = __le32_to_cpu(info->num_func_clusters);
+		qcom_socinfo->info.boot_cluster = __le32_to_cpu(info->boot_cluster);
+		qcom_socinfo->info.boot_core = __le32_to_cpu(info->boot_core);
+
+		debugfs_create_u32("num_func_clusters", 0444, qcom_socinfo->dbg_root,
+				   &qcom_socinfo->info.num_func_clusters);
+		debugfs_create_u32("boot_cluster", 0444, qcom_socinfo->dbg_root,
+				   &qcom_socinfo->info.boot_cluster);
+		debugfs_create_u32("boot_core", 0444, qcom_socinfo->dbg_root,
+				   &qcom_socinfo->info.boot_core);
+		fallthrough;
 	case SOCINFO_VERSION(0, 18):
 	case SOCINFO_VERSION(0, 17):
 		qcom_socinfo->info.oem_variant = __le32_to_cpu(info->oem_variant);
@@ -589,17 +604,18 @@ static void socinfo_debugfs_init(struct qcom_socinfo *qcom_socinfo,
 	case SOCINFO_VERSION(0, 14):
 		qcom_socinfo->info.num_clusters = __le32_to_cpu(info->num_clusters);
 		qcom_socinfo->info.ncluster_array_offset = __le32_to_cpu(info->ncluster_array_offset);
-		qcom_socinfo->info.num_defective_parts = __le32_to_cpu(info->num_defective_parts);
-		qcom_socinfo->info.ndefective_parts_array_offset = __le32_to_cpu(info->ndefective_parts_array_offset);
+		qcom_socinfo->info.num_subset_parts = __le32_to_cpu(info->num_subset_parts);
+		qcom_socinfo->info.nsubset_parts_array_offset =
+			__le32_to_cpu(info->nsubset_parts_array_offset);
 
 		debugfs_create_u32("num_clusters", 0444, qcom_socinfo->dbg_root,
 				   &qcom_socinfo->info.num_clusters);
 		debugfs_create_u32("ncluster_array_offset", 0444, qcom_socinfo->dbg_root,
 				   &qcom_socinfo->info.ncluster_array_offset);
-		debugfs_create_u32("num_defective_parts", 0444, qcom_socinfo->dbg_root,
-				   &qcom_socinfo->info.num_defective_parts);
-		debugfs_create_u32("ndefective_parts_array_offset", 0444, qcom_socinfo->dbg_root,
-				   &qcom_socinfo->info.ndefective_parts_array_offset);
+		debugfs_create_u32("num_subset_parts", 0444, qcom_socinfo->dbg_root,
+				   &qcom_socinfo->info.num_subset_parts);
+		debugfs_create_u32("nsubset_parts_array_offset", 0444, qcom_socinfo->dbg_root,
+				   &qcom_socinfo->info.nsubset_parts_array_offset);
 		fallthrough;
 	case SOCINFO_VERSION(0, 13):
 		qcom_socinfo->info.nproduct_id = __le32_to_cpu(info->nproduct_id);
diff --git a/include/linux/soc/qcom/socinfo.h b/include/linux/soc/qcom/socinfo.h
index 3cc266d8a8b4..e78777bb0f4a 100644
--- a/include/linux/soc/qcom/socinfo.h
+++ b/include/linux/soc/qcom/socinfo.h
@@ -54,8 +54,8 @@ struct socinfo {
 	/* Version 14 */
 	__le32 num_clusters;
 	__le32 ncluster_array_offset;
-	__le32 num_defective_parts;
-	__le32 ndefective_parts_array_offset;
+	__le32 num_subset_parts;
+	__le32 nsubset_parts_array_offset;
 	/* Version 15 */
 	__le32 nmodem_supported;
 	/* Version 16 */
@@ -68,6 +68,10 @@ struct socinfo {
 	/* Version 18 */
 	__le32 num_kvps;
 	__le32 kvps_offset;
+	/* Version 19 */
+	__le32 num_func_clusters;
+	__le32 boot_cluster;
+	__le32 boot_core;
 };
 
 #endif
-- 
cgit v1.2.3


From 904e6ddf4133c52fdb9654c2cd2ad90f320d48b9 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Tue, 13 Jun 2023 18:38:21 +0300
Subject: bpf: Use scalar ids in mark_chain_precision()

Change mark_chain_precision() to track precision in situations
like below:

    r2 = unknown value
    ...
  --- state #0 ---
    ...
    r1 = r2                 // r1 and r2 now share the same ID
    ...
  --- state #1 {r1.id = A, r2.id = A} ---
    ...
    if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
    ...
  --- state #2 {r1.id = A, r2.id = A} ---
    r3 = r10
    r3 += r1                // need to mark both r1 and r2

At the beginning of the processing of each state, ensure that if a
register with a scalar ID is marked as precise, all registers sharing
this ID are also marked as precise.

This property would be used by a follow-up change in regsafe().

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230613153824.3324830-2-eddyz87@gmail.com
---
 include/linux/bpf_verifier.h                   |  10 ++-
 kernel/bpf/verifier.c                          | 115 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/verifier/precise.c |   8 +-
 3 files changed, 128 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 5b11a3b0fec0..22fb13c738a9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -557,6 +557,11 @@ struct backtrack_state {
 	u64 stack_masks[MAX_CALL_FRAMES];
 };
 
+struct bpf_idset {
+	u32 count;
+	u32 ids[BPF_ID_MAP_SIZE];
+};
+
 /* single container for all structs
  * one verifier_env per bpf_check() call
  */
@@ -588,7 +593,10 @@ struct bpf_verifier_env {
 	const struct bpf_line_info *prev_linfo;
 	struct bpf_verifier_log log;
 	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
-	struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
+	union {
+		struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
+		struct bpf_idset idset_scratch;
+	};
 	struct {
 		int *insn_state;
 		int *insn_stack;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1e38584d497c..064aef5cd186 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3779,6 +3779,96 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_
 	}
 }
 
+static bool idset_contains(struct bpf_idset *s, u32 id)
+{
+	u32 i;
+
+	for (i = 0; i < s->count; ++i)
+		if (s->ids[i] == id)
+			return true;
+
+	return false;
+}
+
+static int idset_push(struct bpf_idset *s, u32 id)
+{
+	if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids)))
+		return -EFAULT;
+	s->ids[s->count++] = id;
+	return 0;
+}
+
+static void idset_reset(struct bpf_idset *s)
+{
+	s->count = 0;
+}
+
+/* Collect a set of IDs for all registers currently marked as precise in env->bt.
+ * Mark all registers with these IDs as precise.
+ */
+static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+	struct bpf_idset *precise_ids = &env->idset_scratch;
+	struct backtrack_state *bt = &env->bt;
+	struct bpf_func_state *func;
+	struct bpf_reg_state *reg;
+	DECLARE_BITMAP(mask, 64);
+	int i, fr;
+
+	idset_reset(precise_ids);
+
+	for (fr = bt->frame; fr >= 0; fr--) {
+		func = st->frame[fr];
+
+		bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
+		for_each_set_bit(i, mask, 32) {
+			reg = &func->regs[i];
+			if (!reg->id || reg->type != SCALAR_VALUE)
+				continue;
+			if (idset_push(precise_ids, reg->id))
+				return -EFAULT;
+		}
+
+		bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
+		for_each_set_bit(i, mask, 64) {
+			if (i >= func->allocated_stack / BPF_REG_SIZE)
+				break;
+			if (!is_spilled_scalar_reg(&func->stack[i]))
+				continue;
+			reg = &func->stack[i].spilled_ptr;
+			if (!reg->id)
+				continue;
+			if (idset_push(precise_ids, reg->id))
+				return -EFAULT;
+		}
+	}
+
+	for (fr = 0; fr <= st->curframe; ++fr) {
+		func = st->frame[fr];
+
+		for (i = BPF_REG_0; i < BPF_REG_10; ++i) {
+			reg = &func->regs[i];
+			if (!reg->id)
+				continue;
+			if (!idset_contains(precise_ids, reg->id))
+				continue;
+			bt_set_frame_reg(bt, fr, i);
+		}
+		for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) {
+			if (!is_spilled_scalar_reg(&func->stack[i]))
+				continue;
+			reg = &func->stack[i].spilled_ptr;
+			if (!reg->id)
+				continue;
+			if (!idset_contains(precise_ids, reg->id))
+				continue;
+			bt_set_frame_slot(bt, fr, i);
+		}
+	}
+
+	return 0;
+}
+
 /*
  * __mark_chain_precision() backtracks BPF program instruction sequence and
  * chain of verifier states making sure that register *regno* (if regno >= 0)
@@ -3910,6 +4000,31 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
 				bt->frame, last_idx, first_idx, subseq_idx);
 		}
 
+		/* If some register with scalar ID is marked as precise,
+		 * make sure that all registers sharing this ID are also precise.
+		 * This is needed to estimate effect of find_equal_scalars().
+		 * Do this at the last instruction of each state,
+		 * bpf_reg_state::id fields are valid for these instructions.
+		 *
+		 * Allows to track precision in situation like below:
+		 *
+		 *     r2 = unknown value
+		 *     ...
+		 *   --- state #0 ---
+		 *     ...
+		 *     r1 = r2                 // r1 and r2 now share the same ID
+		 *     ...
+		 *   --- state #1 {r1.id = A, r2.id = A} ---
+		 *     ...
+		 *     if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
+		 *     ...
+		 *   --- state #2 {r1.id = A, r2.id = A} ---
+		 *     r3 = r10
+		 *     r3 += r1                // need to mark both r1 and r2
+		 */
+		if (mark_precise_scalar_ids(env, st))
+			return -EFAULT;
+
 		if (last_idx < 0) {
 			/* we are at the entry into subprog, which
 			 * is expected for global funcs, but only if
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index b8c0aae8e7ec..99272bb890da 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -46,7 +46,7 @@
 	mark_precise: frame0: regs=r2 stack= before 20\
 	mark_precise: frame0: parent state regs=r2 stack=:\
 	mark_precise: frame0: last_idx 19 first_idx 10\
-	mark_precise: frame0: regs=r2 stack= before 19\
+	mark_precise: frame0: regs=r2,r9 stack= before 19\
 	mark_precise: frame0: regs=r9 stack= before 18\
 	mark_precise: frame0: regs=r8,r9 stack= before 17\
 	mark_precise: frame0: regs=r0,r9 stack= before 15\
@@ -106,10 +106,10 @@
 	mark_precise: frame0: regs=r2 stack= before 22\
 	mark_precise: frame0: parent state regs=r2 stack=:\
 	mark_precise: frame0: last_idx 20 first_idx 20\
-	mark_precise: frame0: regs=r2 stack= before 20\
-	mark_precise: frame0: parent state regs=r2 stack=:\
+	mark_precise: frame0: regs=r2,r9 stack= before 20\
+	mark_precise: frame0: parent state regs=r2,r9 stack=:\
 	mark_precise: frame0: last_idx 19 first_idx 17\
-	mark_precise: frame0: regs=r2 stack= before 19\
+	mark_precise: frame0: regs=r2,r9 stack= before 19\
 	mark_precise: frame0: regs=r9 stack= before 18\
 	mark_precise: frame0: regs=r8,r9 stack= before 17\
 	mark_precise: frame0: parent state regs= stack=:",
-- 
cgit v1.2.3


From 1ffc85d9298e0ca0137ba65c93a786143fe167b8 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Tue, 13 Jun 2023 18:38:23 +0300
Subject: bpf: Verify scalar ids mapping in regsafe() using check_ids()

Make sure that the following unsafe example is rejected by verifier:

1: r9 = ... some pointer with range X ...
2: r6 = ... unbound scalar ID=a ...
3: r7 = ... unbound scalar ID=b ...
4: if (r6 > r7) goto +1
5: r6 = r7
6: if (r6 > X) goto ...
--- checkpoint ---
7: r9 += r7
8: *(u64 *)r9 = Y

This example is unsafe because not all execution paths verify r7 range.
Because of the jump at (4) the verifier would arrive at (6) in two states:
I.  r6{.id=b}, r7{.id=b} via path 1-6;
II. r6{.id=a}, r7{.id=b} via path 1-4, 6.

Currently regsafe() does not call check_ids() for scalar registers,
thus from POV of regsafe() states (I) and (II) are identical. If the
path 1-6 is taken by verifier first, and checkpoint is created at (6)
the path [1-4, 6] would be considered safe.

Changes in this commit:
- check_ids() is modified to disallow mapping multiple old_id to the
  same cur_id.
- check_scalar_ids() is added, unlike check_ids() it treats ID zero as
  a unique scalar ID.
- check_scalar_ids() needs to generate temporary unique IDs, field
  'tmp_id_gen' is added to bpf_verifier_env::idmap_scratch to
  facilitate this.
- regsafe() is updated to:
  - use check_scalar_ids() for precise scalar registers.
  - compare scalar registers using memcmp only for explore_alu_limits
    branch. This simplifies control flow for scalar case, and has no
    measurable performance impact.
- check_alu_op() is updated to avoid generating bpf_reg_state::id for
  constant scalar values when processing BPF_MOV. ID is needed to
  propagate range information for identical values, but there is
  nothing to propagate for constants.

Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.")
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20230613153824.3324830-4-eddyz87@gmail.com
---
 include/linux/bpf_verifier.h | 17 ++++++---
 kernel/bpf/verifier.c        | 91 +++++++++++++++++++++++++++++++++-----------
 2 files changed, 79 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 22fb13c738a9..f70f9ac884d2 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -313,11 +313,6 @@ struct bpf_idx_pair {
 	u32 idx;
 };
 
-struct bpf_id_pair {
-	u32 old;
-	u32 cur;
-};
-
 #define MAX_CALL_FRAMES 8
 /* Maximum number of register states that can exist at once */
 #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
@@ -557,6 +552,16 @@ struct backtrack_state {
 	u64 stack_masks[MAX_CALL_FRAMES];
 };
 
+struct bpf_id_pair {
+	u32 old;
+	u32 cur;
+};
+
+struct bpf_idmap {
+	u32 tmp_id_gen;
+	struct bpf_id_pair map[BPF_ID_MAP_SIZE];
+};
+
 struct bpf_idset {
 	u32 count;
 	u32 ids[BPF_ID_MAP_SIZE];
@@ -594,7 +599,7 @@ struct bpf_verifier_env {
 	struct bpf_verifier_log log;
 	struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
 	union {
-		struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
+		struct bpf_idmap idmap_scratch;
 		struct bpf_idset idset_scratch;
 	};
 	struct {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 064aef5cd186..fa43dc8e85b9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12934,12 +12934,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		if (BPF_SRC(insn->code) == BPF_X) {
 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
+			bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
+				       !tnum_is_const(src_reg->var_off);
 
 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
 				/* case: R1 = R2
 				 * copy register state to dest reg
 				 */
-				if (src_reg->type == SCALAR_VALUE && !src_reg->id)
+				if (need_id)
 					/* Assign src and dst registers the same ID
 					 * that will be used by find_equal_scalars()
 					 * to propagate min/max range.
@@ -12958,7 +12960,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 				} else if (src_reg->type == SCALAR_VALUE) {
 					bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
 
-					if (is_src_reg_u32 && !src_reg->id)
+					if (is_src_reg_u32 && need_id)
 						src_reg->id = ++env->id_gen;
 					copy_register_state(dst_reg, src_reg);
 					/* Make sure ID is cleared if src_reg is not in u32 range otherwise
@@ -15114,8 +15116,9 @@ static bool range_within(struct bpf_reg_state *old,
  * So we look through our idmap to see if this old id has been seen before.  If
  * so, we require the new id to match; otherwise, we add the id pair to the map.
  */
-static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
+static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
 {
+	struct bpf_id_pair *map = idmap->map;
 	unsigned int i;
 
 	/* either both IDs should be set or both should be zero */
@@ -15126,20 +15129,34 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
 		return true;
 
 	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
-		if (!idmap[i].old) {
+		if (!map[i].old) {
 			/* Reached an empty slot; haven't seen this id before */
-			idmap[i].old = old_id;
-			idmap[i].cur = cur_id;
+			map[i].old = old_id;
+			map[i].cur = cur_id;
 			return true;
 		}
-		if (idmap[i].old == old_id)
-			return idmap[i].cur == cur_id;
+		if (map[i].old == old_id)
+			return map[i].cur == cur_id;
+		if (map[i].cur == cur_id)
+			return false;
 	}
 	/* We ran out of idmap slots, which should be impossible */
 	WARN_ON_ONCE(1);
 	return false;
 }
 
+/* Similar to check_ids(), but allocate a unique temporary ID
+ * for 'old_id' or 'cur_id' of zero.
+ * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
+ */
+static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
+{
+	old_id = old_id ? old_id : ++idmap->tmp_id_gen;
+	cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
+
+	return check_ids(old_id, cur_id, idmap);
+}
+
 static void clean_func_state(struct bpf_verifier_env *env,
 			     struct bpf_func_state *st)
 {
@@ -15238,7 +15255,7 @@ next:
 
 static bool regs_exact(const struct bpf_reg_state *rold,
 		       const struct bpf_reg_state *rcur,
-		       struct bpf_id_pair *idmap)
+		       struct bpf_idmap *idmap)
 {
 	return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
 	       check_ids(rold->id, rcur->id, idmap) &&
@@ -15247,7 +15264,7 @@ static bool regs_exact(const struct bpf_reg_state *rold,
 
 /* Returns true if (rold safe implies rcur safe) */
 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
-		    struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
+		    struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
 {
 	if (!(rold->live & REG_LIVE_READ))
 		/* explored state didn't use this */
@@ -15284,15 +15301,42 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
 
 	switch (base_type(rold->type)) {
 	case SCALAR_VALUE:
-		if (regs_exact(rold, rcur, idmap))
-			return true;
-		if (env->explore_alu_limits)
-			return false;
+		if (env->explore_alu_limits) {
+			/* explore_alu_limits disables tnum_in() and range_within()
+			 * logic and requires everything to be strict
+			 */
+			return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+			       check_scalar_ids(rold->id, rcur->id, idmap);
+		}
 		if (!rold->precise)
 			return true;
-		/* new val must satisfy old val knowledge */
+		/* Why check_ids() for scalar registers?
+		 *
+		 * Consider the following BPF code:
+		 *   1: r6 = ... unbound scalar, ID=a ...
+		 *   2: r7 = ... unbound scalar, ID=b ...
+		 *   3: if (r6 > r7) goto +1
+		 *   4: r6 = r7
+		 *   5: if (r6 > X) goto ...
+		 *   6: ... memory operation using r7 ...
+		 *
+		 * First verification path is [1-6]:
+		 * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
+		 * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
+		 *   r7 <= X, because r6 and r7 share same id.
+		 * Next verification path is [1-4, 6].
+		 *
+		 * Instruction (6) would be reached in two states:
+		 *   I.  r6{.id=b}, r7{.id=b} via path 1-6;
+		 *   II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
+		 *
+		 * Use check_ids() to distinguish these states.
+		 * ---
+		 * Also verify that new value satisfies old value range knowledge.
+		 */
 		return range_within(rold, rcur) &&
-		       tnum_in(rold->var_off, rcur->var_off);
+		       tnum_in(rold->var_off, rcur->var_off) &&
+		       check_scalar_ids(rold->id, rcur->id, idmap);
 	case PTR_TO_MAP_KEY:
 	case PTR_TO_MAP_VALUE:
 	case PTR_TO_MEM:
@@ -15338,7 +15382,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
 }
 
 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
-		      struct bpf_func_state *cur, struct bpf_id_pair *idmap)
+		      struct bpf_func_state *cur, struct bpf_idmap *idmap)
 {
 	int i, spi;
 
@@ -15441,7 +15485,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
 }
 
 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
-		    struct bpf_id_pair *idmap)
+		    struct bpf_idmap *idmap)
 {
 	int i;
 
@@ -15489,13 +15533,13 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
 
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (!regsafe(env, &old->regs[i], &cur->regs[i],
-			     env->idmap_scratch))
+			     &env->idmap_scratch))
 			return false;
 
-	if (!stacksafe(env, old, cur, env->idmap_scratch))
+	if (!stacksafe(env, old, cur, &env->idmap_scratch))
 		return false;
 
-	if (!refsafe(old, cur, env->idmap_scratch))
+	if (!refsafe(old, cur, &env->idmap_scratch))
 		return false;
 
 	return true;
@@ -15510,7 +15554,8 @@ static bool states_equal(struct bpf_verifier_env *env,
 	if (old->curframe != cur->curframe)
 		return false;
 
-	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
+	env->idmap_scratch.tmp_id_gen = env->id_gen;
+	memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
 
 	/* Verification state from speculative execution simulation
 	 * must never prune a non-speculative execution one.
@@ -15528,7 +15573,7 @@ static bool states_equal(struct bpf_verifier_env *env,
 		return false;
 
 	if (old->active_lock.id &&
-	    !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
+	    !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch))
 		return false;
 
 	if (old->active_rcu_lock != cur->active_rcu_lock)
-- 
cgit v1.2.3


From 18c0ffb404db2093b6afdc8ae15f18ba3975e1ed Mon Sep 17 00:00:00 2001
From: Gregory Greenman <gregory.greenman@intel.com>
Date: Tue, 13 Jun 2023 15:57:19 +0300
Subject: wifi: iwlwifi: mvm: add support for Extra EHT LTF

Add support for Extra EHT LTF defined in 9.4.2.313
EHT Capabilities element.

Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230613155501.de019d7cc174.I806f0f6042b89274192701a60b4f7900822db666@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/api/rs.h     |  2 ++
 drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c |  2 ++
 drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c     | 15 +++++++++++++++
 include/linux/ieee80211.h                          |  1 +
 4 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
index c9a48fc5fac8..a1a272433b09 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rs.h
@@ -21,6 +21,7 @@
  * @IWL_TLC_MNG_CFG_FLAGS_HE_DCM_NSS_2_MSK: enable HE Dual Carrier Modulation
  *					    for BPSK (MCS 0) with 2 spatial
  *					    streams
+ * @IWL_TLC_MNG_CFG_FLAGS_EHT_EXTRA_LTF_MSK: enable support for EHT extra LTF
  */
 enum iwl_tlc_mng_cfg_flags {
 	IWL_TLC_MNG_CFG_FLAGS_STBC_MSK			= BIT(0),
@@ -28,6 +29,7 @@ enum iwl_tlc_mng_cfg_flags {
 	IWL_TLC_MNG_CFG_FLAGS_HE_STBC_160MHZ_MSK	= BIT(2),
 	IWL_TLC_MNG_CFG_FLAGS_HE_DCM_NSS_1_MSK		= BIT(3),
 	IWL_TLC_MNG_CFG_FLAGS_HE_DCM_NSS_2_MSK		= BIT(4),
+	IWL_TLC_MNG_CFG_FLAGS_EHT_EXTRA_LTF_MSK		= BIT(6),
 };
 
 /**
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index 7edb98ef8093..fad71f490313 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -1012,6 +1012,8 @@ iwl_nvm_fixup_sband_iftd(struct iwl_trans *trans,
 		iftype_data->eht_cap.eht_cap_elem.phy_cap_info[6] &=
 			~(IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK |
 			  IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP);
+		iftype_data->eht_cap.eht_cap_elem.phy_cap_info[5] |=
+			IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF;
 	}
 
 	if (fw_has_capa(&fw->ucode_capa, IWL_UCODE_TLV_CAPA_BROADCAST_TWT))
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
index e77b6157f759..7c6eff4fa58a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
@@ -591,6 +591,21 @@ void iwl_mvm_rs_fw_rate_init(struct iwl_mvm *mvm,
 	int cmd_ver;
 	int ret;
 
+	/* Enable external EHT LTF only for GL device and if there's
+	 * mutual support by AP and client
+	 */
+	if (CSR_HW_REV_TYPE(mvm->trans->hw_rev) == IWL_CFG_MAC_TYPE_GL &&
+	    sband->iftype_data->eht_cap.has_eht &&
+	    sband->iftype_data->eht_cap.eht_cap_elem.phy_cap_info[5] &
+	    IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF &&
+	    link_sta->eht_cap.has_eht &&
+	    link_sta->eht_cap.eht_cap_elem.phy_cap_info[5] &
+	    IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF) {
+		IWL_DEBUG_RATE(mvm, "Set support for Extra EHT LTF\n");
+		cfg_cmd.flags |=
+			cpu_to_le16(IWL_TLC_MNG_CFG_FLAGS_EHT_EXTRA_LTF_MSK);
+	}
+
 	rcu_read_lock();
 	mvm_link_sta = rcu_dereference(mvmsta->link[link_id]);
 	if (WARN_ON_ONCE(!mvm_link_sta)) {
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 516cd32d6196..5dfed1a6625c 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2859,6 +2859,7 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
 
 /* Maximum number of supported EHT LTF is split */
 #define IEEE80211_EHT_PHY_CAP5_MAX_NUM_SUPP_EHT_LTF_MASK	0xc0
+#define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF		0x40
 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK	0x07
 
 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK			0x78
-- 
cgit v1.2.3


From 65bae54e08c109ddbbf121bb00058cf3b3fb7b8e Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Fri, 9 Jun 2023 16:30:00 +0800
Subject: regulator: mt6358: Merge VCN33_* regulators

The VCN33_BT and VCN33_WIFI regulators are actually the same regulator,
having the same voltage setting and output pin. There are simply two
enable bits that are ORed together to enable the regulator.

Having two regulators representing the same output pin is misleading
from a design matching standpoint, and also error-prone in driver
implementations. If consumers try to set different voltages on either
regulator, the one set later would override the one set before. There
are ways around this, such as chaining them together and having the
downstream one act as a switch. But given there's only one output pin,
such a workaround doesn't match reality.

Remove the VCN33_WIFI regulator. During the probe phase, have the driver
sync the enable status of VCN33_WIFI to VCN33_BT. Also drop the suffix
so that the regulator name matches the pin name in the datasheet.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Link: https://lore.kernel.org/r/20230609083009.2822259-4-wenst@chromium.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6358-regulator.c       | 65 +++++++++++++++++++++++-------
 include/linux/regulator/mt6358-regulator.h |  6 +--
 2 files changed, 52 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c
index c9e16bd092f6..faf6b0757019 100644
--- a/drivers/regulator/mt6358-regulator.c
+++ b/drivers/regulator/mt6358-regulator.c
@@ -277,7 +277,7 @@ static const unsigned int vcama_voltages[] = {
 	2800000, 2900000, 3000000,
 };
 
-static const unsigned int vcn33_bt_wifi_voltages[] = {
+static const unsigned int vcn33_voltages[] = {
 	3300000, 3400000, 3500000,
 };
 
@@ -321,7 +321,7 @@ static const u32 vcama_idx[] = {
 	0, 7, 9, 10, 11, 12,
 };
 
-static const u32 vcn33_bt_wifi_idx[] = {
+static const u32 vcn33_idx[] = {
 	1, 2, 3,
 };
 
@@ -566,12 +566,8 @@ static struct mt6358_regulator_info mt6358_regulators[] = {
 		   MT6358_LDO_VCAMA1_CON0, 0, MT6358_VCAMA1_ANA_CON0, 0xf00),
 	MT6358_LDO("ldo_vemc", VEMC, vmch_vemc_voltages, vmch_vemc_idx,
 		   MT6358_LDO_VEMC_CON0, 0, MT6358_VEMC_ANA_CON0, 0x700),
-	MT6358_LDO("ldo_vcn33_bt", VCN33_BT, vcn33_bt_wifi_voltages,
-		   vcn33_bt_wifi_idx, MT6358_LDO_VCN33_CON0_0,
-		   0, MT6358_VCN33_ANA_CON0, 0x300),
-	MT6358_LDO("ldo_vcn33_wifi", VCN33_WIFI, vcn33_bt_wifi_voltages,
-		   vcn33_bt_wifi_idx, MT6358_LDO_VCN33_CON0_1,
-		   0, MT6358_VCN33_ANA_CON0, 0x300),
+	MT6358_LDO("ldo_vcn33", VCN33, vcn33_voltages, vcn33_idx,
+		   MT6358_LDO_VCN33_CON0_0, 0, MT6358_VCN33_ANA_CON0, 0x300),
 	MT6358_LDO("ldo_vcama2", VCAMA2, vcama_voltages, vcama_idx,
 		   MT6358_LDO_VCAMA2_CON0, 0, MT6358_VCAMA2_ANA_CON0, 0xf00),
 	MT6358_LDO("ldo_vmc", VMC, vmc_voltages, vmc_idx,
@@ -662,12 +658,8 @@ static struct mt6358_regulator_info mt6366_regulators[] = {
 		   MT6358_LDO_VMCH_CON0, 0, MT6358_VMCH_ANA_CON0, 0x700),
 	MT6366_LDO("ldo_vemc", VEMC, vmch_vemc_voltages, vmch_vemc_idx,
 		   MT6358_LDO_VEMC_CON0, 0, MT6358_VEMC_ANA_CON0, 0x700),
-	MT6366_LDO("ldo_vcn33_bt", VCN33_BT, vcn33_bt_wifi_voltages,
-		   vcn33_bt_wifi_idx, MT6358_LDO_VCN33_CON0_0,
-		   0, MT6358_VCN33_ANA_CON0, 0x300),
-	MT6366_LDO("ldo_vcn33_wifi", VCN33_WIFI, vcn33_bt_wifi_voltages,
-		   vcn33_bt_wifi_idx, MT6358_LDO_VCN33_CON0_1,
-		   0, MT6358_VCN33_ANA_CON0, 0x300),
+	MT6366_LDO("ldo_vcn33", VCN33, vcn33_voltages, vcn33_idx,
+		   MT6358_LDO_VCN33_CON0_0, 0, MT6358_VCN33_ANA_CON0, 0x300),
 	MT6366_LDO("ldo_vmc", VMC, vmc_voltages, vmc_idx,
 		   MT6358_LDO_VMC_CON0, 0, MT6358_VMC_ANA_CON0, 0xf00),
 	MT6366_LDO("ldo_vsim2", VSIM2, vsim_voltages, vsim_idx,
@@ -690,13 +682,56 @@ static struct mt6358_regulator_info mt6366_regulators[] = {
 		    MT6358_LDO_VSRAM_CON1, 0x7f),
 };
 
+static int mt6358_sync_vcn33_setting(struct device *dev)
+{
+	struct mt6397_chip *mt6397 = dev_get_drvdata(dev->parent);
+	unsigned int val;
+	int ret;
+
+	/*
+	 * VCN33_WIFI and VCN33_BT are two separate enable bits for the same
+	 * regulator. They share the same voltage setting and output pin.
+	 * Instead of having two potentially conflicting regulators, just have
+	 * one VCN33 regulator. Sync the two enable bits and only use one in
+	 * the regulator device.
+	 */
+	ret = regmap_read(mt6397->regmap, MT6358_LDO_VCN33_CON0_1, &val);
+	if (ret) {
+		dev_err(dev, "Failed to read VCN33_WIFI setting\n");
+		return ret;
+	}
+
+	if (!(val & BIT(0)))
+		return 0;
+
+	/* Sync VCN33_WIFI enable status to VCN33_BT */
+	ret = regmap_update_bits(mt6397->regmap, MT6358_LDO_VCN33_CON0_0, BIT(0), BIT(0));
+	if (ret) {
+		dev_err(dev, "Failed to sync VCN33_WIFI setting to VCN33_BT\n");
+		return ret;
+	}
+
+	/* Disable VCN33_WIFI */
+	ret = regmap_update_bits(mt6397->regmap, MT6358_LDO_VCN33_CON0_1, BIT(0), 0);
+	if (ret) {
+		dev_err(dev, "Failed to disable VCN33_BT\n");
+		return ret;
+	}
+
+	return 0;
+}
+
 static int mt6358_regulator_probe(struct platform_device *pdev)
 {
 	struct mt6397_chip *mt6397 = dev_get_drvdata(pdev->dev.parent);
 	struct regulator_config config = {};
 	struct regulator_dev *rdev;
 	struct mt6358_regulator_info *mt6358_info;
-	int i, max_regulator;
+	int i, max_regulator, ret;
+
+	ret = mt6358_sync_vcn33_setting(&pdev->dev);
+	if (ret)
+		return ret;
 
 	if (mt6397->chip_id == MT6366_CHIP_ID) {
 		max_regulator = MT6366_MAX_REGULATOR;
diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h
index bdcf83cd719e..a4307cd9edd6 100644
--- a/include/linux/regulator/mt6358-regulator.h
+++ b/include/linux/regulator/mt6358-regulator.h
@@ -41,8 +41,7 @@ enum {
 	MT6358_ID_VIO28,
 	MT6358_ID_VA12,
 	MT6358_ID_VRF18,
-	MT6358_ID_VCN33_BT,
-	MT6358_ID_VCN33_WIFI,
+	MT6358_ID_VCN33,
 	MT6358_ID_VCAMA2,
 	MT6358_ID_VMC,
 	MT6358_ID_VLDO28,
@@ -85,8 +84,7 @@ enum {
 	MT6366_ID_VIO28,
 	MT6366_ID_VA12,
 	MT6366_ID_VRF18,
-	MT6366_ID_VCN33_BT,
-	MT6366_ID_VCN33_WIFI,
+	MT6366_ID_VCN33,
 	MT6366_ID_VMC,
 	MT6366_ID_VAUD28,
 	MT6366_ID_VSIM2,
-- 
cgit v1.2.3


From 04ba665248ed91576d326041108e5fc2ec2254eb Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Fri, 9 Jun 2023 16:30:01 +0800
Subject: regulator: mt6358: Drop *_SSHUB regulators

The *_SSHUB regulators are actually alternate configuration interfaces
for their non *_SSHUB counterparts. They are not separate regulator
outputs. These registers are intended for the companion processor to
use to configure the power rails while the main processor is sleeping.
They are not intended for the main operating system to use.

Since they are not real outputs they shouldn't be modeled separately.
Remove them. Luckily no device tree actually uses them.

Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20230609083009.2822259-5-wenst@chromium.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6358-regulator.c       | 14 --------------
 include/linux/regulator/mt6358-regulator.h |  4 ----
 2 files changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c
index faf6b0757019..946a251a8b3a 100644
--- a/drivers/regulator/mt6358-regulator.c
+++ b/drivers/regulator/mt6358-regulator.c
@@ -505,9 +505,6 @@ static struct mt6358_regulator_info mt6358_regulators[] = {
 	MT6358_BUCK("buck_vcore", VCORE, 500000, 1293750, 6250,
 		    buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f,
 		    MT6358_VCORE_VGPU_ANA_CON0, 1),
-	MT6358_BUCK("buck_vcore_sshub", VCORE_SSHUB, 500000, 1293750, 6250,
-		    buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_SSHUB_ELR0, 0x7f,
-		    MT6358_VCORE_VGPU_ANA_CON0, 1),
 	MT6358_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
 		    buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f,
 		    MT6358_VPA_ANA_CON0, 3),
@@ -583,10 +580,6 @@ static struct mt6358_regulator_info mt6358_regulators[] = {
 	MT6358_LDO1("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750, 6250,
 		    buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00,
 		    MT6358_LDO_VSRAM_CON2, 0x7f),
-	MT6358_LDO1("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB, 500000,
-		    1293750, 6250, buck_volt_range1,
-		    MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f,
-		    MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f),
 	MT6358_LDO1("ldo_vsram_gpu", VSRAM_GPU, 500000, 1293750, 6250,
 		    buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00,
 		    MT6358_LDO_VSRAM_CON3, 0x7f),
@@ -603,9 +596,6 @@ static struct mt6358_regulator_info mt6366_regulators[] = {
 	MT6366_BUCK("buck_vcore", VCORE, 500000, 1293750, 6250,
 		    buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f,
 		    MT6358_VCORE_VGPU_ANA_CON0, 1),
-	MT6366_BUCK("buck_vcore_sshub", VCORE_SSHUB, 500000, 1293750, 6250,
-		    buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_SSHUB_ELR0, 0x7f,
-		    MT6358_VCORE_VGPU_ANA_CON0, 1),
 	MT6366_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
 		    buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f,
 		    MT6358_VPA_ANA_CON0, 3),
@@ -670,10 +660,6 @@ static struct mt6358_regulator_info mt6366_regulators[] = {
 	MT6366_LDO1("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750, 6250,
 		    buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00,
 		    MT6358_LDO_VSRAM_CON2, 0x7f),
-	MT6366_LDO1("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB, 500000,
-		    1293750, 6250, buck_volt_range1,
-		    MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f,
-		    MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f),
 	MT6366_LDO1("ldo_vsram_gpu", VSRAM_GPU, 500000, 1293750, 6250,
 		    buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00,
 		    MT6358_LDO_VSRAM_CON3, 0x7f),
diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h
index a4307cd9edd6..c71a6a9fce7a 100644
--- a/include/linux/regulator/mt6358-regulator.h
+++ b/include/linux/regulator/mt6358-regulator.h
@@ -47,8 +47,6 @@ enum {
 	MT6358_ID_VLDO28,
 	MT6358_ID_VAUD28,
 	MT6358_ID_VSIM2,
-	MT6358_ID_VCORE_SSHUB,
-	MT6358_ID_VSRAM_OTHERS_SSHUB,
 	MT6358_ID_RG_MAX,
 };
 
@@ -88,8 +86,6 @@ enum {
 	MT6366_ID_VMC,
 	MT6366_ID_VAUD28,
 	MT6366_ID_VSIM2,
-	MT6366_ID_VCORE_SSHUB,
-	MT6366_ID_VSRAM_OTHERS_SSHUB,
 	MT6366_ID_RG_MAX,
 };
 
-- 
cgit v1.2.3


From 74836ecbc5c7565d24a770917644e96af3e98d25 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 12 Jun 2023 12:00:47 -0700
Subject: fsverity: rework fsverity_get_digest() again

Address several issues with the calling convention and documentation of
fsverity_get_digest():

- Make it provide the hash algorithm as either a FS_VERITY_HASH_ALG_*
  value or HASH_ALGO_* value, at the caller's choice, rather than only a
  HASH_ALGO_* value as it did before.  This allows callers to work with
  the fsverity native algorithm numbers if they want to.  HASH_ALGO_* is
  what IMA uses, but other users (e.g. overlayfs) should use
  FS_VERITY_HASH_ALG_* to match fsverity-utils and the fsverity UAPI.

- Make it return the digest size so that it doesn't need to be looked up
  separately.  Use the return value for this, since 0 works nicely for
  the "file doesn't have fsverity enabled" case.  This also makes it
  clear that no other errors are possible.

- Rename the 'digest' parameter to 'raw_digest' and clearly document
  that it is only useful in combination with the algorithm ID.  This
  hopefully clears up a point of confusion.

- Export it to modules, since overlayfs will need it for checking the
  fsverity digests of lowerdata files
  (https://lore.kernel.org/r/dd294a44e8f401e6b5140029d8355f88748cd8fd.1686565330.git.alexl@redhat.com).

Acked-by: Mimi Zohar <zohar@linux.ibm.com> # for the IMA piece
Link: https://lore.kernel.org/r/20230612190047.59755-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/measure.c              | 37 ++++++++++++++++++++++++++-----------
 include/linux/fsverity.h         | 14 +++++++++-----
 security/integrity/ima/ima_api.c | 31 ++++++++++++-------------------
 3 files changed, 47 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/verity/measure.c b/fs/verity/measure.c
index 5c79ea1b2468..eec5956141da 100644
--- a/fs/verity/measure.c
+++ b/fs/verity/measure.c
@@ -61,27 +61,42 @@ EXPORT_SYMBOL_GPL(fsverity_ioctl_measure);
 /**
  * fsverity_get_digest() - get a verity file's digest
  * @inode: inode to get digest of
- * @digest: (out) pointer to the digest
- * @alg: (out) pointer to the hash algorithm enumeration
+ * @raw_digest: (out) the raw file digest
+ * @alg: (out) the digest's algorithm, as a FS_VERITY_HASH_ALG_* value
+ * @halg: (out) the digest's algorithm, as a HASH_ALGO_* value
  *
- * Return the file hash algorithm and digest of an fsverity protected file.
- * Assumption: before calling this, the file must have been opened.
+ * Retrieves the fsverity digest of the given file.  The file must have been
+ * opened at least once since the inode was last loaded into the inode cache;
+ * otherwise this function will not recognize when fsverity is enabled.
  *
- * Return: 0 on success, -errno on failure
+ * The file's fsverity digest consists of @raw_digest in combination with either
+ * @alg or @halg.  (The caller can choose which one of @alg or @halg to use.)
+ *
+ * IMPORTANT: Callers *must* make use of one of the two algorithm IDs, since
+ * @raw_digest is meaningless without knowing which algorithm it uses!  fsverity
+ * provides no security guarantee for users who ignore the algorithm ID, even if
+ * they use the digest size (since algorithms can share the same digest size).
+ *
+ * Return: The size of the raw digest in bytes, or 0 if the file doesn't have
+ *	   fsverity enabled.
  */
 int fsverity_get_digest(struct inode *inode,
-			u8 digest[FS_VERITY_MAX_DIGEST_SIZE],
-			enum hash_algo *alg)
+			u8 raw_digest[FS_VERITY_MAX_DIGEST_SIZE],
+			u8 *alg, enum hash_algo *halg)
 {
 	const struct fsverity_info *vi;
 	const struct fsverity_hash_alg *hash_alg;
 
 	vi = fsverity_get_info(inode);
 	if (!vi)
-		return -ENODATA; /* not a verity file */
+		return 0; /* not a verity file */
 
 	hash_alg = vi->tree_params.hash_alg;
-	memcpy(digest, vi->file_digest, hash_alg->digest_size);
-	*alg = hash_alg->algo_id;
-	return 0;
+	memcpy(raw_digest, vi->file_digest, hash_alg->digest_size);
+	if (alg)
+		*alg = hash_alg - fsverity_hash_algs;
+	if (halg)
+		*halg = hash_alg->algo_id;
+	return hash_alg->digest_size;
 }
+EXPORT_SYMBOL_GPL(fsverity_get_digest);
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index e76605d5b36e..1eb7eae580be 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -143,8 +143,8 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
 
 int fsverity_ioctl_measure(struct file *filp, void __user *arg);
 int fsverity_get_digest(struct inode *inode,
-			u8 digest[FS_VERITY_MAX_DIGEST_SIZE],
-			enum hash_algo *alg);
+			u8 raw_digest[FS_VERITY_MAX_DIGEST_SIZE],
+			u8 *alg, enum hash_algo *halg);
 
 /* open.c */
 
@@ -197,10 +197,14 @@ static inline int fsverity_ioctl_measure(struct file *filp, void __user *arg)
 }
 
 static inline int fsverity_get_digest(struct inode *inode,
-				      u8 digest[FS_VERITY_MAX_DIGEST_SIZE],
-				      enum hash_algo *alg)
+				      u8 raw_digest[FS_VERITY_MAX_DIGEST_SIZE],
+				      u8 *alg, enum hash_algo *halg)
 {
-	return -EOPNOTSUPP;
+	/*
+	 * fsverity is not enabled in the kernel configuration, so always report
+	 * that the file doesn't have fsverity enabled (digest size 0).
+	 */
+	return 0;
 }
 
 /* open.c */
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index d3662f4acadc..ce541b0ee1d3 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -202,19 +202,19 @@ int ima_get_action(struct mnt_idmap *idmap, struct inode *inode,
 				allowed_algos);
 }
 
-static int ima_get_verity_digest(struct integrity_iint_cache *iint,
-				 struct ima_max_digest_data *hash)
+static bool ima_get_verity_digest(struct integrity_iint_cache *iint,
+				  struct ima_max_digest_data *hash)
 {
-	enum hash_algo verity_alg;
-	int ret;
+	enum hash_algo alg;
+	int digest_len;
 
 	/*
 	 * On failure, 'measure' policy rules will result in a file data
 	 * hash containing 0's.
 	 */
-	ret = fsverity_get_digest(iint->inode, hash->digest, &verity_alg);
-	if (ret)
-		return ret;
+	digest_len = fsverity_get_digest(iint->inode, hash->digest, NULL, &alg);
+	if (digest_len == 0)
+		return false;
 
 	/*
 	 * Unlike in the case of actually calculating the file hash, in
@@ -223,9 +223,9 @@ static int ima_get_verity_digest(struct integrity_iint_cache *iint,
 	 * mismatch between the verity algorithm and the xattr signature
 	 * algorithm, if one exists, will be detected later.
 	 */
-	hash->hdr.algo = verity_alg;
-	hash->hdr.length = hash_digest_size[verity_alg];
-	return 0;
+	hash->hdr.algo = alg;
+	hash->hdr.length = digest_len;
+	return true;
 }
 
 /*
@@ -276,16 +276,9 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
 	memset(&hash.digest, 0, sizeof(hash.digest));
 
 	if (iint->flags & IMA_VERITY_REQUIRED) {
-		result = ima_get_verity_digest(iint, &hash);
-		switch (result) {
-		case 0:
-			break;
-		case -ENODATA:
+		if (!ima_get_verity_digest(iint, &hash)) {
 			audit_cause = "no-verity-digest";
-			break;
-		default:
-			audit_cause = "invalid-verity-digest";
-			break;
+			result = -ENODATA;
 		}
 	} else if (buf) {
 		result = ima_calc_buffer_hash(buf, size, &hash.hdr);
-- 
cgit v1.2.3


From cbe7cff4a76bc749dd70264ca5cf924e2adf9296 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Sat, 10 Jun 2023 10:20:01 +0800
Subject: blktrace: use inline function for blk_trace_remove() while blktrace
 is disabled

If config is disabled, call blk_trace_remove() directly will trigger
build warning, hence use inline function instead, prepare to fix
blktrace debugfs entries leakage.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230610022003.2557284-2-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blktrace_api.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index cfbda114348c..122c62e561fc 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -85,10 +85,14 @@ extern int blk_trace_remove(struct request_queue *q);
 # define blk_add_driver_data(rq, data, len)		do {} while (0)
 # define blk_trace_setup(q, name, dev, bdev, arg)	(-ENOTTY)
 # define blk_trace_startstop(q, start)			(-ENOTTY)
-# define blk_trace_remove(q)				(-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
 # define blk_add_cgroup_trace_msg(q, cg, fmt, ...)	do { } while (0)
 # define blk_trace_note_message_enabled(q)		(false)
+
+static inline int blk_trace_remove(struct request_queue *q)
+{
+	return -ENOTTY;
+}
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #ifdef CONFIG_COMPAT
-- 
cgit v1.2.3


From 70f7457ad6d655e65f1b93cbba2a519e4b11c946 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 12 Jun 2023 14:49:43 -0700
Subject: net: create device lookup API with reference tracking

New users of dev_get_by_index() and dev_get_by_name() keep
getting added and it would be nice to steer them towards
the APIs with reference tracking.

Add variants of those calls which allocate the reference
tracker and use them in a couple of places.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +++
 net/core/dev.c            | 63 +++++++++++++++++++++++++++++++++--------------
 net/ethtool/netlink.c     | 10 ++++----
 net/ipv6/route.c          | 12 ++++-----
 4 files changed, 60 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2d6cb2bf2f05..acf706d49c2b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3124,6 +3124,10 @@ struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev,
 					    struct sock *sk);
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
+struct net_device *netdev_get_by_index(struct net *net, int ifindex,
+				       netdevice_tracker *tracker, gfp_t gfp);
+struct net_device *netdev_get_by_name(struct net *net, const char *name,
+				      netdevice_tracker *tracker, gfp_t gfp);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
 struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 int dev_restart(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index c2456b3667fe..63abb0463c24 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -758,29 +758,43 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
 }
 EXPORT_SYMBOL(dev_get_by_name_rcu);
 
+/* Deprecated for new users, call netdev_get_by_name() instead */
+struct net_device *dev_get_by_name(struct net *net, const char *name)
+{
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = dev_get_by_name_rcu(net, name);
+	dev_hold(dev);
+	rcu_read_unlock();
+	return dev;
+}
+EXPORT_SYMBOL(dev_get_by_name);
+
 /**
- *	dev_get_by_name		- find a device by its name
+ *	netdev_get_by_name() - find a device by its name
  *	@net: the applicable net namespace
  *	@name: name to find
+ *	@tracker: tracking object for the acquired reference
+ *	@gfp: allocation flags for the tracker
  *
  *	Find an interface by name. This can be called from any
  *	context and does its own locking. The returned handle has
- *	the usage count incremented and the caller must use dev_put() to
+ *	the usage count incremented and the caller must use netdev_put() to
  *	release it when it is no longer needed. %NULL is returned if no
  *	matching device is found.
  */
-
-struct net_device *dev_get_by_name(struct net *net, const char *name)
+struct net_device *netdev_get_by_name(struct net *net, const char *name,
+				      netdevice_tracker *tracker, gfp_t gfp)
 {
 	struct net_device *dev;
 
-	rcu_read_lock();
-	dev = dev_get_by_name_rcu(net, name);
-	dev_hold(dev);
-	rcu_read_unlock();
+	dev = dev_get_by_name(net, name);
+	if (dev)
+		netdev_tracker_alloc(dev, tracker, gfp);
 	return dev;
 }
-EXPORT_SYMBOL(dev_get_by_name);
+EXPORT_SYMBOL(netdev_get_by_name);
 
 /**
  *	__dev_get_by_index - find a device by its ifindex
@@ -831,29 +845,42 @@ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
 }
 EXPORT_SYMBOL(dev_get_by_index_rcu);
 
+/* Deprecated for new users, call netdev_get_by_index() instead */
+struct net_device *dev_get_by_index(struct net *net, int ifindex)
+{
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifindex);
+	dev_hold(dev);
+	rcu_read_unlock();
+	return dev;
+}
+EXPORT_SYMBOL(dev_get_by_index);
 
 /**
- *	dev_get_by_index - find a device by its ifindex
+ *	netdev_get_by_index() - find a device by its ifindex
  *	@net: the applicable net namespace
  *	@ifindex: index of device
+ *	@tracker: tracking object for the acquired reference
+ *	@gfp: allocation flags for the tracker
  *
  *	Search for an interface by index. Returns NULL if the device
  *	is not found or a pointer to the device. The device returned has
  *	had a reference added and the pointer is safe until the user calls
- *	dev_put to indicate they have finished with it.
+ *	netdev_put() to indicate they have finished with it.
  */
-
-struct net_device *dev_get_by_index(struct net *net, int ifindex)
+struct net_device *netdev_get_by_index(struct net *net, int ifindex,
+				       netdevice_tracker *tracker, gfp_t gfp)
 {
 	struct net_device *dev;
 
-	rcu_read_lock();
-	dev = dev_get_by_index_rcu(net, ifindex);
-	dev_hold(dev);
-	rcu_read_unlock();
+	dev = dev_get_by_index(net, ifindex);
+	if (dev)
+		netdev_tracker_alloc(dev, tracker, gfp);
 	return dev;
 }
-EXPORT_SYMBOL(dev_get_by_index);
+EXPORT_SYMBOL(netdev_get_by_index);
 
 /**
  *	dev_get_by_napi_id - find a device by napi_id
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 5dd5e8222c45..39a459b0111b 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -115,7 +115,8 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
 	if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) {
 		u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]);
 
-		dev = dev_get_by_index(net, ifindex);
+		dev = netdev_get_by_index(net, ifindex, &req_info->dev_tracker,
+					  GFP_KERNEL);
 		if (!dev) {
 			NL_SET_ERR_MSG_ATTR(extack,
 					    tb[ETHTOOL_A_HEADER_DEV_INDEX],
@@ -125,13 +126,14 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
 		/* if both ifindex and ifname are passed, they must match */
 		if (devname_attr &&
 		    strncmp(dev->name, nla_data(devname_attr), IFNAMSIZ)) {
-			dev_put(dev);
+			netdev_put(dev, &req_info->dev_tracker);
 			NL_SET_ERR_MSG_ATTR(extack, header,
 					    "ifindex and name do not match");
 			return -ENODEV;
 		}
 	} else if (devname_attr) {
-		dev = dev_get_by_name(net, nla_data(devname_attr));
+		dev = netdev_get_by_name(net, nla_data(devname_attr),
+					 &req_info->dev_tracker, GFP_KERNEL);
 		if (!dev) {
 			NL_SET_ERR_MSG_ATTR(extack, devname_attr,
 					    "no device matches name");
@@ -144,8 +146,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
 	}
 
 	req_info->dev = dev;
-	if (dev)
-		netdev_tracker_alloc(dev, &req_info->dev_tracker, GFP_KERNEL);
 	req_info->flags = flags;
 	return 0;
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 392aaa373b66..e510a4162ef8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3503,6 +3503,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 		 struct fib6_config *cfg, gfp_t gfp_flags,
 		 struct netlink_ext_ack *extack)
 {
+	netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 	int addr_type;
@@ -3520,7 +3521,8 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 
 	err = -ENODEV;
 	if (cfg->fc_ifindex) {
-		dev = dev_get_by_index(net, cfg->fc_ifindex);
+		dev = netdev_get_by_index(net, cfg->fc_ifindex,
+					  dev_tracker, gfp_flags);
 		if (!dev)
 			goto out;
 		idev = in6_dev_get(dev);
@@ -3554,11 +3556,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 		/* hold loopback dev/idev if we haven't done so. */
 		if (dev != net->loopback_dev) {
 			if (dev) {
-				dev_put(dev);
+				netdev_put(dev, dev_tracker);
 				in6_dev_put(idev);
 			}
 			dev = net->loopback_dev;
-			dev_hold(dev);
+			netdev_hold(dev, dev_tracker, gfp_flags);
 			idev = in6_dev_get(dev);
 			if (!idev) {
 				err = -ENODEV;
@@ -3610,8 +3612,6 @@ pcpu_alloc:
 	}
 
 	fib6_nh->fib_nh_dev = dev;
-	netdev_tracker_alloc(dev, &fib6_nh->fib_nh_dev_tracker, gfp_flags);
-
 	fib6_nh->fib_nh_oif = dev->ifindex;
 	err = 0;
 out:
@@ -3621,7 +3621,7 @@ out:
 	if (err) {
 		lwtstate_put(fib6_nh->fib_nh_lws);
 		fib6_nh->fib_nh_lws = NULL;
-		dev_put(dev);
+		netdev_put(dev, dev_tracker);
 	}
 
 	return err;
-- 
cgit v1.2.3


From 9d3ba0b6c056918355cf36094d6ed63cdd01a2ab Mon Sep 17 00:00:00 2001
From: Hao Zhang <quic_hazha@quicinc.com>
Date: Fri, 2 Jun 2023 16:41:47 +0800
Subject: Coresight: Add coresight dummy driver

Some Coresight devices that kernel don't have permission to access or
configure. For these devices, a dummy driver is needed to register them as
Coresight devices. The module may also be used to define components that
may not have any programming interfaces, so that paths can be created
in the driver. It provides Coresight API for operations on dummy devices,
such as enabling and disabling them. It also provides the Coresight dummy
sink/source paths for debugging.

Signed-off-by: Hao Zhang <quic_hazha@quicinc.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20230602084149.40031-2-quic_hazha@quicinc.com
---
 drivers/hwtracing/coresight/Kconfig           |  11 ++
 drivers/hwtracing/coresight/Makefile          |   1 +
 drivers/hwtracing/coresight/coresight-dummy.c | 163 ++++++++++++++++++++++++++
 include/linux/coresight.h                     |   1 +
 4 files changed, 176 insertions(+)
 create mode 100644 drivers/hwtracing/coresight/coresight-dummy.c

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
index 2b5bbfffbc4f..06f0a7594169 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -236,4 +236,15 @@ config CORESIGHT_TPDA
 
 	  To compile this driver as a module, choose M here: the module will be
 	  called coresight-tpda.
+
+config CORESIGHT_DUMMY
+	tristate "Dummy driver support"
+	help
+	  Enables support for dummy driver. Dummy driver can be used for
+	  CoreSight sources/sinks that are owned and configured by some
+	  other subsystem and use Linux drivers to configure rest of trace
+	  path.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called coresight-dummy.
 endif
diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
index 33bcc3f7b8ae..995d3b2c76df 100644
--- a/drivers/hwtracing/coresight/Makefile
+++ b/drivers/hwtracing/coresight/Makefile
@@ -30,3 +30,4 @@ obj-$(CONFIG_CORESIGHT_TPDA) += coresight-tpda.o
 coresight-cti-y := coresight-cti-core.o	coresight-cti-platform.o \
 		   coresight-cti-sysfs.o
 obj-$(CONFIG_ULTRASOC_SMB) += ultrasoc-smb.o
+obj-$(CONFIG_CORESIGHT_DUMMY) += coresight-dummy.o
diff --git a/drivers/hwtracing/coresight/coresight-dummy.c b/drivers/hwtracing/coresight/coresight-dummy.c
new file mode 100644
index 000000000000..42125b7a0f39
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-dummy.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/coresight.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "coresight-priv.h"
+
+struct dummy_drvdata {
+	struct device			*dev;
+	struct coresight_device		*csdev;
+};
+
+DEFINE_CORESIGHT_DEVLIST(source_devs, "dummy_source");
+DEFINE_CORESIGHT_DEVLIST(sink_devs, "dummy_sink");
+
+static int dummy_source_enable(struct coresight_device *csdev,
+			       struct perf_event *event, u32 mode)
+{
+	dev_dbg(csdev->dev.parent, "Dummy source enabled\n");
+
+	return 0;
+}
+
+static void dummy_source_disable(struct coresight_device *csdev,
+				 struct perf_event *event)
+{
+	dev_dbg(csdev->dev.parent, "Dummy source disabled\n");
+}
+
+static int dummy_sink_enable(struct coresight_device *csdev, u32 mode,
+				void *data)
+{
+	dev_dbg(csdev->dev.parent, "Dummy sink enabled\n");
+
+	return 0;
+}
+
+static int dummy_sink_disable(struct coresight_device *csdev)
+{
+	dev_dbg(csdev->dev.parent, "Dummy sink disabled\n");
+
+	return 0;
+}
+
+static const struct coresight_ops_source dummy_source_ops = {
+	.enable	= dummy_source_enable,
+	.disable = dummy_source_disable,
+};
+
+static const struct coresight_ops dummy_source_cs_ops = {
+	.source_ops = &dummy_source_ops,
+};
+
+static const struct coresight_ops_sink dummy_sink_ops = {
+	.enable	= dummy_sink_enable,
+	.disable = dummy_sink_disable,
+};
+
+static const struct coresight_ops dummy_sink_cs_ops = {
+	.sink_ops = &dummy_sink_ops,
+};
+
+static int dummy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct coresight_platform_data *pdata;
+	struct dummy_drvdata *drvdata;
+	struct coresight_desc desc = { 0 };
+
+	if (of_device_is_compatible(node, "arm,coresight-dummy-source")) {
+
+		desc.name = coresight_alloc_device_name(&source_devs, dev);
+		if (!desc.name)
+			return -ENOMEM;
+
+		desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+		desc.subtype.source_subtype =
+					CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS;
+		desc.ops = &dummy_source_cs_ops;
+	} else if (of_device_is_compatible(node, "arm,coresight-dummy-sink")) {
+		desc.name = coresight_alloc_device_name(&sink_devs, dev);
+		if (!desc.name)
+			return -ENOMEM;
+
+		desc.type = CORESIGHT_DEV_TYPE_SINK;
+		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_DUMMY;
+		desc.ops = &dummy_sink_cs_ops;
+	} else {
+		dev_err(dev, "Device type not set\n");
+		return -EINVAL;
+	}
+
+	pdata = coresight_get_platform_data(dev);
+	if (IS_ERR(pdata))
+		return PTR_ERR(pdata);
+	pdev->dev.platform_data = pdata;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->dev = &pdev->dev;
+	platform_set_drvdata(pdev, drvdata);
+
+	desc.pdata = pdev->dev.platform_data;
+	desc.dev = &pdev->dev;
+	drvdata->csdev = coresight_register(&desc);
+	if (IS_ERR(drvdata->csdev))
+		return PTR_ERR(drvdata->csdev);
+
+	pm_runtime_enable(dev);
+	dev_dbg(dev, "Dummy device initialized\n");
+
+	return 0;
+}
+
+static int dummy_remove(struct platform_device *pdev)
+{
+	struct dummy_drvdata *drvdata = platform_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+
+	pm_runtime_disable(dev);
+	coresight_unregister(drvdata->csdev);
+	return 0;
+}
+
+static const struct of_device_id dummy_match[] = {
+	{.compatible = "arm,coresight-dummy-source"},
+	{.compatible = "arm,coresight-dummy-sink"},
+	{},
+};
+
+static struct platform_driver dummy_driver = {
+	.probe	= dummy_probe,
+	.remove	= dummy_remove,
+	.driver	= {
+		.name   = "coresight-dummy",
+		.of_match_table = dummy_match,
+	},
+};
+
+static int __init dummy_init(void)
+{
+	return platform_driver_register(&dummy_driver);
+}
+module_init(dummy_init);
+
+static void __exit dummy_exit(void)
+{
+	platform_driver_unregister(&dummy_driver);
+}
+module_exit(dummy_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CoreSight dummy driver");
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 949aa24f46bd..bf70987240e4 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -45,6 +45,7 @@ enum coresight_dev_type {
 };
 
 enum coresight_dev_subtype_sink {
+	CORESIGHT_DEV_SUBTYPE_SINK_DUMMY,
 	CORESIGHT_DEV_SUBTYPE_SINK_PORT,
 	CORESIGHT_DEV_SUBTYPE_SINK_BUFFER,
 	CORESIGHT_DEV_SUBTYPE_SINK_SYSMEM,
-- 
cgit v1.2.3


From c452e3bd91b30a8ef7889fa06a50f54158c720d6 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Apr 2023 12:26:51 +0300
Subject: mfd: intel-m10-bmc: Create m10bmc_sys_update_bits()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wrap regmap_update_bits() with m10bmc_sys_update_bits() in order to be
able to add additional checks into it.

Co-developed-by: Russ Weight <russell.h.weight@intel.com>
Signed-off-by: Russ Weight <russell.h.weight@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/20230417092653.16487-3-ilpo.jarvinen@linux.intel.com
---
 drivers/fpga/intel-m10-bmc-sec-update.c | 30 ++++++++++++++----------------
 drivers/mfd/intel-m10-bmc-core.c        |  9 +++++++++
 include/linux/mfd/intel-m10-bmc.h       |  4 ++++
 3 files changed, 27 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/fpga/intel-m10-bmc-sec-update.c b/drivers/fpga/intel-m10-bmc-sec-update.c
index d7e2f9f461bc..fe0127a58eff 100644
--- a/drivers/fpga/intel-m10-bmc-sec-update.c
+++ b/drivers/fpga/intel-m10-bmc-sec-update.c
@@ -376,12 +376,11 @@ static enum fw_upload_err rsu_update_init(struct m10bmc_sec *sec)
 	u32 doorbell_reg, progress, status;
 	int ret, err;
 
-	ret = regmap_update_bits(sec->m10bmc->regmap,
-				 csr_map->base + csr_map->doorbell,
-				 DRBL_RSU_REQUEST | DRBL_HOST_STATUS,
-				 DRBL_RSU_REQUEST |
-				 FIELD_PREP(DRBL_HOST_STATUS,
-					    HOST_STATUS_IDLE));
+	ret = m10bmc_sys_update_bits(sec->m10bmc, csr_map->doorbell,
+				     DRBL_RSU_REQUEST | DRBL_HOST_STATUS,
+				     DRBL_RSU_REQUEST |
+				     FIELD_PREP(DRBL_HOST_STATUS,
+						HOST_STATUS_IDLE));
 	if (ret)
 		return FW_UPLOAD_ERR_RW_ERROR;
 
@@ -450,11 +449,10 @@ static enum fw_upload_err rsu_send_data(struct m10bmc_sec *sec)
 	u32 doorbell_reg, status;
 	int ret;
 
-	ret = regmap_update_bits(sec->m10bmc->regmap,
-				 csr_map->base + csr_map->doorbell,
-				 DRBL_HOST_STATUS,
-				 FIELD_PREP(DRBL_HOST_STATUS,
-					    HOST_STATUS_WRITE_DONE));
+	ret = m10bmc_sys_update_bits(sec->m10bmc, csr_map->doorbell,
+				     DRBL_HOST_STATUS,
+				     FIELD_PREP(DRBL_HOST_STATUS,
+						HOST_STATUS_WRITE_DONE));
 	if (ret)
 		return FW_UPLOAD_ERR_RW_ERROR;
 
@@ -517,11 +515,10 @@ static enum fw_upload_err rsu_cancel(struct m10bmc_sec *sec)
 	if (rsu_prog(doorbell) != RSU_PROG_READY)
 		return FW_UPLOAD_ERR_BUSY;
 
-	ret = regmap_update_bits(sec->m10bmc->regmap,
-				 csr_map->base + csr_map->doorbell,
-				 DRBL_HOST_STATUS,
-				 FIELD_PREP(DRBL_HOST_STATUS,
-					    HOST_STATUS_ABORT_RSU));
+	ret = m10bmc_sys_update_bits(sec->m10bmc, csr_map->doorbell,
+				     DRBL_HOST_STATUS,
+				     FIELD_PREP(DRBL_HOST_STATUS,
+						HOST_STATUS_ABORT_RSU));
 	if (ret)
 		return FW_UPLOAD_ERR_RW_ERROR;
 
@@ -764,3 +761,4 @@ module_platform_driver(intel_m10bmc_sec_driver);
 MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION("Intel MAX10 BMC Secure Update");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(INTEL_M10_BMC_CORE);
diff --git a/drivers/mfd/intel-m10-bmc-core.c b/drivers/mfd/intel-m10-bmc-core.c
index c0b8209fd842..dab1bb152fd6 100644
--- a/drivers/mfd/intel-m10-bmc-core.c
+++ b/drivers/mfd/intel-m10-bmc-core.c
@@ -12,6 +12,15 @@
 #include <linux/mfd/intel-m10-bmc.h>
 #include <linux/module.h>
 
+int m10bmc_sys_update_bits(struct intel_m10bmc *m10bmc, unsigned int offset,
+			   unsigned int msk, unsigned int val)
+{
+	const struct m10bmc_csr_map *csr_map = m10bmc->info->csr_map;
+
+	return regmap_update_bits(m10bmc->regmap, csr_map->base + offset, msk, val);
+}
+EXPORT_SYMBOL_NS_GPL(m10bmc_sys_update_bits, INTEL_M10_BMC_CORE);
+
 static ssize_t bmc_version_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h
index 1812ebfa11a8..5418f7279ed0 100644
--- a/include/linux/mfd/intel-m10-bmc.h
+++ b/include/linux/mfd/intel-m10-bmc.h
@@ -251,6 +251,7 @@ struct intel_m10bmc {
  *
  * m10bmc_raw_read - read m10bmc register per addr
  * m10bmc_sys_read - read m10bmc system register per offset
+ * m10bmc_sys_update_bits - update m10bmc system register per offset
  */
 static inline int
 m10bmc_raw_read(struct intel_m10bmc *m10bmc, unsigned int addr,
@@ -282,6 +283,9 @@ static inline int m10bmc_sys_read(struct intel_m10bmc *m10bmc, unsigned int offs
 	return m10bmc_raw_read(m10bmc, csr_map->base + offset, val);
 }
 
+int m10bmc_sys_update_bits(struct intel_m10bmc *m10bmc, unsigned int offset,
+			   unsigned int msk, unsigned int val);
+
 /*
  * MAX10 BMC Core support
  */
-- 
cgit v1.2.3


From e9c154eed8aa166330eb0a8dc84642a8675c31e6 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Apr 2023 12:26:52 +0300
Subject: mfd: intel-m10-bmc: Move m10bmc_sys_read() away from header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move m10bmc_sys_read() out from the header to prepare it for adding
more code into the function which would make it too large to be a
static inline any more.

While at it, replace the vague wording in function comment with more
precise statements.

Reviewed-by: Russ Weight <russell.h.weight@intel.com>
Acked-by: Guenter Roeck <linux@roeck-us.net> # For hwmon
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/20230417092653.16487-4-ilpo.jarvinen@linux.intel.com
---
 drivers/hwmon/intel-m10-bmc-hwmon.c |  1 +
 drivers/mfd/intel-m10-bmc-core.c    | 14 ++++++++++++++
 include/linux/mfd/intel-m10-bmc.h   | 17 +----------------
 3 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/intel-m10-bmc-hwmon.c b/drivers/hwmon/intel-m10-bmc-hwmon.c
index 6512f4bec79a..6500ca548f9c 100644
--- a/drivers/hwmon/intel-m10-bmc-hwmon.c
+++ b/drivers/hwmon/intel-m10-bmc-hwmon.c
@@ -794,3 +794,4 @@ MODULE_DEVICE_TABLE(platform, intel_m10bmc_hwmon_ids);
 MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION("Intel MAX 10 BMC hardware monitor");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(INTEL_M10_BMC_CORE);
diff --git a/drivers/mfd/intel-m10-bmc-core.c b/drivers/mfd/intel-m10-bmc-core.c
index dab1bb152fd6..51f865174796 100644
--- a/drivers/mfd/intel-m10-bmc-core.c
+++ b/drivers/mfd/intel-m10-bmc-core.c
@@ -12,6 +12,20 @@
 #include <linux/mfd/intel-m10-bmc.h>
 #include <linux/module.h>
 
+/*
+ * This function helps to simplify the accessing of the system registers.
+ *
+ * The base of the system registers is configured through the struct
+ * csr_map.
+ */
+int m10bmc_sys_read(struct intel_m10bmc *m10bmc, unsigned int offset, unsigned int *val)
+{
+	const struct m10bmc_csr_map *csr_map = m10bmc->info->csr_map;
+
+	return m10bmc_raw_read(m10bmc, csr_map->base + offset, val);
+}
+EXPORT_SYMBOL_NS_GPL(m10bmc_sys_read, INTEL_M10_BMC_CORE);
+
 int m10bmc_sys_update_bits(struct intel_m10bmc *m10bmc, unsigned int offset,
 			   unsigned int msk, unsigned int val)
 {
diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h
index 5418f7279ed0..252644fa61be 100644
--- a/include/linux/mfd/intel-m10-bmc.h
+++ b/include/linux/mfd/intel-m10-bmc.h
@@ -267,22 +267,7 @@ m10bmc_raw_read(struct intel_m10bmc *m10bmc, unsigned int addr,
 	return ret;
 }
 
-/*
- * The base of the system registers could be configured by HW developers, and
- * in HW SPEC, the base is not added to the addresses of the system registers.
- *
- * This function helps to simplify the accessing of the system registers. And if
- * the base is reconfigured in HW, SW developers could simply change the
- * csr_map's base accordingly.
- */
-static inline int m10bmc_sys_read(struct intel_m10bmc *m10bmc, unsigned int offset,
-				  unsigned int *val)
-{
-	const struct m10bmc_csr_map *csr_map = m10bmc->info->csr_map;
-
-	return m10bmc_raw_read(m10bmc, csr_map->base + offset, val);
-}
-
+int m10bmc_sys_read(struct intel_m10bmc *m10bmc, unsigned int offset, unsigned int *val);
 int m10bmc_sys_update_bits(struct intel_m10bmc *m10bmc, unsigned int offset,
 			   unsigned int msk, unsigned int val);
 
-- 
cgit v1.2.3


From 867cae44f8ae150d0e303cfd62a01d0d7cd7f7a5 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Mon, 17 Apr 2023 12:26:53 +0300
Subject: mfd: intel-m10-bmc: Manage access to MAX 10 fw handshake registers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On some MAX 10 cards, the BMC firmware is not available to service
handshake registers during secure update erase and write phases at
normal speeds. This problem affects at least hwmon driver. When the MAX
10 hwmon driver tries to read the sensor values during a secure update,
the reads are slowed down (e.g., reading all D5005 sensors takes ~24s
which is magnitudes worse than the normal <0.02s).

Manage access to the handshake registers using a rw semaphore and a FW
state variable to prevent accesses during those secure update phases
and return -EBUSY instead.

If handshake_sys_reg_nranges == 0, don't update bwcfw_state as it is not
used. This avoids the locking cost.

Co-developed-by: Russ Weight <russell.h.weight@intel.com>
Signed-off-by: Russ Weight <russell.h.weight@intel.com>
Co-developed-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/20230417092653.16487-5-ilpo.jarvinen@linux.intel.com
---
 drivers/fpga/intel-m10-bmc-sec-update.c | 17 +++++++--
 drivers/mfd/intel-m10-bmc-core.c        | 67 ++++++++++++++++++++++++++++++++-
 drivers/mfd/intel-m10-bmc-spi.c         | 14 +++++++
 include/linux/mfd/intel-m10-bmc.h       | 28 ++++++++++++++
 4 files changed, 121 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/fpga/intel-m10-bmc-sec-update.c b/drivers/fpga/intel-m10-bmc-sec-update.c
index fe0127a58eff..31af2e08c825 100644
--- a/drivers/fpga/intel-m10-bmc-sec-update.c
+++ b/drivers/fpga/intel-m10-bmc-sec-update.c
@@ -544,21 +544,28 @@ static enum fw_upload_err m10bmc_sec_prepare(struct fw_upload *fwl,
 	if (ret != FW_UPLOAD_ERR_NONE)
 		goto unlock_flash;
 
+	m10bmc_fw_state_set(sec->m10bmc, M10BMC_FW_STATE_SEC_UPDATE_PREPARE);
+
 	ret = rsu_update_init(sec);
 	if (ret != FW_UPLOAD_ERR_NONE)
-		goto unlock_flash;
+		goto fw_state_exit;
 
 	ret = rsu_prog_ready(sec);
 	if (ret != FW_UPLOAD_ERR_NONE)
-		goto unlock_flash;
+		goto fw_state_exit;
 
 	if (sec->cancel_request) {
 		ret = rsu_cancel(sec);
-		goto unlock_flash;
+		goto fw_state_exit;
 	}
 
+	m10bmc_fw_state_set(sec->m10bmc, M10BMC_FW_STATE_SEC_UPDATE_WRITE);
+
 	return FW_UPLOAD_ERR_NONE;
 
+fw_state_exit:
+	m10bmc_fw_state_set(sec->m10bmc, M10BMC_FW_STATE_NORMAL);
+
 unlock_flash:
 	if (sec->m10bmc->flash_bulk_ops)
 		sec->m10bmc->flash_bulk_ops->unlock_write(sec->m10bmc);
@@ -607,6 +614,8 @@ static enum fw_upload_err m10bmc_sec_poll_complete(struct fw_upload *fwl)
 	if (sec->cancel_request)
 		return rsu_cancel(sec);
 
+	m10bmc_fw_state_set(sec->m10bmc, M10BMC_FW_STATE_SEC_UPDATE_PROGRAM);
+
 	result = rsu_send_data(sec);
 	if (result != FW_UPLOAD_ERR_NONE)
 		return result;
@@ -650,6 +659,8 @@ static void m10bmc_sec_cleanup(struct fw_upload *fwl)
 
 	(void)rsu_cancel(sec);
 
+	m10bmc_fw_state_set(sec->m10bmc, M10BMC_FW_STATE_NORMAL);
+
 	if (sec->m10bmc->flash_bulk_ops)
 		sec->m10bmc->flash_bulk_ops->unlock_write(sec->m10bmc);
 }
diff --git a/drivers/mfd/intel-m10-bmc-core.c b/drivers/mfd/intel-m10-bmc-core.c
index 51f865174796..8ad5b3821584 100644
--- a/drivers/mfd/intel-m10-bmc-core.c
+++ b/drivers/mfd/intel-m10-bmc-core.c
@@ -12,6 +12,46 @@
 #include <linux/mfd/intel-m10-bmc.h>
 #include <linux/module.h>
 
+void m10bmc_fw_state_set(struct intel_m10bmc *m10bmc, enum m10bmc_fw_state new_state)
+{
+	/* bmcfw_state is only needed if handshake_sys_reg_nranges > 0 */
+	if (!m10bmc->info->handshake_sys_reg_nranges)
+		return;
+
+	down_write(&m10bmc->bmcfw_lock);
+	m10bmc->bmcfw_state = new_state;
+	up_write(&m10bmc->bmcfw_lock);
+}
+EXPORT_SYMBOL_NS_GPL(m10bmc_fw_state_set, INTEL_M10_BMC_CORE);
+
+/*
+ * For some Intel FPGA devices, the BMC firmware is not available to service
+ * handshake registers during a secure update.
+ */
+static bool m10bmc_reg_always_available(struct intel_m10bmc *m10bmc, unsigned int offset)
+{
+	if (!m10bmc->info->handshake_sys_reg_nranges)
+		return true;
+
+	return !regmap_reg_in_ranges(offset, m10bmc->info->handshake_sys_reg_ranges,
+				     m10bmc->info->handshake_sys_reg_nranges);
+}
+
+/*
+ * m10bmc_handshake_reg_unavailable - Checks if reg access collides with secure update state
+ * @m10bmc: M10 BMC structure
+ *
+ * For some Intel FPGA devices, the BMC firmware is not available to service
+ * handshake registers during a secure update erase and write phases.
+ *
+ * Context: @m10bmc->bmcfw_lock must be held.
+ */
+static bool m10bmc_handshake_reg_unavailable(struct intel_m10bmc *m10bmc)
+{
+	return m10bmc->bmcfw_state == M10BMC_FW_STATE_SEC_UPDATE_PREPARE ||
+	       m10bmc->bmcfw_state == M10BMC_FW_STATE_SEC_UPDATE_WRITE;
+}
+
 /*
  * This function helps to simplify the accessing of the system registers.
  *
@@ -21,8 +61,19 @@
 int m10bmc_sys_read(struct intel_m10bmc *m10bmc, unsigned int offset, unsigned int *val)
 {
 	const struct m10bmc_csr_map *csr_map = m10bmc->info->csr_map;
+	int ret;
+
+	if (m10bmc_reg_always_available(m10bmc, offset))
+		return m10bmc_raw_read(m10bmc, csr_map->base + offset, val);
 
-	return m10bmc_raw_read(m10bmc, csr_map->base + offset, val);
+	down_read(&m10bmc->bmcfw_lock);
+	if (m10bmc_handshake_reg_unavailable(m10bmc))
+		ret = -EBUSY;	/* Reg not available during secure update */
+	else
+		ret = m10bmc_raw_read(m10bmc, csr_map->base + offset, val);
+	up_read(&m10bmc->bmcfw_lock);
+
+	return ret;
 }
 EXPORT_SYMBOL_NS_GPL(m10bmc_sys_read, INTEL_M10_BMC_CORE);
 
@@ -30,8 +81,19 @@ int m10bmc_sys_update_bits(struct intel_m10bmc *m10bmc, unsigned int offset,
 			   unsigned int msk, unsigned int val)
 {
 	const struct m10bmc_csr_map *csr_map = m10bmc->info->csr_map;
+	int ret;
 
-	return regmap_update_bits(m10bmc->regmap, csr_map->base + offset, msk, val);
+	if (m10bmc_reg_always_available(m10bmc, offset))
+		return regmap_update_bits(m10bmc->regmap, csr_map->base + offset, msk, val);
+
+	down_read(&m10bmc->bmcfw_lock);
+	if (m10bmc_handshake_reg_unavailable(m10bmc))
+		ret = -EBUSY;	/* Reg not available during secure update */
+	else
+		ret = regmap_update_bits(m10bmc->regmap, csr_map->base + offset, msk, val);
+	up_read(&m10bmc->bmcfw_lock);
+
+	return ret;
 }
 EXPORT_SYMBOL_NS_GPL(m10bmc_sys_update_bits, INTEL_M10_BMC_CORE);
 
@@ -129,6 +191,7 @@ int m10bmc_dev_init(struct intel_m10bmc *m10bmc, const struct intel_m10bmc_platf
 
 	m10bmc->info = info;
 	dev_set_drvdata(m10bmc->dev, m10bmc);
+	init_rwsem(&m10bmc->bmcfw_lock);
 
 	ret = devm_mfd_add_devices(m10bmc->dev, PLATFORM_DEVID_AUTO,
 				   info->cells, info->n_cells,
diff --git a/drivers/mfd/intel-m10-bmc-spi.c b/drivers/mfd/intel-m10-bmc-spi.c
index edd266557ab9..cbeb7de9e041 100644
--- a/drivers/mfd/intel-m10-bmc-spi.c
+++ b/drivers/mfd/intel-m10-bmc-spi.c
@@ -116,12 +116,20 @@ static struct mfd_cell m10bmc_d5005_subdevs[] = {
 	{ .name = "d5005bmc-sec-update" },
 };
 
+static const struct regmap_range m10bmc_d5005_fw_handshake_regs[] = {
+	regmap_reg_range(M10BMC_N3000_TELEM_START, M10BMC_D5005_TELEM_END),
+};
+
 static struct mfd_cell m10bmc_pacn3000_subdevs[] = {
 	{ .name = "n3000bmc-hwmon" },
 	{ .name = "n3000bmc-retimer" },
 	{ .name = "n3000bmc-sec-update" },
 };
 
+static const struct regmap_range m10bmc_n3000_fw_handshake_regs[] = {
+	regmap_reg_range(M10BMC_N3000_TELEM_START, M10BMC_N3000_TELEM_END),
+};
+
 static struct mfd_cell m10bmc_n5010_subdevs[] = {
 	{ .name = "n5010bmc-hwmon" },
 };
@@ -129,18 +137,24 @@ static struct mfd_cell m10bmc_n5010_subdevs[] = {
 static const struct intel_m10bmc_platform_info m10bmc_spi_n3000 = {
 	.cells = m10bmc_pacn3000_subdevs,
 	.n_cells = ARRAY_SIZE(m10bmc_pacn3000_subdevs),
+	.handshake_sys_reg_ranges = m10bmc_n3000_fw_handshake_regs,
+	.handshake_sys_reg_nranges = ARRAY_SIZE(m10bmc_n3000_fw_handshake_regs),
 	.csr_map = &m10bmc_n3000_csr_map,
 };
 
 static const struct intel_m10bmc_platform_info m10bmc_spi_d5005 = {
 	.cells = m10bmc_d5005_subdevs,
 	.n_cells = ARRAY_SIZE(m10bmc_d5005_subdevs),
+	.handshake_sys_reg_ranges = m10bmc_d5005_fw_handshake_regs,
+	.handshake_sys_reg_nranges = ARRAY_SIZE(m10bmc_d5005_fw_handshake_regs),
 	.csr_map = &m10bmc_n3000_csr_map,
 };
 
 static const struct intel_m10bmc_platform_info m10bmc_spi_n5010 = {
 	.cells = m10bmc_n5010_subdevs,
 	.n_cells = ARRAY_SIZE(m10bmc_n5010_subdevs),
+	.handshake_sys_reg_ranges = m10bmc_n3000_fw_handshake_regs,
+	.handshake_sys_reg_nranges = ARRAY_SIZE(m10bmc_n3000_fw_handshake_regs),
 	.csr_map = &m10bmc_n3000_csr_map,
 };
 
diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h
index 252644fa61be..ee66c9751003 100644
--- a/include/linux/mfd/intel-m10-bmc.h
+++ b/include/linux/mfd/intel-m10-bmc.h
@@ -11,6 +11,7 @@
 #include <linux/bits.h>
 #include <linux/dev_printk.h>
 #include <linux/regmap.h>
+#include <linux/rwsem.h>
 
 #define M10BMC_N3000_LEGACY_BUILD_VER	0x300468
 #define M10BMC_N3000_SYS_BASE		0x300800
@@ -39,6 +40,11 @@
 #define M10BMC_N3000_VER_PCB_INFO_MSK	GENMASK(31, 24)
 #define M10BMC_N3000_VER_LEGACY_INVALID	0xffffffff
 
+/* Telemetry registers */
+#define M10BMC_N3000_TELEM_START	0x100
+#define M10BMC_N3000_TELEM_END		0x250
+#define M10BMC_D5005_TELEM_END		0x300
+
 /* Secure update doorbell register, in system register region */
 #define M10BMC_N3000_DOORBELL		0x400
 
@@ -205,11 +211,15 @@ struct m10bmc_csr_map {
  * struct intel_m10bmc_platform_info - Intel MAX 10 BMC platform specific information
  * @cells: MFD cells
  * @n_cells: MFD cells ARRAY_SIZE()
+ * @handshake_sys_reg_ranges: array of register ranges for fw handshake regs
+ * @handshake_sys_reg_nranges: number of register ranges for fw handshake regs
  * @csr_map: the mappings for register definition of MAX10 BMC
  */
 struct intel_m10bmc_platform_info {
 	struct mfd_cell *cells;
 	int n_cells;
+	const struct regmap_range *handshake_sys_reg_ranges;
+	unsigned int handshake_sys_reg_nranges;
 	const struct m10bmc_csr_map *csr_map;
 };
 
@@ -232,18 +242,30 @@ struct intel_m10bmc_flash_bulk_ops {
 	void (*unlock_write)(struct intel_m10bmc *m10bmc);
 };
 
+enum m10bmc_fw_state {
+	M10BMC_FW_STATE_NORMAL,
+	M10BMC_FW_STATE_SEC_UPDATE_PREPARE,
+	M10BMC_FW_STATE_SEC_UPDATE_WRITE,
+	M10BMC_FW_STATE_SEC_UPDATE_PROGRAM,
+};
+
 /**
  * struct intel_m10bmc - Intel MAX 10 BMC parent driver data structure
  * @dev: this device
  * @regmap: the regmap used to access registers by m10bmc itself
  * @info: the platform information for MAX10 BMC
  * @flash_bulk_ops: optional device specific operations for flash R/W
+ * @bmcfw_lock: read/write semaphore to BMC firmware running state
+ * @bmcfw_state: BMC firmware running state. Available only when
+ *		 handshake_sys_reg_nranges > 0.
  */
 struct intel_m10bmc {
 	struct device *dev;
 	struct regmap *regmap;
 	const struct intel_m10bmc_platform_info *info;
 	const struct intel_m10bmc_flash_bulk_ops *flash_bulk_ops;
+	struct rw_semaphore bmcfw_lock;		/* Protects bmcfw_state */
+	enum m10bmc_fw_state bmcfw_state;
 };
 
 /*
@@ -271,6 +293,12 @@ int m10bmc_sys_read(struct intel_m10bmc *m10bmc, unsigned int offset, unsigned i
 int m10bmc_sys_update_bits(struct intel_m10bmc *m10bmc, unsigned int offset,
 			   unsigned int msk, unsigned int val);
 
+/*
+ * Track the state of the firmware, as it is not available for register
+ * handshakes during secure updates on some MAX 10 cards.
+ */
+void m10bmc_fw_state_set(struct intel_m10bmc *m10bmc, enum m10bmc_fw_state new_state);
+
 /*
  * MAX10 BMC Core support
  */
-- 
cgit v1.2.3


From 63eeabbc9dbddd7381409feccd9082e5ffabfe59 Mon Sep 17 00:00:00 2001
From: Aidan MacDonald <aidanmacdonald.0x0@gmail.com>
Date: Thu, 11 May 2023 10:26:08 +0100
Subject: mfd: axp20x: Add support for AXP192

The AXP192 PMIC is similar to the AXP202/AXP209, but with different
regulators, additional GPIOs, and a different IRQ register layout.

Signed-off-by: Aidan MacDonald <aidanmacdonald.0x0@gmail.com>
Link: https://lore.kernel.org/r/20230511092609.76183-1-aidanmacdonald.0x0@gmail.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/axp20x-i2c.c   |   2 +
 drivers/mfd/axp20x.c       | 141 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/axp20x.h |  84 +++++++++++++++++++++++++++
 3 files changed, 227 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/axp20x-i2c.c b/drivers/mfd/axp20x-i2c.c
index da0e538c18bd..68d3560cfe4a 100644
--- a/drivers/mfd/axp20x-i2c.c
+++ b/drivers/mfd/axp20x-i2c.c
@@ -59,6 +59,7 @@ static void axp20x_i2c_remove(struct i2c_client *i2c)
 #ifdef CONFIG_OF
 static const struct of_device_id axp20x_i2c_of_match[] = {
 	{ .compatible = "x-powers,axp152", .data = (void *)AXP152_ID },
+	{ .compatible = "x-powers,axp192", .data = (void *)AXP192_ID },
 	{ .compatible = "x-powers,axp202", .data = (void *)AXP202_ID },
 	{ .compatible = "x-powers,axp209", .data = (void *)AXP209_ID },
 	{ .compatible = "x-powers,axp221", .data = (void *)AXP221_ID },
@@ -74,6 +75,7 @@ MODULE_DEVICE_TABLE(of, axp20x_i2c_of_match);
 
 static const struct i2c_device_id axp20x_i2c_id[] = {
 	{ "axp152", 0 },
+	{ "axp192", 0 },
 	{ "axp202", 0 },
 	{ "axp209", 0 },
 	{ "axp221", 0 },
diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index 07a846ecbf18..c03bc5cda080 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -34,6 +34,7 @@
 
 static const char * const axp20x_model_names[] = {
 	"AXP152",
+	"AXP192",
 	"AXP202",
 	"AXP209",
 	"AXP221",
@@ -94,6 +95,35 @@ static const struct regmap_access_table axp20x_volatile_table = {
 	.n_yes_ranges	= ARRAY_SIZE(axp20x_volatile_ranges),
 };
 
+static const struct regmap_range axp192_writeable_ranges[] = {
+	regmap_reg_range(AXP192_DATACACHE(0), AXP192_DATACACHE(5)),
+	regmap_reg_range(AXP192_PWR_OUT_CTRL, AXP192_IRQ5_STATE),
+	regmap_reg_range(AXP20X_DCDC_MODE, AXP192_N_RSTO_CTRL),
+	regmap_reg_range(AXP20X_CC_CTRL, AXP20X_CC_CTRL),
+};
+
+static const struct regmap_range axp192_volatile_ranges[] = {
+	regmap_reg_range(AXP20X_PWR_INPUT_STATUS, AXP192_USB_OTG_STATUS),
+	regmap_reg_range(AXP192_IRQ1_STATE, AXP192_IRQ4_STATE),
+	regmap_reg_range(AXP192_IRQ5_STATE, AXP192_IRQ5_STATE),
+	regmap_reg_range(AXP20X_ACIN_V_ADC_H, AXP20X_IPSOUT_V_HIGH_L),
+	regmap_reg_range(AXP20X_TIMER_CTRL, AXP20X_TIMER_CTRL),
+	regmap_reg_range(AXP192_GPIO2_0_STATE, AXP192_GPIO2_0_STATE),
+	regmap_reg_range(AXP192_GPIO4_3_STATE, AXP192_GPIO4_3_STATE),
+	regmap_reg_range(AXP192_N_RSTO_CTRL, AXP192_N_RSTO_CTRL),
+	regmap_reg_range(AXP20X_CHRG_CC_31_24, AXP20X_CC_CTRL),
+};
+
+static const struct regmap_access_table axp192_writeable_table = {
+	.yes_ranges	= axp192_writeable_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(axp192_writeable_ranges),
+};
+
+static const struct regmap_access_table axp192_volatile_table = {
+	.yes_ranges	= axp192_volatile_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(axp192_volatile_ranges),
+};
+
 /* AXP22x ranges are shared with the AXP809, as they cover the same range */
 static const struct regmap_range axp22x_writeable_ranges[] = {
 	regmap_reg_range(AXP20X_DATACACHE(0), AXP20X_IRQ5_STATE),
@@ -220,6 +250,19 @@ static const struct resource axp152_pek_resources[] = {
 	DEFINE_RES_IRQ_NAMED(AXP152_IRQ_PEK_FAL_EDGE, "PEK_DBF"),
 };
 
+static const struct resource axp192_ac_power_supply_resources[] = {
+	DEFINE_RES_IRQ_NAMED(AXP192_IRQ_ACIN_PLUGIN, "ACIN_PLUGIN"),
+	DEFINE_RES_IRQ_NAMED(AXP192_IRQ_ACIN_REMOVAL, "ACIN_REMOVAL"),
+	DEFINE_RES_IRQ_NAMED(AXP192_IRQ_ACIN_OVER_V, "ACIN_OVER_V"),
+};
+
+static const struct resource axp192_usb_power_supply_resources[] = {
+	DEFINE_RES_IRQ_NAMED(AXP192_IRQ_VBUS_PLUGIN, "VBUS_PLUGIN"),
+	DEFINE_RES_IRQ_NAMED(AXP192_IRQ_VBUS_REMOVAL, "VBUS_REMOVAL"),
+	DEFINE_RES_IRQ_NAMED(AXP192_IRQ_VBUS_VALID, "VBUS_VALID"),
+	DEFINE_RES_IRQ_NAMED(AXP192_IRQ_VBUS_NOT_VALID, "VBUS_NOT_VALID"),
+};
+
 static const struct resource axp20x_ac_power_supply_resources[] = {
 	DEFINE_RES_IRQ_NAMED(AXP20X_IRQ_ACIN_PLUGIN, "ACIN_PLUGIN"),
 	DEFINE_RES_IRQ_NAMED(AXP20X_IRQ_ACIN_REMOVAL, "ACIN_REMOVAL"),
@@ -302,6 +345,15 @@ static const struct regmap_config axp152_regmap_config = {
 	.cache_type	= REGCACHE_RBTREE,
 };
 
+static const struct regmap_config axp192_regmap_config = {
+	.reg_bits	= 8,
+	.val_bits	= 8,
+	.wr_table	= &axp192_writeable_table,
+	.volatile_table	= &axp192_volatile_table,
+	.max_register	= AXP20X_CC_CTRL,
+	.cache_type	= REGCACHE_RBTREE,
+};
+
 static const struct regmap_config axp20x_regmap_config = {
 	.reg_bits	= 8,
 	.val_bits	= 8,
@@ -379,6 +431,42 @@ static const struct regmap_irq axp152_regmap_irqs[] = {
 	INIT_REGMAP_IRQ(AXP152, GPIO0_INPUT,		2, 0),
 };
 
+static const struct regmap_irq axp192_regmap_irqs[] = {
+	INIT_REGMAP_IRQ(AXP192, ACIN_OVER_V,		0, 7),
+	INIT_REGMAP_IRQ(AXP192, ACIN_PLUGIN,		0, 6),
+	INIT_REGMAP_IRQ(AXP192, ACIN_REMOVAL,		0, 5),
+	INIT_REGMAP_IRQ(AXP192, VBUS_OVER_V,		0, 4),
+	INIT_REGMAP_IRQ(AXP192, VBUS_PLUGIN,		0, 3),
+	INIT_REGMAP_IRQ(AXP192, VBUS_REMOVAL,		0, 2),
+	INIT_REGMAP_IRQ(AXP192, VBUS_V_LOW,		0, 1),
+	INIT_REGMAP_IRQ(AXP192, BATT_PLUGIN,		1, 7),
+	INIT_REGMAP_IRQ(AXP192, BATT_REMOVAL,	        1, 6),
+	INIT_REGMAP_IRQ(AXP192, BATT_ENT_ACT_MODE,	1, 5),
+	INIT_REGMAP_IRQ(AXP192, BATT_EXIT_ACT_MODE,	1, 4),
+	INIT_REGMAP_IRQ(AXP192, CHARG,		        1, 3),
+	INIT_REGMAP_IRQ(AXP192, CHARG_DONE,		1, 2),
+	INIT_REGMAP_IRQ(AXP192, BATT_TEMP_HIGH,	        1, 1),
+	INIT_REGMAP_IRQ(AXP192, BATT_TEMP_LOW,	        1, 0),
+	INIT_REGMAP_IRQ(AXP192, DIE_TEMP_HIGH,	        2, 7),
+	INIT_REGMAP_IRQ(AXP192, CHARG_I_LOW,		2, 6),
+	INIT_REGMAP_IRQ(AXP192, DCDC1_V_LONG,	        2, 5),
+	INIT_REGMAP_IRQ(AXP192, DCDC2_V_LONG,	        2, 4),
+	INIT_REGMAP_IRQ(AXP192, DCDC3_V_LONG,	        2, 3),
+	INIT_REGMAP_IRQ(AXP192, PEK_SHORT,		2, 1),
+	INIT_REGMAP_IRQ(AXP192, PEK_LONG,		2, 0),
+	INIT_REGMAP_IRQ(AXP192, N_OE_PWR_ON,		3, 7),
+	INIT_REGMAP_IRQ(AXP192, N_OE_PWR_OFF,	        3, 6),
+	INIT_REGMAP_IRQ(AXP192, VBUS_VALID,		3, 5),
+	INIT_REGMAP_IRQ(AXP192, VBUS_NOT_VALID,	        3, 4),
+	INIT_REGMAP_IRQ(AXP192, VBUS_SESS_VALID,	3, 3),
+	INIT_REGMAP_IRQ(AXP192, VBUS_SESS_END,	        3, 2),
+	INIT_REGMAP_IRQ(AXP192, LOW_PWR_LVL,	        3, 0),
+	INIT_REGMAP_IRQ(AXP192, TIMER,			4, 7),
+	INIT_REGMAP_IRQ(AXP192, GPIO2_INPUT,		4, 2),
+	INIT_REGMAP_IRQ(AXP192, GPIO1_INPUT,		4, 1),
+	INIT_REGMAP_IRQ(AXP192, GPIO0_INPUT,		4, 0),
+};
+
 static const struct regmap_irq axp20x_regmap_irqs[] = {
 	INIT_REGMAP_IRQ(AXP20X, ACIN_OVER_V,		0, 7),
 	INIT_REGMAP_IRQ(AXP20X, ACIN_PLUGIN,		0, 6),
@@ -615,6 +703,32 @@ static const struct regmap_irq_chip axp152_regmap_irq_chip = {
 	.num_regs		= 3,
 };
 
+static unsigned int axp192_get_irq_reg(struct regmap_irq_chip_data *data,
+				       unsigned int base, int index)
+{
+	/* linear mapping for IRQ1 to IRQ4 */
+	if (index < 4)
+		return base + index;
+
+	/* handle IRQ5 separately */
+	if (base == AXP192_IRQ1_EN)
+		return AXP192_IRQ5_EN;
+
+	return AXP192_IRQ5_STATE;
+}
+
+static const struct regmap_irq_chip axp192_regmap_irq_chip = {
+	.name			= "axp192_irq_chip",
+	.status_base		= AXP192_IRQ1_STATE,
+	.ack_base		= AXP192_IRQ1_STATE,
+	.unmask_base		= AXP192_IRQ1_EN,
+	.init_ack_masked	= true,
+	.irqs			= axp192_regmap_irqs,
+	.num_irqs		= ARRAY_SIZE(axp192_regmap_irqs),
+	.num_regs		= 5,
+	.get_irq_reg		= axp192_get_irq_reg,
+};
+
 static const struct regmap_irq_chip axp20x_regmap_irq_chip = {
 	.name			= "axp20x_irq_chip",
 	.status_base		= AXP20X_IRQ1_STATE,
@@ -705,6 +819,27 @@ static const struct regmap_irq_chip axp15060_regmap_irq_chip = {
 	.num_regs		= 2,
 };
 
+static const struct mfd_cell axp192_cells[] = {
+	{
+		.name		= "axp192-adc",
+		.of_compatible	= "x-powers,axp192-adc",
+	}, {
+		.name		= "axp20x-battery-power-supply",
+		.of_compatible	= "x-powers,axp192-battery-power-supply",
+	}, {
+		.name		= "axp20x-ac-power-supply",
+		.of_compatible	= "x-powers,axp202-ac-power-supply",
+		.num_resources	= ARRAY_SIZE(axp192_ac_power_supply_resources),
+		.resources	= axp192_ac_power_supply_resources,
+	}, {
+		.name		= "axp20x-usb-power-supply",
+		.of_compatible	= "x-powers,axp192-usb-power-supply",
+		.num_resources	= ARRAY_SIZE(axp192_usb_power_supply_resources),
+		.resources	= axp192_usb_power_supply_resources,
+	},
+	{	.name		= "axp20x-regulator" },
+};
+
 static const struct mfd_cell axp20x_cells[] = {
 	{
 		.name		= "axp20x-gpio",
@@ -1022,6 +1157,12 @@ int axp20x_match_device(struct axp20x_dev *axp20x)
 		axp20x->regmap_cfg = &axp152_regmap_config;
 		axp20x->regmap_irq_chip = &axp152_regmap_irq_chip;
 		break;
+	case AXP192_ID:
+		axp20x->nr_cells = ARRAY_SIZE(axp192_cells);
+		axp20x->cells = axp192_cells;
+		axp20x->regmap_cfg = &axp192_regmap_config;
+		axp20x->regmap_irq_chip = &axp192_regmap_irq_chip;
+		break;
 	case AXP202_ID:
 	case AXP209_ID:
 		axp20x->nr_cells = ARRAY_SIZE(axp20x_cells);
diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index fff7fa6b7c5d..f1755163dd9f 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -12,6 +12,7 @@
 
 enum axp20x_variants {
 	AXP152_ID = 0,
+	AXP192_ID,
 	AXP202_ID,
 	AXP209_ID,
 	AXP221_ID,
@@ -26,6 +27,7 @@ enum axp20x_variants {
 	NR_AXP20X_VARIANTS,
 };
 
+#define AXP192_DATACACHE(m)		(0x06 + (m))
 #define AXP20X_DATACACHE(m)		(0x04 + (m))
 
 /* Power supply */
@@ -47,6 +49,13 @@ enum axp20x_variants {
 #define AXP152_DCDC_FREQ		0x37
 #define AXP152_DCDC_MODE		0x80
 
+#define AXP192_USB_OTG_STATUS		0x04
+#define AXP192_PWR_OUT_CTRL		0x12
+#define AXP192_DCDC2_V_OUT		0x23
+#define AXP192_DCDC1_V_OUT		0x26
+#define AXP192_DCDC3_V_OUT		0x27
+#define AXP192_LDO2_3_V_OUT		0x28
+
 #define AXP20X_PWR_INPUT_STATUS		0x00
 #define AXP20X_PWR_OP_MODE		0x01
 #define AXP20X_USB_OTG_STATUS		0x02
@@ -185,6 +194,17 @@ enum axp20x_variants {
 #define AXP152_IRQ2_STATE		0x49
 #define AXP152_IRQ3_STATE		0x4a
 
+#define AXP192_IRQ1_EN			0x40
+#define AXP192_IRQ2_EN			0x41
+#define AXP192_IRQ3_EN			0x42
+#define AXP192_IRQ4_EN			0x43
+#define AXP192_IRQ1_STATE		0x44
+#define AXP192_IRQ2_STATE		0x45
+#define AXP192_IRQ3_STATE		0x46
+#define AXP192_IRQ4_STATE		0x47
+#define AXP192_IRQ5_EN			0x4a
+#define AXP192_IRQ5_STATE		0x4d
+
 #define AXP20X_IRQ1_EN			0x40
 #define AXP20X_IRQ2_EN			0x41
 #define AXP20X_IRQ3_EN			0x42
@@ -204,6 +224,11 @@ enum axp20x_variants {
 #define AXP15060_IRQ2_STATE		0x49
 
 /* ADC */
+#define AXP192_GPIO2_V_ADC_H		0x68
+#define AXP192_GPIO2_V_ADC_L		0x69
+#define AXP192_GPIO3_V_ADC_H		0x6a
+#define AXP192_GPIO3_V_ADC_L		0x6b
+
 #define AXP20X_ACIN_V_ADC_H		0x56
 #define AXP20X_ACIN_V_ADC_L		0x57
 #define AXP20X_ACIN_I_ADC_H		0x58
@@ -233,6 +258,8 @@ enum axp20x_variants {
 #define AXP20X_IPSOUT_V_HIGH_L		0x7f
 
 /* Power supply */
+#define AXP192_GPIO30_IN_RANGE		0x85
+
 #define AXP20X_DCDC_MODE		0x80
 #define AXP20X_ADC_EN1			0x82
 #define AXP20X_ADC_EN2			0x83
@@ -261,6 +288,16 @@ enum axp20x_variants {
 #define AXP152_PWM1_FREQ_Y		0x9c
 #define AXP152_PWM1_DUTY_CYCLE		0x9d
 
+#define AXP192_GPIO0_CTRL		0x90
+#define AXP192_LDO_IO0_V_OUT		0x91
+#define AXP192_GPIO1_CTRL		0x92
+#define AXP192_GPIO2_CTRL		0x93
+#define AXP192_GPIO2_0_STATE		0x94
+#define AXP192_GPIO4_3_CTRL		0x95
+#define AXP192_GPIO4_3_STATE		0x96
+#define AXP192_GPIO2_0_PULL		0x97
+#define AXP192_N_RSTO_CTRL		0x9e
+
 #define AXP20X_GPIO0_CTRL		0x90
 #define AXP20X_LDO5_V_OUT		0x91
 #define AXP20X_GPIO1_CTRL		0x92
@@ -340,6 +377,17 @@ enum axp20x_variants {
 #define AXP288_FG_TUNE5             0xed
 
 /* Regulators IDs */
+enum {
+	AXP192_DCDC1 = 0,
+	AXP192_DCDC2,
+	AXP192_DCDC3,
+	AXP192_LDO1,
+	AXP192_LDO2,
+	AXP192_LDO3,
+	AXP192_LDO_IO0,
+	AXP192_REG_ID_MAX
+};
+
 enum {
 	AXP20X_LDO1 = 0,
 	AXP20X_LDO2,
@@ -531,6 +579,42 @@ enum {
 	AXP152_IRQ_GPIO0_INPUT,
 };
 
+enum axp192_irqs {
+	AXP192_IRQ_ACIN_OVER_V = 1,
+	AXP192_IRQ_ACIN_PLUGIN,
+	AXP192_IRQ_ACIN_REMOVAL,
+	AXP192_IRQ_VBUS_OVER_V,
+	AXP192_IRQ_VBUS_PLUGIN,
+	AXP192_IRQ_VBUS_REMOVAL,
+	AXP192_IRQ_VBUS_V_LOW,
+	AXP192_IRQ_BATT_PLUGIN,
+	AXP192_IRQ_BATT_REMOVAL,
+	AXP192_IRQ_BATT_ENT_ACT_MODE,
+	AXP192_IRQ_BATT_EXIT_ACT_MODE,
+	AXP192_IRQ_CHARG,
+	AXP192_IRQ_CHARG_DONE,
+	AXP192_IRQ_BATT_TEMP_HIGH,
+	AXP192_IRQ_BATT_TEMP_LOW,
+	AXP192_IRQ_DIE_TEMP_HIGH,
+	AXP192_IRQ_CHARG_I_LOW,
+	AXP192_IRQ_DCDC1_V_LONG,
+	AXP192_IRQ_DCDC2_V_LONG,
+	AXP192_IRQ_DCDC3_V_LONG,
+	AXP192_IRQ_PEK_SHORT = 22,
+	AXP192_IRQ_PEK_LONG,
+	AXP192_IRQ_N_OE_PWR_ON,
+	AXP192_IRQ_N_OE_PWR_OFF,
+	AXP192_IRQ_VBUS_VALID,
+	AXP192_IRQ_VBUS_NOT_VALID,
+	AXP192_IRQ_VBUS_SESS_VALID,
+	AXP192_IRQ_VBUS_SESS_END,
+	AXP192_IRQ_LOW_PWR_LVL = 31,
+	AXP192_IRQ_TIMER,
+	AXP192_IRQ_GPIO2_INPUT = 37,
+	AXP192_IRQ_GPIO1_INPUT,
+	AXP192_IRQ_GPIO0_INPUT,
+};
+
 enum {
 	AXP20X_IRQ_ACIN_OVER_V = 1,
 	AXP20X_IRQ_ACIN_PLUGIN,
-- 
cgit v1.2.3


From 48b4371b98676e8db3f72e4355af7707103d9c07 Mon Sep 17 00:00:00 2001
From: Sean Nyekjaer <sean@geanix.com>
Date: Fri, 2 Jun 2023 08:24:25 +0200
Subject: mfd: stpmic1: Fixup main control register and bits naming

Fixup main control register and bits naming so the match the naming from
the datasheet.

https://www.st.com/resource/en/datasheet/stpmic1.pdf

Signed-off-by: Sean Nyekjaer <sean@geanix.com>
Signed-off-by: Lee Jones <lee@kernel.org>
Link: https://lore.kernel.org/r/20230602062426.3947116-1-sean@geanix.com
---
 drivers/mfd/stpmic1.c       |  4 ++--
 include/linux/mfd/stpmic1.h | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/stpmic1.c b/drivers/mfd/stpmic1.c
index d0ff46961910..a79dfb9923d7 100644
--- a/drivers/mfd/stpmic1.c
+++ b/drivers/mfd/stpmic1.c
@@ -19,7 +19,7 @@
 
 static const struct regmap_range stpmic1_readable_ranges[] = {
 	regmap_reg_range(TURN_ON_SR, VERSION_SR),
-	regmap_reg_range(SWOFF_PWRCTRL_CR, LDO6_STDBY_CR),
+	regmap_reg_range(MAIN_CR, LDO6_STDBY_CR),
 	regmap_reg_range(BST_SW_CR, BST_SW_CR),
 	regmap_reg_range(INT_PENDING_R1, INT_PENDING_R4),
 	regmap_reg_range(INT_CLEAR_R1, INT_CLEAR_R4),
@@ -30,7 +30,7 @@ static const struct regmap_range stpmic1_readable_ranges[] = {
 };
 
 static const struct regmap_range stpmic1_writeable_ranges[] = {
-	regmap_reg_range(SWOFF_PWRCTRL_CR, LDO6_STDBY_CR),
+	regmap_reg_range(MAIN_CR, LDO6_STDBY_CR),
 	regmap_reg_range(BST_SW_CR, BST_SW_CR),
 	regmap_reg_range(INT_CLEAR_R1, INT_CLEAR_R4),
 	regmap_reg_range(INT_SET_MASK_R1, INT_SET_MASK_R4),
diff --git a/include/linux/mfd/stpmic1.h b/include/linux/mfd/stpmic1.h
index fa3f99f7e9a1..dc00bac24f5a 100644
--- a/include/linux/mfd/stpmic1.h
+++ b/include/linux/mfd/stpmic1.h
@@ -15,7 +15,7 @@
 #define RREQ_STATE_SR		0x5
 #define VERSION_SR		0x6
 
-#define SWOFF_PWRCTRL_CR	0x10
+#define MAIN_CR			0x10
 #define PADS_PULL_CR		0x11
 #define BUCKS_PD_CR		0x12
 #define LDO14_PD_CR		0x13
@@ -148,14 +148,14 @@
 #define LDO_BYPASS_MASK			BIT(7)
 
 /* Main PMIC Control Register
- * SWOFF_PWRCTRL_CR
+ * MAIN_CR
  * Address : 0x10
  */
-#define ICC_EVENT_ENABLED		BIT(4)
+#define OCP_OFF_DBG			BIT(4)
 #define PWRCTRL_POLARITY_HIGH		BIT(3)
-#define PWRCTRL_PIN_VALID		BIT(2)
-#define RESTART_REQUEST_ENABLED		BIT(1)
-#define SOFTWARE_SWITCH_OFF_ENABLED	BIT(0)
+#define PWRCTRL_ENABLE			BIT(2)
+#define RESTART_REQUEST_ENABLE		BIT(1)
+#define SOFTWARE_SWITCH_OFF		BIT(0)
 
 /* Main PMIC PADS Control Register
  * PADS_PULL_CR
-- 
cgit v1.2.3


From cf683e8870bd4be0fd6b98639286700a35088660 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 14 Jun 2023 15:12:12 +0200
Subject: fbdev: Use /* */ comment in initializer macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use /* */ in initializer macro to avoid out-commenting the comma
at the end of the line.

Reported-by: Christian König <ckoenig.leichtzumerken@gmail.com>
Closes: https://lore.kernel.org/dri-devel/20230530150253.22758-1-tzimmermann@suse.de/T/#m356cda2679c17d7a01f30ce2b5282cd9046ea6d4
Fixes: f1061fa641b8 ("fbdev: Add initializer macros for struct fb_ops")
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Helge Deller <deller@gmx.de>
Cc: linux-fbdev@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230614131253.10208-1-tzimmermann@suse.de
---
 include/linux/fb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index ce6823e157e6..ce7d588edc3e 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -552,7 +552,7 @@ extern ssize_t fb_io_write(struct fb_info *info, const char __user *buf,
 	.fb_imageblit	= cfb_imageblit
 
 #define __FB_DEFAULT_IO_OPS_MMAP \
-	.fb_mmap	= NULL // default implementation
+	.fb_mmap	= NULL /* default implementation */
 
 #define FB_DEFAULT_IO_OPS \
 	__FB_DEFAULT_IO_OPS_RDWR, \
@@ -585,7 +585,7 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
 	.fb_imageblit	= sys_imageblit
 
 #define __FB_DEFAULT_SYS_OPS_MMAP \
-	.fb_mmap	= NULL // default implementation
+	.fb_mmap	= NULL /* default implementation */
 
 #define FB_DEFAULT_SYS_OPS \
 	__FB_DEFAULT_SYS_OPS_RDWR, \
-- 
cgit v1.2.3


From df49f2a0ac4a34c0cb4b5c233fcfa0add644c43c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 15 Jun 2023 11:30:35 +0200
Subject: Revert "usb: common: usb-conn-gpio: Set last role to unknown before
 initial detection"

This reverts commit edd60d24bd858cef165274e4cd6cab43bdc58d15.

Heikki reports that this should not be a global flag just to work around
one broken driver and should be fixed differently, so revert it.

Reported-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Fixes: edd60d24bd85 ("usb: common: usb-conn-gpio: Set last role to unknown before initial detection")
Link: https://lore.kernel.org/r/ZImE4L3YgABnCIsP@kuha.fi.intel.com
Cc: Prashanth K <quic_prashk@quicinc.com>
Cc: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/cdns3/core.c                       | 2 --
 drivers/usb/common/usb-conn-gpio.c             | 3 ---
 drivers/usb/musb/jz4740.c                      | 2 --
 drivers/usb/roles/intel-xhci-usb-role-switch.c | 2 --
 include/linux/usb/role.h                       | 1 -
 5 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c
index 69d2921f2d3b..dbcdf3b24b47 100644
--- a/drivers/usb/cdns3/core.c
+++ b/drivers/usb/cdns3/core.c
@@ -252,8 +252,6 @@ static enum usb_role cdns_hw_role_state_machine(struct cdns *cdns)
 		if (!vbus)
 			role = USB_ROLE_NONE;
 		break;
-	default:
-		break;
 	}
 
 	dev_dbg(cdns->dev, "role %d -> %d\n", cdns->role, role);
diff --git a/drivers/usb/common/usb-conn-gpio.c b/drivers/usb/common/usb-conn-gpio.c
index d0e8624c7dfe..766005d20bae 100644
--- a/drivers/usb/common/usb-conn-gpio.c
+++ b/drivers/usb/common/usb-conn-gpio.c
@@ -257,9 +257,6 @@ static int usb_conn_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, info);
 	device_set_wakeup_capable(&pdev->dev, true);
 
-	/* Set last role to unknown before performing the initial detection */
-	info->last_role = USB_ROLE_UNKNOWN;
-
 	/* Perform initial detection */
 	usb_conn_queue_dwork(info, 0);
 
diff --git a/drivers/usb/musb/jz4740.c b/drivers/usb/musb/jz4740.c
index 6d880c4cce36..5aabdd7e2511 100644
--- a/drivers/usb/musb/jz4740.c
+++ b/drivers/usb/musb/jz4740.c
@@ -95,8 +95,6 @@ static int jz4740_musb_role_switch_set(struct usb_role_switch *sw,
 	case USB_ROLE_HOST:
 		atomic_notifier_call_chain(&phy->notifier, USB_EVENT_ID, phy);
 		break;
-	default:
-		break;
 	}
 
 	return 0;
diff --git a/drivers/usb/roles/intel-xhci-usb-role-switch.c b/drivers/usb/roles/intel-xhci-usb-role-switch.c
index 8e2997d65f11..e5c6c413a075 100644
--- a/drivers/usb/roles/intel-xhci-usb-role-switch.c
+++ b/drivers/usb/roles/intel-xhci-usb-role-switch.c
@@ -97,8 +97,6 @@ static int intel_xhci_usb_set_role(struct usb_role_switch *sw,
 		val |= SW_VBUS_VALID;
 		drd_config = DRD_CONFIG_STATIC_DEVICE;
 		break;
-	default:
-		break;
 	}
 	val |= SW_IDPIN_EN;
 	if (data->enable_sw_switch) {
diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h
index 65e790a28913..b5deafd91f67 100644
--- a/include/linux/usb/role.h
+++ b/include/linux/usb/role.h
@@ -11,7 +11,6 @@ enum usb_role {
 	USB_ROLE_NONE,
 	USB_ROLE_HOST,
 	USB_ROLE_DEVICE,
-	USB_ROLE_UNKNOWN,
 };
 
 typedef int (*usb_role_switch_set_t)(struct usb_role_switch *sw,
-- 
cgit v1.2.3


From 007cfa13e034a1a2973967cbbe31e70c19e2bf31 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 9 Jun 2023 18:48:58 +0300
Subject: ACPI: Move ACPI_DEVICE_CLASS() to mod_devicetable.h

The data type of struct acpi_device_id is defined in the
mod_devicetable.h. It's suboptimal to require user with
the almost agnostic code to include acpi.h solely for the
macro that affects the data type defined elsewhere.

Taking into account the above and for the sake of consistency
move ACPI_DEVICE_CLASS() to mod_devicetable.h.

Note, that with CONFIG_ACPI=n the ID table will be filed with data
but it does not really matter because either it won't be used, or
won't be compiled in some cases (when guarded by respective ifdeffery).

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Message-ID: <20230609154900.43024-2-andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/acpi.h            | 14 --------------
 include/linux/mod_devicetable.h | 13 +++++++++++++
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7b71dd74baeb..0d7c6ee5f0e5 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -70,19 +70,6 @@ static inline void acpi_free_fwnode_static(struct fwnode_handle *fwnode)
 	kfree(fwnode);
 }
 
-/**
- * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with
- * the PCI-defined class-code information
- *
- * @_cls : the class, subclass, prog-if triple for this device
- * @_msk : the class mask for this device
- *
- * This macro is used to create a struct acpi_device_id that matches a
- * specific PCI class. The .id and .driver_data fields will be left
- * initialized with the default value.
- */
-#define ACPI_DEVICE_CLASS(_cls, _msk)	.cls = (_cls), .cls_msk = (_msk),
-
 static inline bool has_acpi_companion(struct device *dev)
 {
 	return is_acpi_device_node(dev->fwnode);
@@ -781,7 +768,6 @@ const char *acpi_get_subsystem_id(acpi_handle handle);
 #define ACPI_COMPANION_SET(dev, adev)	do { } while (0)
 #define ACPI_HANDLE(dev)		(NULL)
 #define ACPI_HANDLE_FWNODE(fwnode)	(NULL)
-#define ACPI_DEVICE_CLASS(_cls, _msk)	.cls = (0), .cls_msk = (0),
 
 #include <acpi/acpi_numa.h>
 
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index ccaaeda792c0..486747518aae 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -221,6 +221,19 @@ struct acpi_device_id {
 	__u32 cls_msk;
 };
 
+/**
+ * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with
+ * the PCI-defined class-code information
+ *
+ * @_cls : the class, subclass, prog-if triple for this device
+ * @_msk : the class mask for this device
+ *
+ * This macro is used to create a struct acpi_device_id that matches a
+ * specific PCI class. The .id and .driver_data fields will be left
+ * initialized with the default value.
+ */
+#define ACPI_DEVICE_CLASS(_cls, _msk)	.cls = (_cls), .cls_msk = (_msk),
+
 #define PNP_ID_LEN	8
 #define PNP_MAX_DEVICES	8
 
-- 
cgit v1.2.3


From 2de5897b5c148a1cdf31bf4628590825d694c37d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 9 Jun 2023 18:48:59 +0300
Subject: device property: Implement device_is_compatible()

Some users want to use the struct device pointer to see if the
device is compatible in terms of Open Firmware specifications,
i.e. if it has a 'compatible' property and it matches to the
given value. Provide inline helper for the users.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Message-ID: <20230609154900.43024-3-andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/property.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/property.h b/include/linux/property.h
index 66df1a15d518..8c3c6685a2ae 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -85,6 +85,18 @@ bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char
 	return fwnode_property_match_string(fwnode, "compatible", compat) >= 0;
 }
 
+/**
+ * device_is_compatible - match 'compatible' property of the device with a given string
+ * @dev: Pointer to the struct device
+ * @compat: The string to match 'compatible' property with
+ *
+ * Returns: true if matches, otherwise false.
+ */
+static inline bool device_is_compatible(const struct device *dev, const char *compat)
+{
+	return fwnode_device_is_compatible(dev_fwnode(dev), compat);
+}
+
 int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode,
 				       const char *prop, const char *nargs_prop,
 				       unsigned int nargs, unsigned int index,
-- 
cgit v1.2.3


From 2d8c9dcf7158060fcec9f891c0292ffdb4397523 Mon Sep 17 00:00:00 2001
From: Wen Yang <wenyang.linux@foxmail.com>
Date: Thu, 15 Jun 2023 02:40:28 +0800
Subject: eventfd: add a uapi header for eventfd userspace APIs

Create a uapi header include/uapi/linux/eventfd.h, move the associated
flags to the uapi header, and include it from linux/eventfd.h.

Suggested-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Wen Yang <wenyang.linux@foxmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dylan Yudaken <dylany@fb.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Eric Biggers <ebiggers@google.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Message-Id: <tencent_2B6A999A23E86E522D5D9859D54FFCF9AA05@qq.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/eventfd.h      |  6 +-----
 include/uapi/linux/eventfd.h | 11 +++++++++++
 2 files changed, 12 insertions(+), 5 deletions(-)
 create mode 100644 include/uapi/linux/eventfd.h

(limited to 'include/linux')

diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 98d31cdaca40..b9d83652c097 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -9,12 +9,12 @@
 #ifndef _LINUX_EVENTFD_H
 #define _LINUX_EVENTFD_H
 
-#include <linux/fcntl.h>
 #include <linux/wait.h>
 #include <linux/err.h>
 #include <linux/percpu-defs.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
+#include <uapi/linux/eventfd.h>
 
 /*
  * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
@@ -23,10 +23,6 @@
  * from eventfd, in order to leave a free define-space for
  * shared O_* flags.
  */
-#define EFD_SEMAPHORE (1 << 0)
-#define EFD_CLOEXEC O_CLOEXEC
-#define EFD_NONBLOCK O_NONBLOCK
-
 #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
 
diff --git a/include/uapi/linux/eventfd.h b/include/uapi/linux/eventfd.h
new file mode 100644
index 000000000000..2eb9ab6c32f3
--- /dev/null
+++ b/include/uapi/linux/eventfd.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_EVENTFD_H
+#define _UAPI_LINUX_EVENTFD_H
+
+#include <linux/fcntl.h>
+
+#define EFD_SEMAPHORE (1 << 0)
+#define EFD_CLOEXEC O_CLOEXEC
+#define EFD_NONBLOCK O_NONBLOCK
+
+#endif /* _UAPI_LINUX_EVENTFD_H */
-- 
cgit v1.2.3


From b4a11fa3331e163e177e76098fe1d8b12b87cf6b Mon Sep 17 00:00:00 2001
From: Wyes Karny <wyes.karny@amd.com>
Date: Mon, 29 May 2023 14:25:51 +0000
Subject: cpufreq: Fail driver register if it has adjust_perf without
 fast_switch

If fast_switch_possible flag is set by the scaling driver, the governor
is free to select fast_switch function even if adjust_perf is set.  Some
scaling drivers which use adjust_perf don't set fast_switch thinking
that the governor would never fall back to fast_switch. But the governor
can fall back to fast_switch even in runtime if frequency invariance is
disabled due to some reason. This could crash the kernel if the driver
didn't set the fast_switch function pointer.

Therefore, fail driver registration if it has adjust_perf without
fast_switch.

Suggested-by: Rafael J. Wysocki <rafael@kernel.org>
Suggested-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Wyes Karny <wyes.karny@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 3 ++-
 include/linux/cpufreq.h   | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 6b52ebe5a890..50bbc969ffe5 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2828,7 +2828,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	     (driver_data->setpolicy && (driver_data->target_index ||
 		    driver_data->target)) ||
 	     (!driver_data->get_intermediate != !driver_data->target_intermediate) ||
-	     (!driver_data->online != !driver_data->offline))
+	     (!driver_data->online != !driver_data->offline) ||
+		 (driver_data->adjust_perf && !driver_data->fast_switch))
 		return -EINVAL;
 
 	pr_debug("trying to register driver %s\n", driver_data->name);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 26e2eb399484..172ff51c1b2a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -340,7 +340,10 @@ struct cpufreq_driver {
 	/*
 	 * ->fast_switch() replacement for drivers that use an internal
 	 * representation of performance levels and can pass hints other than
-	 * the target performance level to the hardware.
+	 * the target performance level to the hardware. This can only be set
+	 * if ->fast_switch is set too, because in those cases (under specific
+	 * conditions) scale invariance can be disabled, which causes the
+	 * schedutil governor to fall back to the latter.
 	 */
 	void		(*adjust_perf)(unsigned int cpu,
 				       unsigned long min_perf,
-- 
cgit v1.2.3


From e1d001fa5b477c4da46a29be1fcece91db7c7c6f Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Fri, 9 Jun 2023 08:27:42 -0700
Subject: net: ioctl: Use kernel memory on protocol ioctl callbacks

Most of the ioctls to net protocols operates directly on userspace
argument (arg). Usually doing get_user()/put_user() directly in the
ioctl callback.  This is not flexible, because it is hard to reuse these
functions without passing userspace buffers.

Change the "struct proto" ioctls to avoid touching userspace memory and
operate on kernel buffers, i.e., all protocol's ioctl callbacks is
adapted to operate on a kernel memory other than on userspace (so, no
more {put,get}_user() and friends being called in the ioctl callback).

This changes the "struct proto" ioctl format in the following way:

    int                     (*ioctl)(struct sock *sk, int cmd,
-                                        unsigned long arg);
+                                        int *karg);

(Important to say that this patch does not touch the "struct proto_ops"
protocols)

So, the "karg" argument, which is passed to the ioctl callback, is a
pointer allocated to kernel space memory (inside a function wrapper).
This buffer (karg) may contain input argument (copied from userspace in
a prep function) and it might return a value/buffer, which is copied
back to userspace if necessary. There is not one-size-fits-all format
(that is I am using 'may' above), but basically, there are three type of
ioctls:

1) Do not read from userspace, returns a result to userspace
2) Read an input parameter from userspace, and does not return anything
  to userspace
3) Read an input from userspace, and return a buffer to userspace.

The default case (1) (where no input parameter is given, and an "int" is
returned to userspace) encompasses more than 90% of the cases, but there
are two other exceptions. Here is a list of exceptions:

* Protocol RAW:
   * cmd = SIOCGETVIFCNT:
     * input and output = struct sioc_vif_req
   * cmd = SIOCGETSGCNT
     * input and output = struct sioc_sg_req
   * Explanation: for the SIOCGETVIFCNT case, userspace passes the input
     argument, which is struct sioc_vif_req. Then the callback populates
     the struct, which is copied back to userspace.

* Protocol RAW6:
   * cmd = SIOCGETMIFCNT_IN6
     * input and output = struct sioc_mif_req6
   * cmd = SIOCGETSGCNT_IN6
     * input and output = struct sioc_sg_req6

* Protocol PHONET:
  * cmd == SIOCPNADDRESOURCE | SIOCPNDELRESOURCE
     * input int (4 bytes)
  * Nothing is copied back to userspace.

For the exception cases, functions sock_sk_ioctl_inout() will
copy the userspace input, and copy it back to kernel space.

The wrapper that prepare the buffer and put the buffer back to user is
sk_ioctl(), so, instead of calling sk->sk_prot->ioctl(), the callee now
calls sk_ioctl(), which will handle all cases.

Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20230609152800.830401-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/icmpv6.h      |  6 +++++
 include/linux/mroute.h      | 22 ++++++++++++++--
 include/linux/mroute6.h     | 31 ++++++++++++++++++++--
 include/net/phonet/phonet.h | 21 +++++++++++++++
 include/net/sock.h          |  5 +++-
 include/net/tcp.h           |  2 +-
 include/net/udp.h           |  2 +-
 net/core/sock.c             | 64 +++++++++++++++++++++++++++++++++++++++++++++
 net/dccp/dccp.h             |  2 +-
 net/dccp/proto.c            | 12 ++++-----
 net/ieee802154/socket.c     | 15 +++++------
 net/ipv4/af_inet.c          |  2 +-
 net/ipv4/ipmr.c             | 63 +++++++++++++++++++++++++++-----------------
 net/ipv4/raw.c              | 16 ++++++------
 net/ipv4/tcp.c              |  5 ++--
 net/ipv4/udp.c              | 12 ++++-----
 net/ipv6/af_inet6.c         |  2 +-
 net/ipv6/ip6mr.c            | 44 +++++++++++++------------------
 net/ipv6/raw.c              | 16 ++++++------
 net/l2tp/l2tp_core.h        |  2 +-
 net/l2tp/l2tp_ip.c          |  9 +++----
 net/mptcp/protocol.c        | 11 ++++----
 net/phonet/datagram.c       | 11 +++-----
 net/phonet/pep.c            | 11 ++++----
 net/phonet/socket.c         |  2 +-
 net/sctp/socket.c           |  8 +++---
 26 files changed, 267 insertions(+), 129 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index db0f4fcfdaf4..1fe33e6741cc 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -111,4 +111,10 @@ static inline bool icmpv6_is_err(int type)
 	return false;
 }
 
+static inline int sk_is_icmpv6(struct sock *sk)
+{
+	return sk->sk_family == AF_INET6 &&
+		inet_sk(sk)->inet_num == IPPROTO_ICMPV6;
+}
+
 #endif
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 80b8400ab8b2..94c6e6f549f0 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -16,12 +16,19 @@ static inline int ip_mroute_opt(int opt)
 	return opt >= MRT_BASE && opt <= MRT_MAX;
 }
 
+static inline int sk_is_ipmr(struct sock *sk)
+{
+	return sk->sk_family == AF_INET &&
+		inet_sk(sk)->inet_num == IPPROTO_IGMP;
+}
+
 int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
 int ip_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
-int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
+int ipmr_ioctl(struct sock *sk, int cmd, void *arg);
 int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
 int ip_mr_init(void);
 bool ipmr_rule_default(const struct fib_rule *rule);
+int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
 #else
 static inline int ip_mroute_setsockopt(struct sock *sock, int optname,
 				       sockptr_t optval, unsigned int optlen)
@@ -35,7 +42,7 @@ static inline int ip_mroute_getsockopt(struct sock *sk, int optname,
 	return -ENOPROTOOPT;
 }
 
-static inline int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
+static inline int ipmr_ioctl(struct sock *sk, int cmd, void *arg)
 {
 	return -ENOIOCTLCMD;
 }
@@ -50,10 +57,21 @@ static inline int ip_mroute_opt(int opt)
 	return 0;
 }
 
+static inline int sk_is_ipmr(struct sock *sk)
+{
+	return 0;
+}
+
 static inline bool ipmr_rule_default(const struct fib_rule *rule)
 {
 	return true;
 }
+
+static inline int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd,
+				void __user *arg)
+{
+	return 1;
+}
 #endif
 
 #define VIFF_STATIC 0x8000
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 8f2b307fb124..2f95d5b4e47a 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -29,10 +29,10 @@ struct sock;
 extern int ip6_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
 extern int ip6_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
 extern int ip6_mr_input(struct sk_buff *skb);
-extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg);
 extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
 extern int ip6_mr_init(void);
 extern void ip6_mr_cleanup(void);
+int ip6mr_ioctl(struct sock *sk, int cmd, void *arg);
 #else
 static inline int ip6_mroute_setsockopt(struct sock *sock, int optname,
 		sockptr_t optval, unsigned int optlen)
@@ -48,7 +48,7 @@ int ip6_mroute_getsockopt(struct sock *sock,
 }
 
 static inline
-int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
+int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
 {
 	return -ENOIOCTLCMD;
 }
@@ -100,6 +100,27 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
 #ifdef CONFIG_IPV6_MROUTE
 bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
 extern int ip6mr_sk_done(struct sock *sk);
+static inline int ip6mr_sk_ioctl(struct sock *sk, unsigned int cmd,
+				 void __user *arg)
+{
+	switch (cmd) {
+	/* These userspace buffers will be consumed by ip6mr_ioctl() */
+	case SIOCGETMIFCNT_IN6: {
+		struct sioc_mif_req6 buffer;
+
+		return sock_ioctl_inout(sk, cmd, arg, &buffer,
+				      sizeof(buffer));
+		}
+	case SIOCGETSGCNT_IN6: {
+		struct sioc_mif_req6 buffer;
+
+		return sock_ioctl_inout(sk, cmd, arg, &buffer,
+				      sizeof(buffer));
+		}
+	}
+
+	return 1;
+}
 #else
 static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
 {
@@ -109,5 +130,11 @@ static inline int ip6mr_sk_done(struct sock *sk)
 {
 	return 0;
 }
+
+static inline int ip6mr_sk_ioctl(struct sock *sk, unsigned int cmd,
+				 void __user *arg)
+{
+	return 1;
+}
 #endif
 #endif
diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 862f1719b523..cf5ecae4a2fc 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -109,4 +109,25 @@ void phonet_sysctl_exit(void);
 int isi_register(void);
 void isi_unregister(void);
 
+static inline bool sk_is_phonet(struct sock *sk)
+{
+	return sk->sk_family == PF_PHONET;
+}
+
+static inline int phonet_sk_ioctl(struct sock *sk, unsigned int cmd,
+				  void __user *arg)
+{
+	int karg;
+
+	switch (cmd) {
+	case SIOCPNADDRESOURCE:
+	case SIOCPNDELRESOURCE:
+		if (get_user(karg, (int __user *)arg))
+			return -EFAULT;
+
+		return sk->sk_prot->ioctl(sk, cmd, &karg);
+	}
+	/* A positive return value means that the ioctl was not processed */
+	return 1;
+}
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 2790133b4b76..62a1b99da349 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1258,7 +1258,7 @@ struct proto {
 					  bool kern);
 
 	int			(*ioctl)(struct sock *sk, int cmd,
-					 unsigned long arg);
+					 int *karg);
 	int			(*init)(struct sock *sk);
 	void			(*destroy)(struct sock *sk);
 	void			(*shutdown)(struct sock *sk, int how);
@@ -2974,6 +2974,9 @@ int sock_get_timeout(long timeo, void *optval, bool old_timeval);
 int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
 			   sockptr_t optval, int optlen, bool old_timeval);
 
+int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
+		     void __user *arg, void *karg, size_t size);
+int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
 static inline bool sk_is_readable(struct sock *sk)
 {
 	if (sk->sk_prot->sock_is_readable)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index bf9f56225821..9c08eab647a2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -340,7 +340,7 @@ void tcp_release_cb(struct sock *sk);
 void tcp_wfree(struct sk_buff *skb);
 void tcp_write_timer_handler(struct sock *sk);
 void tcp_delack_timer_handler(struct sock *sk);
-int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+int tcp_ioctl(struct sock *sk, int cmd, int *karg);
 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
 void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
 void tcp_rcv_space_adjust(struct sock *sk);
diff --git a/include/net/udp.h b/include/net/udp.h
index e01340a27155..4d13424f8f72 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -285,7 +285,7 @@ void udp_flush_pending_frames(struct sock *sk);
 int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
 void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
 int udp_rcv(struct sk_buff *skb);
-int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+int udp_ioctl(struct sock *sk, int cmd, int *karg);
 int udp_init_sock(struct sock *sk);
 int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 int __udp_disconnect(struct sock *sk, int flags);
diff --git a/net/core/sock.c b/net/core/sock.c
index ea66b1afadd0..cff3e82514d1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -114,6 +114,9 @@
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
 #include <linux/compat.h>
+#include <linux/mroute.h>
+#include <linux/mroute6.h>
+#include <linux/icmpv6.h>
 
 #include <linux/uaccess.h>
 
@@ -138,6 +141,7 @@
 
 #include <net/tcp.h>
 #include <net/busy_poll.h>
+#include <net/phonet/phonet.h>
 
 #include <linux/ethtool.h>
 
@@ -4150,3 +4154,63 @@ int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
 	return sk->sk_prot->bind_add(sk, addr, addr_len);
 }
 EXPORT_SYMBOL(sock_bind_add);
+
+/* Copy 'size' bytes from userspace and return `size` back to userspace */
+int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
+		     void __user *arg, void *karg, size_t size)
+{
+	int ret;
+
+	if (copy_from_user(karg, arg, size))
+		return -EFAULT;
+
+	ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(arg, karg, size))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL(sock_ioctl_inout);
+
+/* This is the most common ioctl prep function, where the result (4 bytes) is
+ * copied back to userspace if the ioctl() returns successfully. No input is
+ * copied from userspace as input argument.
+ */
+static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+	int ret, karg = 0;
+
+	ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
+	if (ret)
+		return ret;
+
+	return put_user(karg, (int __user *)arg);
+}
+
+/* A wrapper around sock ioctls, which copies the data from userspace
+ * (depending on the protocol/ioctl), and copies back the result to userspace.
+ * The main motivation for this function is to pass kernel memory to the
+ * protocol ioctl callbacks, instead of userspace memory.
+ */
+int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+	int rc = 1;
+
+	if (sk_is_ipmr(sk))
+		rc = ipmr_sk_ioctl(sk, cmd, arg);
+	else if (sk_is_icmpv6(sk))
+		rc = ip6mr_sk_ioctl(sk, cmd, arg);
+	else if (sk_is_phonet(sk))
+		rc = phonet_sk_ioctl(sk, cmd, arg);
+
+	/* If ioctl was processed, returns its value */
+	if (rc <= 0)
+		return rc;
+
+	/* Otherwise call the default handler */
+	return sock_ioctl_out(sk, cmd, arg);
+}
+EXPORT_SYMBOL(sk_ioctl);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 9ddc3a9e89e4..1f748ed1279d 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -292,7 +292,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
 		    char __user *optval, int __user *optlen);
 int dccp_setsockopt(struct sock *sk, int level, int optname,
 		    sockptr_t optval, unsigned int optlen);
-int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+int dccp_ioctl(struct sock *sk, int cmd, int *karg);
 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
 		 int *addr_len);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b0ebf853cb07..f331e5977a84 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -362,7 +362,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
 
 EXPORT_SYMBOL_GPL(dccp_poll);
 
-int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+int dccp_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	int rc = -ENOTCONN;
 
@@ -373,17 +373,17 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case SIOCOUTQ: {
-		int amount = sk_wmem_alloc_get(sk);
+		*karg = sk_wmem_alloc_get(sk);
 		/* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
 		 * always 0, comparably to UDP.
 		 */
 
-		rc = put_user(amount, (int __user *)arg);
+		rc = 0;
 	}
 		break;
 	case SIOCINQ: {
 		struct sk_buff *skb;
-		unsigned long amount = 0;
+		*karg = 0;
 
 		skb = skb_peek(&sk->sk_receive_queue);
 		if (skb != NULL) {
@@ -391,9 +391,9 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			 * We will only return the amount of this packet since
 			 * that is all that will be read.
 			 */
-			amount = skb->len;
+			*karg = skb->len;
 		}
-		rc = put_user(amount, (int __user *)arg);
+		rc = 0;
 	}
 		break;
 	default:
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 1fa2fe041ec0..9c124705120d 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -162,7 +162,7 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
 	default:
 		if (!sk->sk_prot->ioctl)
 			return -ENOIOCTLCMD;
-		return sk->sk_prot->ioctl(sk, cmd, arg);
+		return sk_ioctl(sk, cmd, (void __user *)arg);
 	}
 }
 
@@ -531,22 +531,21 @@ out:
 	return err;
 }
 
-static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int dgram_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	switch (cmd) {
 	case SIOCOUTQ:
 	{
-		int amount = sk_wmem_alloc_get(sk);
+		*karg = sk_wmem_alloc_get(sk);
 
-		return put_user(amount, (int __user *)arg);
+		return 0;
 	}
 
 	case SIOCINQ:
 	{
 		struct sk_buff *skb;
-		unsigned long amount;
 
-		amount = 0;
+		*karg = 0;
 		spin_lock_bh(&sk->sk_receive_queue.lock);
 		skb = skb_peek(&sk->sk_receive_queue);
 		if (skb) {
@@ -554,10 +553,10 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			 * of this packet since that is all
 			 * that will be read.
 			 */
-			amount = skb->len - ieee802154_hdr_length(skb);
+			*karg = skb->len - ieee802154_hdr_length(skb);
 		}
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		return put_user(amount, (int __user *)arg);
+		return 0;
 	}
 	}
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0e16ac8282c5..38e649fb4474 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -998,7 +998,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		break;
 	default:
 		if (sk->sk_prot->ioctl)
-			err = sk->sk_prot->ioctl(sk, cmd, arg);
+			err = sk_ioctl(sk, cmd, (void __user *)arg);
 		else
 			err = -ENOIOCTLCMD;
 		break;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index eec1f6df80d8..3f0c6d602fb7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1547,6 +1547,28 @@ out:
 	return ret;
 }
 
+/* Execute if this ioctl is a special mroute ioctl */
+int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+	switch (cmd) {
+	/* These userspace buffers will be consumed by ipmr_ioctl() */
+	case SIOCGETVIFCNT: {
+		struct sioc_vif_req buffer;
+
+		return sock_ioctl_inout(sk, cmd, arg, &buffer,
+				      sizeof(buffer));
+		}
+	case SIOCGETSGCNT: {
+		struct sioc_sg_req buffer;
+
+		return sock_ioctl_inout(sk, cmd, arg, &buffer,
+				      sizeof(buffer));
+		}
+	}
+	/* return code > 0 means that the ioctl was not executed */
+	return 1;
+}
+
 /* Getsock opt support for the multicast routing system. */
 int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
 			 sockptr_t optlen)
@@ -1593,13 +1615,13 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
 }
 
 /* The IP multicast ioctl support routines. */
-int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
+int ipmr_ioctl(struct sock *sk, int cmd, void *arg)
 {
-	struct sioc_sg_req sr;
-	struct sioc_vif_req vr;
 	struct vif_device *vif;
 	struct mfc_cache *c;
 	struct net *net = sock_net(sk);
+	struct sioc_vif_req *vr;
+	struct sioc_sg_req *sr;
 	struct mr_table *mrt;
 
 	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
@@ -1608,40 +1630,33 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 
 	switch (cmd) {
 	case SIOCGETVIFCNT:
-		if (copy_from_user(&vr, arg, sizeof(vr)))
-			return -EFAULT;
-		if (vr.vifi >= mrt->maxvif)
+		vr = (struct sioc_vif_req *)arg;
+		if (vr->vifi >= mrt->maxvif)
 			return -EINVAL;
-		vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
+		vr->vifi = array_index_nospec(vr->vifi, mrt->maxvif);
 		rcu_read_lock();
-		vif = &mrt->vif_table[vr.vifi];
-		if (VIF_EXISTS(mrt, vr.vifi)) {
-			vr.icount = READ_ONCE(vif->pkt_in);
-			vr.ocount = READ_ONCE(vif->pkt_out);
-			vr.ibytes = READ_ONCE(vif->bytes_in);
-			vr.obytes = READ_ONCE(vif->bytes_out);
+		vif = &mrt->vif_table[vr->vifi];
+		if (VIF_EXISTS(mrt, vr->vifi)) {
+			vr->icount = READ_ONCE(vif->pkt_in);
+			vr->ocount = READ_ONCE(vif->pkt_out);
+			vr->ibytes = READ_ONCE(vif->bytes_in);
+			vr->obytes = READ_ONCE(vif->bytes_out);
 			rcu_read_unlock();
 
-			if (copy_to_user(arg, &vr, sizeof(vr)))
-				return -EFAULT;
 			return 0;
 		}
 		rcu_read_unlock();
 		return -EADDRNOTAVAIL;
 	case SIOCGETSGCNT:
-		if (copy_from_user(&sr, arg, sizeof(sr)))
-			return -EFAULT;
+		sr = (struct sioc_sg_req *)arg;
 
 		rcu_read_lock();
-		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
+		c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr);
 		if (c) {
-			sr.pktcnt = c->_c.mfc_un.res.pkt;
-			sr.bytecnt = c->_c.mfc_un.res.bytes;
-			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+			sr->pktcnt = c->_c.mfc_un.res.pkt;
+			sr->bytecnt = c->_c.mfc_un.res.bytes;
+			sr->wrong_if = c->_c.mfc_un.res.wrong_if;
 			rcu_read_unlock();
-
-			if (copy_to_user(arg, &sr, sizeof(sr)))
-				return -EFAULT;
 			return 0;
 		}
 		rcu_read_unlock();
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 9aacce9db7b9..7782ff5e6539 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -856,29 +856,29 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
 	return do_raw_getsockopt(sk, level, optname, optval, optlen);
 }
 
-static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int raw_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	switch (cmd) {
 	case SIOCOUTQ: {
-		int amount = sk_wmem_alloc_get(sk);
-
-		return put_user(amount, (int __user *)arg);
+		*karg = sk_wmem_alloc_get(sk);
+		return 0;
 	}
 	case SIOCINQ: {
 		struct sk_buff *skb;
-		int amount = 0;
 
 		spin_lock_bh(&sk->sk_receive_queue.lock);
 		skb = skb_peek(&sk->sk_receive_queue);
 		if (skb)
-			amount = skb->len;
+			*karg = skb->len;
+		else
+			*karg = 0;
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		return put_user(amount, (int __user *)arg);
+		return 0;
 	}
 
 	default:
 #ifdef CONFIG_IP_MROUTE
-		return ipmr_ioctl(sk, cmd, (void __user *)arg);
+		return ipmr_ioctl(sk, cmd, karg);
 #else
 		return -ENOIOCTLCMD;
 #endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fba6578bc98f..0e21ea92dc1d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -599,7 +599,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 }
 EXPORT_SYMBOL(tcp_poll);
 
-int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+int tcp_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int answ;
@@ -641,7 +641,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		return -ENOIOCTLCMD;
 	}
 
-	return put_user(answ, (int __user *)arg);
+	*karg = answ;
+	return 0;
 }
 EXPORT_SYMBOL(tcp_ioctl);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7e0542c10471..48fdcd3cad9c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1692,21 +1692,19 @@ static int first_packet_length(struct sock *sk)
  *	IOCTL requests applicable to the UDP protocol
  */
 
-int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+int udp_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	switch (cmd) {
 	case SIOCOUTQ:
 	{
-		int amount = sk_wmem_alloc_get(sk);
-
-		return put_user(amount, (int __user *)arg);
+		*karg = sk_wmem_alloc_get(sk);
+		return 0;
 	}
 
 	case SIOCINQ:
 	{
-		int amount = max_t(int, 0, first_packet_length(sk));
-
-		return put_user(amount, (int __user *)arg);
+		*karg = max_t(int, 0, first_packet_length(sk));
+		return 0;
 	}
 
 	default:
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 564942bee067..b3451cf47d29 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -579,7 +579,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		prot = READ_ONCE(sk->sk_prot);
 		if (!prot->ioctl)
 			return -ENOIOCTLCMD;
-		return prot->ioctl(sk, cmd, arg);
+		return sk_ioctl(sk, cmd, (void __user *)arg);
 	}
 	/*NOTREACHED*/
 	return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 51cf37abd142..cc3d5ad17257 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1879,11 +1879,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
 /*
  *	The IP multicast ioctl support routines.
  */
-
-int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
+int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
 {
-	struct sioc_sg_req6 sr;
-	struct sioc_mif_req6 vr;
+	struct sioc_sg_req6 *sr;
+	struct sioc_mif_req6 *vr;
 	struct vif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
@@ -1895,40 +1894,33 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 
 	switch (cmd) {
 	case SIOCGETMIFCNT_IN6:
-		if (copy_from_user(&vr, arg, sizeof(vr)))
-			return -EFAULT;
-		if (vr.mifi >= mrt->maxvif)
+		vr = (struct sioc_mif_req6 *)arg;
+		if (vr->mifi >= mrt->maxvif)
 			return -EINVAL;
-		vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
+		vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
 		rcu_read_lock();
-		vif = &mrt->vif_table[vr.mifi];
-		if (VIF_EXISTS(mrt, vr.mifi)) {
-			vr.icount = READ_ONCE(vif->pkt_in);
-			vr.ocount = READ_ONCE(vif->pkt_out);
-			vr.ibytes = READ_ONCE(vif->bytes_in);
-			vr.obytes = READ_ONCE(vif->bytes_out);
+		vif = &mrt->vif_table[vr->mifi];
+		if (VIF_EXISTS(mrt, vr->mifi)) {
+			vr->icount = READ_ONCE(vif->pkt_in);
+			vr->ocount = READ_ONCE(vif->pkt_out);
+			vr->ibytes = READ_ONCE(vif->bytes_in);
+			vr->obytes = READ_ONCE(vif->bytes_out);
 			rcu_read_unlock();
-
-			if (copy_to_user(arg, &vr, sizeof(vr)))
-				return -EFAULT;
 			return 0;
 		}
 		rcu_read_unlock();
 		return -EADDRNOTAVAIL;
 	case SIOCGETSGCNT_IN6:
-		if (copy_from_user(&sr, arg, sizeof(sr)))
-			return -EFAULT;
+		sr = (struct sioc_sg_req6 *)arg;
 
 		rcu_read_lock();
-		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+		c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
+				     &sr->grp.sin6_addr);
 		if (c) {
-			sr.pktcnt = c->_c.mfc_un.res.pkt;
-			sr.bytecnt = c->_c.mfc_un.res.bytes;
-			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+			sr->pktcnt = c->_c.mfc_un.res.pkt;
+			sr->bytecnt = c->_c.mfc_un.res.bytes;
+			sr->wrong_if = c->_c.mfc_un.res.wrong_if;
 			rcu_read_unlock();
-
-			if (copy_to_user(arg, &sr, sizeof(sr)))
-				return -EFAULT;
 			return 0;
 		}
 		rcu_read_unlock();
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 44ee7a2e72ac..c9caeb5a43ed 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1118,29 +1118,29 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
 	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
 }
 
-static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int rawv6_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	switch (cmd) {
 	case SIOCOUTQ: {
-		int amount = sk_wmem_alloc_get(sk);
-
-		return put_user(amount, (int __user *)arg);
+		*karg = sk_wmem_alloc_get(sk);
+		return 0;
 	}
 	case SIOCINQ: {
 		struct sk_buff *skb;
-		int amount = 0;
 
 		spin_lock_bh(&sk->sk_receive_queue.lock);
 		skb = skb_peek(&sk->sk_receive_queue);
 		if (skb)
-			amount = skb->len;
+			*karg = skb->len;
+		else
+			*karg = 0;
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		return put_user(amount, (int __user *)arg);
+		return 0;
 	}
 
 	default:
 #ifdef CONFIG_IPV6_MROUTE
-		return ip6mr_ioctl(sk, cmd, (void __user *)arg);
+		return ip6mr_ioctl(sk, cmd, karg);
 #else
 		return -ENOIOCTLCMD;
 #endif
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a88e070b431d..91ebf0a3f499 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -272,7 +272,7 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops
 void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
 
 /* IOCTL helper for IP encap modules. */
-int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+int l2tp_ioctl(struct sock *sk, int cmd, int *karg);
 
 /* Extract the tunnel structure from a socket's sk_user_data pointer,
  * validating the tunnel magic feather.
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 41a74fc84ca1..2b795c1064f5 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -562,19 +562,18 @@ out:
 	return err ? err : copied;
 }
 
-int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+int l2tp_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	struct sk_buff *skb;
-	int amount;
 
 	switch (cmd) {
 	case SIOCOUTQ:
-		amount = sk_wmem_alloc_get(sk);
+		*karg = sk_wmem_alloc_get(sk);
 		break;
 	case SIOCINQ:
 		spin_lock_bh(&sk->sk_receive_queue.lock);
 		skb = skb_peek(&sk->sk_receive_queue);
-		amount = skb ? skb->len : 0;
+		*karg = skb ? skb->len : 0;
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
 		break;
 
@@ -582,7 +581,7 @@ int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		return -ENOIOCTLCMD;
 	}
 
-	return put_user(amount, (int __user *)arg);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(l2tp_ioctl);
 
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5df5cc0ffedc..992b89c75631 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3570,11 +3570,10 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
 	return (int)delta;
 }
 
-static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	bool slow;
-	int answ;
 
 	switch (cmd) {
 	case SIOCINQ:
@@ -3583,24 +3582,24 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 		lock_sock(sk);
 		__mptcp_move_skbs(msk);
-		answ = mptcp_inq_hint(sk);
+		*karg = mptcp_inq_hint(sk);
 		release_sock(sk);
 		break;
 	case SIOCOUTQ:
 		slow = lock_sock_fast(sk);
-		answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
+		*karg = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
 		unlock_sock_fast(sk, slow);
 		break;
 	case SIOCOUTQNSD:
 		slow = lock_sock_fast(sk);
-		answ = mptcp_ioctl_outq(msk, msk->snd_nxt);
+		*karg = mptcp_ioctl_outq(msk, msk->snd_nxt);
 		unlock_sock_fast(sk, slow);
 		break;
 	default:
 		return -ENOIOCTLCMD;
 	}
 
-	return put_user(answ, (int __user *)arg);
+	return 0;
 }
 
 static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index ff5f49ab236e..3aa50dc7535b 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -28,24 +28,21 @@ static void pn_sock_close(struct sock *sk, long timeout)
 	sk_common_release(sk);
 }
 
-static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int pn_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	struct sk_buff *skb;
-	int answ;
 
 	switch (cmd) {
 	case SIOCINQ:
 		lock_sock(sk);
 		skb = skb_peek(&sk->sk_receive_queue);
-		answ = skb ? skb->len : 0;
+		*karg = skb ? skb->len : 0;
 		release_sock(sk);
-		return put_user(answ, (int __user *)arg);
+		return 0;
 
 	case SIOCPNADDRESOURCE:
 	case SIOCPNDELRESOURCE: {
-			u32 res;
-			if (get_user(res, (u32 __user *)arg))
-				return -EFAULT;
+			u32 res = *karg;
 			if (res >= 256)
 				return -EINVAL;
 			if (cmd == SIOCPNADDRESOURCE)
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 83ea13a50690..faba31f2eff2 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -917,10 +917,9 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
 	return 0;
 }
 
-static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int pep_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	struct pep_sock *pn = pep_sk(sk);
-	int answ;
 	int ret = -ENOIOCTLCMD;
 
 	switch (cmd) {
@@ -933,13 +932,13 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		lock_sock(sk);
 		if (sock_flag(sk, SOCK_URGINLINE) &&
 		    !skb_queue_empty(&pn->ctrlreq_queue))
-			answ = skb_peek(&pn->ctrlreq_queue)->len;
+			*karg = skb_peek(&pn->ctrlreq_queue)->len;
 		else if (!skb_queue_empty(&sk->sk_receive_queue))
-			answ = skb_peek(&sk->sk_receive_queue)->len;
+			*karg = skb_peek(&sk->sk_receive_queue)->len;
 		else
-			answ = 0;
+			*karg = 0;
 		release_sock(sk);
-		ret = put_user(answ, (int __user *)arg);
+		ret = 0;
 		break;
 
 	case SIOCPNENABLEPIPE:
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 71e2caf6ab85..967f9b4dc026 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -387,7 +387,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
 		return put_user(handle, (__u16 __user *)arg);
 	}
 
-	return sk->sk_prot->ioctl(sk, cmd, arg);
+	return sk_ioctl(sk, cmd, (void __user *)arg);
 }
 
 static int pn_socket_listen(struct socket *sock, int backlog)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a68e1d541b12..6554a357fe33 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4895,7 +4895,7 @@ out:
 }
 
 /* The SCTP ioctl handler. */
-static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int sctp_ioctl(struct sock *sk, int cmd, int *karg)
 {
 	int rc = -ENOTCONN;
 
@@ -4911,7 +4911,7 @@ static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 	switch (cmd) {
 	case SIOCINQ: {
 		struct sk_buff *skb;
-		unsigned int amount = 0;
+		*karg = 0;
 
 		skb = skb_peek(&sk->sk_receive_queue);
 		if (skb != NULL) {
@@ -4919,9 +4919,9 @@ static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			 * We will only return the amount of this packet since
 			 * that is all that will be read.
 			 */
-			amount = skb->len;
+			*karg = skb->len;
 		}
-		rc = put_user(amount, (int __user *)arg);
+		rc = 0;
 		break;
 	}
 	default:
-- 
cgit v1.2.3


From e111fb92513771cbcae70d0aa855c3ca20f48c1b Mon Sep 17 00:00:00 2001
From: Gil Fine <gil.fine@intel.com>
Date: Thu, 29 Sep 2022 13:00:09 +0300
Subject: thunderbolt: Add support for USB4 v2 80 Gb/s link

USB4 v2 bumps the per-lane speed up to 40 Gb/s. Also the lanes are
always bonded which gives 80 Gb/s symmetric link (and 120/40 Gb/s
asymmetric). This updates the speed and width of routers and XDomain
connections to support the Gen 4 link. For now we keep the link as is
even if it is already asymmetric.

While there make tb_port_set_link_width() static.

Signed-off-by: Gil Fine <gil.fine@intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/thunderbolt/dma_test.c |  10 ++-
 drivers/thunderbolt/icm.c      |   6 +-
 drivers/thunderbolt/switch.c   | 185 ++++++++++++++++++++++++++++-------------
 drivers/thunderbolt/tb.c       |  38 +++++++--
 drivers/thunderbolt/tb.h       |  14 ++--
 drivers/thunderbolt/tb_regs.h  |   1 +
 drivers/thunderbolt/xdomain.c  |  82 ++++++++++++++----
 include/linux/thunderbolt.h    |  18 +++-
 8 files changed, 266 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thunderbolt/dma_test.c b/drivers/thunderbolt/dma_test.c
index 58496f889d03..39476fc48801 100644
--- a/drivers/thunderbolt/dma_test.c
+++ b/drivers/thunderbolt/dma_test.c
@@ -412,6 +412,7 @@ static void speed_get(const struct dma_test *dt, u64 *val)
 static int speed_validate(u64 val)
 {
 	switch (val) {
+	case 40:
 	case 20:
 	case 10:
 	case 0:
@@ -489,9 +490,12 @@ static void dma_test_check_errors(struct dma_test *dt, int ret)
 	if (!dt->error_code) {
 		if (dt->link_speed && dt->xd->link_speed != dt->link_speed) {
 			dt->error_code = DMA_TEST_SPEED_ERROR;
-		} else if (dt->link_width &&
-			   dt->xd->link_width != dt->link_width) {
-			dt->error_code = DMA_TEST_WIDTH_ERROR;
+		} else if (dt->link_width) {
+			const struct tb_xdomain *xd = dt->xd;
+
+			if ((dt->link_width == 1 && xd->link_width != TB_LINK_WIDTH_SINGLE) ||
+			    (dt->link_width == 2 && xd->link_width < TB_LINK_WIDTH_DUAL))
+				dt->error_code = DMA_TEST_WIDTH_ERROR;
 		} else if (dt->packets_to_send != dt->packets_sent ||
 			 dt->packets_to_receive != dt->packets_received ||
 			 dt->crc_errors || dt->buffer_overflow_errors) {
diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c
index 05274caf1466..dbdcad8d73bf 100644
--- a/drivers/thunderbolt/icm.c
+++ b/drivers/thunderbolt/icm.c
@@ -850,7 +850,8 @@ icm_fr_device_connected(struct tb *tb, const struct icm_pkg_header *hdr)
 		sw->security_level = security_level;
 		sw->boot = boot;
 		sw->link_speed = speed_gen3 ? 20 : 10;
-		sw->link_width = dual_lane ? 2 : 1;
+		sw->link_width = dual_lane ? TB_LINK_WIDTH_DUAL :
+					     TB_LINK_WIDTH_SINGLE;
 		sw->rpm = intel_vss_is_rtd3(pkg->ep_name, sizeof(pkg->ep_name));
 
 		if (add_switch(parent_sw, sw))
@@ -1272,7 +1273,8 @@ __icm_tr_device_connected(struct tb *tb, const struct icm_pkg_header *hdr,
 		sw->security_level = security_level;
 		sw->boot = boot;
 		sw->link_speed = speed_gen3 ? 20 : 10;
-		sw->link_width = dual_lane ? 2 : 1;
+		sw->link_width = dual_lane ? TB_LINK_WIDTH_DUAL :
+					     TB_LINK_WIDTH_SINGLE;
 		sw->rpm = force_rtd3;
 		if (!sw->rpm)
 			sw->rpm = intel_vss_is_rtd3(pkg->ep_name,
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 3a1fc3e053f6..a0451218af2a 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -903,15 +903,23 @@ int tb_port_get_link_speed(struct tb_port *port)
 
 	speed = (val & LANE_ADP_CS_1_CURRENT_SPEED_MASK) >>
 		LANE_ADP_CS_1_CURRENT_SPEED_SHIFT;
-	return speed == LANE_ADP_CS_1_CURRENT_SPEED_GEN3 ? 20 : 10;
+
+	switch (speed) {
+	case LANE_ADP_CS_1_CURRENT_SPEED_GEN4:
+		return 40;
+	case LANE_ADP_CS_1_CURRENT_SPEED_GEN3:
+		return 20;
+	default:
+		return 10;
+	}
 }
 
 /**
  * tb_port_get_link_width() - Get current link width
  * @port: Port to check (USB4 or CIO)
  *
- * Returns link width. Return values can be 1 (Single-Lane), 2 (Dual-Lane)
- * or negative errno in case of failure.
+ * Returns link width. Return the link width as encoded in &enum
+ * tb_link_width or negative errno in case of failure.
  */
 int tb_port_get_link_width(struct tb_port *port)
 {
@@ -926,11 +934,13 @@ int tb_port_get_link_width(struct tb_port *port)
 	if (ret)
 		return ret;
 
+	/* Matches the values in enum tb_link_width */
 	return (val & LANE_ADP_CS_1_CURRENT_WIDTH_MASK) >>
 		LANE_ADP_CS_1_CURRENT_WIDTH_SHIFT;
 }
 
-static bool tb_port_is_width_supported(struct tb_port *port, int width)
+static bool tb_port_is_width_supported(struct tb_port *port,
+				       unsigned int width_mask)
 {
 	u32 phy, widths;
 	int ret;
@@ -946,20 +956,25 @@ static bool tb_port_is_width_supported(struct tb_port *port, int width)
 	widths = (phy & LANE_ADP_CS_0_SUPPORTED_WIDTH_MASK) >>
 		LANE_ADP_CS_0_SUPPORTED_WIDTH_SHIFT;
 
-	return !!(widths & width);
+	return widths & width_mask;
+}
+
+static bool is_gen4_link(struct tb_port *port)
+{
+	return tb_port_get_link_speed(port) > 20;
 }
 
 /**
  * tb_port_set_link_width() - Set target link width of the lane adapter
  * @port: Lane adapter
- * @width: Target link width (%1 or %2)
+ * @width: Target link width
  *
  * Sets the target link width of the lane adapter to @width. Does not
  * enable/disable lane bonding. For that call tb_port_set_lane_bonding().
  *
  * Return: %0 in case of success and negative errno in case of error
  */
-int tb_port_set_link_width(struct tb_port *port, unsigned int width)
+int tb_port_set_link_width(struct tb_port *port, enum tb_link_width width)
 {
 	u32 val;
 	int ret;
@@ -974,11 +989,14 @@ int tb_port_set_link_width(struct tb_port *port, unsigned int width)
 
 	val &= ~LANE_ADP_CS_1_TARGET_WIDTH_MASK;
 	switch (width) {
-	case 1:
+	case TB_LINK_WIDTH_SINGLE:
+		/* Gen 4 link cannot be single */
+		if (is_gen4_link(port))
+			return -EOPNOTSUPP;
 		val |= LANE_ADP_CS_1_TARGET_WIDTH_SINGLE <<
 			LANE_ADP_CS_1_TARGET_WIDTH_SHIFT;
 		break;
-	case 2:
+	case TB_LINK_WIDTH_DUAL:
 		val |= LANE_ADP_CS_1_TARGET_WIDTH_DUAL <<
 			LANE_ADP_CS_1_TARGET_WIDTH_SHIFT;
 		break;
@@ -1000,12 +1018,9 @@ int tb_port_set_link_width(struct tb_port *port, unsigned int width)
  * cases one should use tb_port_lane_bonding_enable() instead to enable
  * lane bonding.
  *
- * As a side effect sets @port->bonding accordingly (and does the same
- * for lane 1 too).
- *
  * Return: %0 in case of success and negative errno in case of error
  */
-int tb_port_set_lane_bonding(struct tb_port *port, bool bonding)
+static int tb_port_set_lane_bonding(struct tb_port *port, bool bonding)
 {
 	u32 val;
 	int ret;
@@ -1023,19 +1038,8 @@ int tb_port_set_lane_bonding(struct tb_port *port, bool bonding)
 	else
 		val &= ~LANE_ADP_CS_1_LB;
 
-	ret = tb_port_write(port, &val, TB_CFG_PORT,
-			    port->cap_phy + LANE_ADP_CS_1, 1);
-	if (ret)
-		return ret;
-
-	/*
-	 * When lane 0 bonding is set it will affect lane 1 too so
-	 * update both.
-	 */
-	port->bonded = bonding;
-	port->dual_link_port->bonded = bonding;
-
-	return 0;
+	return tb_port_write(port, &val, TB_CFG_PORT,
+			     port->cap_phy + LANE_ADP_CS_1, 1);
 }
 
 /**
@@ -1052,36 +1056,52 @@ int tb_port_set_lane_bonding(struct tb_port *port, bool bonding)
  */
 int tb_port_lane_bonding_enable(struct tb_port *port)
 {
+	enum tb_link_width width;
 	int ret;
 
 	/*
 	 * Enable lane bonding for both links if not already enabled by
 	 * for example the boot firmware.
 	 */
-	ret = tb_port_get_link_width(port);
-	if (ret == 1) {
-		ret = tb_port_set_link_width(port, 2);
+	width = tb_port_get_link_width(port);
+	if (width == TB_LINK_WIDTH_SINGLE) {
+		ret = tb_port_set_link_width(port, TB_LINK_WIDTH_DUAL);
 		if (ret)
 			goto err_lane0;
 	}
 
-	ret = tb_port_get_link_width(port->dual_link_port);
-	if (ret == 1) {
-		ret = tb_port_set_link_width(port->dual_link_port, 2);
+	width = tb_port_get_link_width(port->dual_link_port);
+	if (width == TB_LINK_WIDTH_SINGLE) {
+		ret = tb_port_set_link_width(port->dual_link_port,
+					     TB_LINK_WIDTH_DUAL);
 		if (ret)
 			goto err_lane0;
 	}
 
-	ret = tb_port_set_lane_bonding(port, true);
-	if (ret)
-		goto err_lane1;
+	/*
+	 * Only set bonding if the link was not already bonded. This
+	 * avoids the lane adapter to re-enter bonding state.
+	 */
+	if (width == TB_LINK_WIDTH_SINGLE) {
+		ret = tb_port_set_lane_bonding(port, true);
+		if (ret)
+			goto err_lane1;
+	}
+
+	/*
+	 * When lane 0 bonding is set it will affect lane 1 too so
+	 * update both.
+	 */
+	port->bonded = true;
+	port->dual_link_port->bonded = true;
 
 	return 0;
 
 err_lane1:
-	tb_port_set_link_width(port->dual_link_port, 1);
+	tb_port_set_link_width(port->dual_link_port, TB_LINK_WIDTH_SINGLE);
 err_lane0:
-	tb_port_set_link_width(port, 1);
+	tb_port_set_link_width(port, TB_LINK_WIDTH_SINGLE);
+
 	return ret;
 }
 
@@ -1095,27 +1115,34 @@ err_lane0:
 void tb_port_lane_bonding_disable(struct tb_port *port)
 {
 	tb_port_set_lane_bonding(port, false);
-	tb_port_set_link_width(port->dual_link_port, 1);
-	tb_port_set_link_width(port, 1);
+	tb_port_set_link_width(port->dual_link_port, TB_LINK_WIDTH_SINGLE);
+	tb_port_set_link_width(port, TB_LINK_WIDTH_SINGLE);
+	port->dual_link_port->bonded = false;
+	port->bonded = false;
 }
 
 /**
  * tb_port_wait_for_link_width() - Wait until link reaches specific width
  * @port: Port to wait for
- * @width: Expected link width (%1 or %2)
+ * @width_mask: Expected link width mask
  * @timeout_msec: Timeout in ms how long to wait
  *
  * Should be used after both ends of the link have been bonded (or
  * bonding has been disabled) to wait until the link actually reaches
- * the expected state. Returns %-ETIMEDOUT if the @width was not reached
- * within the given timeout, %0 if it did.
+ * the expected state. Returns %-ETIMEDOUT if the width was not reached
+ * within the given timeout, %0 if it did. Can be passed a mask of
+ * expected widths and succeeds if any of the widths is reached.
  */
-int tb_port_wait_for_link_width(struct tb_port *port, int width,
+int tb_port_wait_for_link_width(struct tb_port *port, unsigned int width_mask,
 				int timeout_msec)
 {
 	ktime_t timeout = ktime_add_ms(ktime_get(), timeout_msec);
 	int ret;
 
+	/* Gen 4 link does not support single lane */
+	if ((width_mask & TB_LINK_WIDTH_SINGLE) && is_gen4_link(port))
+		return -EOPNOTSUPP;
+
 	do {
 		ret = tb_port_get_link_width(port);
 		if (ret < 0) {
@@ -1126,7 +1153,7 @@ int tb_port_wait_for_link_width(struct tb_port *port, int width,
 			 */
 			if (ret != -EACCES)
 				return ret;
-		} else if (ret == width) {
+		} else if (ret & width_mask) {
 			return 0;
 		}
 
@@ -1778,20 +1805,57 @@ static ssize_t speed_show(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(rx_speed, 0444, speed_show, NULL);
 static DEVICE_ATTR(tx_speed, 0444, speed_show, NULL);
 
-static ssize_t lanes_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
+static ssize_t rx_lanes_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
 {
 	struct tb_switch *sw = tb_to_switch(dev);
+	unsigned int width;
+
+	switch (sw->link_width) {
+	case TB_LINK_WIDTH_SINGLE:
+	case TB_LINK_WIDTH_ASYM_TX:
+		width = 1;
+		break;
+	case TB_LINK_WIDTH_DUAL:
+		width = 2;
+		break;
+	case TB_LINK_WIDTH_ASYM_RX:
+		width = 3;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
 
-	return sysfs_emit(buf, "%u\n", sw->link_width);
+	return sysfs_emit(buf, "%u\n", width);
 }
+static DEVICE_ATTR(rx_lanes, 0444, rx_lanes_show, NULL);
 
-/*
- * Currently link has same amount of lanes both directions (1 or 2) but
- * expose them separately to allow possible asymmetric links in the future.
- */
-static DEVICE_ATTR(rx_lanes, 0444, lanes_show, NULL);
-static DEVICE_ATTR(tx_lanes, 0444, lanes_show, NULL);
+static ssize_t tx_lanes_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct tb_switch *sw = tb_to_switch(dev);
+	unsigned int width;
+
+	switch (sw->link_width) {
+	case TB_LINK_WIDTH_SINGLE:
+	case TB_LINK_WIDTH_ASYM_RX:
+		width = 1;
+		break;
+	case TB_LINK_WIDTH_DUAL:
+		width = 2;
+		break;
+	case TB_LINK_WIDTH_ASYM_TX:
+		width = 3;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	return sysfs_emit(buf, "%u\n", width);
+}
+static DEVICE_ATTR(tx_lanes, 0444, tx_lanes_show, NULL);
 
 static ssize_t nvm_authenticate_show(struct device *dev,
 	struct device_attribute *attr, char *buf)
@@ -2624,6 +2688,7 @@ int tb_switch_lane_bonding_enable(struct tb_switch *sw)
 {
 	struct tb_port *up, *down;
 	u64 route = tb_route(sw);
+	unsigned int width_mask;
 	int ret;
 
 	if (!route)
@@ -2635,8 +2700,8 @@ int tb_switch_lane_bonding_enable(struct tb_switch *sw)
 	up = tb_upstream_port(sw);
 	down = tb_switch_downstream_port(sw);
 
-	if (!tb_port_is_width_supported(up, 2) ||
-	    !tb_port_is_width_supported(down, 2))
+	if (!tb_port_is_width_supported(up, TB_LINK_WIDTH_DUAL) ||
+	    !tb_port_is_width_supported(down, TB_LINK_WIDTH_DUAL))
 		return 0;
 
 	ret = tb_port_lane_bonding_enable(up);
@@ -2652,7 +2717,11 @@ int tb_switch_lane_bonding_enable(struct tb_switch *sw)
 		return ret;
 	}
 
-	ret = tb_port_wait_for_link_width(down, 2, 100);
+	/* Any of the widths are all bonded */
+	width_mask = TB_LINK_WIDTH_DUAL | TB_LINK_WIDTH_ASYM_TX |
+		     TB_LINK_WIDTH_ASYM_RX;
+
+	ret = tb_port_wait_for_link_width(down, width_mask, 100);
 	if (ret) {
 		tb_port_warn(down, "timeout enabling lane bonding\n");
 		return ret;
@@ -2676,6 +2745,7 @@ int tb_switch_lane_bonding_enable(struct tb_switch *sw)
 void tb_switch_lane_bonding_disable(struct tb_switch *sw)
 {
 	struct tb_port *up, *down;
+	int ret;
 
 	if (!tb_route(sw))
 		return;
@@ -2693,7 +2763,8 @@ void tb_switch_lane_bonding_disable(struct tb_switch *sw)
 	 * It is fine if we get other errors as the router might have
 	 * been unplugged.
 	 */
-	if (tb_port_wait_for_link_width(down, 1, 100) == -ETIMEDOUT)
+	ret = tb_port_wait_for_link_width(down, TB_LINK_WIDTH_SINGLE, 100);
+	if (ret == -ETIMEDOUT)
 		tb_sw_warn(sw, "timeout disabling lane bonding\n");
 
 	tb_port_update_credits(down);
diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index aa6e11589c28..440693f561a4 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -570,7 +570,8 @@ static int tb_available_bandwidth(struct tb *tb, struct tb_port *src_port,
 		usb3_consumed_down = 0;
 	}
 
-	*available_up = *available_down = 40000;
+	/* Maximum possible bandwidth asymmetric Gen 4 link is 120 Gb/s */
+	*available_up = *available_down = 120000;
 
 	/* Find the minimum available bandwidth over all links */
 	tb_for_each_port_on_path(src_port, dst_port, port) {
@@ -581,18 +582,45 @@ static int tb_available_bandwidth(struct tb *tb, struct tb_port *src_port,
 
 		if (tb_is_upstream_port(port)) {
 			link_speed = port->sw->link_speed;
+			/*
+			 * sw->link_width is from upstream perspective
+			 * so we use the opposite for downstream of the
+			 * host router.
+			 */
+			if (port->sw->link_width == TB_LINK_WIDTH_ASYM_TX) {
+				up_bw = link_speed * 3 * 1000;
+				down_bw = link_speed * 1 * 1000;
+			} else if (port->sw->link_width == TB_LINK_WIDTH_ASYM_RX) {
+				up_bw = link_speed * 1 * 1000;
+				down_bw = link_speed * 3 * 1000;
+			} else {
+				up_bw = link_speed * port->sw->link_width * 1000;
+				down_bw = up_bw;
+			}
 		} else {
 			link_speed = tb_port_get_link_speed(port);
 			if (link_speed < 0)
 				return link_speed;
-		}
 
-		link_width = port->bonded ? 2 : 1;
+			link_width = tb_port_get_link_width(port);
+			if (link_width < 0)
+				return link_width;
+
+			if (link_width == TB_LINK_WIDTH_ASYM_TX) {
+				up_bw = link_speed * 1 * 1000;
+				down_bw = link_speed * 3 * 1000;
+			} else if (link_width == TB_LINK_WIDTH_ASYM_RX) {
+				up_bw = link_speed * 3 * 1000;
+				down_bw = link_speed * 1 * 1000;
+			} else {
+				up_bw = link_speed * link_width * 1000;
+				down_bw = up_bw;
+			}
+		}
 
-		up_bw = link_speed * link_width * 1000; /* Mb/s */
 		/* Leave 10% guard band */
 		up_bw -= up_bw / 10;
-		down_bw = up_bw;
+		down_bw -= down_bw / 10;
 
 		tb_port_dbg(port, "link total bandwidth %d/%d Mb/s\n", up_bw,
 			    down_bw);
diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h
index bc91fcf5f430..845e851012e5 100644
--- a/drivers/thunderbolt/tb.h
+++ b/drivers/thunderbolt/tb.h
@@ -135,7 +135,7 @@ struct tb_switch_tmu {
  * @vendor_name: Name of the vendor (or %NULL if not known)
  * @device_name: Name of the device (or %NULL if not known)
  * @link_speed: Speed of the link in Gb/s
- * @link_width: Width of the link (1 or 2)
+ * @link_width: Width of the upstream facing link
  * @link_usb4: Upstream link is USB4
  * @generation: Switch Thunderbolt generation
  * @cap_plug_events: Offset to the plug events capability (%0 if not found)
@@ -173,6 +173,11 @@ struct tb_switch_tmu {
  * switches) you need to have domain lock held.
  *
  * In USB4 terminology this structure represents a router.
+ *
+ * Note @link_width is not the same as whether link is bonded or not.
+ * For Gen 4 links the link is also bonded when it is asymmetric. The
+ * correct way to find out whether the link is bonded or not is to look
+ * @bonded field of the upstream port.
  */
 struct tb_switch {
 	struct device dev;
@@ -188,7 +193,7 @@ struct tb_switch {
 	const char *vendor_name;
 	const char *device_name;
 	unsigned int link_speed;
-	unsigned int link_width;
+	enum tb_link_width link_width;
 	bool link_usb4;
 	unsigned int generation;
 	int cap_plug_events;
@@ -1050,11 +1055,10 @@ static inline bool tb_port_use_credit_allocation(const struct tb_port *port)
 
 int tb_port_get_link_speed(struct tb_port *port);
 int tb_port_get_link_width(struct tb_port *port);
-int tb_port_set_link_width(struct tb_port *port, unsigned int width);
-int tb_port_set_lane_bonding(struct tb_port *port, bool bonding);
+int tb_port_set_link_width(struct tb_port *port, enum tb_link_width width);
 int tb_port_lane_bonding_enable(struct tb_port *port);
 void tb_port_lane_bonding_disable(struct tb_port *port);
-int tb_port_wait_for_link_width(struct tb_port *port, int width,
+int tb_port_wait_for_link_width(struct tb_port *port, unsigned int width_mask,
 				int timeout_msec);
 int tb_port_update_credits(struct tb_port *port);
 
diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h
index 0716d6b7701a..69455eaf6351 100644
--- a/drivers/thunderbolt/tb_regs.h
+++ b/drivers/thunderbolt/tb_regs.h
@@ -346,6 +346,7 @@ struct tb_regs_port_header {
 #define LANE_ADP_CS_1_CURRENT_SPEED_SHIFT	16
 #define LANE_ADP_CS_1_CURRENT_SPEED_GEN2	0x8
 #define LANE_ADP_CS_1_CURRENT_SPEED_GEN3	0x4
+#define LANE_ADP_CS_1_CURRENT_SPEED_GEN4	0x2
 #define LANE_ADP_CS_1_CURRENT_WIDTH_MASK	GENMASK(25, 20)
 #define LANE_ADP_CS_1_CURRENT_WIDTH_SHIFT	20
 #define LANE_ADP_CS_1_PMS			BIT(30)
diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
index 8389961b2d45..5b5566862318 100644
--- a/drivers/thunderbolt/xdomain.c
+++ b/drivers/thunderbolt/xdomain.c
@@ -1290,13 +1290,16 @@ static int tb_xdomain_link_state_change(struct tb_xdomain *xd,
 
 static int tb_xdomain_bond_lanes_uuid_high(struct tb_xdomain *xd)
 {
+	unsigned int width, width_mask;
 	struct tb_port *port;
-	int ret, width;
+	int ret;
 
 	if (xd->target_link_width == LANE_ADP_CS_1_TARGET_WIDTH_SINGLE) {
-		width = 1;
+		width = TB_LINK_WIDTH_SINGLE;
+		width_mask = width;
 	} else if (xd->target_link_width == LANE_ADP_CS_1_TARGET_WIDTH_DUAL) {
-		width = 2;
+		width = TB_LINK_WIDTH_DUAL;
+		width_mask = width | TB_LINK_WIDTH_ASYM_TX | TB_LINK_WIDTH_ASYM_RX;
 	} else {
 		if (xd->state_retries-- > 0) {
 			dev_dbg(&xd->dev,
@@ -1328,15 +1331,16 @@ static int tb_xdomain_bond_lanes_uuid_high(struct tb_xdomain *xd)
 		return ret;
 	}
 
-	ret = tb_port_wait_for_link_width(port, width, XDOMAIN_BONDING_TIMEOUT);
+	ret = tb_port_wait_for_link_width(port, width_mask,
+					  XDOMAIN_BONDING_TIMEOUT);
 	if (ret) {
 		dev_warn(&xd->dev, "error waiting for link width to become %d\n",
-			 width);
+			 width_mask);
 		return ret;
 	}
 
-	port->bonded = width == 2;
-	port->dual_link_port->bonded = width == 2;
+	port->bonded = width > TB_LINK_WIDTH_SINGLE;
+	port->dual_link_port->bonded = width > TB_LINK_WIDTH_SINGLE;
 
 	tb_port_update_credits(port);
 	tb_xdomain_update_link_attributes(xd);
@@ -1735,16 +1739,57 @@ static ssize_t speed_show(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR(rx_speed, 0444, speed_show, NULL);
 static DEVICE_ATTR(tx_speed, 0444, speed_show, NULL);
 
-static ssize_t lanes_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
+static ssize_t rx_lanes_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
 {
 	struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev);
+	unsigned int width;
 
-	return sysfs_emit(buf, "%u\n", xd->link_width);
+	switch (xd->link_width) {
+	case TB_LINK_WIDTH_SINGLE:
+	case TB_LINK_WIDTH_ASYM_RX:
+		width = 1;
+		break;
+	case TB_LINK_WIDTH_DUAL:
+		width = 2;
+		break;
+	case TB_LINK_WIDTH_ASYM_TX:
+		width = 3;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	return sysfs_emit(buf, "%u\n", width);
 }
+static DEVICE_ATTR(rx_lanes, 0444, rx_lanes_show, NULL);
+
+static ssize_t tx_lanes_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev);
+	unsigned int width;
 
-static DEVICE_ATTR(rx_lanes, 0444, lanes_show, NULL);
-static DEVICE_ATTR(tx_lanes, 0444, lanes_show, NULL);
+	switch (xd->link_width) {
+	case TB_LINK_WIDTH_SINGLE:
+	case TB_LINK_WIDTH_ASYM_TX:
+		width = 1;
+		break;
+	case TB_LINK_WIDTH_DUAL:
+		width = 2;
+		break;
+	case TB_LINK_WIDTH_ASYM_RX:
+		width = 3;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	return sysfs_emit(buf, "%u\n", width);
+}
+static DEVICE_ATTR(tx_lanes, 0444, tx_lanes_show, NULL);
 
 static struct attribute *xdomain_attrs[] = {
 	&dev_attr_device.attr,
@@ -1974,6 +2019,7 @@ void tb_xdomain_remove(struct tb_xdomain *xd)
  */
 int tb_xdomain_lane_bonding_enable(struct tb_xdomain *xd)
 {
+	unsigned int width_mask;
 	struct tb_port *port;
 	int ret;
 
@@ -1997,7 +2043,12 @@ int tb_xdomain_lane_bonding_enable(struct tb_xdomain *xd)
 		return ret;
 	}
 
-	ret = tb_port_wait_for_link_width(port, 2, XDOMAIN_BONDING_TIMEOUT);
+	/* Any of the widths are all bonded */
+	width_mask = TB_LINK_WIDTH_DUAL | TB_LINK_WIDTH_ASYM_TX |
+		     TB_LINK_WIDTH_ASYM_RX;
+
+	ret = tb_port_wait_for_link_width(port, width_mask,
+					  XDOMAIN_BONDING_TIMEOUT);
 	if (ret) {
 		tb_port_warn(port, "failed to enable lane bonding\n");
 		return ret;
@@ -2024,8 +2075,11 @@ void tb_xdomain_lane_bonding_disable(struct tb_xdomain *xd)
 
 	port = tb_xdomain_downstream_port(xd);
 	if (port->dual_link_port) {
+		int ret;
+
 		tb_port_lane_bonding_disable(port);
-		if (tb_port_wait_for_link_width(port, 1, 100) == -ETIMEDOUT)
+		ret = tb_port_wait_for_link_width(port, TB_LINK_WIDTH_SINGLE, 100);
+		if (ret == -ETIMEDOUT)
 			tb_port_warn(port, "timeout disabling lane bonding\n");
 		tb_port_disable(port->dual_link_port);
 		tb_port_update_credits(port);
diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h
index 90cd08ab2f5d..02333f47c994 100644
--- a/include/linux/thunderbolt.h
+++ b/include/linux/thunderbolt.h
@@ -171,6 +171,20 @@ struct tb_property *tb_property_get_next(struct tb_property_dir *dir,
 int tb_register_property_dir(const char *key, struct tb_property_dir *dir);
 void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir);
 
+/**
+ * enum tb_link_width - Thunderbolt/USB4 link width
+ * @TB_LINK_WIDTH_SINGLE: Single lane link
+ * @TB_LINK_WIDTH_DUAL: Dual lane symmetric link
+ * @TB_LINK_WIDTH_ASYM_TX: Dual lane asymmetric Gen 4 link with 3 trasmitters
+ * @TB_LINK_WIDTH_ASYM_RX: Dual lane asymmetric Gen 4 link with 3 receivers
+ */
+enum tb_link_width {
+	TB_LINK_WIDTH_SINGLE = BIT(0),
+	TB_LINK_WIDTH_DUAL = BIT(1),
+	TB_LINK_WIDTH_ASYM_TX = BIT(2),
+	TB_LINK_WIDTH_ASYM_RX = BIT(3),
+};
+
 /**
  * struct tb_xdomain - Cross-domain (XDomain) connection
  * @dev: XDomain device
@@ -186,7 +200,7 @@ void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir);
  * @vendor_name: Name of the vendor (or %NULL if not known)
  * @device_name: Name of the device (or %NULL if not known)
  * @link_speed: Speed of the link in Gb/s
- * @link_width: Width of the link (1 or 2)
+ * @link_width: Width of the downstream facing link
  * @link_usb4: Downstream link is USB4
  * @is_unplugged: The XDomain is unplugged
  * @needs_uuid: If the XDomain does not have @remote_uuid it will be
@@ -234,7 +248,7 @@ struct tb_xdomain {
 	const char *vendor_name;
 	const char *device_name;
 	unsigned int link_speed;
-	unsigned int link_width;
+	enum tb_link_width link_width;
 	bool link_usb4;
 	bool is_unplugged;
 	bool needs_uuid;
-- 
cgit v1.2.3


From 7725acaa4f0c04fbefb0e0d342635b967bb7d414 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 14 Jun 2023 01:39:22 +0200
Subject: init: Provide arch_cpu_finalize_init()

check_bugs() has become a dumping ground for all sorts of activities to
finalize the CPU initialization before running the rest of the init code.

Most are empty, a few do actual bug checks, some do alternative patching
and some cobble a CPU advertisement string together....

Aside of that the current implementation requires duplicated function
declaration and mostly empty header files for them.

Provide a new function arch_cpu_finalize_init(). Provide a generic
declaration if CONFIG_ARCH_HAS_CPU_FINALIZE_INIT is selected and a stub
inline otherwise.

This requires a temporary #ifdef in start_kernel() which will be removed
along with check_bugs() once the architectures are converted over.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20230613224544.957805717@linutronix.de
---
 arch/Kconfig        | 3 +++
 include/linux/cpu.h | 6 ++++++
 init/main.c         | 4 ++++
 3 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 205fd23e0cad..171e6b5e61b8 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -285,6 +285,9 @@ config ARCH_HAS_DMA_SET_UNCACHED
 config ARCH_HAS_DMA_CLEAR_UNCACHED
 	bool
 
+config ARCH_HAS_CPU_FINALIZE_INIT
+	bool
+
 # Select if arch init_task must go in the __init_task_data section
 config ARCH_TASK_STRUCT_ON_STACK
 	bool
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 8582a7142623..4893c4ac026d 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -184,6 +184,12 @@ void arch_cpu_idle_enter(void);
 void arch_cpu_idle_exit(void);
 void __noreturn arch_cpu_idle_dead(void);
 
+#ifdef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT
+void arch_cpu_finalize_init(void);
+#else
+static inline void arch_cpu_finalize_init(void) { }
+#endif
+
 int cpu_report_state(int cpu);
 int cpu_check_up_prepare(int cpu);
 void cpu_set_state_online(int cpu);
diff --git a/init/main.c b/init/main.c
index af50044deed5..77d53167f4c7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1078,7 +1078,11 @@ asmlinkage __visible void __init __no_sanitize_address __noreturn start_kernel(v
 	taskstats_init_early();
 	delayacct_init();
 
+	arch_cpu_finalize_init();
+	/* Temporary conditional until everything has been converted */
+#ifndef CONFIG_ARCH_HAS_CPU_FINALIZE_INIT
 	check_bugs();
+#endif
 
 	acpi_subsystem_init();
 	arch_post_acpi_subsys_init();
-- 
cgit v1.2.3


From 0cfb4a1af386427cdaba98f18f501eb074985cfd Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 15 Jun 2023 14:58:52 +0100
Subject: genirq: Use BIT() for the IRQD_* state flags

As we're about to use the last bit available in the IRQD_* state
flags, rewrite these flags with BIT(), which ensures that these
constant do not represent a signed value.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irq.h | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index b1b28affb32a..d9c86db69982 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -226,29 +226,29 @@ struct irq_data {
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
-	IRQD_SETAFFINITY_PENDING	= (1 <<  8),
-	IRQD_ACTIVATED			= (1 <<  9),
-	IRQD_NO_BALANCING		= (1 << 10),
-	IRQD_PER_CPU			= (1 << 11),
-	IRQD_AFFINITY_SET		= (1 << 12),
-	IRQD_LEVEL			= (1 << 13),
-	IRQD_WAKEUP_STATE		= (1 << 14),
-	IRQD_MOVE_PCNTXT		= (1 << 15),
-	IRQD_IRQ_DISABLED		= (1 << 16),
-	IRQD_IRQ_MASKED			= (1 << 17),
-	IRQD_IRQ_INPROGRESS		= (1 << 18),
-	IRQD_WAKEUP_ARMED		= (1 << 19),
-	IRQD_FORWARDED_TO_VCPU		= (1 << 20),
-	IRQD_AFFINITY_MANAGED		= (1 << 21),
-	IRQD_IRQ_STARTED		= (1 << 22),
-	IRQD_MANAGED_SHUTDOWN		= (1 << 23),
-	IRQD_SINGLE_TARGET		= (1 << 24),
-	IRQD_DEFAULT_TRIGGER_SET	= (1 << 25),
-	IRQD_CAN_RESERVE		= (1 << 26),
-	IRQD_MSI_NOMASK_QUIRK		= (1 << 27),
-	IRQD_HANDLE_ENFORCE_IRQCTX	= (1 << 28),
-	IRQD_AFFINITY_ON_ACTIVATE	= (1 << 29),
-	IRQD_IRQ_ENABLED_ON_SUSPEND	= (1 << 30),
+	IRQD_SETAFFINITY_PENDING	= BIT(8),
+	IRQD_ACTIVATED			= BIT(9),
+	IRQD_NO_BALANCING		= BIT(10),
+	IRQD_PER_CPU			= BIT(11),
+	IRQD_AFFINITY_SET		= BIT(12),
+	IRQD_LEVEL			= BIT(13),
+	IRQD_WAKEUP_STATE		= BIT(14),
+	IRQD_MOVE_PCNTXT		= BIT(15),
+	IRQD_IRQ_DISABLED		= BIT(16),
+	IRQD_IRQ_MASKED			= BIT(17),
+	IRQD_IRQ_INPROGRESS		= BIT(18),
+	IRQD_WAKEUP_ARMED		= BIT(19),
+	IRQD_FORWARDED_TO_VCPU		= BIT(20),
+	IRQD_AFFINITY_MANAGED		= BIT(21),
+	IRQD_IRQ_STARTED		= BIT(22),
+	IRQD_MANAGED_SHUTDOWN		= BIT(23),
+	IRQD_SINGLE_TARGET		= BIT(24),
+	IRQD_DEFAULT_TRIGGER_SET	= BIT(25),
+	IRQD_CAN_RESERVE		= BIT(26),
+	IRQD_MSI_NOMASK_QUIRK		= BIT(27),
+	IRQD_HANDLE_ENFORCE_IRQCTX	= BIT(28),
+	IRQD_AFFINITY_ON_ACTIVATE	= BIT(29),
+	IRQD_IRQ_ENABLED_ON_SUSPEND	= BIT(30),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
-- 
cgit v1.2.3


From 9c15eeb5362c48dd27d51bd72e8873341fa9383c Mon Sep 17 00:00:00 2001
From: James Gowans <jgowans@amazon.com>
Date: Thu, 8 Jun 2023 14:00:20 +0200
Subject: genirq: Allow fasteoi handler to resend interrupts on concurrent
 handling

There is a class of interrupt controllers out there that, once they
have signalled a given interrupt number, will still signal incoming
instances of the *same* interrupt despite the original interrupt
not having been EOIed yet.

As long as the new interrupt reaches the *same* CPU, nothing bad
happens, as that CPU still has its interrupts globally disabled,
and we will only take the new interrupt once the interrupt has
been EOIed.

However, things become more "interesting" if an affinity change comes
in while the interrupt is being handled. More specifically, while
the per-irq lock is being dropped. This results in the affinity change
taking place immediately. At this point, there is nothing that prevents
the interrupt from firing on the new target CPU. We end-up with the
interrupt running concurrently on two CPUs, which isn't a good thing.

And that's where things become worse: the new CPU notices that the
interrupt handling is in progress (irq_may_run() return false), and
*drops the interrupt on the floor*.

The whole race looks like this:

           CPU 0             |          CPU 1
-----------------------------|-----------------------------
interrupt start              |
  handle_fasteoi_irq         | set_affinity(CPU 1)
    handler                  |
    ...                      | interrupt start
    ...                      |   handle_fasteoi_irq -> early out
  handle_fasteoi_irq return  | interrupt end
interrupt end                |

If the interrupt was an edge, too bad. The interrupt is lost, and
the system will eventually die one way or another. Not great.

A way to avoid this situation is to detect this problem at the point
we handle the interrupt on the new target. Instead of dropping the
interrupt, use the resend mechanism to force it to be replayed.

Also, in order to limit the impact of this workaround to the pathetic
architectures that require it, gate it behind a new irq flag aptly
named IRQD_RESEND_WHEN_IN_PROGRESS.

Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: James Gowans <jgowans@amazon.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <maz@kernel.org>
Cc: KarimAllah Raslan <karahmed@amazon.com>
Cc: Yipeng Zou <zouyipeng@huawei.com>
Cc: Zhang Jianhua <chris.zjh@huawei.com>
[maz: reworded commit mesage]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230608120021.3273400-3-jgowans@amazon.com
---
 include/linux/irq.h  | 13 +++++++++++++
 kernel/irq/chip.c    | 16 +++++++++++++++-
 kernel/irq/debugfs.c |  2 ++
 3 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index d9c86db69982..d8a6fdce9373 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -223,6 +223,8 @@ struct irq_data {
  *				  irq_chip::irq_set_affinity() when deactivated.
  * IRQD_IRQ_ENABLED_ON_SUSPEND	- Interrupt is enabled on suspend by irq pm if
  *				  irqchip have flag IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND set.
+ * IRQD_RESEND_WHEN_IN_PROGRESS	- Interrupt may fire when already in progress in which
+ *				  case it must be resent at the next available opportunity.
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
@@ -249,6 +251,7 @@ enum {
 	IRQD_HANDLE_ENFORCE_IRQCTX	= BIT(28),
 	IRQD_AFFINITY_ON_ACTIVATE	= BIT(29),
 	IRQD_IRQ_ENABLED_ON_SUSPEND	= BIT(30),
+	IRQD_RESEND_WHEN_IN_PROGRESS    = BIT(31),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -448,6 +451,16 @@ static inline bool irqd_affinity_on_activate(struct irq_data *d)
 	return __irqd_to_state(d) & IRQD_AFFINITY_ON_ACTIVATE;
 }
 
+static inline void irqd_set_resend_when_in_progress(struct irq_data *d)
+{
+	__irqd_to_state(d) |= IRQD_RESEND_WHEN_IN_PROGRESS;
+}
+
+static inline bool irqd_needs_resend_when_in_progress(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_RESEND_WHEN_IN_PROGRESS;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 49e7bc871fec..57cd8f475302 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -692,8 +692,16 @@ void handle_fasteoi_irq(struct irq_desc *desc)
 
 	raw_spin_lock(&desc->lock);
 
-	if (!irq_may_run(desc))
+	/*
+	 * When an affinity change races with IRQ handling, the next interrupt
+	 * can arrive on the new CPU before the original CPU has completed
+	 * handling the previous one - it may need to be resent.
+	 */
+	if (!irq_may_run(desc)) {
+		if (irqd_needs_resend_when_in_progress(&desc->irq_data))
+			desc->istate |= IRQS_PENDING;
 		goto out;
+	}
 
 	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
 
@@ -715,6 +723,12 @@ void handle_fasteoi_irq(struct irq_desc *desc)
 
 	cond_unmask_eoi_irq(desc, chip);
 
+	/*
+	 * When the race described above happens this will resend the interrupt.
+	 */
+	if (unlikely(desc->istate & IRQS_PENDING))
+		check_irq_resend(desc, false);
+
 	raw_spin_unlock(&desc->lock);
 	return;
 out:
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index bbcaac64038e..5971a66be034 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -133,6 +133,8 @@ static const struct irq_bit_descr irqdata_states[] = {
 	BIT_MASK_DESCR(IRQD_HANDLE_ENFORCE_IRQCTX),
 
 	BIT_MASK_DESCR(IRQD_IRQ_ENABLED_ON_SUSPEND),
+
+	BIT_MASK_DESCR(IRQD_RESEND_WHEN_IN_PROGRESS),
 };
 
 static const struct irq_bit_descr irqdesc_states[] = {
-- 
cgit v1.2.3


From b33eb50a92b0a298fa8a6ac350e741c3ec100f6d Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 15 Jun 2023 14:27:34 +0100
Subject: locking/atomic: scripts: fix ${atomic}_dec_if_positive() kerneldoc

The ${atomic}_dec_if_positive() ops are unlike all the other conditional
atomic ops. Rather than returning a boolean success value, these return
the value that the atomic variable would be updated to, even when no
update is performed.

We missed this when adding kerneldoc comments, and the documentation for
${atomic}_dec_if_positive() erroneously states:

| Return: @true if @v was updated, @false otherwise.

Ideally we'd clean this up by aligning ${atomic}_dec_if_positive() with
the usual atomic op conventions: with ${atomic}_fetch_dec_if_positive()
for those who care about the value of the varaible, and
${atomic}_dec_if_positive() returning a boolean success value.

In the mean time, align the documentation with the current reality.

Fixes: ad8110706f381170 ("locking/atomic: scripts: generate kerneldoc comments")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20230615132734.1119765-1-mark.rutland@arm.com
---
 include/linux/atomic/atomic-arch-fallback.h | 6 +++---
 include/linux/atomic/atomic-instrumented.h  | 8 ++++----
 include/linux/atomic/atomic-long.h          | 4 ++--
 scripts/atomic/kerneldoc/dec_if_positive    | 2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 8cded57dd7a6..18f5744dfb5d 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -2520,7 +2520,7 @@ raw_atomic_dec_unless_positive(atomic_t *v)
  *
  * Safe to use in noinstr code; prefer atomic_dec_if_positive() elsewhere.
  *
- * Return: @true if @v was updated, @false otherwise.
+ * Return: The old value of (@v - 1), regardless of whether @v was updated.
  */
 static __always_inline int
 raw_atomic_dec_if_positive(atomic_t *v)
@@ -4636,7 +4636,7 @@ raw_atomic64_dec_unless_positive(atomic64_t *v)
  *
  * Safe to use in noinstr code; prefer atomic64_dec_if_positive() elsewhere.
  *
- * Return: @true if @v was updated, @false otherwise.
+ * Return: The old value of (@v - 1), regardless of whether @v was updated.
  */
 static __always_inline s64
 raw_atomic64_dec_if_positive(atomic64_t *v)
@@ -4657,4 +4657,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v)
 }
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 3916f02c038baa3f5190d275f68b9211667fcc9d
+// 202b45c7db600ce36198eb1f1fc2c2d5268ace2d
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index ebfc795f921b..d401b406ef7c 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -1570,7 +1570,7 @@ atomic_dec_unless_positive(atomic_t *v)
  *
  * Unsafe to use in noinstr code; use raw_atomic_dec_if_positive() there.
  *
- * Return: @true if @v was updated, @false otherwise.
+ * Return: The old value of (@v - 1), regardless of whether @v was updated.
  */
 static __always_inline int
 atomic_dec_if_positive(atomic_t *v)
@@ -3134,7 +3134,7 @@ atomic64_dec_unless_positive(atomic64_t *v)
  *
  * Unsafe to use in noinstr code; use raw_atomic64_dec_if_positive() there.
  *
- * Return: @true if @v was updated, @false otherwise.
+ * Return: The old value of (@v - 1), regardless of whether @v was updated.
  */
 static __always_inline s64
 atomic64_dec_if_positive(atomic64_t *v)
@@ -4698,7 +4698,7 @@ atomic_long_dec_unless_positive(atomic_long_t *v)
  *
  * Unsafe to use in noinstr code; use raw_atomic_long_dec_if_positive() there.
  *
- * Return: @true if @v was updated, @false otherwise.
+ * Return: The old value of (@v - 1), regardless of whether @v was updated.
  */
 static __always_inline long
 atomic_long_dec_if_positive(atomic_long_t *v)
@@ -5000,4 +5000,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// 06cec02e676a484857aee38b0071a1d846ec9457
+// 1568f875fef72097413caab8339120c065a39aa4
diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
index f6df2adadf99..c82947170ddc 100644
--- a/include/linux/atomic/atomic-long.h
+++ b/include/linux/atomic/atomic-long.h
@@ -1782,7 +1782,7 @@ raw_atomic_long_dec_unless_positive(atomic_long_t *v)
  *
  * Safe to use in noinstr code; prefer atomic_long_dec_if_positive() elsewhere.
  *
- * Return: @true if @v was updated, @false otherwise.
+ * Return: The old value of (@v - 1), regardless of whether @v was updated.
  */
 static __always_inline long
 raw_atomic_long_dec_if_positive(atomic_long_t *v)
@@ -1795,4 +1795,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v)
 }
 
 #endif /* _LINUX_ATOMIC_LONG_H */
-// 029d2e3a493086671e874a4c2e0e42084be42403
+// 4ef23f98c73cff96d239896175fd26b10b88899e
diff --git a/scripts/atomic/kerneldoc/dec_if_positive b/scripts/atomic/kerneldoc/dec_if_positive
index 7c742866fb6b..04f1aed3cf83 100644
--- a/scripts/atomic/kerneldoc/dec_if_positive
+++ b/scripts/atomic/kerneldoc/dec_if_positive
@@ -7,6 +7,6 @@ cat <<EOF
  *
  * ${desc_noinstr}
  *
- * Return: @true if @v was updated, @false otherwise.
+ * Return: The old value of (@v - 1), regardless of whether @v was updated.
  */
 EOF
-- 
cgit v1.2.3


From b50f26a44887f3f71ff5457135ee1d5f1d542d7d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 16 Jun 2023 12:48:31 +0100
Subject: perf/core: Drop __weak attribute from arch_perf_update_userpage()
 prototype

Reiji reports that the arm64 implementation of arch_perf_update_userpage()
is now ignored and replaced by the dummy stub in core code.
This seems to happen since the PMUv3 driver was moved to driver/perf.

As it turns out, dropping the __weak attribute from the *prototype*
of the function solves the problem. You're right, this doesn't seem
to make much sense. And yet... It appears that both symbols get
flagged as weak, and that the first one to appear in the link order
wins:

$ nm drivers/perf/arm_pmuv3.o|grep arch_perf_update_userpage
0000000000001db0 W arch_perf_update_userpage

Dropping the attribute from the prototype restores the expected
behaviour, and arm64 is able to enjoy arch_perf_update_userpage()
again.

Fixes: 7755cec63ade ("arm64: perf: Move PMUv3 driver to drivers/perf")
Fixes: f1ec3a517b43 ("kernel/events: Add a missing prototype for arch_perf_update_userpage()")
Reported-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Reiji Watanabe <reijiw@google.com>
Link: https://lkml.kernel.org/r/20230616114831.3186980-1-maz@kernel.org
---
 include/linux/perf_event.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d5628a7b5eaa..c8dcfdbda1f4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1845,9 +1845,9 @@ int perf_event_exit_cpu(unsigned int cpu);
 #define perf_event_exit_cpu	NULL
 #endif
 
-extern void __weak arch_perf_update_userpage(struct perf_event *event,
-					     struct perf_event_mmap_page *userpg,
-					     u64 now);
+extern void arch_perf_update_userpage(struct perf_event *event,
+				      struct perf_event_mmap_page *userpg,
+				      u64 now);
 
 #ifdef CONFIG_MMU
 extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
-- 
cgit v1.2.3


From 0cce0fde499a92c726cd2e24f7763644f7c9f971 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 3 Jun 2023 15:36:45 +0800
Subject: sched/topology: Mark set_sched_topology() __init

All callers of set_sched_topology() are within __init section. Mark
it __init too.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230603073645.1173332-1-linmiaohe@huawei.com
---
 include/linux/sched/topology.h | 2 +-
 kernel/sched/topology.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 816df6cc444e..67b573d5bf28 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -203,7 +203,7 @@ struct sched_domain_topology_level {
 #endif
 };
 
-extern void set_sched_topology(struct sched_domain_topology_level *tl);
+extern void __init set_sched_topology(struct sched_domain_topology_level *tl);
 
 #ifdef CONFIG_SCHED_DEBUG
 # define SD_INIT_NAME(type)		.name = #type
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index ca4472281c28..cb92dc5f5646 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1681,7 +1681,7 @@ static struct sched_domain_topology_level *sched_domain_topology_saved;
 #define for_each_sd_topology(tl)			\
 	for (tl = sched_domain_topology; tl->mask; tl++)
 
-void set_sched_topology(struct sched_domain_topology_level *tl)
+void __init set_sched_topology(struct sched_domain_topology_level *tl)
 {
 	if (WARN_ON_ONCE(sched_smp_initialized))
 		return;
-- 
cgit v1.2.3


From ef73d6a4ef0b35524125c3cfc6deafc26a0c966a Mon Sep 17 00:00:00 2001
From: Arve Hjønnevåg <arve@android.com>
Date: Fri, 2 Jun 2023 21:23:46 +0000
Subject: sched/wait: Fix a kthread_park race with wait_woken()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kthread_park and wait_woken have a similar race that
kthread_stop and wait_woken used to have before it was fixed in
commit cb6538e740d7 ("sched/wait: Fix a kthread race with
wait_woken()"). Extend that fix to also cover kthread_park.

[jstultz: Made changes suggested by Peter to optimize
 memory loads]

Signed-off-by: Arve Hjønnevåg <arve@android.com>
Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lore.kernel.org/r/20230602212350.535358-1-jstultz@google.com
---
 include/linux/kthread.h |  1 +
 kernel/kthread.c        | 10 ++++++++++
 kernel/sched/wait.c     |  7 +------
 3 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 30e5bec81d2b..f1f95a71a4bc 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -89,6 +89,7 @@ int kthread_stop(struct task_struct *k);
 bool kthread_should_stop(void);
 bool kthread_should_park(void);
 bool __kthread_should_park(struct task_struct *k);
+bool kthread_should_stop_or_park(void);
 bool kthread_freezable_should_stop(bool *was_frozen);
 void *kthread_func(struct task_struct *k);
 void *kthread_data(struct task_struct *k);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 490792b1066e..07a057086d26 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -182,6 +182,16 @@ bool kthread_should_park(void)
 }
 EXPORT_SYMBOL_GPL(kthread_should_park);
 
+bool kthread_should_stop_or_park(void)
+{
+	struct kthread *kthread = __to_kthread(current);
+
+	if (!kthread)
+		return false;
+
+	return kthread->flags & (BIT(KTHREAD_SHOULD_STOP) | BIT(KTHREAD_SHOULD_PARK));
+}
+
 /**
  * kthread_freezable_should_stop - should this freezable kthread return now?
  * @was_frozen: optional out parameter, indicates whether %current was frozen
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 133b74730738..48c53e4739ea 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -425,11 +425,6 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
 }
 EXPORT_SYMBOL(autoremove_wake_function);
 
-static inline bool is_kthread_should_stop(void)
-{
-	return (current->flags & PF_KTHREAD) && kthread_should_stop();
-}
-
 /*
  * DEFINE_WAIT_FUNC(wait, woken_wake_func);
  *
@@ -459,7 +454,7 @@ long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
 	 * or woken_wake_function() sees our store to current->state.
 	 */
 	set_current_state(mode); /* A */
-	if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
+	if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !kthread_should_stop_or_park())
 		timeout = schedule_timeout(timeout);
 	__set_current_state(TASK_RUNNING);
 
-- 
cgit v1.2.3


From e4cc64657becbd073c3ecc9d5938a1fe0d59913f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 14 Jun 2023 16:03:40 +0200
Subject: block: remove BIO_PAGE_REFFED

Now that all block direct I/O helpers use page pinning, this flag is
unused.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20230614140341.521331-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h               | 2 --
 include/linux/bio.h       | 3 +--
 include/linux/blk_types.h | 1 -
 3 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index 768852a84fef..608c5dcc516b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -445,8 +445,6 @@ static inline void bio_release_page(struct bio *bio, struct page *page)
 {
 	if (bio_flagged(bio, BIO_PAGE_PINNED))
 		unpin_user_page(page);
-	else if (bio_flagged(bio, BIO_PAGE_REFFED))
-		put_page(page);
 }
 
 struct request_queue *blk_alloc_queue(int node_id);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 617522928964..c4f5b5228105 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -492,8 +492,7 @@ void zero_fill_bio(struct bio *bio);
 
 static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
 {
-	if (bio_flagged(bio, BIO_PAGE_REFFED) ||
-	    bio_flagged(bio, BIO_PAGE_PINNED))
+	if (bio_flagged(bio, BIO_PAGE_PINNED))
 		__bio_release_pages(bio, mark_dirty);
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index deb69eeab6bd..752a54e3284b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -326,7 +326,6 @@ struct bio {
  */
 enum {
 	BIO_PAGE_PINNED,	/* Unpin pages in bio_release_pages() */
-	BIO_PAGE_REFFED,	/* put pages in bio_release_pages() */
 	BIO_CLONED,		/* doesn't own data */
 	BIO_BOUNCED,		/* bio is a bounce bio */
 	BIO_QUIET,		/* Make BIO Quiet */
-- 
cgit v1.2.3


From 84bd06c632c6d5279849f5f8ab47d9517d259422 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 14 Jun 2023 16:03:41 +0200
Subject: iov_iter: remove iov_iter_get_pages and iov_iter_get_pages_alloc

Now that the direct I/O helpers have switched to use
iov_iter_extract_pages, these helpers are unused.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/20230614140341.521331-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/uio.h |  6 ------
 lib/iov_iter.c      | 35 +++++++----------------------------
 2 files changed, 7 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 60c342bb7ab8..8e7d2c425340 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -277,14 +277,8 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_
 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
 void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
 		     loff_t start, size_t count);
-ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
-		size_t maxsize, unsigned maxpages, size_t *start,
-		iov_iter_extraction_t extraction_flags);
 ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
-ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
-		struct page ***pages, size_t maxsize, size_t *start,
-		iov_iter_extraction_t extraction_flags);
 ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f18138e0292a..b667b1e2f688 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1082,8 +1082,7 @@ static struct page *first_bvec_segment(const struct iov_iter *i,
 
 static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
-		   unsigned int maxpages, size_t *start,
-		   iov_iter_extraction_t extraction_flags)
+		   unsigned int maxpages, size_t *start)
 {
 	unsigned int n, gup_flags = 0;
 
@@ -1093,8 +1092,6 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		return 0;
 	if (maxsize > MAX_RW_COUNT)
 		maxsize = MAX_RW_COUNT;
-	if (extraction_flags & ITER_ALLOW_P2PDMA)
-		gup_flags |= FOLL_PCI_P2PDMA;
 
 	if (likely(user_backed_iter(i))) {
 		unsigned long addr;
@@ -1144,49 +1141,31 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 	return -EFAULT;
 }
 
-ssize_t iov_iter_get_pages(struct iov_iter *i,
-		   struct page **pages, size_t maxsize, unsigned maxpages,
-		   size_t *start, iov_iter_extraction_t extraction_flags)
+ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
+		size_t maxsize, unsigned maxpages, size_t *start)
 {
 	if (!maxpages)
 		return 0;
 	BUG_ON(!pages);
 
-	return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages,
-					  start, extraction_flags);
-}
-EXPORT_SYMBOL_GPL(iov_iter_get_pages);
-
-ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
-		size_t maxsize, unsigned maxpages, size_t *start)
-{
-	return iov_iter_get_pages(i, pages, maxsize, maxpages, start, 0);
+	return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start);
 }
 EXPORT_SYMBOL(iov_iter_get_pages2);
 
-ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
-		   struct page ***pages, size_t maxsize,
-		   size_t *start, iov_iter_extraction_t extraction_flags)
+ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i,
+		struct page ***pages, size_t maxsize, size_t *start)
 {
 	ssize_t len;
 
 	*pages = NULL;
 
-	len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start,
-					 extraction_flags);
+	len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start);
 	if (len <= 0) {
 		kvfree(*pages);
 		*pages = NULL;
 	}
 	return len;
 }
-EXPORT_SYMBOL_GPL(iov_iter_get_pages_alloc);
-
-ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i,
-		struct page ***pages, size_t maxsize, size_t *start)
-{
-	return iov_iter_get_pages_alloc(i, pages, maxsize, start, 0);
-}
 EXPORT_SYMBOL(iov_iter_get_pages_alloc2);
 
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
-- 
cgit v1.2.3


From 234489ac561300ceed33e64c3bf3a810b9e2051d Mon Sep 17 00:00:00 2001
From: Nipun Gupta <nipun.gupta@amd.com>
Date: Wed, 31 May 2023 18:15:57 +0530
Subject: vfio/cdx: add support for CDX bus

vfio-cdx driver enables IOCTLs for user space to query
MMIO regions for CDX devices and mmap them. This change
also adds support for reset of CDX devices. With VFIO
enabled on CDX devices, user-space applications can also
exercise DMA securely via IOMMU on these devices.

This change adds the VFIO CDX driver and enables the following
ioctls for CDX devices:
 - VFIO_DEVICE_GET_INFO:
 - VFIO_DEVICE_GET_REGION_INFO
 - VFIO_DEVICE_RESET

Signed-off-by: Nipun Gupta <nipun.gupta@amd.com>
Reviewed-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Tested-by: Nikhil Agarwal <nikhil.agarwal@amd.com>
Link: https://lore.kernel.org/r/20230531124557.11009-1-nipun.gupta@amd.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 MAINTAINERS                       |   7 ++
 drivers/vfio/Kconfig              |   1 +
 drivers/vfio/Makefile             |   1 +
 drivers/vfio/cdx/Kconfig          |  17 +++
 drivers/vfio/cdx/Makefile         |   8 ++
 drivers/vfio/cdx/main.c           | 234 ++++++++++++++++++++++++++++++++++++++
 drivers/vfio/cdx/private.h        |  28 +++++
 include/linux/cdx/cdx_bus.h       |   1 -
 include/linux/mod_devicetable.h   |   6 +
 include/uapi/linux/vfio.h         |   1 +
 scripts/mod/devicetable-offsets.c |   1 +
 scripts/mod/file2alias.c          |  17 ++-
 12 files changed, 320 insertions(+), 2 deletions(-)
 create mode 100644 drivers/vfio/cdx/Kconfig
 create mode 100644 drivers/vfio/cdx/Makefile
 create mode 100644 drivers/vfio/cdx/main.c
 create mode 100644 drivers/vfio/cdx/private.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 27ef11624748..ce6ac552d8f6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22085,6 +22085,13 @@ F:	Documentation/filesystems/vfat.rst
 F:	fs/fat/
 F:	tools/testing/selftests/filesystems/fat/
 
+VFIO CDX DRIVER
+M:	Nipun Gupta <nipun.gupta@amd.com>
+M:	Nikhil Agarwal <nikhil.agarwal@amd.com>
+L:	kvm@vger.kernel.org
+S:	Maintained
+F:	drivers/vfio/cdx/*
+
 VFIO DRIVER
 M:	Alex Williamson <alex.williamson@redhat.com>
 L:	kvm@vger.kernel.org
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 89e06c981e43..aba36f5be4ec 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -57,6 +57,7 @@ source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
 source "drivers/vfio/mdev/Kconfig"
 source "drivers/vfio/fsl-mc/Kconfig"
+source "drivers/vfio/cdx/Kconfig"
 endif
 
 source "virt/lib/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 8da44aa1ea16..66f418aef5a9 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_VFIO_PCI_CORE) += pci/
 obj-$(CONFIG_VFIO_PLATFORM_BASE) += platform/
 obj-$(CONFIG_VFIO_MDEV) += mdev/
 obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
+obj-$(CONFIG_VFIO_CDX) += cdx/
diff --git a/drivers/vfio/cdx/Kconfig b/drivers/vfio/cdx/Kconfig
new file mode 100644
index 000000000000..e6de0a0caa32
--- /dev/null
+++ b/drivers/vfio/cdx/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# VFIO CDX configuration
+#
+# Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+#
+
+config VFIO_CDX
+	tristate "VFIO support for CDX bus devices"
+	depends on CDX_BUS
+	select EVENTFD
+	help
+	  Driver to enable VFIO support for the devices on CDX bus.
+	  This is required to make use of CDX devices present in
+	  the system using the VFIO framework.
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile
new file mode 100644
index 000000000000..cd4a2e6fe609
--- /dev/null
+++ b/drivers/vfio/cdx/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+#
+
+obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o
+
+vfio-cdx-objs := main.o
diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c
new file mode 100644
index 000000000000..c376a69d2db2
--- /dev/null
+++ b/drivers/vfio/cdx/main.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#include <linux/vfio.h>
+#include <linux/cdx/cdx_bus.h>
+
+#include "private.h"
+
+static int vfio_cdx_open_device(struct vfio_device *core_vdev)
+{
+	struct vfio_cdx_device *vdev =
+		container_of(core_vdev, struct vfio_cdx_device, vdev);
+	struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev);
+	int count = cdx_dev->res_count;
+	int i;
+
+	vdev->regions = kcalloc(count, sizeof(struct vfio_cdx_region),
+				GFP_KERNEL_ACCOUNT);
+	if (!vdev->regions)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		struct resource *res = &cdx_dev->res[i];
+
+		vdev->regions[i].addr = res->start;
+		vdev->regions[i].size = resource_size(res);
+		vdev->regions[i].type = res->flags;
+		/*
+		 * Only regions addressed with PAGE granularity may be
+		 * MMAP'ed securely.
+		 */
+		if (!(vdev->regions[i].addr & ~PAGE_MASK) &&
+		    !(vdev->regions[i].size & ~PAGE_MASK))
+			vdev->regions[i].flags |=
+					VFIO_REGION_INFO_FLAG_MMAP;
+		vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
+		if (!(cdx_dev->res[i].flags & IORESOURCE_READONLY))
+			vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
+	}
+
+	return 0;
+}
+
+static void vfio_cdx_close_device(struct vfio_device *core_vdev)
+{
+	struct vfio_cdx_device *vdev =
+		container_of(core_vdev, struct vfio_cdx_device, vdev);
+
+	kfree(vdev->regions);
+	cdx_dev_reset(core_vdev->dev);
+}
+
+static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev,
+				   struct vfio_device_info __user *arg)
+{
+	unsigned long minsz = offsetofend(struct vfio_device_info, num_irqs);
+	struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
+	struct vfio_device_info info;
+
+	if (copy_from_user(&info, arg, minsz))
+		return -EFAULT;
+
+	if (info.argsz < minsz)
+		return -EINVAL;
+
+	info.flags = VFIO_DEVICE_FLAGS_CDX;
+	info.flags |= VFIO_DEVICE_FLAGS_RESET;
+
+	info.num_regions = cdx_dev->res_count;
+	info.num_irqs = 0;
+
+	return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
+}
+
+static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev,
+					  struct vfio_region_info __user *arg)
+{
+	unsigned long minsz = offsetofend(struct vfio_region_info, offset);
+	struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
+	struct vfio_region_info info;
+
+	if (copy_from_user(&info, arg, minsz))
+		return -EFAULT;
+
+	if (info.argsz < minsz)
+		return -EINVAL;
+
+	if (info.index >= cdx_dev->res_count)
+		return -EINVAL;
+
+	/* map offset to the physical address */
+	info.offset = vfio_cdx_index_to_offset(info.index);
+	info.size = vdev->regions[info.index].size;
+	info.flags = vdev->regions[info.index].flags;
+
+	return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
+}
+
+static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
+			   unsigned int cmd, unsigned long arg)
+{
+	struct vfio_cdx_device *vdev =
+		container_of(core_vdev, struct vfio_cdx_device, vdev);
+	void __user *uarg = (void __user *)arg;
+
+	switch (cmd) {
+	case VFIO_DEVICE_GET_INFO:
+		return vfio_cdx_ioctl_get_info(vdev, uarg);
+	case VFIO_DEVICE_GET_REGION_INFO:
+		return vfio_cdx_ioctl_get_region_info(vdev, uarg);
+	case VFIO_DEVICE_RESET:
+		return cdx_dev_reset(core_vdev->dev);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static int vfio_cdx_mmap_mmio(struct vfio_cdx_region region,
+			      struct vm_area_struct *vma)
+{
+	u64 size = vma->vm_end - vma->vm_start;
+	u64 pgoff, base;
+
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+	base = pgoff << PAGE_SHIFT;
+
+	if (base + size > region.size)
+		return -EINVAL;
+
+	vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
+	vma->vm_page_prot = pgprot_device(vma->vm_page_prot);
+
+	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+				  size, vma->vm_page_prot);
+}
+
+static int vfio_cdx_mmap(struct vfio_device *core_vdev,
+			 struct vm_area_struct *vma)
+{
+	struct vfio_cdx_device *vdev =
+		container_of(core_vdev, struct vfio_cdx_device, vdev);
+	struct cdx_device *cdx_dev = to_cdx_device(core_vdev->dev);
+	unsigned int index;
+
+	index = vma->vm_pgoff >> (VFIO_CDX_OFFSET_SHIFT - PAGE_SHIFT);
+
+	if (index >= cdx_dev->res_count)
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ) &&
+	    (vma->vm_flags & VM_READ))
+		return -EPERM;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE) &&
+	    (vma->vm_flags & VM_WRITE))
+		return -EPERM;
+
+	return vfio_cdx_mmap_mmio(vdev->regions[index], vma);
+}
+
+static const struct vfio_device_ops vfio_cdx_ops = {
+	.name		= "vfio-cdx",
+	.open_device	= vfio_cdx_open_device,
+	.close_device	= vfio_cdx_close_device,
+	.ioctl		= vfio_cdx_ioctl,
+	.mmap		= vfio_cdx_mmap,
+	.bind_iommufd	= vfio_iommufd_physical_bind,
+	.unbind_iommufd	= vfio_iommufd_physical_unbind,
+	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
+};
+
+static int vfio_cdx_probe(struct cdx_device *cdx_dev)
+{
+	struct vfio_cdx_device *vdev;
+	struct device *dev = &cdx_dev->dev;
+	int ret;
+
+	vdev = vfio_alloc_device(vfio_cdx_device, vdev, dev,
+				 &vfio_cdx_ops);
+	if (IS_ERR(vdev))
+		return PTR_ERR(vdev);
+
+	ret = vfio_register_group_dev(&vdev->vdev);
+	if (ret)
+		goto out_uninit;
+
+	dev_set_drvdata(dev, vdev);
+	return 0;
+
+out_uninit:
+	vfio_put_device(&vdev->vdev);
+	return ret;
+}
+
+static int vfio_cdx_remove(struct cdx_device *cdx_dev)
+{
+	struct device *dev = &cdx_dev->dev;
+	struct vfio_cdx_device *vdev = dev_get_drvdata(dev);
+
+	vfio_unregister_group_dev(&vdev->vdev);
+	vfio_put_device(&vdev->vdev);
+
+	return 0;
+}
+
+static const struct cdx_device_id vfio_cdx_table[] = {
+	{ CDX_DEVICE_DRIVER_OVERRIDE(CDX_ANY_ID, CDX_ANY_ID,
+				     CDX_ID_F_VFIO_DRIVER_OVERRIDE) }, /* match all by default */
+	{}
+};
+
+MODULE_DEVICE_TABLE(cdx, vfio_cdx_table);
+
+static struct cdx_driver vfio_cdx_driver = {
+	.probe		= vfio_cdx_probe,
+	.remove		= vfio_cdx_remove,
+	.match_id_table	= vfio_cdx_table,
+	.driver	= {
+		.name	= "vfio-cdx",
+		.owner	= THIS_MODULE,
+	},
+	.driver_managed_dma = true,
+};
+
+module_driver(vfio_cdx_driver, cdx_driver_register, cdx_driver_unregister);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("VFIO for CDX devices - User Level meta-driver");
diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h
new file mode 100644
index 000000000000..8bdc117ea88e
--- /dev/null
+++ b/drivers/vfio/cdx/private.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
+ */
+
+#ifndef VFIO_CDX_PRIVATE_H
+#define VFIO_CDX_PRIVATE_H
+
+#define VFIO_CDX_OFFSET_SHIFT    40
+
+static inline u64 vfio_cdx_index_to_offset(u32 index)
+{
+	return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT);
+}
+
+struct vfio_cdx_region {
+	u32			flags;
+	u32			type;
+	u64			addr;
+	resource_size_t		size;
+};
+
+struct vfio_cdx_device {
+	struct vfio_device	vdev;
+	struct vfio_cdx_region	*regions;
+};
+
+#endif /* VFIO_CDX_PRIVATE_H */
diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h
index 35ef41d8a61a..bead71b7bc73 100644
--- a/include/linux/cdx/cdx_bus.h
+++ b/include/linux/cdx/cdx_bus.h
@@ -14,7 +14,6 @@
 #include <linux/mod_devicetable.h>
 
 #define MAX_CDX_DEV_RESOURCES	4
-#define CDX_ANY_ID (0xFFFF)
 #define CDX_CONTROLLER_ID_SHIFT 4
 #define CDX_BUS_NUM_MASK 0xF
 
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index ccaaeda792c0..ccf017353bb6 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -912,6 +912,12 @@ struct ishtp_device_id {
 	kernel_ulong_t driver_data;
 };
 
+#define CDX_ANY_ID (0xFFFF)
+
+enum {
+	CDX_ID_F_VFIO_DRIVER_OVERRIDE = 1,
+};
+
 /**
  * struct cdx_device_id - CDX device identifier
  * @vendor: Vendor ID
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 4f48bad09a37..9ab864c6f1ff 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -213,6 +213,7 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_AP	(1 << 5)	/* vfio-ap device */
 #define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
 #define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
+#define VFIO_DEVICE_FLAGS_CDX	(1 << 8)	/* vfio-cdx device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 62dc988df84d..abe65f8968dd 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -265,6 +265,7 @@ int main(void)
 	DEVID(cdx_device_id);
 	DEVID_FIELD(cdx_device_id, vendor);
 	DEVID_FIELD(cdx_device_id, device);
+	DEVID_FIELD(cdx_device_id, override_only);
 
 	return 0;
 }
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 28da34ba4359..38120f932b0d 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1458,8 +1458,23 @@ static int do_cdx_entry(const char *filename, void *symval,
 {
 	DEF_FIELD(symval, cdx_device_id, vendor);
 	DEF_FIELD(symval, cdx_device_id, device);
+	DEF_FIELD(symval, cdx_device_id, override_only);
 
-	sprintf(alias, "cdx:v%08Xd%08Xd", vendor, device);
+	switch (override_only) {
+	case 0:
+		strcpy(alias, "cdx:");
+		break;
+	case CDX_ID_F_VFIO_DRIVER_OVERRIDE:
+		strcpy(alias, "vfio_cdx:");
+		break;
+	default:
+		warn("Unknown CDX driver_override alias %08X\n",
+		     override_only);
+		return 0;
+	}
+
+	ADD(alias, "v", vendor != CDX_ANY_ID, vendor);
+	ADD(alias, "d", device != CDX_ANY_ID, device);
 	return 1;
 }
 
-- 
cgit v1.2.3


From 8bb42ed4210e342631f63d32f7ed87b722968da6 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Wed, 17 May 2023 11:53:50 +0300
Subject: net/mlx5: Expose timeout for sync reset unload stage

Expose new timoueout in Default Timeouts Register to be used on sync
reset flow running on smart NIC. In this flow the driver should know how
much time to wait from getting unload request till firmware will ask the
PF to continue to next stage of the flow.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c | 4 +++-
 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h | 1 +
 include/linux/mlx5/mlx5_ifc.h                      | 4 +++-
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
index 696e45e2bd06..a87d0178ebf3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
@@ -24,7 +24,8 @@ static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = {
 	[MLX5_TO_TEARDOWN_MS] = 3000,
 	[MLX5_TO_FSM_REACTIVATE_MS] = 5000,
 	[MLX5_TO_RECLAIM_PAGES_MS] = 5000,
-	[MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000
+	[MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000,
+	[MLX5_TO_RESET_UNLOAD_MS] = 300000
 };
 
 static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type)
@@ -146,6 +147,7 @@ static int tout_query_dtor(struct mlx5_core_dev *dev)
 	MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0);
 	MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0);
 	MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0);
+	MLX5_TIMEOUT_FILL(reset_unload_to, out, dev, MLX5_TO_RESET_UNLOAD_MS, 0);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
index bc9e9aeda847..99e0a05526fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
@@ -26,6 +26,7 @@ enum mlx5_timeouts_types {
 	MLX5_TO_FSM_REACTIVATE_MS,
 	MLX5_TO_RECLAIM_PAGES_MS,
 	MLX5_TO_RECLAIM_VFS_PAGES_MS,
+	MLX5_TO_RESET_UNLOAD_MS,
 
 	MAX_TIMEOUT_TYPES
 };
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1f4f62cb9f34..14892e795808 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3117,7 +3117,9 @@ struct mlx5_ifc_dtor_reg_bits {
 
 	struct mlx5_ifc_default_timeout_bits reclaim_vfs_pages_to;
 
-	u8         reserved_at_1c0[0x40];
+	struct mlx5_ifc_default_timeout_bits reset_unload_to;
+
+	u8         reserved_at_1c0[0x20];
 };
 
 enum {
-- 
cgit v1.2.3


From 7a9770f1bfeaeddf5afabd3244e2c4c4966be37d Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Wed, 17 May 2023 16:07:40 +0300
Subject: net/mlx5: Handle sync reset unload event

Added a new event handler to firmware sync reset, which is used to
support firmware sync reset flow on smart NIC. Adding this new stage to
the flow enables the firmware to ensure host PFs unload before ECPFs
unload, to avoid race of PFs recovery.

If firmware sends sync_reset_unload event to driver the driver should
unload and close all HW resources of the function. Once the driver
finishes unloading part, it can't get any more events from firmware as
event queues are closed, so it polls the reset state field to know when
to continue to next stage of the sync reset flow.

Added capability bit for supporting sync_reset_unload event.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 103 ++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   3 +
 include/linux/mlx5/device.h                        |   1 +
 include/linux/mlx5/mlx5_ifc.h                      |   3 +-
 4 files changed, 104 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 952cc340b510..7af2b14ab5d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -21,6 +21,7 @@ struct mlx5_fw_reset {
 	struct workqueue_struct *wq;
 	struct work_struct fw_live_patch_work;
 	struct work_struct reset_request_work;
+	struct work_struct reset_unload_work;
 	struct work_struct reset_reload_work;
 	struct work_struct reset_now_work;
 	struct work_struct reset_abort_work;
@@ -30,6 +31,26 @@ struct mlx5_fw_reset {
 	int ret;
 };
 
+enum {
+	MLX5_FW_RST_STATE_IDLE = 0,
+	MLX5_FW_RST_STATE_TOGGLE_REQ = 4,
+};
+
+enum {
+	MLX5_RST_STATE_BIT_NUM = 12,
+	MLX5_RST_ACK_BIT_NUM = 22,
+};
+
+static u8 mlx5_get_fw_rst_state(struct mlx5_core_dev *dev)
+{
+	return (ioread32be(&dev->iseg->initializing) >> MLX5_RST_STATE_BIT_NUM) & 0xF;
+}
+
+static void mlx5_set_fw_rst_ack(struct mlx5_core_dev *dev)
+{
+	iowrite32be(BIT(MLX5_RST_ACK_BIT_NUM), &dev->iseg->initializing);
+}
+
 static int mlx5_fw_reset_enable_remote_dev_reset_set(struct devlink *devlink, u32 id,
 						     struct devlink_param_gset_ctx *ctx)
 {
@@ -155,7 +176,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
 	return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false);
 }
 
-static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
+static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded)
 {
 	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
 
@@ -163,7 +184,8 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev)
 	if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) {
 		complete(&fw_reset->done);
 	} else {
-		mlx5_unload_one(dev, false);
+		if (!unloaded)
+			mlx5_unload_one(dev, false);
 		if (mlx5_health_wait_pci_up(dev))
 			mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n");
 		else
@@ -204,7 +226,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work)
 
 	mlx5_sync_reset_clear_reset_requested(dev, false);
 	mlx5_enter_error_state(dev, true);
-	mlx5_fw_reset_complete_reload(dev);
+	mlx5_fw_reset_complete_reload(dev, false);
 }
 
 #define MLX5_RESET_POLL_INTERVAL	(HZ / 10)
@@ -458,7 +480,70 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
 	mlx5_enter_error_state(dev, true);
 done:
 	fw_reset->ret = err;
-	mlx5_fw_reset_complete_reload(dev);
+	mlx5_fw_reset_complete_reload(dev, false);
+}
+
+static void mlx5_sync_reset_unload_event(struct work_struct *work)
+{
+	struct mlx5_fw_reset *fw_reset;
+	struct mlx5_core_dev *dev;
+	unsigned long timeout;
+	bool reset_action;
+	u8 rst_state;
+	int err;
+
+	fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work);
+	dev = fw_reset->dev;
+
+	if (mlx5_sync_reset_clear_reset_requested(dev, false))
+		return;
+
+	mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n");
+
+	err = mlx5_cmd_fast_teardown_hca(dev);
+	if (err)
+		mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err);
+	else
+		mlx5_enter_error_state(dev, true);
+
+	if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags))
+		mlx5_unload_one_devl_locked(dev, false);
+	else
+		mlx5_unload_one(dev, false);
+
+	mlx5_set_fw_rst_ack(dev);
+	mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n");
+
+	reset_action = false;
+	timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, RESET_UNLOAD));
+	do {
+		rst_state = mlx5_get_fw_rst_state(dev);
+		if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ ||
+		    rst_state == MLX5_FW_RST_STATE_IDLE) {
+			reset_action = true;
+			break;
+		}
+		msleep(20);
+	} while (!time_after(jiffies, timeout));
+
+	if (!reset_action) {
+		mlx5_core_err(dev, "Got timeout waiting for sync reset action, state = %u\n",
+			      rst_state);
+		fw_reset->ret = -ETIMEDOUT;
+		goto done;
+	}
+
+	mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state);
+	if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) {
+		err = mlx5_pci_link_toggle(dev);
+		if (err) {
+			mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, err %d\n", err);
+			fw_reset->ret = err;
+		}
+	}
+
+done:
+	mlx5_fw_reset_complete_reload(dev, true);
 }
 
 static void mlx5_sync_reset_abort_event(struct work_struct *work)
@@ -483,6 +568,9 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct
 	case MLX5_SYNC_RST_STATE_RESET_REQUEST:
 		queue_work(fw_reset->wq, &fw_reset->reset_request_work);
 		break;
+	case MLX5_SYNC_RST_STATE_RESET_UNLOAD:
+		queue_work(fw_reset->wq, &fw_reset->reset_unload_work);
+		break;
 	case MLX5_SYNC_RST_STATE_RESET_NOW:
 		queue_work(fw_reset->wq, &fw_reset->reset_now_work);
 		break;
@@ -517,10 +605,13 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti
 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
 {
 	unsigned long pci_sync_update_timeout = mlx5_tout_ms(dev, PCI_SYNC_UPDATE);
-	unsigned long timeout = msecs_to_jiffies(pci_sync_update_timeout);
 	struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+	unsigned long timeout;
 	int err;
 
+	if (MLX5_CAP_GEN(dev, pci_sync_for_fw_update_with_driver_unload))
+		pci_sync_update_timeout += mlx5_tout_ms(dev, RESET_UNLOAD);
+	timeout = msecs_to_jiffies(pci_sync_update_timeout);
 	if (!wait_for_completion_timeout(&fw_reset->done, timeout)) {
 		mlx5_core_warn(dev, "FW sync reset timeout after %lu seconds\n",
 			       pci_sync_update_timeout / 1000);
@@ -557,6 +648,7 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
 	set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags);
 	cancel_work_sync(&fw_reset->fw_live_patch_work);
 	cancel_work_sync(&fw_reset->reset_request_work);
+	cancel_work_sync(&fw_reset->reset_unload_work);
 	cancel_work_sync(&fw_reset->reset_reload_work);
 	cancel_work_sync(&fw_reset->reset_now_work);
 	cancel_work_sync(&fw_reset->reset_abort_work);
@@ -595,6 +687,7 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
 
 	INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event);
 	INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event);
+	INIT_WORK(&fw_reset->reset_unload_work, mlx5_sync_reset_unload_event);
 	INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
 	INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
 	INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6fa314f8e5ee..88dbea6631d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -619,6 +619,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 
 	if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event))
 		MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1);
+	if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload))
+		MLX5_SET(cmd_hca_cap, set_hca_cap,
+			 pci_sync_for_fw_update_with_driver_unload, 1);
 
 	if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
 		MLX5_SET(cmd_hca_cap,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index c0af74efd3cb..80cc12a9a531 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -716,6 +716,7 @@ enum sync_rst_state_type {
 	MLX5_SYNC_RST_STATE_RESET_REQUEST	= 0x0,
 	MLX5_SYNC_RST_STATE_RESET_NOW		= 0x1,
 	MLX5_SYNC_RST_STATE_RESET_ABORT		= 0x2,
+	MLX5_SYNC_RST_STATE_RESET_UNLOAD	= 0x3,
 };
 
 struct mlx5_eqe_sync_fw_update {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 14892e795808..d61dcb5d7cd5 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1755,7 +1755,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_328[0x2];
 	u8	   relaxed_ordering_read[0x1];
 	u8         log_max_pd[0x5];
-	u8         reserved_at_330[0x7];
+	u8         reserved_at_330[0x6];
+	u8         pci_sync_for_fw_update_with_driver_unload[0x1];
 	u8         vnic_env_cnt_steering_fail[0x1];
 	u8         reserved_at_338[0x1];
 	u8         q_counter_aggregation[0x1];
-- 
cgit v1.2.3


From 0bd2e6fc78fddf83b9a71a61bdf0c4caca83abe7 Mon Sep 17 00:00:00 2001
From: Or Har-Toov <ohartoov@nvidia.com>
Date: Thu, 23 Mar 2023 17:52:03 +0200
Subject: net/mlx5: Expose bits for local loopback counter

Add needed HW bits for querying local loopback counter and the
HCA capability for it.

Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
Reviewed-by: Avihai Horon <avihaih@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index d61dcb5d7cd5..354c7e326eab 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1758,7 +1758,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_330[0x6];
 	u8         pci_sync_for_fw_update_with_driver_unload[0x1];
 	u8         vnic_env_cnt_steering_fail[0x1];
-	u8         reserved_at_338[0x1];
+	u8         vport_counter_local_loopback[0x1];
 	u8         q_counter_aggregation[0x1];
 	u8         q_counter_other_vport[0x1];
 	u8         log_max_xrcd[0x5];
@@ -5190,7 +5190,9 @@ struct mlx5_ifc_query_vport_counter_out_bits {
 
 	struct mlx5_ifc_traffic_counter_bits transmitted_eth_multicast;
 
-	u8         reserved_at_680[0xa00];
+	struct mlx5_ifc_traffic_counter_bits local_loopback;
+
+	u8         reserved_at_700[0x980];
 };
 
 enum {
-- 
cgit v1.2.3


From d48300120627a1cb98914738fff38b424625b8ad Mon Sep 17 00:00:00 2001
From: Li Lingfeng <lilingfeng3@huawei.com>
Date: Mon, 5 Jun 2023 15:03:16 +0800
Subject: dm thin metadata: Fix ABBA deadlock by resetting dm_bufio_client

As described in commit 8111964f1b85 ("dm thin: Fix ABBA deadlock between
shrink_slab and dm_pool_abort_metadata"), ABBA deadlocks will be
triggered because shrinker_rwsem currently needs to held by
dm_pool_abort_metadata() as a side-effect of thin-pool metadata
operation failure.

The following three problem scenarios have been noticed:

1) Described by commit 8111964f1b85 ("dm thin: Fix ABBA deadlock between
   shrink_slab and dm_pool_abort_metadata")

2) shrinker_rwsem and throttle->lock
          P1(drop cache)                        P2(kworker)
drop_caches_sysctl_handler
 drop_slab
  shrink_slab
   down_read(&shrinker_rwsem)  - LOCK A
   do_shrink_slab
    super_cache_scan
     prune_icache_sb
      dispose_list
       evict
        ext4_evict_inode
         ext4_clear_inode
          ext4_discard_preallocations
           ext4_mb_load_buddy_gfp
            ext4_mb_init_cache
             ext4_wait_block_bitmap
              __ext4_error
               ext4_handle_error
                ext4_commit_super
                 ...
                 dm_submit_bio
                                     do_worker
                                      throttle_work_update
                                       down_write(&t->lock) -- LOCK B
                                      process_deferred_bios
                                       commit
                                        metadata_operation_failed
                                         dm_pool_abort_metadata
                                          dm_block_manager_create
                                           dm_bufio_client_create
                                            register_shrinker
                                             down_write(&shrinker_rwsem)
                                             -- LOCK A
                 thin_map
                  thin_bio_map
                   thin_defer_bio_with_throttle
                    throttle_lock
                     down_read(&t->lock)  - LOCK B

3) shrinker_rwsem and wait_on_buffer
          P1(drop cache)                            P2(kworker)
drop_caches_sysctl_handler
 drop_slab
  shrink_slab
   down_read(&shrinker_rwsem)  - LOCK A
   do_shrink_slab
   ...
    ext4_wait_block_bitmap
     __ext4_error
      ext4_handle_error
       jbd2_journal_abort
        jbd2_journal_update_sb_errno
         jbd2_write_superblock
          submit_bh
           // LOCK B
           // RELEASE B
                             do_worker
                              throttle_work_update
                               down_write(&t->lock) - LOCK B
                              process_deferred_bios
                               process_bio
                               commit
                                metadata_operation_failed
                                 dm_pool_abort_metadata
                                  dm_block_manager_create
                                   dm_bufio_client_create
                                    register_shrinker
                                     register_shrinker_prepared
                                      down_write(&shrinker_rwsem)  - LOCK A
                               bio_endio
      wait_on_buffer
       __wait_on_buffer

Fix these by resetting dm_bufio_client without holding shrinker_rwsem.

Fixes: 8111964f1b85 ("dm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata")
Cc: stable@vger.kernel.org
Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-bufio.c                              |  7 +++
 drivers/md/dm-thin-metadata.c                      | 58 ++++++++++------------
 drivers/md/persistent-data/dm-block-manager.c      |  6 +++
 drivers/md/persistent-data/dm-block-manager.h      |  1 +
 drivers/md/persistent-data/dm-space-map.h          |  3 +-
 .../md/persistent-data/dm-transaction-manager.c    |  3 ++
 include/linux/dm-bufio.h                           |  2 +
 7 files changed, 46 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index eea977662e81..a7079b38756a 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -2592,6 +2592,13 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
 }
 EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
 
+void dm_bufio_client_reset(struct dm_bufio_client *c)
+{
+	drop_buffers(c);
+	flush_work(&c->shrink_work);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_client_reset);
+
 void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start)
 {
 	c->start = start;
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 9f5cb52c5763..63d92d388ee6 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -603,6 +603,8 @@ static int __format_metadata(struct dm_pool_metadata *pmd)
 	r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION,
 				 &pmd->tm, &pmd->metadata_sm);
 	if (r < 0) {
+		pmd->tm = NULL;
+		pmd->metadata_sm = NULL;
 		DMERR("tm_create_with_sm failed");
 		return r;
 	}
@@ -611,6 +613,7 @@ static int __format_metadata(struct dm_pool_metadata *pmd)
 	if (IS_ERR(pmd->data_sm)) {
 		DMERR("sm_disk_create failed");
 		r = PTR_ERR(pmd->data_sm);
+		pmd->data_sm = NULL;
 		goto bad_cleanup_tm;
 	}
 
@@ -641,11 +644,15 @@ static int __format_metadata(struct dm_pool_metadata *pmd)
 
 bad_cleanup_nb_tm:
 	dm_tm_destroy(pmd->nb_tm);
+	pmd->nb_tm = NULL;
 bad_cleanup_data_sm:
 	dm_sm_destroy(pmd->data_sm);
+	pmd->data_sm = NULL;
 bad_cleanup_tm:
 	dm_tm_destroy(pmd->tm);
+	pmd->tm = NULL;
 	dm_sm_destroy(pmd->metadata_sm);
+	pmd->metadata_sm = NULL;
 
 	return r;
 }
@@ -711,6 +718,8 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
 			       sizeof(disk_super->metadata_space_map_root),
 			       &pmd->tm, &pmd->metadata_sm);
 	if (r < 0) {
+		pmd->tm = NULL;
+		pmd->metadata_sm = NULL;
 		DMERR("tm_open_with_sm failed");
 		goto bad_unlock_sblock;
 	}
@@ -720,6 +729,7 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
 	if (IS_ERR(pmd->data_sm)) {
 		DMERR("sm_disk_open failed");
 		r = PTR_ERR(pmd->data_sm);
+		pmd->data_sm = NULL;
 		goto bad_cleanup_tm;
 	}
 
@@ -746,9 +756,12 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
 
 bad_cleanup_data_sm:
 	dm_sm_destroy(pmd->data_sm);
+	pmd->data_sm = NULL;
 bad_cleanup_tm:
 	dm_tm_destroy(pmd->tm);
+	pmd->tm = NULL;
 	dm_sm_destroy(pmd->metadata_sm);
+	pmd->metadata_sm = NULL;
 bad_unlock_sblock:
 	dm_bm_unlock(sblock);
 
@@ -795,9 +808,13 @@ static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd,
 					      bool destroy_bm)
 {
 	dm_sm_destroy(pmd->data_sm);
+	pmd->data_sm = NULL;
 	dm_sm_destroy(pmd->metadata_sm);
+	pmd->metadata_sm = NULL;
 	dm_tm_destroy(pmd->nb_tm);
+	pmd->nb_tm = NULL;
 	dm_tm_destroy(pmd->tm);
+	pmd->tm = NULL;
 	if (destroy_bm)
 		dm_block_manager_destroy(pmd->bm);
 }
@@ -1005,8 +1022,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
 			       __func__, r);
 	}
 	pmd_write_unlock(pmd);
-	if (!pmd->fail_io)
-		__destroy_persistent_data_objects(pmd, true);
+	__destroy_persistent_data_objects(pmd, true);
 
 	kfree(pmd);
 	return 0;
@@ -1877,53 +1893,29 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
 int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
 {
 	int r = -EINVAL;
-	struct dm_block_manager *old_bm = NULL, *new_bm = NULL;
 
 	/* fail_io is double-checked with pmd->root_lock held below */
 	if (unlikely(pmd->fail_io))
 		return r;
 
-	/*
-	 * Replacement block manager (new_bm) is created and old_bm destroyed outside of
-	 * pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
-	 * shrinker associated with the block manager's bufio client vs pmd root_lock).
-	 * - must take shrinker_mutex without holding pmd->root_lock
-	 */
-	new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
-					 THIN_MAX_CONCURRENT_LOCKS);
-
 	pmd_write_lock(pmd);
 	if (pmd->fail_io) {
 		pmd_write_unlock(pmd);
-		goto out;
+		return r;
 	}
-
 	__set_abort_with_changes_flags(pmd);
+
+	/* destroy data_sm/metadata_sm/nb_tm/tm */
 	__destroy_persistent_data_objects(pmd, false);
-	old_bm = pmd->bm;
-	if (IS_ERR(new_bm)) {
-		DMERR("could not create block manager during abort");
-		pmd->bm = NULL;
-		r = PTR_ERR(new_bm);
-		goto out_unlock;
-	}
 
-	pmd->bm = new_bm;
+	/* reset bm */
+	dm_block_manager_reset(pmd->bm);
+
+	/* rebuild data_sm/metadata_sm/nb_tm/tm */
 	r = __open_or_format_metadata(pmd, false);
-	if (r) {
-		pmd->bm = NULL;
-		goto out_unlock;
-	}
-	new_bm = NULL;
-out_unlock:
 	if (r)
 		pmd->fail_io = true;
 	pmd_write_unlock(pmd);
-	dm_block_manager_destroy(old_bm);
-out:
-	if (new_bm && !IS_ERR(new_bm))
-		dm_block_manager_destroy(new_bm);
-
 	return r;
 }
 
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 7bdfc23f758a..0e010e1204aa 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -421,6 +421,12 @@ void dm_block_manager_destroy(struct dm_block_manager *bm)
 }
 EXPORT_SYMBOL_GPL(dm_block_manager_destroy);
 
+void dm_block_manager_reset(struct dm_block_manager *bm)
+{
+	dm_bufio_client_reset(bm->bufio);
+}
+EXPORT_SYMBOL_GPL(dm_block_manager_reset);
+
 unsigned int dm_bm_block_size(struct dm_block_manager *bm)
 {
 	return dm_bufio_get_block_size(bm->bufio);
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h
index 5746b0f82a03..f706d3de8d5a 100644
--- a/drivers/md/persistent-data/dm-block-manager.h
+++ b/drivers/md/persistent-data/dm-block-manager.h
@@ -36,6 +36,7 @@ struct dm_block_manager *dm_block_manager_create(
 	struct block_device *bdev, unsigned int block_size,
 	unsigned int max_held_per_thread);
 void dm_block_manager_destroy(struct dm_block_manager *bm);
+void dm_block_manager_reset(struct dm_block_manager *bm);
 
 unsigned int dm_bm_block_size(struct dm_block_manager *bm);
 dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm);
diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h
index dab490353781..6bf69922b5ad 100644
--- a/drivers/md/persistent-data/dm-space-map.h
+++ b/drivers/md/persistent-data/dm-space-map.h
@@ -77,7 +77,8 @@ struct dm_space_map {
 
 static inline void dm_sm_destroy(struct dm_space_map *sm)
 {
-	sm->destroy(sm);
+	if (sm)
+		sm->destroy(sm);
 }
 
 static inline int dm_sm_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 6dc016248baf..c88fa6266203 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -199,6 +199,9 @@ EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone);
 
 void dm_tm_destroy(struct dm_transaction_manager *tm)
 {
+	if (!tm)
+		return;
+
 	if (!tm->is_clone)
 		wipe_shadow_table(tm);
 
diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h
index 681656a1c03d..75e7d8cbb532 100644
--- a/include/linux/dm-bufio.h
+++ b/include/linux/dm-bufio.h
@@ -38,6 +38,8 @@ dm_bufio_client_create(struct block_device *bdev, unsigned int block_size,
  */
 void dm_bufio_client_destroy(struct dm_bufio_client *c);
 
+void dm_bufio_client_reset(struct dm_bufio_client *c);
+
 /*
  * Set the sector range.
  * When this function is called, there must be no I/O in progress on the bufio
-- 
cgit v1.2.3


From f1771b85e3086c9506c3de81e993330bca568ba5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 22:05:05 +0200
Subject: irqchip/mmp: Remove non-DT codepath

Building with "W=1" warns about missing declarations for
two functions in the mmp irqchip driver:

drivers/irqchip/irq-mmp.c:248:13: error: no previous prototype for 'icu_init_irq'
drivers/irqchip/irq-mmp.c:271:13: error: no previous prototype for 'mmp2_init_icu'

The declarations are present in an unused header, but since there is no
caller, it's best to just remove the functions and the header completely,
making the driver DT-only to match the state of the platform.

Fixes: 77acc85ce797 ("ARM: mmp: remove device definitions")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230516200516.554663-2-arnd@kernel.org
---
 drivers/irqchip/irq-mmp.c   | 127 --------------------------------------------
 include/linux/irqchip/mmp.h |  10 ----
 2 files changed, 137 deletions(-)
 delete mode 100644 include/linux/irqchip/mmp.h

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c
index 83455ca72439..25cf4f80e767 100644
--- a/drivers/irqchip/irq-mmp.c
+++ b/drivers/irqchip/irq-mmp.c
@@ -244,132 +244,6 @@ static void __exception_irq_entry mmp2_handle_irq(struct pt_regs *regs)
 	generic_handle_domain_irq(icu_data[0].domain, hwirq);
 }
 
-/* MMP (ARMv5) */
-void __init icu_init_irq(void)
-{
-	int irq;
-
-	max_icu_nr = 1;
-	mmp_icu_base = ioremap(0xd4282000, 0x1000);
-	icu_data[0].conf_enable = mmp_conf.conf_enable;
-	icu_data[0].conf_disable = mmp_conf.conf_disable;
-	icu_data[0].conf_mask = mmp_conf.conf_mask;
-	icu_data[0].nr_irqs = 64;
-	icu_data[0].virq_base = 0;
-	icu_data[0].domain = irq_domain_add_legacy(NULL, 64, 0, 0,
-						   &irq_domain_simple_ops,
-						   &icu_data[0]);
-	for (irq = 0; irq < 64; irq++) {
-		icu_mask_irq(irq_get_irq_data(irq));
-		irq_set_chip_and_handler(irq, &icu_irq_chip, handle_level_irq);
-	}
-	irq_set_default_host(icu_data[0].domain);
-	set_handle_irq(mmp_handle_irq);
-}
-
-/* MMP2 (ARMv7) */
-void __init mmp2_init_icu(void)
-{
-	int irq, end;
-
-	max_icu_nr = 8;
-	mmp_icu_base = ioremap(0xd4282000, 0x1000);
-	icu_data[0].conf_enable = mmp2_conf.conf_enable;
-	icu_data[0].conf_disable = mmp2_conf.conf_disable;
-	icu_data[0].conf_mask = mmp2_conf.conf_mask;
-	icu_data[0].nr_irqs = 64;
-	icu_data[0].virq_base = 0;
-	icu_data[0].domain = irq_domain_add_legacy(NULL, 64, 0, 0,
-						   &irq_domain_simple_ops,
-						   &icu_data[0]);
-	icu_data[1].reg_status = mmp_icu_base + 0x150;
-	icu_data[1].reg_mask = mmp_icu_base + 0x168;
-	icu_data[1].clr_mfp_irq_base = icu_data[0].virq_base +
-				icu_data[0].nr_irqs;
-	icu_data[1].clr_mfp_hwirq = 1;		/* offset to IRQ_MMP2_PMIC_BASE */
-	icu_data[1].nr_irqs = 2;
-	icu_data[1].cascade_irq = 4;
-	icu_data[1].virq_base = icu_data[0].virq_base + icu_data[0].nr_irqs;
-	icu_data[1].domain = irq_domain_add_legacy(NULL, icu_data[1].nr_irqs,
-						   icu_data[1].virq_base, 0,
-						   &irq_domain_simple_ops,
-						   &icu_data[1]);
-	icu_data[2].reg_status = mmp_icu_base + 0x154;
-	icu_data[2].reg_mask = mmp_icu_base + 0x16c;
-	icu_data[2].nr_irqs = 2;
-	icu_data[2].cascade_irq = 5;
-	icu_data[2].virq_base = icu_data[1].virq_base + icu_data[1].nr_irqs;
-	icu_data[2].domain = irq_domain_add_legacy(NULL, icu_data[2].nr_irqs,
-						   icu_data[2].virq_base, 0,
-						   &irq_domain_simple_ops,
-						   &icu_data[2]);
-	icu_data[3].reg_status = mmp_icu_base + 0x180;
-	icu_data[3].reg_mask = mmp_icu_base + 0x17c;
-	icu_data[3].nr_irqs = 3;
-	icu_data[3].cascade_irq = 9;
-	icu_data[3].virq_base = icu_data[2].virq_base + icu_data[2].nr_irqs;
-	icu_data[3].domain = irq_domain_add_legacy(NULL, icu_data[3].nr_irqs,
-						   icu_data[3].virq_base, 0,
-						   &irq_domain_simple_ops,
-						   &icu_data[3]);
-	icu_data[4].reg_status = mmp_icu_base + 0x158;
-	icu_data[4].reg_mask = mmp_icu_base + 0x170;
-	icu_data[4].nr_irqs = 5;
-	icu_data[4].cascade_irq = 17;
-	icu_data[4].virq_base = icu_data[3].virq_base + icu_data[3].nr_irqs;
-	icu_data[4].domain = irq_domain_add_legacy(NULL, icu_data[4].nr_irqs,
-						   icu_data[4].virq_base, 0,
-						   &irq_domain_simple_ops,
-						   &icu_data[4]);
-	icu_data[5].reg_status = mmp_icu_base + 0x15c;
-	icu_data[5].reg_mask = mmp_icu_base + 0x174;
-	icu_data[5].nr_irqs = 15;
-	icu_data[5].cascade_irq = 35;
-	icu_data[5].virq_base = icu_data[4].virq_base + icu_data[4].nr_irqs;
-	icu_data[5].domain = irq_domain_add_legacy(NULL, icu_data[5].nr_irqs,
-						   icu_data[5].virq_base, 0,
-						   &irq_domain_simple_ops,
-						   &icu_data[5]);
-	icu_data[6].reg_status = mmp_icu_base + 0x160;
-	icu_data[6].reg_mask = mmp_icu_base + 0x178;
-	icu_data[6].nr_irqs = 2;
-	icu_data[6].cascade_irq = 51;
-	icu_data[6].virq_base = icu_data[5].virq_base + icu_data[5].nr_irqs;
-	icu_data[6].domain = irq_domain_add_legacy(NULL, icu_data[6].nr_irqs,
-						   icu_data[6].virq_base, 0,
-						   &irq_domain_simple_ops,
-						   &icu_data[6]);
-	icu_data[7].reg_status = mmp_icu_base + 0x188;
-	icu_data[7].reg_mask = mmp_icu_base + 0x184;
-	icu_data[7].nr_irqs = 2;
-	icu_data[7].cascade_irq = 55;
-	icu_data[7].virq_base = icu_data[6].virq_base + icu_data[6].nr_irqs;
-	icu_data[7].domain = irq_domain_add_legacy(NULL, icu_data[7].nr_irqs,
-						   icu_data[7].virq_base, 0,
-						   &irq_domain_simple_ops,
-						   &icu_data[7]);
-	end = icu_data[7].virq_base + icu_data[7].nr_irqs;
-	for (irq = 0; irq < end; irq++) {
-		icu_mask_irq(irq_get_irq_data(irq));
-		if (irq == icu_data[1].cascade_irq ||
-		    irq == icu_data[2].cascade_irq ||
-		    irq == icu_data[3].cascade_irq ||
-		    irq == icu_data[4].cascade_irq ||
-		    irq == icu_data[5].cascade_irq ||
-		    irq == icu_data[6].cascade_irq ||
-		    irq == icu_data[7].cascade_irq) {
-			irq_set_chip(irq, &icu_irq_chip);
-			irq_set_chained_handler(irq, icu_mux_irq_demux);
-		} else {
-			irq_set_chip_and_handler(irq, &icu_irq_chip,
-						 handle_level_irq);
-		}
-	}
-	irq_set_default_host(icu_data[0].domain);
-	set_handle_irq(mmp2_handle_irq);
-}
-
-#ifdef CONFIG_OF
 static int __init mmp_init_bases(struct device_node *node)
 {
 	int ret, nr_irqs, irq, i = 0;
@@ -548,4 +422,3 @@ err:
 	return -EINVAL;
 }
 IRQCHIP_DECLARE(mmp2_mux_intc, "mrvl,mmp2-mux-intc", mmp2_mux_of_init);
-#endif
diff --git a/include/linux/irqchip/mmp.h b/include/linux/irqchip/mmp.h
deleted file mode 100644
index aa1813749a4f..000000000000
--- a/include/linux/irqchip/mmp.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef	__IRQCHIP_MMP_H
-#define	__IRQCHIP_MMP_H
-
-extern struct irq_chip icu_irq_chip;
-
-extern void icu_init_irq(void);
-extern void mmp2_init_icu(void);
-
-#endif	/* __IRQCHIP_MMP_H */
-- 
cgit v1.2.3


From 415e84294798d1cb041c902168393054cc4ad211 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 May 2023 22:05:08 +0200
Subject: irqchip/gicv3: Add a iort_pmsi_get_dev_id() prototype

iort_pmsi_get_dev_id() has a __weak definition in the driver, and
an override in arm64 specific code, but the declaration is conditional
and not always seen when the copy in the driver gets built:

drivers/irqchip/irq-gic-v3-its-platform-msi.c:41:12: error: no previous prototype for 'iort_pmsi_get_dev_id' [-Werror=missing-prototypes]

Move the existing declaration out of the #ifdef block to ensure
it can be seen in all configurations.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230516200516.554663-5-arnd@kernel.org
---
 include/linux/acpi_iort.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index b43be0987b19..6b70d02bc5f9 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -26,13 +26,14 @@ int iort_register_domain_token(int trans_id, phys_addr_t base,
 			       struct fwnode_handle *fw_node);
 void iort_deregister_domain_token(int trans_id);
 struct fwnode_handle *iort_find_domain_token(int trans_id);
+int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
+
 #ifdef CONFIG_ACPI_IORT
 void acpi_iort_init(void);
 u32 iort_msi_map_id(struct device *dev, u32 id);
 struct irq_domain *iort_get_device_domain(struct device *dev, u32 id,
 					  enum irq_domain_bus_token bus_token);
 void acpi_configure_pmsi_domain(struct device *dev);
-int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
 void iort_get_rmr_sids(struct fwnode_handle *iommu_fwnode,
 		       struct list_head *head);
 void iort_put_rmr_sids(struct fwnode_handle *iommu_fwnode,
-- 
cgit v1.2.3


From fc669e922ecff02c173a8484f7a5ed4810089209 Mon Sep 17 00:00:00 2001
From: JuenKit Yip <JuenKit_Yip@hotmail.com>
Date: Sat, 17 Jun 2023 00:00:12 +0800
Subject: hwmon: (sht3x) remove sht3x_platform_data

Since no in-tree driver supports it, sht3x_platform_data has been
removed and the relevant properties have been moved to sht3x_data.

Signed-off-by: JuenKit Yip <JuenKit_Yip@hotmail.com>
Link: https://lore.kernel.org/r/DB4PR10MB626126FB7226D5AF341197449258A@DB4PR10MB6261.EURPRD10.PROD.OUTLOOK.COM
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/sht3x.rst       |  2 +-
 drivers/hwmon/sht3x.c               | 20 ++++++++------------
 include/linux/platform_data/sht3x.h | 15 ---------------
 3 files changed, 9 insertions(+), 28 deletions(-)
 delete mode 100644 include/linux/platform_data/sht3x.h

(limited to 'include/linux')

diff --git a/Documentation/hwmon/sht3x.rst b/Documentation/hwmon/sht3x.rst
index 95a850d5b2c1..31fd36b144a6 100644
--- a/Documentation/hwmon/sht3x.rst
+++ b/Documentation/hwmon/sht3x.rst
@@ -28,7 +28,7 @@ The device communicates with the I2C protocol. Sensors can have the I2C
 addresses 0x44 or 0x45, depending on the wiring. See
 Documentation/i2c/instantiating-devices.rst for methods to instantiate the device.
 
-There are two options configurable by means of sht3x_platform_data:
+There are two options configurable by means of sht3x_data:
 
 1. blocking (pull the I2C clock line down while performing the measurement) or
    non-blocking mode. Blocking mode will guarantee the fastest result but
diff --git a/drivers/hwmon/sht3x.c b/drivers/hwmon/sht3x.c
index 1dab3002728b..12a6f5cc96b6 100644
--- a/drivers/hwmon/sht3x.c
+++ b/drivers/hwmon/sht3x.c
@@ -20,7 +20,6 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/jiffies.h>
-#include <linux/platform_data/sht3x.h>
 
 /* commands (high precision mode) */
 static const unsigned char sht3x_cmd_measure_blocking_hpm[]    = { 0x2c, 0x06 };
@@ -135,8 +134,8 @@ struct sht3x_data {
 	const unsigned char *command;
 	u32 wait_time;			/* in us*/
 	unsigned long last_update;	/* last update in periodic mode*/
-
-	struct sht3x_platform_data setup;
+	bool blocking_io;
+	bool high_precision;
 
 	/*
 	 * cached values for temperature and humidity and limits
@@ -441,13 +440,13 @@ static void sht3x_select_command(struct sht3x_data *data)
 	if (data->mode > 0) {
 		data->command = sht3x_cmd_measure_periodic_mode;
 		data->wait_time = 0;
-	} else if (data->setup.blocking_io) {
-		data->command = data->setup.high_precision ?
+	} else if (data->blocking_io) {
+		data->command = data->high_precision ?
 				sht3x_cmd_measure_blocking_hpm :
 				sht3x_cmd_measure_blocking_lpm;
 		data->wait_time = 0;
 	} else {
-		if (data->setup.high_precision) {
+		if (data->high_precision) {
 			data->command = sht3x_cmd_measure_nonblocking_hpm;
 			data->wait_time = SHT3X_NONBLOCKING_WAIT_TIME_HPM;
 		} else {
@@ -595,7 +594,7 @@ static ssize_t update_interval_store(struct device *dev,
 	}
 
 	if (mode > 0) {
-		if (data->setup.high_precision)
+		if (data->high_precision)
 			command = periodic_measure_commands_hpm[mode - 1];
 		else
 			command = periodic_measure_commands_lpm[mode - 1];
@@ -690,16 +689,13 @@ static int sht3x_probe(struct i2c_client *client)
 	if (!data)
 		return -ENOMEM;
 
-	data->setup.blocking_io = false;
-	data->setup.high_precision = true;
+	data->blocking_io = false;
+	data->high_precision = true;
 	data->mode = 0;
 	data->last_update = jiffies - msecs_to_jiffies(3000);
 	data->client = client;
 	crc8_populate_msb(sht3x_crc8_table, SHT3X_CRC8_POLYNOMIAL);
 
-	if (client->dev.platform_data)
-		data->setup = *(struct sht3x_platform_data *)dev->platform_data;
-
 	sht3x_select_command(data);
 
 	mutex_init(&data->i2c_lock);
diff --git a/include/linux/platform_data/sht3x.h b/include/linux/platform_data/sht3x.h
deleted file mode 100644
index 14680d2a98f7..000000000000
--- a/include/linux/platform_data/sht3x.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2016 Sensirion AG, Switzerland
- * Author: David Frey <david.frey@sensirion.com>
- * Author: Pascal Sachs <pascal.sachs@sensirion.com>
- */
-
-#ifndef __SHT3X_H_
-#define __SHT3X_H_
-
-struct sht3x_platform_data {
-	bool blocking_io;
-	bool high_precision;
-};
-#endif /* __SHT3X_H_ */
-- 
cgit v1.2.3


From 9636be85cc5bdd8b7a7f6a53405cbcc52161c93c Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Tue, 23 May 2023 10:14:21 -0700
Subject: x86/hyperv: Fix hyperv_pcpu_input_arg handling when CPUs go
 online/offline

These commits

a494aef23dfc ("PCI: hv: Replace retarget_msi_interrupt_params with hyperv_pcpu_input_arg")
2c6ba4216844 ("PCI: hv: Enable PCI pass-thru devices in Confidential VMs")

update the Hyper-V virtual PCI driver to use the hyperv_pcpu_input_arg
because that memory will be correctly marked as decrypted or encrypted
for all VM types (CoCo or normal). But problems ensue when CPUs in the
VM go online or offline after virtual PCI devices have been configured.

When a CPU is brought online, the hyperv_pcpu_input_arg for that CPU is
initialized by hv_cpu_init() running under state CPUHP_AP_ONLINE_DYN.
But this state occurs after state CPUHP_AP_IRQ_AFFINITY_ONLINE, which
may call the virtual PCI driver and fault trying to use the as yet
uninitialized hyperv_pcpu_input_arg. A similar problem occurs in a CoCo
VM if the MMIO read and write hypercalls are used from state
CPUHP_AP_IRQ_AFFINITY_ONLINE.

When a CPU is taken offline, IRQs may be reassigned in state
CPUHP_TEARDOWN_CPU. Again, the virtual PCI driver may fault trying to
use the hyperv_pcpu_input_arg that has already been freed by a
higher state.

Fix the onlining problem by adding state CPUHP_AP_HYPERV_ONLINE
immediately after CPUHP_AP_ONLINE_IDLE (similar to CPUHP_AP_KVM_ONLINE)
and before CPUHP_AP_IRQ_AFFINITY_ONLINE. Use this new state for
Hyper-V initialization so that hyperv_pcpu_input_arg is allocated
early enough.

Fix the offlining problem by not freeing hyperv_pcpu_input_arg when
a CPU goes offline. Retain the allocated memory, and reuse it if
the CPU comes back online later.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Acked-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Link: https://lore.kernel.org/r/1684862062-51576-1-git-send-email-mikelley@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 arch/x86/hyperv/hv_init.c  |  2 +-
 drivers/hv/hv_common.c     | 48 +++++++++++++++++++++++-----------------------
 include/linux/cpuhotplug.h |  1 +
 3 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index a5f9474f08e1..6c04b52f139b 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -416,7 +416,7 @@ void __init hyperv_init(void)
 			goto free_vp_assist_page;
 	}
 
-	cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+	cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online",
 				  hv_cpu_init, hv_cpu_die);
 	if (cpuhp < 0)
 		goto free_ghcb_page;
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 64f9ceca887b..542a1d53b303 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -364,13 +364,20 @@ int hv_common_cpu_init(unsigned int cpu)
 	flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
 
 	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
-	*inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
-	if (!(*inputarg))
-		return -ENOMEM;
 
-	if (hv_root_partition) {
-		outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
-		*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
+	/*
+	 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
+	 * allocated if this CPU was previously online and then taken offline
+	 */
+	if (!*inputarg) {
+		*inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
+		if (!(*inputarg))
+			return -ENOMEM;
+
+		if (hv_root_partition) {
+			outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
+			*outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
+		}
 	}
 
 	msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
@@ -385,24 +392,17 @@ int hv_common_cpu_init(unsigned int cpu)
 
 int hv_common_cpu_die(unsigned int cpu)
 {
-	unsigned long flags;
-	void **inputarg, **outputarg;
-	void *mem;
-
-	local_irq_save(flags);
-
-	inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
-	mem = *inputarg;
-	*inputarg = NULL;
-
-	if (hv_root_partition) {
-		outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
-		*outputarg = NULL;
-	}
-
-	local_irq_restore(flags);
-
-	kfree(mem);
+	/*
+	 * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
+	 * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
+	 * may be used by the Hyper-V vPCI driver in reassigning interrupts
+	 * as part of the offlining process.  The interrupt reassignment
+	 * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
+	 * called this function.
+	 *
+	 * If a previously offlined CPU is brought back online again, the
+	 * originally allocated memory is reused in hv_common_cpu_init().
+	 */
 
 	return 0;
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0f1001dca0e0..3ceb9dfa0993 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -200,6 +200,7 @@ enum cpuhp_state {
 
 	/* Online section invoked on the hotplugged CPU from the hotplug thread */
 	CPUHP_AP_ONLINE_IDLE,
+	CPUHP_AP_HYPERV_ONLINE,
 	CPUHP_AP_KVM_ONLINE,
 	CPUHP_AP_SCHED_WAIT_EMPTY,
 	CPUHP_AP_SMPBOOT_THREADS,
-- 
cgit v1.2.3


From 6aa0365a3c8512587fffd42fe438768709ddef8e Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 15 Jun 2023 17:18:53 +0900
Subject: ata: libata-scsi: Avoid deadlock on rescan after device resume

When an ATA port is resumed from sleep, the port is reset and a power
management request issued to libata EH to reset the port and rescanning
the device(s) attached to the port. Device rescanning is done by
scheduling an ata_scsi_dev_rescan() work, which will execute
scsi_rescan_device().

However, scsi_rescan_device() takes the generic device lock, which is
also taken by dpm_resume() when the SCSI device is resumed as well. If
a device rescan execution starts before the completion of the SCSI
device resume, the rcu locking used to refresh the cached VPD pages of
the device, combined with the generic device locking from
scsi_rescan_device() and from dpm_resume() can cause a deadlock.

Avoid this situation by changing struct ata_port scsi_rescan_task to be
a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is
modified to check if the SCSI device associated with the ATA device that
must be rescanned is not suspended. If the SCSI device is still
suspended, ata_scsi_dev_rescan() returns early and reschedule itself for
execution after an arbitrary delay of 5ms.

Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Reported-by: Joe Breuer <linux-kernel@jmbreuer.net>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530
Fixes: a19a93e4c6a9 ("scsi: core: pm: Rely on the device driver core for async power management")
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Joe Breuer <linux-kernel@jmbreuer.net>
---
 drivers/ata/libata-core.c |  3 ++-
 drivers/ata/libata-eh.c   |  2 +-
 drivers/ata/libata-scsi.c | 22 +++++++++++++++++++++-
 include/linux/libata.h    |  2 +-
 4 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8bf612bdd61a..b4f246f0cac7 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5348,7 +5348,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
 
 	mutex_init(&ap->scsi_scan_mutex);
 	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
-	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
+	INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
 	INIT_LIST_HEAD(&ap->eh_done_q);
 	init_waitqueue_head(&ap->eh_wait_q);
 	init_completion(&ap->park_req_pending);
@@ -5954,6 +5954,7 @@ static void ata_port_detach(struct ata_port *ap)
 	WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
 
 	cancel_delayed_work_sync(&ap->hotplug_task);
+	cancel_delayed_work_sync(&ap->scsi_rescan_task);
 
  skip_eh:
 	/* clean up zpodd on port removal */
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index a6c901811802..6f8d14191593 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2984,7 +2984,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
 			ehc->i.flags |= ATA_EHI_SETMODE;
 
 			/* schedule the scsi_rescan_device() here */
-			schedule_work(&(ap->scsi_rescan_task));
+			schedule_delayed_work(&ap->scsi_rescan_task, 0);
 		} else if (dev->class == ATA_DEV_UNKNOWN &&
 			   ehc->tries[dev->devno] &&
 			   ata_class_enabled(ehc->classes[dev->devno])) {
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 8ce90284eb34..551077cea4e4 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4597,10 +4597,11 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
 void ata_scsi_dev_rescan(struct work_struct *work)
 {
 	struct ata_port *ap =
-		container_of(work, struct ata_port, scsi_rescan_task);
+		container_of(work, struct ata_port, scsi_rescan_task.work);
 	struct ata_link *link;
 	struct ata_device *dev;
 	unsigned long flags;
+	bool delay_rescan = false;
 
 	mutex_lock(&ap->scsi_scan_mutex);
 	spin_lock_irqsave(ap->lock, flags);
@@ -4614,6 +4615,21 @@ void ata_scsi_dev_rescan(struct work_struct *work)
 			if (scsi_device_get(sdev))
 				continue;
 
+			/*
+			 * If the rescan work was scheduled because of a resume
+			 * event, the port is already fully resumed, but the
+			 * SCSI device may not yet be fully resumed. In such
+			 * case, executing scsi_rescan_device() may cause a
+			 * deadlock with the PM code on device_lock(). Prevent
+			 * this by giving up and retrying rescan after a short
+			 * delay.
+			 */
+			delay_rescan = sdev->sdev_gendev.power.is_suspended;
+			if (delay_rescan) {
+				scsi_device_put(sdev);
+				break;
+			}
+
 			spin_unlock_irqrestore(ap->lock, flags);
 			scsi_rescan_device(&(sdev->sdev_gendev));
 			scsi_device_put(sdev);
@@ -4623,4 +4639,8 @@ void ata_scsi_dev_rescan(struct work_struct *work)
 
 	spin_unlock_irqrestore(ap->lock, flags);
 	mutex_unlock(&ap->scsi_scan_mutex);
+
+	if (delay_rescan)
+		schedule_delayed_work(&ap->scsi_rescan_task,
+				      msecs_to_jiffies(5));
 }
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 311cd93377c7..dd5797fb6305 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -836,7 +836,7 @@ struct ata_port {
 
 	struct mutex		scsi_scan_mutex;
 	struct delayed_work	hotplug_task;
-	struct work_struct	scsi_rescan_task;
+	struct delayed_work	scsi_rescan_task;
 
 	unsigned int		hsm_task_state;
 
-- 
cgit v1.2.3


From 2f2665c13af4895b26761107c2f637c2f112d8e9 Mon Sep 17 00:00:00 2001
From: Joel Granados <j.granados@samsung.com>
Date: Fri, 16 Jun 2023 10:59:22 +0200
Subject: sysctl: replace child with an enumeration

This is part of the effort to remove the empty element at the end of
ctl_table structs. "child" was a deprecated elem in this struct and was
being used to differentiate between two types of ctl_tables: "normal"
and "permanently emtpy".

What changed?:
* Replace "child" with an enumeration that will have two values: the
  default (0) and the permanently empty (1). The latter is left at zero
  so when struct ctl_table is created with kzalloc or in a local
  context, it will have the zero value by default. We document the
  new enum with kdoc.
* Remove the "empty child" check from sysctl_check_table
* Remove count_subheaders function as there is no longer a need to
  calculate how many headers there are for every child
* Remove the recursive call to unregister_sysctl_table as there is no
  need to traverse down the child tree any longer
* Add a new SYSCTL_PERM_EMPTY_DIR binary flag
* Remove the last remanence of child from partport/procfs.c

Signed-off-by: Joel Granados <j.granados@samsung.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---
 drivers/parport/procfs.c |  1 -
 fs/proc/proc_sysctl.c    | 81 +++++++++++-------------------------------------
 include/linux/sysctl.h   | 14 +++++++--
 3 files changed, 30 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 0f2d2e1ee28e..4e5b972c3e26 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -387,7 +387,6 @@ parport_device_sysctl_template = {
 			.data		= NULL,
 			.maxlen		= 0,
 			.mode		= 0555,
-			.child		= NULL
 		},
 		{}
 	}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 07804097f997..c4ea804d862b 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -29,9 +29,8 @@ static const struct file_operations proc_sys_dir_file_operations;
 static const struct inode_operations proc_sys_dir_operations;
 
 /* Support for permanently empty directories */
-
 struct ctl_table sysctl_mount_point[] = {
-	{ }
+	{.type = SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY }
 };
 
 /**
@@ -48,21 +47,14 @@ struct ctl_table_header *register_sysctl_mount_point(const char *path)
 }
 EXPORT_SYMBOL(register_sysctl_mount_point);
 
-static bool is_empty_dir(struct ctl_table_header *head)
-{
-	return head->ctl_table[0].child == sysctl_mount_point;
-}
-
-static void set_empty_dir(struct ctl_dir *dir)
-{
-	dir->header.ctl_table[0].child = sysctl_mount_point;
-}
-
-static void clear_empty_dir(struct ctl_dir *dir)
-
-{
-	dir->header.ctl_table[0].child = NULL;
-}
+#define sysctl_is_perm_empty_ctl_table(tptr)		\
+	(tptr[0].type == SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY)
+#define sysctl_is_perm_empty_ctl_header(hptr)		\
+	(sysctl_is_perm_empty_ctl_table(hptr->ctl_table))
+#define sysctl_set_perm_empty_ctl_header(hptr)		\
+	(hptr->ctl_table[0].type = SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY)
+#define sysctl_clear_perm_empty_ctl_header(hptr)	\
+	(hptr->ctl_table[0].type = SYSCTL_TABLE_TYPE_DEFAULT)
 
 void proc_sys_poll_notify(struct ctl_table_poll *poll)
 {
@@ -230,20 +222,22 @@ static void erase_header(struct ctl_table_header *head)
 static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
 {
 	struct ctl_table *entry;
+	struct ctl_table_header *dir_h = &dir->header;
 	int err;
 
+
 	/* Is this a permanently empty directory? */
-	if (is_empty_dir(&dir->header))
+	if (sysctl_is_perm_empty_ctl_header(dir_h))
 		return -EROFS;
 
 	/* Am I creating a permanently empty directory? */
-	if (header->ctl_table == sysctl_mount_point) {
+	if (sysctl_is_perm_empty_ctl_table(header->ctl_table)) {
 		if (!RB_EMPTY_ROOT(&dir->root))
 			return -EINVAL;
-		set_empty_dir(dir);
+		sysctl_set_perm_empty_ctl_header(dir_h);
 	}
 
-	dir->header.nreg++;
+	dir_h->nreg++;
 	header->parent = dir;
 	err = insert_links(header);
 	if (err)
@@ -259,9 +253,9 @@ fail:
 	put_links(header);
 fail_links:
 	if (header->ctl_table == sysctl_mount_point)
-		clear_empty_dir(dir);
+		sysctl_clear_perm_empty_ctl_header(dir_h);
 	header->parent = NULL;
-	drop_sysctl_table(&dir->header);
+	drop_sysctl_table(dir_h);
 	return err;
 }
 
@@ -479,7 +473,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 		inode->i_mode |= S_IFDIR;
 		inode->i_op = &proc_sys_dir_operations;
 		inode->i_fop = &proc_sys_dir_file_operations;
-		if (is_empty_dir(head))
+		if (sysctl_is_perm_empty_ctl_header(head))
 			make_empty_dir_inode(inode);
 	}
 
@@ -1136,9 +1130,6 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
 	struct ctl_table *entry;
 	int err = 0;
 	list_for_each_table_entry(entry, table) {
-		if (entry->child)
-			err |= sysctl_err(path, entry, "Not a file");
-
 		if ((entry->proc_handler == proc_dostring) ||
 		    (entry->proc_handler == proc_dobool) ||
 		    (entry->proc_handler == proc_dointvec) ||
@@ -1465,25 +1456,6 @@ void __init __register_sysctl_init(const char *path, struct ctl_table *table,
 	kmemleak_not_leak(hdr);
 }
 
-static int count_subheaders(struct ctl_table *table)
-{
-	int has_files = 0;
-	int nr_subheaders = 0;
-	struct ctl_table *entry;
-
-	/* special case: no directory and empty directory */
-	if (!table || !table->procname)
-		return 1;
-
-	list_for_each_table_entry(entry, table) {
-		if (entry->child)
-			nr_subheaders += count_subheaders(entry->child);
-		else
-			has_files = 1;
-	}
-	return nr_subheaders + has_files;
-}
-
 static void put_links(struct ctl_table_header *header)
 {
 	struct ctl_table_set *root_set = &sysctl_table_root.default_set;
@@ -1546,28 +1518,11 @@ static void drop_sysctl_table(struct ctl_table_header *header)
  */
 void unregister_sysctl_table(struct ctl_table_header * header)
 {
-	int nr_subheaders;
 	might_sleep();
 
 	if (header == NULL)
 		return;
 
-	nr_subheaders = count_subheaders(header->ctl_table_arg);
-	if (unlikely(nr_subheaders > 1)) {
-		struct ctl_table_header **subheaders;
-		int i;
-
-		subheaders = (struct ctl_table_header **)(header + 1);
-		for (i = nr_subheaders -1; i >= 0; i--) {
-			struct ctl_table_header *subh = subheaders[i];
-			struct ctl_table *table = subh->ctl_table_arg;
-			unregister_sysctl_table(subh);
-			kfree(table);
-		}
-		kfree(header);
-		return;
-	}
-
 	spin_lock(&sysctl_lock);
 	drop_sysctl_table(header);
 	spin_unlock(&sysctl_lock);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 653b66c762b1..59d451f455bf 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -137,7 +137,17 @@ struct ctl_table {
 	void *data;
 	int maxlen;
 	umode_t mode;
-	struct ctl_table *child;	/* Deprecated */
+	/**
+	 * enum type - Enumeration to differentiate between ctl target types
+	 * @SYSCTL_TABLE_TYPE_DEFAULT: ctl target with no special considerations
+	 * @SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY: Used to identify a permanently
+	 *                                       empty directory target to serve
+	 *                                       as mount point.
+	 */
+	enum {
+		SYSCTL_TABLE_TYPE_DEFAULT,
+		SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY
+	} type;
 	proc_handler *proc_handler;	/* Callback for text formatting */
 	struct ctl_table_poll *poll;
 	void *extra1;
@@ -229,7 +239,7 @@ extern int unaligned_enabled;
 extern int unaligned_dump_stack;
 extern int no_unaligned_warning;
 
-extern struct ctl_table sysctl_mount_point[];
+#define SYSCTL_PERM_EMPTY_DIR	(1 << 0)
 
 #else /* CONFIG_SYSCTL */
 
-- 
cgit v1.2.3


From 7a7f094635349a7d0314364ad50bdeb770b6df4f Mon Sep 17 00:00:00 2001
From: Arjun Roy <arjunroy@google.com>
Date: Fri, 16 Jun 2023 12:34:27 -0700
Subject: tcp: Use per-vma locking for receive zerocopy

Per-VMA locking allows us to lock a struct vm_area_struct without
taking the process-wide mmap lock in read mode.

Consider a process workload where the mmap lock is taken constantly in
write mode. In this scenario, all zerocopy receives are periodically
blocked during that period of time - though in principle, the memory
ranges being used by TCP are not touched by the operations that need
the mmap write lock. This results in performance degradation.

Now consider another workload where the mmap lock is never taken in
write mode, but there are many TCP connections using receive zerocopy
that are concurrently receiving. These connections all take the mmap
lock in read mode, but this does induce a lot of contention and atomic
ops for this process-wide lock. This results in additional CPU
overhead caused by contending on the cache line for this lock.

However, with per-vma locking, both of these problems can be avoided.

As a test, I ran an RPC-style request/response workload with 4KB
payloads and receive zerocopy enabled, with 100 simultaneous TCP
connections. I measured perf cycles within the
find_tcp_vma/mmap_read_lock/mmap_read_unlock codepath, with and
without per-vma locking enabled.

When using process-wide mmap semaphore read locking, about 1% of
measured perf cycles were within this path. With per-VMA locking, this
value dropped to about 0.45%.

Signed-off-by: Arjun Roy <arjunroy@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS            |  1 +
 include/linux/net_mm.h | 17 +++++++++++++++++
 include/net/tcp.h      |  1 +
 mm/memory.c            |  7 ++++---
 net/ipv4/tcp.c         | 45 +++++++++++++++++++++++++++++++++++++--------
 5 files changed, 60 insertions(+), 11 deletions(-)
 create mode 100644 include/linux/net_mm.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 7322963b0670..cb14589d14ab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14743,6 +14743,7 @@ NETWORKING [TCP]
 M:	Eric Dumazet <edumazet@google.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
+F:	include/linux/net_mm.h
 F:	include/linux/tcp.h
 F:	include/net/tcp.h
 F:	include/trace/events/tcp.h
diff --git a/include/linux/net_mm.h b/include/linux/net_mm.h
new file mode 100644
index 000000000000..b298998bd5a0
--- /dev/null
+++ b/include/linux/net_mm.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifdef CONFIG_MMU
+
+#ifdef CONFIG_INET
+extern const struct vm_operations_struct tcp_vm_ops;
+static inline bool vma_is_tcp(const struct vm_area_struct *vma)
+{
+	return vma->vm_ops == &tcp_vm_ops;
+}
+#else
+static inline bool vma_is_tcp(const struct vm_area_struct *vma)
+{
+	return false;
+}
+#endif /* CONFIG_INET*/
+
+#endif /* CONFIG_MMU */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9c08eab647a2..31b534370787 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -45,6 +45,7 @@
 #include <linux/memcontrol.h>
 #include <linux/bpf-cgroup.h>
 #include <linux/siphash.h>
+#include <linux/net_mm.h>
 
 extern struct inet_hashinfo tcp_hashinfo;
 
diff --git a/mm/memory.c b/mm/memory.c
index f69fbc251198..3e46b4d881dc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -77,6 +77,7 @@
 #include <linux/ptrace.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/sysctl.h>
+#include <linux/net_mm.h>
 
 #include <trace/events/kmem.h>
 
@@ -5280,12 +5281,12 @@ retry:
 	if (!vma)
 		goto inval;
 
-	/* Only anonymous vmas are supported for now */
-	if (!vma_is_anonymous(vma))
+	/* Only anonymous and tcp vmas are supported for now */
+	if (!vma_is_anonymous(vma) && !vma_is_tcp(vma))
 		goto inval;
 
 	/* find_mergeable_anon_vma uses adjacent vmas which are not locked */
-	if (!vma->anon_vma)
+	if (!vma->anon_vma && !vma_is_tcp(vma))
 		goto inval;
 
 	if (!vma_start_read(vma))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0e21ea92dc1d..71b42eef9dbf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1774,7 +1774,7 @@ void tcp_update_recv_tstamps(struct sk_buff *skb,
 }
 
 #ifdef CONFIG_MMU
-static const struct vm_operations_struct tcp_vm_ops = {
+const struct vm_operations_struct tcp_vm_ops = {
 };
 
 int tcp_mmap(struct file *file, struct socket *sock,
@@ -2073,6 +2073,34 @@ static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
 	}
 }
 
+static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm,
+					   unsigned long address,
+					   bool *mmap_locked)
+{
+	struct vm_area_struct *vma = NULL;
+
+#ifdef CONFIG_PER_VMA_LOCK
+	vma = lock_vma_under_rcu(mm, address);
+#endif
+	if (vma) {
+		if (!vma_is_tcp(vma)) {
+			vma_end_read(vma);
+			return NULL;
+		}
+		*mmap_locked = false;
+		return vma;
+	}
+
+	mmap_read_lock(mm);
+	vma = vma_lookup(mm, address);
+	if (!vma || !vma_is_tcp(vma)) {
+		mmap_read_unlock(mm);
+		return NULL;
+	}
+	*mmap_locked = true;
+	return vma;
+}
+
 #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32
 static int tcp_zerocopy_receive(struct sock *sk,
 				struct tcp_zerocopy_receive *zc,
@@ -2090,6 +2118,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
 	u32 seq = tp->copied_seq;
 	u32 total_bytes_to_map;
 	int inq = tcp_inq(sk);
+	bool mmap_locked;
 	int ret;
 
 	zc->copybuf_len = 0;
@@ -2114,13 +2143,10 @@ static int tcp_zerocopy_receive(struct sock *sk,
 		return 0;
 	}
 
-	mmap_read_lock(current->mm);
-
-	vma = vma_lookup(current->mm, address);
-	if (!vma || vma->vm_ops != &tcp_vm_ops) {
-		mmap_read_unlock(current->mm);
+	vma = find_tcp_vma(current->mm, address, &mmap_locked);
+	if (!vma)
 		return -EINVAL;
-	}
+
 	vma_len = min_t(unsigned long, zc->length, vma->vm_end - address);
 	avail_len = min_t(u32, vma_len, inq);
 	total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
@@ -2194,7 +2220,10 @@ static int tcp_zerocopy_receive(struct sock *sk,
 						   zc, total_bytes_to_map);
 	}
 out:
-	mmap_read_unlock(current->mm);
+	if (mmap_locked)
+		mmap_read_unlock(current->mm);
+	else
+		vma_end_read(vma);
 	/* Try to copy straggler data. */
 	if (!ret)
 		copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss);
-- 
cgit v1.2.3


From 8ce8849dd1e78dadcee0ec9acbd259d239b7069f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 1 Jun 2023 20:58:47 +0200
Subject: posix-timers: Ensure timer ID search-loop limit is valid

posix_timer_add() tries to allocate a posix timer ID by starting from the
cached ID which was stored by the last successful allocation.

This is done in a loop searching the ID space for a free slot one by
one. The loop has to terminate when the search wrapped around to the
starting point.

But that's racy vs. establishing the starting point. That is read out
lockless, which leads to the following problem:

CPU0	  	      	     	   CPU1
posix_timer_add()
  start = sig->posix_timer_id;
  lock(hash_lock);
  ...				   posix_timer_add()
  if (++sig->posix_timer_id < 0)
      			             start = sig->posix_timer_id;
     sig->posix_timer_id = 0;

So CPU1 can observe a negative start value, i.e. -1, and the loop break
never happens because the condition can never be true:

  if (sig->posix_timer_id == start)
     break;

While this is unlikely to ever turn into an endless loop as the ID space is
huge (INT_MAX), the racy read of the start value caught the attention of
KCSAN and Dmitry unearthed that incorrectness.

Rewrite it so that all id operations are under the hash lock.

Reported-by: syzbot+5c54bd3eb218bb595aa9@syzkaller.appspotmail.com
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/r/87bkhzdn6g.ffs@tglx
---
 include/linux/sched/signal.h |  2 +-
 kernel/time/posix-timers.c   | 31 ++++++++++++++++++-------------
 2 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 20099268fa25..669e8cff40c7 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -135,7 +135,7 @@ struct signal_struct {
 #ifdef CONFIG_POSIX_TIMERS
 
 	/* POSIX.1b Interval Timers */
-	int			posix_timer_id;
+	unsigned int		next_posix_timer_id;
 	struct list_head	posix_timers;
 
 	/* ITIMER_REAL timer for the process */
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index ed3c4a954398..2d6cf93ca370 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -140,25 +140,30 @@ static struct k_itimer *posix_timer_by_id(timer_t id)
 static int posix_timer_add(struct k_itimer *timer)
 {
 	struct signal_struct *sig = current->signal;
-	int first_free_id = sig->posix_timer_id;
 	struct hlist_head *head;
-	int ret = -ENOENT;
+	unsigned int cnt, id;
 
-	do {
+	/*
+	 * FIXME: Replace this by a per signal struct xarray once there is
+	 * a plan to handle the resulting CRIU regression gracefully.
+	 */
+	for (cnt = 0; cnt <= INT_MAX; cnt++) {
 		spin_lock(&hash_lock);
-		head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
-		if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
+		id = sig->next_posix_timer_id;
+
+		/* Write the next ID back. Clamp it to the positive space */
+		sig->next_posix_timer_id = (id + 1) & INT_MAX;
+
+		head = &posix_timers_hashtable[hash(sig, id)];
+		if (!__posix_timers_find(head, sig, id)) {
 			hlist_add_head_rcu(&timer->t_hash, head);
-			ret = sig->posix_timer_id;
+			spin_unlock(&hash_lock);
+			return id;
 		}
-		if (++sig->posix_timer_id < 0)
-			sig->posix_timer_id = 0;
-		if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
-			/* Loop over all possible ids completed */
-			ret = -EAGAIN;
 		spin_unlock(&hash_lock);
-	} while (ret == -ENOENT);
-	return ret;
+	}
+	/* POSIX return code when no timer ID could be allocated */
+	return -EAGAIN;
 }
 
 static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
-- 
cgit v1.2.3


From 892f439ea17cbf56a36e57c584d583649a64b404 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 7 Jun 2023 16:28:45 +0200
Subject: posix-timers: Add sys_ni_posix_timers() prototype

The sys_ni_posix_timers() definition causes a warning when the declaration
is missing, so this needs to be added along with the normal syscalls,
outside of the #ifdef.

kernel/time/posix-stubs.c:26:17: error: no previous prototype for 'sys_ni_posix_timers' [-Werror=missing-prototypes]

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20230607142925.3126422-1-arnd@kernel.org
---
 arch/alpha/kernel/osf_sys.c | 2 --
 include/linux/syscalls.h    | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 2a9a877a0508..d98701ee36c6 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1014,8 +1014,6 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
 	return do_sys_settimeofday64(tv ? &kts : NULL, tz ? &ktz : NULL);
 }
 
-asmlinkage long sys_ni_posix_timers(void);
-
 SYSCALL_DEFINE2(osf_utimes, const char __user *, filename,
 		struct timeval32 __user *, tvs)
 {
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 33a0ee3bcb2e..24871f8ec8bb 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1280,6 +1280,7 @@ asmlinkage long sys_ni_syscall(void);
 
 #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
 
+asmlinkage long sys_ni_posix_timers(void);
 
 /*
  * Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly.
-- 
cgit v1.2.3


From 568c69ae2fea27e0152e4ffeee7c6f354c61810f Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 18 Jun 2023 22:52:28 +0200
Subject: video/hdmi: Reorder fields in 'struct hdmi_avi_infoframe'

Group some variables based on their sizes to reduce hole and avoid padding.
On x86_64, this shrinks the size of 'struct hdmi_avi_infoframe'
from 68 to 60 bytes.

It saves a few bytes of memory and is more cache-line friendly.

This also reduces the union hdmi_infoframe the same way.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/linux/hdmi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
index 2f4dcc8d060e..3bb87bf6bc65 100644
--- a/include/linux/hdmi.h
+++ b/include/linux/hdmi.h
@@ -170,19 +170,19 @@ struct hdmi_avi_infoframe {
 	enum hdmi_infoframe_type type;
 	unsigned char version;
 	unsigned char length;
+	bool itc;
+	unsigned char pixel_repeat;
 	enum hdmi_colorspace colorspace;
 	enum hdmi_scan_mode scan_mode;
 	enum hdmi_colorimetry colorimetry;
 	enum hdmi_picture_aspect picture_aspect;
 	enum hdmi_active_aspect active_aspect;
-	bool itc;
 	enum hdmi_extended_colorimetry extended_colorimetry;
 	enum hdmi_quantization_range quantization_range;
 	enum hdmi_nups nups;
 	unsigned char video_code;
 	enum hdmi_ycc_quantization_range ycc_quantization_range;
 	enum hdmi_content_type content_type;
-	unsigned char pixel_repeat;
 	unsigned short top_bar;
 	unsigned short bottom_bar;
 	unsigned short left_bar;
-- 
cgit v1.2.3


From d4313a68ec913f2705b337e2d332813a72cb2de9 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 13 Jun 2023 08:33:14 +0200
Subject: fbdev/media: Use GPIO descriptors for VIA GPIO

The VIA fbdev exposes a custom GPIO chip for its GPIOs, these
are in turn looked up the camera driver using a custom API.

Drop the custom API, provide a look-up table and convert to
GPIO descriptors. Note proper polarity on the RESET line.

Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/media/platform/via/via-camera.c | 51 +++++++++++++--------------------
 drivers/video/fbdev/via/via-core.c      |  2 +-
 drivers/video/fbdev/via/via-gpio.c      | 28 +++++++++---------
 drivers/video/fbdev/via/via-gpio.h      | 13 +++++++++
 include/linux/via-gpio.h                | 14 ---------
 5 files changed, 48 insertions(+), 60 deletions(-)
 create mode 100644 drivers/video/fbdev/via/via-gpio.h
 delete mode 100644 include/linux/via-gpio.h

(limited to 'include/linux')

diff --git a/drivers/media/platform/via/via-camera.c b/drivers/media/platform/via/via-camera.c
index 450254403fa8..4cb8f29e2f14 100644
--- a/drivers/media/platform/via/via-camera.c
+++ b/drivers/media/platform/via/via-camera.c
@@ -11,7 +11,7 @@
 #include <linux/device.h>
 #include <linux/list.h>
 #include <linux/pci.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/videodev2.h>
@@ -26,7 +26,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/pm_qos.h>
 #include <linux/via-core.h>
-#include <linux/via-gpio.h>
 #include <linux/via_i2c.h>
 
 #ifdef CONFIG_X86
@@ -71,8 +70,8 @@ struct via_camera {
 	/*
 	 * GPIO info for power/reset management
 	 */
-	int power_gpio;
-	int reset_gpio;
+	struct gpio_desc *power_gpio;
+	struct gpio_desc *reset_gpio;
 	/*
 	 * I/O memory stuff.
 	 */
@@ -180,27 +179,19 @@ static struct via_format *via_find_format(u32 pixelformat)
  */
 static int via_sensor_power_setup(struct via_camera *cam)
 {
-	int ret;
+	struct device *dev = &cam->platdev->dev;
+
+	cam->power_gpio = devm_gpiod_get(dev, "VGPIO3", GPIOD_OUT_LOW);
+	if (IS_ERR(cam->power_gpio))
+		return dev_err_probe(dev, PTR_ERR(cam->power_gpio),
+				     "failed to get power GPIO");
+
+	/* Request the reset line asserted */
+	cam->reset_gpio = devm_gpiod_get(dev, "VGPIO2", GPIOD_OUT_HIGH);
+	if (IS_ERR(cam->reset_gpio))
+		return dev_err_probe(dev, PTR_ERR(cam->reset_gpio),
+				     "failed to get reset GPIO");
 
-	cam->power_gpio = viafb_gpio_lookup("VGPIO3");
-	cam->reset_gpio = viafb_gpio_lookup("VGPIO2");
-	if (!gpio_is_valid(cam->power_gpio) || !gpio_is_valid(cam->reset_gpio)) {
-		dev_err(&cam->platdev->dev, "Unable to find GPIO lines\n");
-		return -EINVAL;
-	}
-	ret = gpio_request(cam->power_gpio, "viafb-camera");
-	if (ret) {
-		dev_err(&cam->platdev->dev, "Unable to request power GPIO\n");
-		return ret;
-	}
-	ret = gpio_request(cam->reset_gpio, "viafb-camera");
-	if (ret) {
-		dev_err(&cam->platdev->dev, "Unable to request reset GPIO\n");
-		gpio_free(cam->power_gpio);
-		return ret;
-	}
-	gpio_direction_output(cam->power_gpio, 0);
-	gpio_direction_output(cam->reset_gpio, 0);
 	return 0;
 }
 
@@ -209,25 +200,23 @@ static int via_sensor_power_setup(struct via_camera *cam)
  */
 static void via_sensor_power_up(struct via_camera *cam)
 {
-	gpio_set_value(cam->power_gpio, 1);
-	gpio_set_value(cam->reset_gpio, 0);
+	gpiod_set_value(cam->power_gpio, 1);
+	gpiod_set_value(cam->reset_gpio, 1);
 	msleep(20);  /* Probably excessive */
-	gpio_set_value(cam->reset_gpio, 1);
+	gpiod_set_value(cam->reset_gpio, 0);
 	msleep(20);
 }
 
 static void via_sensor_power_down(struct via_camera *cam)
 {
-	gpio_set_value(cam->power_gpio, 0);
-	gpio_set_value(cam->reset_gpio, 0);
+	gpiod_set_value(cam->power_gpio, 0);
+	gpiod_set_value(cam->reset_gpio, 1);
 }
 
 
 static void via_sensor_power_release(struct via_camera *cam)
 {
 	via_sensor_power_down(cam);
-	gpio_free(cam->power_gpio);
-	gpio_free(cam->reset_gpio);
 }
 
 /* --------------------------------------------------------------------------*/
diff --git a/drivers/video/fbdev/via/via-core.c b/drivers/video/fbdev/via/via-core.c
index 2c1803eb196f..908524a74a38 100644
--- a/drivers/video/fbdev/via/via-core.c
+++ b/drivers/video/fbdev/via/via-core.c
@@ -11,7 +11,7 @@
 #include <linux/aperture.h>
 #include <linux/via-core.h>
 #include <linux/via_i2c.h>
-#include <linux/via-gpio.h>
+#include "via-gpio.h"
 #include "global.h"
 
 #include <linux/module.h>
diff --git a/drivers/video/fbdev/via/via-gpio.c b/drivers/video/fbdev/via/via-gpio.c
index f1b670397c02..2719943c06f4 100644
--- a/drivers/video/fbdev/via/via-gpio.c
+++ b/drivers/video/fbdev/via/via-gpio.c
@@ -7,10 +7,11 @@
 
 #include <linux/spinlock.h>
 #include <linux/gpio/driver.h>
+#include <linux/gpio/machine.h>
 #include <linux/platform_device.h>
 #include <linux/via-core.h>
-#include <linux/via-gpio.h>
 #include <linux/export.h>
+#include "via-gpio.h"
 
 /*
  * The ports we know about.  Note that the port-25 gpios are not
@@ -189,19 +190,14 @@ static struct viafb_pm_hooks viafb_gpio_pm_hooks = {
 };
 #endif /* CONFIG_PM */
 
-/*
- * Look up a specific gpio and return the number it was assigned.
- */
-int viafb_gpio_lookup(const char *name)
-{
-	int i;
-
-	for (i = 0; i < viafb_gpio_config.gpio_chip.ngpio; i++)
-		if (!strcmp(name, viafb_gpio_config.active_gpios[i]->vg_name))
-			return viafb_gpio_config.gpio_chip.base + i;
-	return -1;
-}
-EXPORT_SYMBOL_GPL(viafb_gpio_lookup);
+static struct gpiod_lookup_table viafb_gpio_table = {
+	.dev_id = "viafb-camera",
+	.table = {
+		GPIO_LOOKUP("via-gpio", 2, "VGPIO2", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("via-gpio", 3, "VGPIO3", GPIO_ACTIVE_HIGH),
+		{ }
+	},
+};
 
 /*
  * Platform device stuff.
@@ -249,12 +245,16 @@ static int viafb_gpio_probe(struct platform_device *platdev)
 	 * Get registered.
 	 */
 	viafb_gpio_config.gpio_chip.base = -1;  /* Dynamic */
+	viafb_gpio_config.gpio_chip.label = "via-gpio";
 	ret = gpiochip_add_data(&viafb_gpio_config.gpio_chip,
 				&viafb_gpio_config);
 	if (ret) {
 		printk(KERN_ERR "viafb: failed to add gpios (%d)\n", ret);
 		viafb_gpio_config.gpio_chip.ngpio = 0;
 	}
+
+	gpiod_add_lookup_table(&viafb_gpio_table);
+
 #ifdef CONFIG_PM
 	viafb_pm_register(&viafb_gpio_pm_hooks);
 #endif
diff --git a/drivers/video/fbdev/via/via-gpio.h b/drivers/video/fbdev/via/via-gpio.h
new file mode 100644
index 000000000000..2ffedf282f7e
--- /dev/null
+++ b/drivers/video/fbdev/via/via-gpio.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Support for viafb GPIO ports.
+ *
+ * Copyright 2009 Jonathan Corbet <corbet@lwn.net>
+ */
+
+#ifndef __VIA_GPIO_H__
+#define __VIA_GPIO_H__
+
+extern int viafb_gpio_init(void);
+extern void viafb_gpio_exit(void);
+#endif
diff --git a/include/linux/via-gpio.h b/include/linux/via-gpio.h
deleted file mode 100644
index ac34668fd442..000000000000
--- a/include/linux/via-gpio.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Support for viafb GPIO ports.
- *
- * Copyright 2009 Jonathan Corbet <corbet@lwn.net>
- */
-
-#ifndef __VIA_GPIO_H__
-#define __VIA_GPIO_H__
-
-extern int viafb_gpio_lookup(const char *name);
-extern int viafb_gpio_init(void);
-extern void viafb_gpio_exit(void);
-#endif
-- 
cgit v1.2.3


From 5bb578a0c1b86d6eb95f8d08ed6444b227fb674c Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 9 May 2023 13:57:28 +0100
Subject: ARM: 9298/1: Drop custom mdesc->handle_irq()

ARM exclusively uses GENERIC_IRQ_MULTI_HANDLER, so at some point
set_handle_irq() needs to be called to handle system-wide
interrupts.

For all DT-enabled boards, this call happens down in the
drivers/irqchip subsystem, after locating the target irqchip
driver from the device tree.

We still have a few instances of the boardfiles with machine
descriptors passing a machine-specific .handle_irq() to the
ARM kernel core.

Get rid of this by letting the few remaining machines consistently
call set_handle_irq() from the end of the .init_irq() callback
instead and diet down one member from the machine descriptor.

Cc: Marc Zyngier <maz@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/mach/arch.h      |  1 -
 arch/arm/kernel/setup.c               |  4 ----
 arch/arm/mach-mxs/mach-mxs.c          |  2 --
 arch/arm/mach-omap1/board-ams-delta.c |  1 -
 arch/arm/mach-omap1/board-nokia770.c  |  1 -
 arch/arm/mach-omap1/board-osk.c       |  1 -
 arch/arm/mach-omap1/board-palmte.c    |  1 -
 arch/arm/mach-omap1/board-sx1.c       |  1 -
 arch/arm/mach-omap1/irq.c             |  3 +++
 arch/arm/mach-pxa/gumstix.c           |  1 -
 arch/arm/mach-pxa/pxa25x.c            |  1 +
 arch/arm/mach-pxa/pxa27x.c            |  1 +
 arch/arm/mach-pxa/spitz.c             |  3 ---
 drivers/irqchip/irq-mxs.c             |  1 +
 include/linux/irqchip/mxs.h           | 11 -----------
 15 files changed, 6 insertions(+), 27 deletions(-)
 delete mode 100644 include/linux/irqchip/mxs.h

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 9349e7a82c9c..2b18a258204d 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -56,7 +56,6 @@ struct machine_desc {
 	void			(*init_time)(void);
 	void			(*init_machine)(void);
 	void			(*init_late)(void);
-	void			(*handle_irq)(struct pt_regs *);
 	void			(*restart)(enum reboot_mode, const char *);
 };
 
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 75cd4699e7b3..0a26585f5027 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1198,10 +1198,6 @@ void __init setup_arch(char **cmdline_p)
 
 	reserve_crashkernel();
 
-#ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER
-	handle_arch_irq = mdesc->handle_irq;
-#endif
-
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
 	conswitchp = &vga_con;
diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c
index 51e47053c816..3faf9a1e3e36 100644
--- a/arch/arm/mach-mxs/mach-mxs.c
+++ b/arch/arm/mach-mxs/mach-mxs.c
@@ -11,7 +11,6 @@
 #include <linux/err.h>
 #include <linux/gpio.h>
 #include <linux/init.h>
-#include <linux/irqchip/mxs.h>
 #include <linux/reboot.h>
 #include <linux/micrel_phy.h>
 #include <linux/of_address.h>
@@ -472,7 +471,6 @@ static const char *const mxs_dt_compat[] __initconst = {
 };
 
 DT_MACHINE_START(MXS, "Freescale MXS (Device Tree)")
-	.handle_irq	= icoll_handle_irq,
 	.init_machine	= mxs_machine_init,
 	.init_late      = mxs_pm_init,
 	.dt_compat	= mxs_dt_compat,
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index 9108c871d129..88139200449e 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -877,7 +877,6 @@ MACHINE_START(AMS_DELTA, "Amstrad E3 (Delta)")
 	.map_io		= ams_delta_map_io,
 	.init_early	= omap1_init_early,
 	.init_irq	= omap1_init_irq,
-	.handle_irq	= omap1_handle_irq,
 	.init_machine	= ams_delta_init,
 	.init_late	= ams_delta_init_late,
 	.init_time	= omap1_timer_init,
diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index a501a473ffd6..b56cea9f9d2f 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -291,7 +291,6 @@ MACHINE_START(NOKIA770, "Nokia 770")
 	.map_io		= omap1_map_io,
 	.init_early     = omap1_init_early,
 	.init_irq	= omap1_init_irq,
-	.handle_irq	= omap1_handle_irq,
 	.init_machine	= omap_nokia770_init,
 	.init_late	= omap1_init_late,
 	.init_time	= omap1_timer_init,
diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c
index df758c1f9237..46eda4ff4797 100644
--- a/arch/arm/mach-omap1/board-osk.c
+++ b/arch/arm/mach-omap1/board-osk.c
@@ -389,7 +389,6 @@ MACHINE_START(OMAP_OSK, "TI-OSK")
 	.map_io		= omap1_map_io,
 	.init_early	= omap1_init_early,
 	.init_irq	= omap1_init_irq,
-	.handle_irq	= omap1_handle_irq,
 	.init_machine	= osk_init,
 	.init_late	= omap1_init_late,
 	.init_time	= omap1_timer_init,
diff --git a/arch/arm/mach-omap1/board-palmte.c b/arch/arm/mach-omap1/board-palmte.c
index f79c497f04d5..91df3dc365af 100644
--- a/arch/arm/mach-omap1/board-palmte.c
+++ b/arch/arm/mach-omap1/board-palmte.c
@@ -259,7 +259,6 @@ MACHINE_START(OMAP_PALMTE, "OMAP310 based Palm Tungsten E")
 	.map_io		= omap1_map_io,
 	.init_early     = omap1_init_early,
 	.init_irq	= omap1_init_irq,
-	.handle_irq	= omap1_handle_irq,
 	.init_machine	= omap_palmte_init,
 	.init_late	= omap1_init_late,
 	.init_time	= omap1_timer_init,
diff --git a/arch/arm/mach-omap1/board-sx1.c b/arch/arm/mach-omap1/board-sx1.c
index 0c0cdd5e77c7..3ae295af96fd 100644
--- a/arch/arm/mach-omap1/board-sx1.c
+++ b/arch/arm/mach-omap1/board-sx1.c
@@ -338,7 +338,6 @@ MACHINE_START(SX1, "OMAP310 based Siemens SX1")
 	.map_io		= omap1_map_io,
 	.init_early     = omap1_init_early,
 	.init_irq	= omap1_init_irq,
-	.handle_irq	= omap1_handle_irq,
 	.init_machine	= omap_sx1_init,
 	.init_late	= omap1_init_late,
 	.init_time	= omap1_timer_init,
diff --git a/arch/arm/mach-omap1/irq.c b/arch/arm/mach-omap1/irq.c
index bfc7ab010ae2..3d9e72e1eddc 100644
--- a/arch/arm/mach-omap1/irq.c
+++ b/arch/arm/mach-omap1/irq.c
@@ -37,6 +37,7 @@
  */
 #include <linux/gpio.h>
 #include <linux/init.h>
+#include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
@@ -254,4 +255,6 @@ void __init omap1_init_irq(void)
 		ct = irq_data_get_chip_type(d);
 		ct->chip.irq_unmask(d);
 	}
+
+	set_handle_irq(omap1_handle_irq);
 }
diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c
index 72b08a9bf0fd..6b7197ae3c72 100644
--- a/arch/arm/mach-pxa/gumstix.c
+++ b/arch/arm/mach-pxa/gumstix.c
@@ -233,7 +233,6 @@ MACHINE_START(GUMSTIX, "Gumstix")
 	.map_io		= pxa25x_map_io,
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa25x_init_irq,
-	.handle_irq	= pxa25x_handle_irq,
 	.init_time	= pxa_timer_init,
 	.init_machine	= gumstix_init,
 	.restart	= pxa_restart,
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index 1b83be181bab..032dc897fe94 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -143,6 +143,7 @@ set_pwer:
 void __init pxa25x_init_irq(void)
 {
 	pxa_init_irq(32, pxa25x_set_wake);
+	set_handle_irq(pxa25x_handle_irq);
 }
 
 static int __init __init
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 4135ba2877c4..c9b56424b653 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -228,6 +228,7 @@ static int pxa27x_set_wake(struct irq_data *d, unsigned int on)
 void __init pxa27x_init_irq(void)
 {
 	pxa_init_irq(34, pxa27x_set_wake);
+	set_handle_irq(pxa27x_handle_irq);
 }
 
 static int __init
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 4325bdc2b9ff..042922a0a9d6 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -1043,7 +1043,6 @@ MACHINE_START(SPITZ, "SHARP Spitz")
 	.map_io		= pxa27x_map_io,
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
-	.handle_irq	= pxa27x_handle_irq,
 	.init_machine	= spitz_init,
 	.init_time	= pxa_timer_init,
 	.restart	= spitz_restart,
@@ -1056,7 +1055,6 @@ MACHINE_START(BORZOI, "SHARP Borzoi")
 	.map_io		= pxa27x_map_io,
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
-	.handle_irq	= pxa27x_handle_irq,
 	.init_machine	= spitz_init,
 	.init_time	= pxa_timer_init,
 	.restart	= spitz_restart,
@@ -1069,7 +1067,6 @@ MACHINE_START(AKITA, "SHARP Akita")
 	.map_io		= pxa27x_map_io,
 	.nr_irqs	= PXA_NR_IRQS,
 	.init_irq	= pxa27x_init_irq,
-	.handle_irq	= pxa27x_handle_irq,
 	.init_machine	= spitz_init,
 	.init_time	= pxa_timer_init,
 	.restart	= spitz_restart,
diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
index 55cb6b5a686e..be9680645545 100644
--- a/drivers/irqchip/irq-mxs.c
+++ b/drivers/irqchip/irq-mxs.c
@@ -201,6 +201,7 @@ static int __init icoll_of_init(struct device_node *np,
 	stmp_reset_block(icoll_priv.ctrl);
 
 	icoll_add_domain(np, ICOLL_NUM_IRQS);
+	set_handle_irq(icoll_handle_irq);
 
 	return 0;
 }
diff --git a/include/linux/irqchip/mxs.h b/include/linux/irqchip/mxs.h
deleted file mode 100644
index 4f447e3f0f3a..000000000000
--- a/include/linux/irqchip/mxs.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 Freescale Semiconductor, Inc.
- */
-
-#ifndef __LINUX_IRQCHIP_MXS_H
-#define __LINUX_IRQCHIP_MXS_H
-
-extern void icoll_handle_irq(struct pt_regs *);
-
-#endif
-- 
cgit v1.2.3


From 03e7e493f1a3697eba115f3f69e296f7e47500ee Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Fri, 16 Jun 2023 09:54:02 +0300
Subject: wifi: cfg80211: ignore invalid TBTT info field types

The TBTT information field type must be zero. This is only changed in
the 802.11be draft specification where the value 1 is used to indicate
that only the MLD parameters are included.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230616094949.7865606ffe94.I7ff28afb875d1b4c39acd497df8490a7d3628e3f@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 2 ++
 net/wireless/scan.c       | 7 +++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 5dfed1a6625c..47ddc65b443b 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4481,6 +4481,8 @@ static inline bool for_each_element_completed(const struct element *element,
 #define IEEE80211_AP_INFO_TBTT_HDR_FILTERED			0x04
 #define IEEE80211_AP_INFO_TBTT_HDR_COLOC			0x08
 #define IEEE80211_AP_INFO_TBTT_HDR_COUNT			0xF0
+#define IEEE80211_TBTT_INFO_TYPE_TBTT				0
+#define IEEE80211_TBTT_INFO_TYPE_MLD				1
 #define IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM		9
 #define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM	13
 
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index d9abbf123ad1..2212e6d24204 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -629,6 +629,13 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
 		if (end - pos < count * length)
 			break;
 
+		if (u8_get_bits(ap_info->tbtt_info_hdr,
+				IEEE80211_AP_INFO_TBTT_HDR_TYPE) !=
+		    IEEE80211_TBTT_INFO_TYPE_TBTT) {
+			pos += count * length;
+			continue;
+		}
+
 		/*
 		 * TBTT info must include bss param + BSSID +
 		 * (short SSID or same_ssid bit to be set).
-- 
cgit v1.2.3


From e2efec97c3ad503042db27baaf7c8cb5d1348a83 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Fri, 16 Jun 2023 09:54:09 +0300
Subject: wifi: mac80211: Rename ieee80211_mle_sta_prof_size_ok()

Rename it to ieee80211_mle_basic_sta_prof_size_ok() as it
validates the size of the station profile included in
Basic Multi-Link element.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230616094949.9bdfd263974f.I7bebd26894f33716e93cc7da576ef3215e0ba727@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 6 ++++--
 net/mac80211/util.c       | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 47ddc65b443b..aeedd49e5101 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4798,11 +4798,13 @@ struct ieee80211_mle_per_sta_profile {
 } __packed;
 
 /**
- * ieee80211_mle_sta_prof_size_ok - validate multi-link element sta profile size
+ * ieee80211_mle_basic_sta_prof_size_ok - validate basic multi-link element sta
+ *	profile size
  * @data: pointer to the sub element data
  * @len: length of the containing sub element
  */
-static inline bool ieee80211_mle_sta_prof_size_ok(const u8 *data, size_t len)
+static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data,
+							size_t len)
 {
 	const struct ieee80211_mle_per_sta_profile *prof = (const void *)data;
 	u16 control;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 8da6bc43735a..ef53d3baece7 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1496,7 +1496,8 @@ static void ieee80211_mle_get_sta_prof(struct ieee802_11_elems *elems,
 		if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE)
 			continue;
 
-		if (!ieee80211_mle_sta_prof_size_ok(sub->data, sub->datalen))
+		if (!ieee80211_mle_basic_sta_prof_size_ok(sub->data,
+							  sub->datalen))
 			return;
 
 		control = le16_to_cpu(prof->control);
-- 
cgit v1.2.3


From ce6e1f600b0cfc563a7d607de702262a58cd835d Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Sun, 18 Jun 2023 21:49:45 +0300
Subject: wifi: ieee80211: Fix the common size calculation for reconfiguration
 ML

The common information length is found in the first octet of the common
information.

Fixes: 0f48b8b88aa9 ("wifi: ieee80211: add definitions for multi-link element")
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230618214435.3c7ed4817338.I42ef706cb827b4dade6e4ffbb6e7f341eaccd398@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index aeedd49e5101..97edc3b404dd 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4617,15 +4617,12 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
 	case IEEE80211_ML_CONTROL_TYPE_BASIC:
 	case IEEE80211_ML_CONTROL_TYPE_PREQ:
 	case IEEE80211_ML_CONTROL_TYPE_TDLS:
+	case IEEE80211_ML_CONTROL_TYPE_RECONF:
 		/*
 		 * The length is the first octet pointed by mle->variable so no
 		 * need to add anything
 		 */
 		break;
-	case IEEE80211_ML_CONTROL_TYPE_RECONF:
-		if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR)
-			common += ETH_ALEN;
-		return common;
 	case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS:
 		if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR)
 			common += ETH_ALEN;
-- 
cgit v1.2.3


From eeec7574ec3c03c69adc99492df74dc1cc0ebd63 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Sun, 18 Jun 2023 21:49:46 +0300
Subject: wifi: ieee80211: add helper to validate ML element type and size

The helper functions to retrieve the EML capabilities and medium
synchronization delay both assume that the type is correct. Instead of
assuming the length is correct and still checking the type, add a new
helper to check both and don't do any verification.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230618214435.1b50e7a3b3cf.I9385514d8eb6d6d3c82479a6fa732ef65313e554@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 50 +++++++++++++++++++++++++++++------------------
 net/mac80211/mlme.c       |  3 ++-
 2 files changed, 33 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 97edc3b404dd..b107f21e1233 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4639,10 +4639,10 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
  * ieee80211_mle_get_eml_sync_delay - returns the medium sync delay
  * @data: pointer to the multi link EHT IE
  *
- * The element is assumed to be big enough. This must be checked by
- * ieee80211_mle_size_ok().
- * If the medium synchronization can't be found (the type is not basic, or
- * the medium sync presence bit is clear), 0 will be returned.
+ * The element is assumed to be of the correct type (BASIC) and big enough,
+ * this must be checked using ieee80211_mle_type_ok().
+ *
+ * If the medium synchronization is not present, then 0 is returned.
  */
 static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data)
 {
@@ -4650,13 +4650,7 @@ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data)
 	u16 control = le16_to_cpu(mle->control);
 	const u8 *common = mle->variable;
 
-	if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) !=
-	    IEEE80211_ML_CONTROL_TYPE_BASIC)
-		return 0;
-
-	/* common points now at the beginning of
-	 * ieee80211_mle_basic_common_info
-	 */
+	/* common points now at the beginning of ieee80211_mle_basic_common_info */
 	common += sizeof(struct ieee80211_mle_basic_common_info);
 
 	if (!(control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY))
@@ -4674,10 +4668,10 @@ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data)
  * ieee80211_mle_get_eml_cap - returns the EML capability
  * @data: pointer to the multi link EHT IE
  *
- * The element is assumed to be big enough. This must be checked by
- * ieee80211_mle_size_ok().
- * If the EML capability can't be found (the type is not basic, or
- * the EML capability presence bit is clear), 0 will be returned.
+ * The element is assumed to be of the correct type (BASIC) and big enough,
+ * this must be checked using ieee80211_mle_type_ok().
+ *
+ * If the EML capability is not present, 0 will be returned.
  */
 static inline u16 ieee80211_mle_get_eml_cap(const u8 *data)
 {
@@ -4685,10 +4679,6 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data)
 	u16 control = le16_to_cpu(mle->control);
 	const u8 *common = mle->variable;
 
-	if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) !=
-	    IEEE80211_ML_CONTROL_TYPE_BASIC)
-		return 0;
-
 	/* common points now at the beginning of ieee80211_mle_basic_common_info */
 	common += sizeof(struct ieee80211_mle_basic_common_info);
 
@@ -4773,6 +4763,28 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len)
 	return mle->variable[0] >= common;
 }
 
+/**
+ * ieee80211_mle_type_ok - validate multi-link element type and size
+ * @data: pointer to the element data
+ * @type: expected type of the element
+ * @len: length of the containing element
+ */
+static inline bool ieee80211_mle_type_ok(const u8 *data, u8 type, size_t len)
+{
+	const struct ieee80211_multi_link_elem *mle = (const void *)data;
+	u16 control;
+
+	if (!ieee80211_mle_size_ok(data, len))
+		return false;
+
+	control = le16_to_cpu(mle->control);
+
+	if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) == type)
+		return true;
+
+	return false;
+}
+
 enum ieee80211_mle_subelems {
 	IEEE80211_MLE_SUBELEM_PER_STA_PROFILE		= 0,
 	IEEE80211_MLE_SUBELEM_FRAGMENT		        = 254,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b8f8220cd9ff..30588703ffd3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4852,7 +4852,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 
 		/* data + 1 / datalen - 1 since it's an extended element */
 		if (eht_ml_elem &&
-		    ieee80211_mle_size_ok(eht_ml_elem->data + 1,
+		    ieee80211_mle_type_ok(eht_ml_elem->data + 1,
+					  IEEE80211_ML_CONTROL_TYPE_BASIC,
 					  eht_ml_elem->datalen - 1)) {
 			sdata->vif.cfg.eml_cap =
 				ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1);
-- 
cgit v1.2.3


From 39bcc5b8e16e75cfccc36fca3d425c64eef3df04 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Sun, 18 Jun 2023 21:49:47 +0300
Subject: wifi: ieee80211: use default for medium synchronization delay

Default values are defined for the information included in the Medium
Synchronization Delay Information subfield. The spec says to
initialize the values to these defaults and only change them when
included.

Return the default value instead of zero so that the defaults are
used when the field is not included in the association response.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230618214435.a7725bef3795.I2d3528cf4af021c5b37f97fbe64ae9116ce9bef1@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index b107f21e1233..251998be24d0 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4535,6 +4535,14 @@ struct ieee80211_multi_link_elem {
 #define IEEE80211_MED_SYNC_DELAY_SYNC_OFDM_ED_THRESH	0x0f00
 #define IEEE80211_MED_SYNC_DELAY_SYNC_MAX_NUM_TXOPS	0xf000
 
+/*
+ * Described in P802.11be_D3.0
+ * dot11MSDTimerDuration should default to 5484 (i.e. 171.375)
+ * dot11MSDOFDMEDthreshold defaults to -72 (i.e. 0)
+ * dot11MSDTXOPMAX defaults to 1
+ */
+#define IEEE80211_MED_SYNC_DELAY_DEFAULT		0x10ac
+
 #define IEEE80211_EML_CAP_EMLSR_SUPP			0x0001
 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY		0x000e
 #define  IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_0US		0
@@ -4642,7 +4650,8 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
  * The element is assumed to be of the correct type (BASIC) and big enough,
  * this must be checked using ieee80211_mle_type_ok().
  *
- * If the medium synchronization is not present, then 0 is returned.
+ * If the medium synchronization is not present, then the default value is
+ * returned.
  */
 static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data)
 {
@@ -4654,7 +4663,7 @@ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data)
 	common += sizeof(struct ieee80211_mle_basic_common_info);
 
 	if (!(control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY))
-		return 0;
+		return IEEE80211_MED_SYNC_DELAY_DEFAULT;
 
 	if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID)
 		common += 1;
-- 
cgit v1.2.3


From 66d9c573fbb992f11d29a907c339792ee0de82ee Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Sun, 18 Jun 2023 21:49:49 +0300
Subject: wifi: ieee80211: add definitions for RNR MLD params

Add the definitions necessary to parse the MLD parameters
included in an RNR element.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230618214436.9999842237c0.I80f00a90cb4e43071432b4158f206c73ba799618@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 251998be24d0..7afd08d2de2f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4485,6 +4485,7 @@ static inline bool for_each_element_completed(const struct element *element,
 #define IEEE80211_TBTT_INFO_TYPE_MLD				1
 #define IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM		9
 #define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM	13
+#define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM_MLD_PARAM	16
 
 #define IEEE80211_RNR_TBTT_PARAMS_OCT_RECOMMENDED		0x01
 #define IEEE80211_RNR_TBTT_PARAMS_SAME_SSID			0x02
@@ -4508,6 +4509,20 @@ enum ieee80211_range_params_max_total_ltf {
 	IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_UNSPECIFIED,
 };
 
+/*
+ * reduced neighbor report, based on Draft P802.11be_D3.0,
+ * section 9.4.2.170.2.
+ */
+struct ieee80211_rnr_mld_params {
+	u8 mld_id;
+	__le16 params;
+} __packed;
+
+#define IEEE80211_RNR_MLD_PARAMS_LINK_ID			0x000F
+#define IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT		0x0FF0
+#define IEEE80211_RNR_MLD_PARAMS_UPDATES_INCLUDED		0x1000
+#define IEEE80211_RNR_MLD_PARAMS_DISABLED_LINK			0x2000
+
 /* multi-link device */
 #define IEEE80211_MLD_MAX_NUM_LINKS	15
 
-- 
cgit v1.2.3


From 50181fe4f59dcfae391523def6f62e32d86c46b1 Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Sun, 18 Jun 2023 21:49:51 +0300
Subject: wifi: ieee80211: add structs for TBTT information access

The TBTT information can have various lengths with different elements
thare are present. Add definitions for the two types that we are
interested in (i.e. the ones that contain the BSSID).

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230618214436.2a6f8766a3ec.Ic962e28492212cc8ee1eb602b8f07a4ea172fc4a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7afd08d2de2f..5a27c232afdb 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4523,6 +4523,28 @@ struct ieee80211_rnr_mld_params {
 #define IEEE80211_RNR_MLD_PARAMS_UPDATES_INCLUDED		0x1000
 #define IEEE80211_RNR_MLD_PARAMS_DISABLED_LINK			0x2000
 
+/* Format of the TBTT information element if it has 7, 8 or 9 bytes */
+struct ieee80211_tbtt_info_7_8_9 {
+	u8 tbtt_offset;
+	u8 bssid[ETH_ALEN];
+
+	/* The following element is optional, structure may not grow */
+	u8 bss_params;
+	u8 psd_20;
+} __packed;
+
+/* Format of the TBTT information element if it has >= 11 bytes */
+struct ieee80211_tbtt_info_ge_11 {
+	u8 tbtt_offset;
+	u8 bssid[ETH_ALEN];
+	__le32 short_ssid;
+
+	/* The following elements are optional, structure may grow */
+	u8 bss_params;
+	u8 psd_20;
+	struct ieee80211_rnr_mld_params mld_params;
+} __packed;
+
 /* multi-link device */
 #define IEEE80211_MLD_MAX_NUM_LINKS	15
 
-- 
cgit v1.2.3


From dc92e54c30c4bc9d30e674a445dfe1afdca991cf Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Sun, 18 Jun 2023 21:49:52 +0300
Subject: wifi: cfg80211: use structs for TBTT information access

Make the data access a bit nicer overall by using structs. There is a
small change here to also accept a TBTT information length of eight
bytes as we do not require the 20 MHz PSD information.

This also fixes a bug reading the short SSID on big endian machines.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230618214436.4c3f8901c1bc.Ic3e94fd6e1bccff7948a252ad3bb87e322690a17@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  3 ---
 net/wireless/scan.c       | 61 +++++++++++++++++++++++++----------------------
 2 files changed, 33 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 5a27c232afdb..e145af7448a3 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4483,9 +4483,6 @@ static inline bool for_each_element_completed(const struct element *element,
 #define IEEE80211_AP_INFO_TBTT_HDR_COUNT			0xF0
 #define IEEE80211_TBTT_INFO_TYPE_TBTT				0
 #define IEEE80211_TBTT_INFO_TYPE_MLD				1
-#define IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM		9
-#define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM	13
-#define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM_MLD_PARAM	16
 
 #define IEEE80211_RNR_TBTT_PARAMS_OCT_RECOMMENDED		0x01
 #define IEEE80211_RNR_TBTT_PARAMS_SAME_SSID			0x02
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 974a6a8240dd..f0b4d7671d17 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -574,39 +574,41 @@ static void cfg80211_free_coloc_ap_list(struct list_head *coloc_ap_list)
 static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
 				  const u8 *pos, u8 length,
 				  const struct element *ssid_elem,
-				  int s_ssid_tmp)
+				  u32 s_ssid_tmp)
 {
-	/* skip the TBTT offset */
-	pos++;
+	u8 bss_params;
 
-	/* ignore entries with invalid BSSID */
-	if (!is_valid_ether_addr(pos))
-		return -EINVAL;
-
-	memcpy(entry->bssid, pos, ETH_ALEN);
-	pos += ETH_ALEN;
+	/* The length is already verified by the caller to contain bss_params */
+	if (length > sizeof(struct ieee80211_tbtt_info_7_8_9)) {
+		struct ieee80211_tbtt_info_ge_11 *tbtt_info = (void *)pos;
 
-	if (length >= IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM) {
-		memcpy(&entry->short_ssid, pos,
-		       sizeof(entry->short_ssid));
+		memcpy(entry->bssid, tbtt_info->bssid, ETH_ALEN);
+		entry->short_ssid = le32_to_cpu(tbtt_info->short_ssid);
 		entry->short_ssid_valid = true;
-		pos += 4;
+
+		bss_params = tbtt_info->bss_params;
+	} else {
+		struct ieee80211_tbtt_info_7_8_9 *tbtt_info = (void *)pos;
+
+		memcpy(entry->bssid, tbtt_info->bssid, ETH_ALEN);
+
+		bss_params = tbtt_info->bss_params;
 	}
 
+	/* ignore entries with invalid BSSID */
+	if (!is_valid_ether_addr(entry->bssid))
+		return -EINVAL;
+
 	/* skip non colocated APs */
-	if (!cfg80211_parse_bss_param(*pos, entry))
+	if (!cfg80211_parse_bss_param(bss_params, entry))
 		return -EINVAL;
-	pos++;
 
-	if (length == IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM) {
-		/*
-		 * no information about the short ssid. Consider the entry valid
-		 * for now. It would later be dropped in case there are explicit
-		 * SSIDs that need to be matched
-		 */
-		if (!entry->same_ssid)
-			return 0;
-	}
+	/* no information about the short ssid. Consider the entry valid
+	 * for now. It would later be dropped in case there are explicit
+	 * SSIDs that need to be matched
+	 */
+	if (!entry->same_ssid && !entry->short_ssid_valid)
+		return 0;
 
 	if (entry->same_ssid) {
 		entry->short_ssid = s_ssid_tmp;
@@ -617,10 +619,10 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
 		 * cfg80211_parse_colocated_ap(), before calling this
 		 * function.
 		 */
-		memcpy(&entry->ssid, &ssid_elem->data,
-		       ssid_elem->datalen);
+		memcpy(&entry->ssid, &ssid_elem->data, ssid_elem->datalen);
 		entry->ssid_len = ssid_elem->datalen;
 	}
+
 	return 0;
 }
 
@@ -682,8 +684,11 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
 		 * next AP info
 		 */
 		if (band != NL80211_BAND_6GHZ ||
-		    (length != IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM &&
-		     length < IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM)) {
+		    !(length == offsetofend(struct ieee80211_tbtt_info_7_8_9,
+					    bss_params) ||
+		      length == sizeof(struct ieee80211_tbtt_info_7_8_9) ||
+		      length >= offsetofend(struct ieee80211_tbtt_info_ge_11,
+					    bss_params))) {
 			pos += count * length;
 			continue;
 		}
-- 
cgit v1.2.3


From 8eb8dd2ffbbb6b0b8843b66754ee9f129f1b2d6c Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Sun, 18 Jun 2023 21:49:55 +0300
Subject: wifi: mac80211: Support link removal using Reconfiguration ML element

Add support for handling link removal indicated by the
Reconfiguration Multi-Link element.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230618214436.d8a046dc0c1a.I4dcf794da2a2d9f4e5f63a4b32158075d27c0660@changeid
[use cfg80211_links_removed() API instead]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  33 +++++++++
 net/mac80211/ieee80211_i.h |   3 +
 net/mac80211/mlme.c        | 169 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 205 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e145af7448a3..98223b665456 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4891,6 +4891,39 @@ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data,
 	       fixed + prof->sta_info_len <= len;
 }
 
+#define IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID			0x000f
+#define IEEE80211_MLE_STA_RECONF_CONTROL_COMPLETE_PROFILE		0x0010
+#define IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT		0x0020
+#define IEEE80211_MLE_STA_RECONF_CONTROL_DELETE_TIMER_PRESENT		0x0040
+
+/**
+ * ieee80211_mle_reconf_sta_prof_size_ok - validate reconfiguration multi-link
+ *	element sta profile size.
+ * @data: pointer to the sub element data
+ * @len: length of the containing sub element
+ */
+static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data,
+							 size_t len)
+{
+	const struct ieee80211_mle_per_sta_profile *prof = (const void *)data;
+	u16 control;
+	u8 fixed = sizeof(*prof);
+	u8 info_len = 1;
+
+	if (len < fixed)
+		return false;
+
+	control = le16_to_cpu(prof->control);
+
+	if (control & IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT)
+		info_len += ETH_ALEN;
+	if (control & IEEE80211_MLE_STA_RECONF_CONTROL_DELETE_TIMER_PRESENT)
+		info_len += 2;
+
+	return prof->sta_info_len >= info_len &&
+	       fixed + prof->sta_info_len - 1 <= len;
+}
+
 #define for_each_mle_subelement(_elem, _data, _len)			\
 	if (ieee80211_mle_size_ok(_data, _len))				\
 		for_each_element(_elem,					\
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 49461350f909..2f7665998da0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -551,6 +551,9 @@ struct ieee80211_if_managed {
 	 */
 	u8 *assoc_req_ies;
 	size_t assoc_req_ies_len;
+
+	struct wiphy_delayed_work ml_reconf_work;
+	u16 removed_links;
 };
 
 struct ieee80211_if_ibss {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5c881d14b1a2..b60f99cf1be0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5608,6 +5608,169 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link,
 	return true;
 }
 
+static void ieee80211_ml_reconf_work(struct wiphy *wiphy,
+				     struct wiphy_work *work)
+{
+	struct ieee80211_sub_if_data *sdata =
+		container_of(work, struct ieee80211_sub_if_data,
+			     u.mgd.ml_reconf_work.work);
+	u16 new_valid_links, new_active_links, new_dormant_links;
+	int ret;
+
+	sdata_lock(sdata);
+	if (!sdata->u.mgd.removed_links) {
+		sdata_unlock(sdata);
+		return;
+	}
+
+	sdata_info(sdata,
+		   "MLO Reconfiguration: work: valid=0x%x, removed=0x%x\n",
+		   sdata->vif.valid_links, sdata->u.mgd.removed_links);
+
+	new_valid_links = sdata->vif.valid_links & ~sdata->u.mgd.removed_links;
+	if (new_valid_links == sdata->vif.valid_links) {
+		sdata_unlock(sdata);
+		return;
+	}
+
+	if (!new_valid_links ||
+	    !(new_valid_links & ~sdata->vif.dormant_links)) {
+		sdata_info(sdata, "No valid links after reconfiguration\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	new_active_links = sdata->vif.active_links & ~sdata->u.mgd.removed_links;
+	if (new_active_links != sdata->vif.active_links) {
+		if (!new_active_links)
+			new_active_links =
+				BIT(ffs(new_valid_links &
+					~sdata->vif.dormant_links) - 1);
+
+		ret = __ieee80211_set_active_links(&sdata->vif,
+						   new_active_links);
+		if (ret) {
+			sdata_info(sdata,
+				   "Failed setting active links\n");
+			goto out;
+		}
+	}
+
+	new_dormant_links = sdata->vif.dormant_links & ~sdata->u.mgd.removed_links;
+
+	ret = ieee80211_vif_set_links(sdata, new_valid_links,
+				      new_dormant_links);
+	if (ret)
+		sdata_info(sdata, "Failed setting valid links\n");
+
+out:
+	if (!ret)
+		cfg80211_links_removed(sdata->dev, sdata->u.mgd.removed_links);
+	else
+		___ieee80211_disconnect(sdata);
+
+	sdata->u.mgd.removed_links = 0;
+
+	sdata_unlock(sdata);
+}
+
+static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
+					 struct ieee802_11_elems *elems)
+{
+	const struct ieee80211_multi_link_elem *ml;
+	const struct element *sub;
+	size_t ml_len;
+	unsigned long removed_links = 0;
+	u16 link_removal_timeout[IEEE80211_MLD_MAX_NUM_LINKS] = {};
+	u8 link_id;
+	u32 delay;
+
+	if (!ieee80211_vif_is_mld(&sdata->vif) || !elems->ml_reconf)
+		return;
+
+	ml_len = cfg80211_defragment_element(elems->ml_reconf_elem,
+					     elems->ie_start,
+					     elems->total_len,
+					     elems->scratch_pos,
+					     elems->scratch + elems->scratch_len -
+					     elems->scratch_pos,
+					     WLAN_EID_FRAGMENT);
+
+	elems->ml_reconf = (const void *)elems->scratch_pos;
+	elems->ml_reconf_len = ml_len;
+	ml = elems->ml_reconf;
+
+	/* Directly parse the sub elements as the common information doesn't
+	 * hold any useful information.
+	 */
+	for_each_mle_subelement(sub, (u8 *)ml, ml_len) {
+		struct ieee80211_mle_per_sta_profile *prof = (void *)sub->data;
+		u8 *pos = prof->variable;
+		u16 control;
+
+		if (sub->id != IEEE80211_MLE_SUBELEM_PER_STA_PROFILE)
+			continue;
+
+		if (!ieee80211_mle_reconf_sta_prof_size_ok(sub->data,
+							   sub->datalen))
+			return;
+
+		control = le16_to_cpu(prof->control);
+		link_id = control & IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID;
+
+		removed_links |= BIT(link_id);
+
+		/* the MAC address should not be included, but handle it */
+		if (control &
+		    IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT)
+			pos += 6;
+
+		/* According to Draft P802.11be_D3.0, the control should
+		 * include the AP Removal Timer present. If the AP Removal Timer
+		 * is not present assume immediate removal.
+		 */
+		if (control &
+		    IEEE80211_MLE_STA_RECONF_CONTROL_DELETE_TIMER_PRESENT)
+			link_removal_timeout[link_id] = le16_to_cpu(*(__le16 *)pos);
+	}
+
+	removed_links &= sdata->vif.valid_links;
+	if (!removed_links) {
+		/* In case the removal was cancelled, abort it */
+		if (sdata->u.mgd.removed_links) {
+			sdata->u.mgd.removed_links = 0;
+			wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+						  &sdata->u.mgd.ml_reconf_work);
+		}
+		return;
+	}
+
+	delay = 0;
+	for_each_set_bit(link_id, &removed_links, IEEE80211_MLD_MAX_NUM_LINKS) {
+		struct ieee80211_bss_conf *link_conf =
+			sdata_dereference(sdata->vif.link_conf[link_id], sdata);
+		u32 link_delay;
+
+		if (!link_conf) {
+			removed_links &= ~BIT(link_id);
+			continue;
+		}
+
+		link_delay = link_conf->beacon_int *
+			link_removal_timeout[link_id];
+
+		if (!delay)
+			delay = link_delay;
+		else
+			delay = min(delay, link_delay);
+	}
+
+	sdata->u.mgd.removed_links = removed_links;
+	wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+				 &sdata->u.mgd.ml_reconf_work,
+				 TU_TO_JIFFIES(delay));
+}
+
 static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
 				     struct ieee80211_hdr *hdr, size_t len,
 				     struct ieee80211_rx_status *rx_status)
@@ -5937,6 +6100,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
 		}
 	}
 
+	ieee80211_ml_reconfiguration(sdata, elems);
+
 	ieee80211_link_info_change_notify(sdata, link, changed);
 free:
 	kfree(elems);
@@ -6563,6 +6728,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 			ieee80211_csa_connection_drop_work);
 	INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work,
 			  ieee80211_tdls_peer_del_work);
+	wiphy_delayed_work_init(&ifmgd->ml_reconf_work,
+				ieee80211_ml_reconf_work);
 	timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0);
 	timer_setup(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 0);
 	timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0);
@@ -7575,6 +7742,8 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
 	wiphy_work_cancel(sdata->local->hw.wiphy,
 			  &ifmgd->csa_connection_drop_work);
 	cancel_delayed_work_sync(&ifmgd->tdls_peer_del_work);
+	wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
+				  &ifmgd->ml_reconf_work);
 
 	sdata_lock(sdata);
 	if (ifmgd->assoc_data)
-- 
cgit v1.2.3


From 888a325fe0a7d149828600c663869636ffdbe81f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 18 Jun 2023 21:49:56 +0300
Subject: wifi: ieee80211: reorder presence checks in MLE per-STA profile

In ieee80211_mle_sta_prof_size_ok(), the presence
checks aren't ordered by field order, so that's a
bit confusing. Reorder them.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230618214436.fdbf17320a37.I517cf27fdc3f6e5d6a2615182da47ba4bdf14039@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 98223b665456..fc3c26f1b718 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4876,9 +4876,6 @@ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data,
 		info_len += 8;
 	if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT)
 		info_len += 2;
-	if (control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT)
-		info_len += 1;
-
 	if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE &&
 	    control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) {
 		if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE)
@@ -4886,6 +4883,8 @@ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data,
 		else
 			info_len += 1;
 	}
+	if (control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT)
+		info_len += 1;
 
 	return prof->sta_info_len >= info_len &&
 	       fixed + prof->sta_info_len <= len;
-- 
cgit v1.2.3


From c870d66f1b7f51fa3401771ff6c41fd78adb869e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 18 Jun 2023 21:49:59 +0300
Subject: wifi: update multi-link element STA reconfig

Update the MLE STA reconfig sub-type to 802.11be D3.0
format, which includes the operation update field.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230618214436.2e1383b31f07.I8055a111c8fcf22e833e60f5587a4d8d21caca5b@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 8 ++++++--
 net/mac80211/mlme.c       | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index fc3c26f1b718..d2025c986b0f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4893,7 +4893,9 @@ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data,
 #define IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID			0x000f
 #define IEEE80211_MLE_STA_RECONF_CONTROL_COMPLETE_PROFILE		0x0010
 #define IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT		0x0020
-#define IEEE80211_MLE_STA_RECONF_CONTROL_DELETE_TIMER_PRESENT		0x0040
+#define IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT		0x0040
+#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_UPDATE_TYPE		0x0780
+#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_PARAMS_PRESENT	0x0800
 
 /**
  * ieee80211_mle_reconf_sta_prof_size_ok - validate reconfiguration multi-link
@@ -4916,7 +4918,9 @@ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data,
 
 	if (control & IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT)
 		info_len += ETH_ALEN;
-	if (control & IEEE80211_MLE_STA_RECONF_CONTROL_DELETE_TIMER_PRESENT)
+	if (control & IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT)
+		info_len += 2;
+	if (control & IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_PARAMS_PRESENT)
 		info_len += 2;
 
 	return prof->sta_info_len >= info_len &&
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b60f99cf1be0..15e3decc59db 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5730,7 +5730,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata,
 		 * is not present assume immediate removal.
 		 */
 		if (control &
-		    IEEE80211_MLE_STA_RECONF_CONTROL_DELETE_TIMER_PRESENT)
+		    IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT)
 			link_removal_timeout[link_id] = le16_to_cpu(*(__le16 *)pos);
 	}
 
-- 
cgit v1.2.3


From cf0b045ebf6bba7764151e1759c874c43964445a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 18 Jun 2023 21:50:02 +0300
Subject: wifi: mac80211: check EHT basic MCS/NSS set

Check that all the NSS in the EHT basic MCS/NSS set
are actually supported, otherwise disable EHT for the
connection.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230618214436.737827c906c9.I0c11a3cd46ab4dcb774c11a5bbc30aecfb6fce11@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 28 ++++++++++-----
 net/mac80211/mlme.c       | 89 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 108 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d2025c986b0f..fa679613c562 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1996,12 +1996,18 @@ struct ieee80211_mu_edca_param_set {
  * @rx_tx_mcs13_max_nss: indicates the maximum number of spatial streams
  *     supported for reception and the maximum number of spatial streams
  *     supported for transmission for MCS 12 - 13.
+ * @rx_tx_max_nss: array of the previous fields for easier loop access
  */
 struct ieee80211_eht_mcs_nss_supp_20mhz_only {
-	u8 rx_tx_mcs7_max_nss;
-	u8 rx_tx_mcs9_max_nss;
-	u8 rx_tx_mcs11_max_nss;
-	u8 rx_tx_mcs13_max_nss;
+	union {
+		struct {
+			u8 rx_tx_mcs7_max_nss;
+			u8 rx_tx_mcs9_max_nss;
+			u8 rx_tx_mcs11_max_nss;
+			u8 rx_tx_mcs13_max_nss;
+		};
+		u8 rx_tx_max_nss[4];
+	};
 };
 
 /**
@@ -2021,11 +2027,17 @@ struct ieee80211_eht_mcs_nss_supp_20mhz_only {
  * @rx_tx_mcs13_max_nss: indicates the maximum number of spatial streams
  *     supported for reception and the maximum number of spatial streams
  *     supported for transmission for MCS 12 - 13.
+ * @rx_tx_max_nss: array of the previous fields for easier loop access
  */
 struct ieee80211_eht_mcs_nss_supp_bw {
-	u8 rx_tx_mcs9_max_nss;
-	u8 rx_tx_mcs11_max_nss;
-	u8 rx_tx_mcs13_max_nss;
+	union {
+		struct {
+			u8 rx_tx_mcs9_max_nss;
+			u8 rx_tx_mcs11_max_nss;
+			u8 rx_tx_mcs13_max_nss;
+		};
+		u8 rx_tx_max_nss[3];
+	};
 };
 
 /**
@@ -2078,7 +2090,7 @@ struct ieee80211_eht_cap_elem {
  */
 struct ieee80211_eht_operation {
 	u8 params;
-	__le32 basic_mcs_nss;
+	struct ieee80211_eht_mcs_nss_supp_20mhz_only basic_mcs_nss;
 	u8 optional[];
 } __packed;
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 15e3decc59db..cf15089e95f1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4694,6 +4694,89 @@ ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
 	return false;
 }
 
+static u8
+ieee80211_get_eht_cap_mcs_nss(const struct ieee80211_sta_he_cap *sta_he_cap,
+			      const struct ieee80211_sta_eht_cap *sta_eht_cap,
+			      unsigned int idx, int bw)
+{
+	u8 he_phy_cap0 = sta_he_cap->he_cap_elem.phy_cap_info[0];
+	u8 eht_phy_cap0 = sta_eht_cap->eht_cap_elem.phy_cap_info[0];
+
+	/* handle us being a 20 MHz-only EHT STA - with four values
+	 * for MCS 0-7, 8-9, 10-11, 12-13.
+	 */
+	if (!(he_phy_cap0 & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL))
+		return sta_eht_cap->eht_mcs_nss_supp.only_20mhz.rx_tx_max_nss[idx];
+
+	/* the others have MCS 0-9 together, rather than separately from 0-7 */
+	if (idx > 0)
+		idx--;
+
+	switch (bw) {
+	case 0:
+		return sta_eht_cap->eht_mcs_nss_supp.bw._80.rx_tx_max_nss[idx];
+	case 1:
+		if (!(he_phy_cap0 &
+		      (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
+		       IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)))
+			return 0xff; /* pass check */
+		return sta_eht_cap->eht_mcs_nss_supp.bw._160.rx_tx_max_nss[idx];
+	case 2:
+		if (!(eht_phy_cap0 & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ))
+			return 0xff; /* pass check */
+		return sta_eht_cap->eht_mcs_nss_supp.bw._320.rx_tx_max_nss[idx];
+	}
+
+	WARN_ON(1);
+	return 0;
+}
+
+static bool
+ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata,
+				     struct ieee80211_supported_band *sband,
+				     const struct ieee80211_eht_operation *eht_op)
+{
+	const struct ieee80211_sta_he_cap *sta_he_cap =
+		ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif);
+	const struct ieee80211_sta_eht_cap *sta_eht_cap =
+		ieee80211_get_eht_iftype_cap_vif(sband, &sdata->vif);
+	const struct ieee80211_eht_mcs_nss_supp_20mhz_only *req;
+	unsigned int i;
+
+	if (!sta_he_cap || !sta_eht_cap || !eht_op)
+		return false;
+
+	req = &eht_op->basic_mcs_nss;
+
+	for (i = 0; i < ARRAY_SIZE(req->rx_tx_max_nss); i++) {
+		u8 req_rx_nss, req_tx_nss;
+		unsigned int bw;
+
+		req_rx_nss = u8_get_bits(req->rx_tx_max_nss[i],
+					 IEEE80211_EHT_MCS_NSS_RX);
+		req_tx_nss = u8_get_bits(req->rx_tx_max_nss[i],
+					 IEEE80211_EHT_MCS_NSS_TX);
+
+		for (bw = 0; bw < 3; bw++) {
+			u8 have, have_rx_nss, have_tx_nss;
+
+			have = ieee80211_get_eht_cap_mcs_nss(sta_he_cap,
+							     sta_eht_cap,
+							     i, bw);
+			have_rx_nss = u8_get_bits(have,
+						  IEEE80211_EHT_MCS_NSS_RX);
+			have_tx_nss = u8_get_bits(have,
+						  IEEE80211_EHT_MCS_NSS_TX);
+
+			if (req_rx_nss > have_rx_nss ||
+			    req_tx_nss > have_tx_nss)
+				return false;
+		}
+	}
+
+	return true;
+}
+
 static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_link_data *link,
 				  struct cfg80211_bss *cbss,
@@ -4849,11 +4932,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 		else
 			eht_oper = NULL;
 
+		if (!ieee80211_verify_sta_eht_mcs_support(sdata, sband, eht_oper))
+			*conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+
 		eht_ml_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK,
 						     cbss_ies->data, cbss_ies->len);
 
 		/* data + 1 / datalen - 1 since it's an extended element */
-		if (eht_ml_elem &&
+		if (!(*conn_flags & IEEE80211_CONN_DISABLE_EHT) &&
+		    eht_ml_elem &&
 		    ieee80211_mle_type_ok(eht_ml_elem->data + 1,
 					  IEEE80211_ML_CONTROL_TYPE_BASIC,
 					  eht_ml_elem->datalen - 1)) {
-- 
cgit v1.2.3


From ff7a1790fbf92f1bdd0966d3f0da3ea808ede876 Mon Sep 17 00:00:00 2001
From: Michael Walle <mwalle@kernel.org>
Date: Mon, 19 Jun 2023 10:56:07 +0200
Subject: gpiolib: Fix irq_domain resource tracking for
 gpiochip_irqchip_add_domain()

Up until commit 6a45b0e2589f ("gpiolib: Introduce
gpiochip_irqchip_add_domain()") all irq_domains were allocated
by gpiolib itself and thus gpiolib also takes care of freeing it.

With gpiochip_irqchip_add_domain() a user of gpiolib can associate an
irq_domain with the gpio_chip. This irq_domain is not managed by
gpiolib and therefore must not be freed by gpiolib.

Fixes: 6a45b0e2589f ("gpiolib: Introduce gpiochip_irqchip_add_domain()")
Reported-by: Jiawen Wu <jiawenwu@trustnetic.com>
Signed-off-by: Michael Walle <mwalle@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Andy Shevchenko <andy@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c      | 3 ++-
 include/linux/gpio/driver.h | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9ecf93cbd801..5be8ad61523e 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1745,7 +1745,7 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gc)
 	}
 
 	/* Remove all IRQ mappings and delete the domain */
-	if (gc->irq.domain) {
+	if (!gc->irq.domain_is_allocated_externally && gc->irq.domain) {
 		unsigned int irq;
 
 		for (offset = 0; offset < gc->ngpio; offset++) {
@@ -1791,6 +1791,7 @@ int gpiochip_irqchip_add_domain(struct gpio_chip *gc,
 
 	gc->to_irq = gpiochip_to_irq;
 	gc->irq.domain = domain;
+	gc->irq.domain_is_allocated_externally = true;
 
 	/*
 	 * Using barrier() here to prevent compiler from reordering
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 5c6db5533be6..67b8774eed8f 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -251,6 +251,14 @@ struct gpio_irq_chip {
 	 */
 	bool initialized;
 
+	/**
+	 * @domain_is_allocated_externally:
+	 *
+	 * True it the irq_domain was allocated outside of gpiolib, in which
+	 * case gpiolib won't free the irq_domain itself.
+	 */
+	bool domain_is_allocated_externally;
+
 	/**
 	 * @init_hw: optional routine to initialize hardware before
 	 * an IRQ chip will be added. This is quite useful when
-- 
cgit v1.2.3


From a48b3f7be9c5e507ca07bd93d769798f4e5e68b1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 16 Jun 2023 16:53:13 +0300
Subject: gpiolib: Drop unused domain_ops memeber of GPIO IRQ chip

It seems there is no driver that requires custom IRQ chip
domain options. Drop the member and respective code.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c      | 3 +--
 include/linux/gpio/driver.h | 7 -------
 2 files changed, 1 insertion(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 6e66e7e01992..5fd518658501 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1669,11 +1669,10 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 		if (ret)
 			return ret;
 	} else {
-		/* Some drivers provide custom irqdomain ops */
 		gc->irq.domain = irq_domain_create_simple(fwnode,
 			gc->ngpio,
 			gc->irq.first,
-			gc->irq.domain_ops ?: &gpiochip_domain_ops,
+			&gpiochip_domain_ops,
 			gc);
 		if (!gc->irq.domain)
 			return -EINVAL;
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 5c6db5533be6..6879b5436480 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -61,13 +61,6 @@ struct gpio_irq_chip {
 	 */
 	struct irq_domain *domain;
 
-	/**
-	 * @domain_ops:
-	 *
-	 * Table of interrupt domain operations for this IRQ chip.
-	 */
-	const struct irq_domain_ops *domain_ops;
-
 #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
 	/**
 	 * @fwnode:
-- 
cgit v1.2.3


From 7257d930aadcd62d1c7971ab14f3b1126356abdc Mon Sep 17 00:00:00 2001
From: Teresa Remmet <t.remmet@phytec.de>
Date: Wed, 14 Jun 2023 14:52:40 +0200
Subject: regulator: pca9450: Fix LDO3OUT and LDO4OUT MASK

L3_OUT and L4_OUT Bit fields range from Bit 0:4 and thus the
mask should be 0x1F instead of 0x0F.

Fixes: 0935ff5f1f0a ("regulator: pca9450: add pca9450 pmic driver")
Signed-off-by: Teresa Remmet <t.remmet@phytec.de>
Reviewed-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Link: https://lore.kernel.org/r/20230614125240.3946519-1-t.remmet@phytec.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/pca9450.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h
index 3c01c2bf84f5..505c908dbb81 100644
--- a/include/linux/regulator/pca9450.h
+++ b/include/linux/regulator/pca9450.h
@@ -196,11 +196,11 @@ enum {
 
 /* PCA9450_REG_LDO3_VOLT bits */
 #define LDO3_EN_MASK			0xC0
-#define LDO3OUT_MASK			0x0F
+#define LDO3OUT_MASK			0x1F
 
 /* PCA9450_REG_LDO4_VOLT bits */
 #define LDO4_EN_MASK			0xC0
-#define LDO4OUT_MASK			0x0F
+#define LDO4OUT_MASK			0x1F
 
 /* PCA9450_REG_LDO5_VOLT bits */
 #define LDO5L_EN_MASK			0xC0
-- 
cgit v1.2.3


From d56e0ddb8fc35a7aa13ab8f21c499a34f45dda05 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 15 Jun 2023 14:22:25 +0300
Subject: fs: rename {vfs,kernel}_tmpfile_open()

Overlayfs and cachefiles use vfs_open_tmpfile() to open a tmpfile
without accounting for nr_files.

Rename this helper to kernel_tmpfile_open() to better reflect this
helper is used for kernel internal users.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Message-Id: <20230615112229.2143178-2-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/cachefiles/namei.c    |  6 +++---
 fs/namei.c               | 24 +++++++++++++-----------
 fs/overlayfs/overlayfs.h |  5 +++--
 include/linux/fs.h       |  7 ++++---
 4 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 82219a8f6084..6c7d4e97c219 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -451,9 +451,9 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
 
 	ret = cachefiles_inject_write_error();
 	if (ret == 0) {
-		file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath, S_IFREG,
-					O_RDWR | O_LARGEFILE | O_DIRECT,
-					cache->cache_cred);
+		file = kernel_tmpfile_open(&nop_mnt_idmap, &parentpath, S_IFREG,
+					   O_RDWR | O_LARGEFILE | O_DIRECT,
+					   cache->cache_cred);
 		ret = PTR_ERR_OR_ZERO(file);
 	}
 	if (ret) {
diff --git a/fs/namei.c b/fs/namei.c
index e4fe0879ae55..36e335c39c44 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3703,7 +3703,7 @@ static int vfs_tmpfile(struct mnt_idmap *idmap,
 }
 
 /**
- * vfs_tmpfile_open - open a tmpfile for kernel internal use
+ * kernel_tmpfile_open - open a tmpfile for kernel internal use
  * @idmap:	idmap of the mount the inode was found from
  * @parentpath:	path of the base directory
  * @mode:	mode of the new tmpfile
@@ -3714,24 +3714,26 @@ static int vfs_tmpfile(struct mnt_idmap *idmap,
  * hence this is only for kernel internal use, and must not be installed into
  * file tables or such.
  */
-struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
-			  const struct path *parentpath,
-			  umode_t mode, int open_flag, const struct cred *cred)
+struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
+				 const struct path *parentpath,
+				 umode_t mode, int open_flag,
+				 const struct cred *cred)
 {
 	struct file *file;
 	int error;
 
 	file = alloc_empty_file_noaccount(open_flag, cred);
-	if (!IS_ERR(file)) {
-		error = vfs_tmpfile(idmap, parentpath, file, mode);
-		if (error) {
-			fput(file);
-			file = ERR_PTR(error);
-		}
+	if (IS_ERR(file))
+		return file;
+
+	error = vfs_tmpfile(idmap, parentpath, file, mode);
+	if (error) {
+		fput(file);
+		file = ERR_PTR(error);
 	}
 	return file;
 }
-EXPORT_SYMBOL(vfs_tmpfile_open);
+EXPORT_SYMBOL(kernel_tmpfile_open);
 
 static int do_tmpfile(struct nameidata *nd, unsigned flags,
 		const struct open_flags *op,
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 4d0b278f5630..23686e8a06c4 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -329,8 +329,9 @@ static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs,
 					  struct dentry *dentry, umode_t mode)
 {
 	struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry };
-	struct file *file = vfs_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path, mode,
-					O_LARGEFILE | O_WRONLY, current_cred());
+	struct file *file = kernel_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path,
+						mode, O_LARGEFILE | O_WRONLY,
+						current_cred());
 	int err = PTR_ERR_OR_ZERO(file);
 
 	pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21a981680856..62237beeac2a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1672,9 +1672,10 @@ static inline int vfs_whiteout(struct mnt_idmap *idmap,
 			 WHITEOUT_DEV);
 }
 
-struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
-			const struct path *parentpath,
-			umode_t mode, int open_flag, const struct cred *cred);
+struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
+				 const struct path *parentpath,
+				 umode_t mode, int open_flag,
+				 const struct cred *cred);
 
 int vfs_mkobj(struct dentry *, umode_t,
 		int (*f)(struct dentry *, umode_t, void *),
-- 
cgit v1.2.3


From 86e2e1f6d9215bfec88b82c16936ba0f3ddaeb00 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Thu, 4 May 2023 16:47:16 -0400
Subject: NFSv4.2: SETXATTR should update ctime

Otherwise, `stat` will report a stale value to users.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs42proc.c      | 25 +++++++++++++++++++++----
 fs/nfs/nfs42xdr.c       | 11 ++++++++---
 include/linux/nfs_xdr.h |  3 +++
 3 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 93e306bf4430..63802d195556 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1190,15 +1190,19 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
 				const void *buf, size_t buflen, int flags)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
+	__u32 bitmask[NFS_BITMASK_SZ];
 	struct page *pages[NFS4XATTR_MAXPAGES];
 	struct nfs42_setxattrargs arg = {
 		.fh		= NFS_FH(inode),
+		.bitmask	= bitmask,
 		.xattr_pages	= pages,
 		.xattr_len	= buflen,
 		.xattr_name	= name,
 		.xattr_flags	= flags,
 	};
-	struct nfs42_setxattrres res;
+	struct nfs42_setxattrres res = {
+		.server		= server,
+	};
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_SETXATTR],
 		.rpc_argp	= &arg,
@@ -1210,13 +1214,22 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
 	if (buflen > server->sxasize)
 		return -ERANGE;
 
+	res.fattr = nfs_alloc_fattr();
+	if (!res.fattr)
+		return -ENOMEM;
+
 	if (buflen > 0) {
 		np = nfs4_buf_to_pages_noslab(buf, buflen, arg.xattr_pages);
-		if (np < 0)
-			return np;
+		if (np < 0) {
+			ret = np;
+			goto out;
+		}
 	} else
 		np = 0;
 
+	nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask,
+			 inode, NFS_INO_INVALID_CHANGE);
+
 	ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
 	    &res.seq_res, 1);
 	trace_nfs4_setxattr(inode, name, ret);
@@ -1224,9 +1237,13 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
 	for (; np > 0; np--)
 		put_page(pages[np - 1]);
 
-	if (!ret)
+	if (!ret) {
 		nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
+		ret = nfs_post_op_update_inode(inode, res.fattr);
+	}
 
+out:
+	kfree(res.fattr);
 	return ret;
 }
 
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 215b8700e504..95234208dc9e 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -212,11 +212,13 @@
 #define NFS4_enc_setxattr_sz		(compound_encode_hdr_maxsz + \
 					 encode_sequence_maxsz + \
 					 encode_putfh_maxsz + \
-					 encode_setxattr_maxsz)
+					 encode_setxattr_maxsz + \
+					 encode_getattr_maxsz)
 #define NFS4_dec_setxattr_sz		(compound_decode_hdr_maxsz + \
 					 decode_sequence_maxsz + \
 					 decode_putfh_maxsz + \
-					 decode_setxattr_maxsz)
+					 decode_setxattr_maxsz + \
+					 decode_getattr_maxsz)
 #define NFS4_enc_listxattrs_sz		(compound_encode_hdr_maxsz + \
 					 encode_sequence_maxsz + \
 					 encode_putfh_maxsz + \
@@ -720,6 +722,7 @@ static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_setxattr(xdr, args, &hdr);
+	encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -1579,8 +1582,10 @@ static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
 	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-
 	status = decode_setxattr(xdr, &res->cinfo);
+	if (status)
+		goto out;
+	status = decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 29a1b39794bf..12bbb5c63664 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1528,6 +1528,7 @@ struct nfs42_seek_res {
 struct nfs42_setxattrargs {
 	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh			*fh;
+	const u32			*bitmask;
 	const char			*xattr_name;
 	u32				xattr_flags;
 	size_t				xattr_len;
@@ -1537,6 +1538,8 @@ struct nfs42_setxattrargs {
 struct nfs42_setxattrres {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs4_change_info		cinfo;
+	struct nfs_fattr		*fattr;
+	const struct nfs_server		*server;
 };
 
 struct nfs42_getxattrargs {
-- 
cgit v1.2.3


From cbb0b9d4bbcfa96e7872808a63be03202536f1bc Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 15 Jun 2023 14:22:26 +0300
Subject: fs: use a helper for opening kernel internal files

cachefiles uses kernel_open_tmpfile() to open kernel internal tmpfile
without accounting for nr_files.

cachefiles uses open_with_fake_path() for the same reason without the
need for a fake path.

Fork open_with_fake_path() to kernel_file_open() which only does the
noaccount part and use it in cachefiles.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Message-Id: <20230615112229.2143178-3-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/cachefiles/namei.c |  4 ++--
 fs/open.c             | 33 +++++++++++++++++++++++++++++++++
 include/linux/fs.h    |  2 ++
 3 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 6c7d4e97c219..499cf73f097b 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -560,8 +560,8 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
 	 */
 	path.mnt = cache->mnt;
 	path.dentry = dentry;
-	file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
-				   d_backing_inode(dentry), cache->cache_cred);
+	file = kernel_file_open(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
+				d_backing_inode(dentry), cache->cache_cred);
 	if (IS_ERR(file)) {
 		trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
 					   PTR_ERR(file),
diff --git a/fs/open.c b/fs/open.c
index 4478adcc4f3a..322e017bf480 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1116,6 +1116,39 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
 }
 EXPORT_SYMBOL(dentry_create);
 
+/**
+ * kernel_file_open - open a file for kernel internal use
+ * @path:	path of the file to open
+ * @flags:	open flags
+ * @inode:	the inode
+ * @cred:	credentials for open
+ *
+ * Open a file for use by in-kernel consumers. The file is not accounted
+ * against nr_files and must not be installed into the file descriptor
+ * table.
+ *
+ * Return: Opened file on success, an error pointer on failure.
+ */
+struct file *kernel_file_open(const struct path *path, int flags,
+				struct inode *inode, const struct cred *cred)
+{
+	struct file *f;
+	int error;
+
+	f = alloc_empty_file_noaccount(flags, cred);
+	if (IS_ERR(f))
+		return f;
+
+	f->f_path = *path;
+	error = do_dentry_open(f, inode, NULL);
+	if (error) {
+		fput(f);
+		f = ERR_PTR(error);
+	}
+	return f;
+}
+EXPORT_SYMBOL_GPL(kernel_file_open);
+
 struct file *open_with_fake_path(const struct path *path, int flags,
 				struct inode *inode, const struct cred *cred)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 62237beeac2a..1f8486e773af 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1676,6 +1676,8 @@ struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
 				 const struct path *parentpath,
 				 umode_t mode, int open_flag,
 				 const struct cred *cred);
+struct file *kernel_file_open(const struct path *path, int flags,
+			      struct inode *inode, const struct cred *cred);
 
 int vfs_mkobj(struct dentry *, umode_t,
 		int (*f)(struct dentry *, umode_t, void *),
-- 
cgit v1.2.3


From 62d53c4a1dfe347bd87ede46ffad38c9a3870338 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 15 Jun 2023 14:22:28 +0300
Subject: fs: use backing_file container for internal files with "fake" f_path

Overlayfs uses open_with_fake_path() to allocate internal kernel files,
with a "fake" path - whose f_path is not on the same fs as f_inode.

Allocate a container struct backing_file for those internal files, that
is used to hold the "fake" ovl path along with the real path.

backing_file_real_path() can be used to access the stored real path.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Message-Id: <20230615112229.2143178-5-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/file_table.c     | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/internal.h       |  5 +++--
 fs/open.c           | 45 +++++++++++++++++++++++++++++++++------------
 fs/overlayfs/file.c |  4 ++--
 include/linux/fs.h  | 33 ++++++++++++++++++++++++++++-----
 5 files changed, 114 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index 4bc713865212..e06c68e2d757 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -44,18 +44,40 @@ static struct kmem_cache *filp_cachep __read_mostly;
 
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
+/* Container for backing file with optional real path */
+struct backing_file {
+	struct file file;
+	struct path real_path;
+};
+
+static inline struct backing_file *backing_file(struct file *f)
+{
+	return container_of(f, struct backing_file, file);
+}
+
+struct path *backing_file_real_path(struct file *f)
+{
+	return &backing_file(f)->real_path;
+}
+EXPORT_SYMBOL_GPL(backing_file_real_path);
+
 static void file_free_rcu(struct rcu_head *head)
 {
 	struct file *f = container_of(head, struct file, f_rcuhead);
 
 	put_cred(f->f_cred);
-	kmem_cache_free(filp_cachep, f);
+	if (unlikely(f->f_mode & FMODE_BACKING))
+		kfree(backing_file(f));
+	else
+		kmem_cache_free(filp_cachep, f);
 }
 
 static inline void file_free(struct file *f)
 {
 	security_file_free(f);
-	if (!(f->f_mode & FMODE_NOACCOUNT))
+	if (unlikely(f->f_mode & FMODE_BACKING))
+		path_put(backing_file_real_path(f));
+	if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
 		percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_rcuhead, file_free_rcu);
 }
@@ -226,6 +248,30 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
 	return f;
 }
 
+/*
+ * Variant of alloc_empty_file() that allocates a backing_file container
+ * and doesn't check and modify nr_files.
+ *
+ * This is only for kernel internal use, and the allocate file must not be
+ * installed into file tables or such.
+ */
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
+{
+	struct backing_file *ff;
+	int error;
+
+	ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL);
+	if (unlikely(!ff))
+		return ERR_PTR(-ENOMEM);
+
+	error = init_file(&ff->file, flags, cred);
+	if (unlikely(error))
+		return ERR_PTR(error);
+
+	ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
+	return &ff->file;
+}
+
 /**
  * alloc_file - allocate and initialize a 'struct file'
  *
diff --git a/fs/internal.h b/fs/internal.h
index bd3b2810a36b..9c31078e0d16 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -97,8 +97,9 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
 /*
  * file_table.c
  */
-extern struct file *alloc_empty_file(int, const struct cred *);
-extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
+struct file *alloc_empty_file(int flags, const struct cred *cred);
+struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
 
 static inline void put_file_access(struct file *file)
 {
diff --git a/fs/open.c b/fs/open.c
index 322e017bf480..81186b277815 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1149,23 +1149,44 @@ struct file *kernel_file_open(const struct path *path, int flags,
 }
 EXPORT_SYMBOL_GPL(kernel_file_open);
 
-struct file *open_with_fake_path(const struct path *path, int flags,
-				struct inode *inode, const struct cred *cred)
+/**
+ * backing_file_open - open a backing file for kernel internal use
+ * @path:	path of the file to open
+ * @flags:	open flags
+ * @path:	path of the backing file
+ * @cred:	credentials for open
+ *
+ * Open a backing file for a stackable filesystem (e.g., overlayfs).
+ * @path may be on the stackable filesystem and backing inode on the
+ * underlying filesystem. In this case, we want to be able to return
+ * the @real_path of the backing inode. This is done by embedding the
+ * returned file into a container structure that also stores the path of
+ * the backing inode on the underlying filesystem, which can be
+ * retrieved using backing_file_real_path().
+ */
+struct file *backing_file_open(const struct path *path, int flags,
+			       const struct path *real_path,
+			       const struct cred *cred)
 {
-	struct file *f = alloc_empty_file_noaccount(flags, cred);
-	if (!IS_ERR(f)) {
-		int error;
+	struct file *f;
+	int error;
 
-		f->f_path = *path;
-		error = do_dentry_open(f, inode, NULL);
-		if (error) {
-			fput(f);
-			f = ERR_PTR(error);
-		}
+	f = alloc_empty_backing_file(flags, cred);
+	if (IS_ERR(f))
+		return f;
+
+	f->f_path = *path;
+	path_get(real_path);
+	*backing_file_real_path(f) = *real_path;
+	error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
+	if (error) {
+		fput(f);
+		f = ERR_PTR(error);
 	}
+
 	return f;
 }
-EXPORT_SYMBOL(open_with_fake_path);
+EXPORT_SYMBOL_GPL(backing_file_open);
 
 #define WILL_CREATE(flags)	(flags & (O_CREAT | __O_TMPFILE))
 #define O_PATH_FLAGS		(O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 7c04f033aadd..71fa6c83f093 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -61,8 +61,8 @@ static struct file *ovl_open_realfile(const struct file *file,
 		if (!inode_owner_or_capable(real_idmap, realinode))
 			flags &= ~O_NOATIME;
 
-		realfile = open_with_fake_path(&file->f_path, flags, realinode,
-					       current_cred());
+		realfile = backing_file_open(&file->f_path, flags, realpath,
+					     current_cred());
 	}
 	revert_creds(old_cred);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1f8486e773af..24e1be1c13ca 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -171,6 +171,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File supports non-exclusive O_DIRECT writes from multiple threads */
 #define FMODE_DIO_PARALLEL_WRITE	((__force fmode_t)0x1000000)
 
+/* File is embedded in backing_file object */
+#define FMODE_BACKING		((__force fmode_t)0x2000000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
@@ -2352,11 +2355,31 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
 	return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
 			      name, flags, mode);
 }
-extern struct file * dentry_open(const struct path *, int, const struct cred *);
-extern struct file *dentry_create(const struct path *path, int flags,
-				  umode_t mode, const struct cred *cred);
-extern struct file * open_with_fake_path(const struct path *, int,
-					 struct inode*, const struct cred *);
+struct file *dentry_open(const struct path *path, int flags,
+			 const struct cred *creds);
+struct file *dentry_create(const struct path *path, int flags, umode_t mode,
+			   const struct cred *cred);
+struct file *backing_file_open(const struct path *path, int flags,
+			       const struct path *real_path,
+			       const struct cred *cred);
+struct path *backing_file_real_path(struct file *f);
+
+/*
+ * file_real_path - get the path corresponding to f_inode
+ *
+ * When opening a backing file for a stackable filesystem (e.g.,
+ * overlayfs) f_path may be on the stackable filesystem and f_inode on
+ * the underlying filesystem.  When the path associated with f_inode is
+ * needed, this helper should be used instead of accessing f_path
+ * directly.
+*/
+static inline const struct path *file_real_path(struct file *f)
+{
+	if (unlikely(f->f_mode & FMODE_BACKING))
+		return backing_file_real_path(f);
+	return &f->f_path;
+}
+
 static inline struct file *file_clone_open(struct file *file)
 {
 	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
-- 
cgit v1.2.3


From 500053191297fcf73023ff057da6d2aa35f738e0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 7 Jun 2023 09:57:10 -0400
Subject: SUNRPC: Plumb an API for setting transport layer security

Add an initial set of policies along with fields for upper layers to
pass the requested policy down to the transport layer.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/clnt.h |  2 ++
 include/linux/sunrpc/xprt.h | 17 +++++++++++++++++
 net/sunrpc/clnt.c           |  4 ++++
 3 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 770ef2cb5775..063692cd2a60 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -58,6 +58,7 @@ struct rpc_clnt {
 				cl_noretranstimeo: 1,/* No retransmit timeouts */
 				cl_autobind : 1,/* use getport() */
 				cl_chatty   : 1;/* be verbose */
+	struct xprtsec_parms	cl_xprtsec;	/* transport security policy */
 
 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
 	const struct rpc_timeout *cl_timeout;	/* Timeout strategy */
@@ -139,6 +140,7 @@ struct rpc_create_args {
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 	const struct cred	*cred;
 	unsigned int		max_connect;
+	struct xprtsec_parms	xprtsec;
 };
 
 struct rpc_add_xprt_test {
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index b9f59aabee53..9e7f12c240c5 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -129,6 +129,21 @@ struct rpc_rqst {
 #define rq_svec			rq_snd_buf.head
 #define rq_slen			rq_snd_buf.len
 
+/* RPC transport layer security policies */
+enum xprtsec_policies {
+	RPC_XPRTSEC_NONE = 0,
+	RPC_XPRTSEC_TLS_ANON,
+	RPC_XPRTSEC_TLS_X509,
+};
+
+struct xprtsec_parms {
+	enum xprtsec_policies	policy;
+
+	/* authentication material */
+	key_serial_t		cert_serial;
+	key_serial_t		privkey_serial;
+};
+
 struct rpc_xprt_ops {
 	void		(*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
 	int		(*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
@@ -229,6 +244,7 @@ struct rpc_xprt {
 	 */
 	unsigned long		bind_timeout,
 				reestablish_timeout;
+	struct xprtsec_parms	xprtsec;
 	unsigned int		connect_cookie;	/* A cookie that gets bumped
 						   every time the transport
 						   is reconnected */
@@ -333,6 +349,7 @@ struct xprt_create {
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 	struct rpc_xprt_switch	*bc_xps;
 	unsigned int		flags;
+	struct xprtsec_parms	xprtsec;
 };
 
 struct xprt_class {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 18f70854f528..8364b74a0f81 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -385,6 +385,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 	if (!clnt)
 		goto out_err;
 	clnt->cl_parent = parent ? : clnt;
+	clnt->cl_xprtsec = args->xprtsec;
 
 	err = rpc_alloc_clid(clnt);
 	if (err)
@@ -532,6 +533,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		.addrlen = args->addrsize,
 		.servername = args->servername,
 		.bc_xprt = args->bc_xprt,
+		.xprtsec = args->xprtsec,
 	};
 	char servername[48];
 	struct rpc_clnt *clnt;
@@ -731,6 +733,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
 	struct rpc_clnt *parent;
 	int err;
 
+	args->xprtsec = clnt->cl_xprtsec;
 	xprt = xprt_create_transport(args);
 	if (IS_ERR(xprt))
 		return PTR_ERR(xprt);
@@ -3050,6 +3053,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
 
 	if (!xprtargs->ident)
 		xprtargs->ident = ident;
+	xprtargs->xprtsec = clnt->cl_xprtsec;
 	xprt = xprt_create_transport(xprtargs);
 	if (IS_ERR(xprt)) {
 		ret = PTR_ERR(xprt);
-- 
cgit v1.2.3


From bc2473c90fca55bf95b2ab6af1dacee26a4f92f6 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 15 Jun 2023 14:22:29 +0300
Subject: ovl: enable fsnotify events on underlying real files

Overlayfs creates the real underlying files with fake f_path, whose
f_inode is on the underlying fs and f_path on overlayfs.

Those real files were open with FMODE_NONOTIFY, because fsnotify code was
not prapared to handle fsnotify hooks on files with fake path correctly
and fanotify would report unexpected event->fd with fake overlayfs path,
when the underlying fs was being watched.

Teach fsnotify to handle events on the real files, and do not set real
files to FMODE_NONOTIFY to allow operations on real file (e.g. open,
access, modify, close) to generate async and permission events.

Because fsnotify does not have notifications on address space
operations, we do not need to worry about ->vm_file not reporting
events to a watched overlayfs when users are accessing a mapped
overlayfs file.

Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Message-Id: <20230615112229.2143178-6-amir73il@gmail.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/overlayfs/file.c      | 4 ++--
 include/linux/fsnotify.h | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 71fa6c83f093..dbbb156c2d67 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -34,8 +34,8 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
 		return 'm';
 }
 
-/* No atime modification nor notify on underlying */
-#define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
+/* No atime modification on underlying */
+#define OVL_OPEN_FLAGS (O_NOATIME)
 
 static struct file *ovl_open_realfile(const struct file *file,
 				      const struct path *realpath)
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index bb8467cd11ae..ed48e4f1e755 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -91,11 +91,13 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
 
 static inline int fsnotify_file(struct file *file, __u32 mask)
 {
-	const struct path *path = &file->f_path;
+	const struct path *path;
 
 	if (file->f_mode & FMODE_NONOTIFY)
 		return 0;
 
+	/* Overlayfs internal files have fake f_path */
+	path = file_real_path(file);
 	return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
 }
 
-- 
cgit v1.2.3


From 120726526e5ee3dfac11bd417e266a7e411f3315 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 7 Jun 2023 09:58:04 -0400
Subject: SUNRPC: Add RPC client support for the RPC_AUTH_TLS auth flavor

The new authentication flavor is used only to discover peer support
for RPC-over-TLS.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/auth.h |   2 +
 net/sunrpc/Makefile         |   2 +-
 net/sunrpc/auth.c           |   2 +-
 net/sunrpc/auth_tls.c       | 175 ++++++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/clnt.c           |   3 +
 5 files changed, 182 insertions(+), 2 deletions(-)
 create mode 100644 net/sunrpc/auth_tls.c

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 3e6ce288a7fc..61e58327b1aa 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -120,6 +120,7 @@ struct rpc_authops {
 						struct rpcsec_gss_info *);
 	int			(*key_timeout)(struct rpc_auth *,
 						struct rpc_cred *);
+	int			(*ping)(struct rpc_clnt *clnt);
 };
 
 struct rpc_credops {
@@ -144,6 +145,7 @@ struct rpc_credops {
 
 extern const struct rpc_authops	authunix_ops;
 extern const struct rpc_authops	authnull_ops;
+extern const struct rpc_authops	authtls_ops;
 
 int __init		rpc_init_authunix(void);
 int __init		rpcauth_init_module(void);
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 1c8de397d6ad..f89c10fe7e6a 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
 obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
 
 sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
-	    auth.o auth_null.o auth_unix.o \
+	    auth.o auth_null.o auth_tls.o auth_unix.o \
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
 	    addr.o rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o sysfs.o \
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index fb75a883503f..2f16f9d17966 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -32,7 +32,7 @@ static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
 static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
 	[RPC_AUTH_NULL] = (const struct rpc_authops __force __rcu *)&authnull_ops,
 	[RPC_AUTH_UNIX] = (const struct rpc_authops __force __rcu *)&authunix_ops,
-	NULL,			/* others can be loadable modules */
+	[RPC_AUTH_TLS]  = (const struct rpc_authops __force __rcu *)&authtls_ops,
 };
 
 static LIST_HEAD(cred_unused);
diff --git a/net/sunrpc/auth_tls.c b/net/sunrpc/auth_tls.c
new file mode 100644
index 000000000000..de7678f8a23d
--- /dev/null
+++ b/net/sunrpc/auth_tls.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, 2022 Oracle.  All rights reserved.
+ *
+ * The AUTH_TLS credential is used only to probe a remote peer
+ * for RPC-over-TLS support.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sunrpc/clnt.h>
+
+static const char *starttls_token = "STARTTLS";
+static const size_t starttls_len = 8;
+
+static struct rpc_auth tls_auth;
+static struct rpc_cred tls_cred;
+
+static void tls_encode_probe(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			     const void *obj)
+{
+}
+
+static int tls_decode_probe(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+			    void *obj)
+{
+	return 0;
+}
+
+static const struct rpc_procinfo rpcproc_tls_probe = {
+	.p_encode	= tls_encode_probe,
+	.p_decode	= tls_decode_probe,
+};
+
+static void rpc_tls_probe_call_prepare(struct rpc_task *task, void *data)
+{
+	task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
+	rpc_call_start(task);
+}
+
+static void rpc_tls_probe_call_done(struct rpc_task *task, void *data)
+{
+}
+
+static const struct rpc_call_ops rpc_tls_probe_ops = {
+	.rpc_call_prepare	= rpc_tls_probe_call_prepare,
+	.rpc_call_done		= rpc_tls_probe_call_done,
+};
+
+static int tls_probe(struct rpc_clnt *clnt)
+{
+	struct rpc_message msg = {
+		.rpc_proc	= &rpcproc_tls_probe,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client	= clnt,
+		.rpc_message	= &msg,
+		.rpc_op_cred	= &tls_cred,
+		.callback_ops	= &rpc_tls_probe_ops,
+		.flags		= RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+	};
+	struct rpc_task	*task;
+	int status;
+
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	status = task->tk_status;
+	rpc_put_task(task);
+	return status;
+}
+
+static struct rpc_auth *tls_create(const struct rpc_auth_create_args *args,
+				   struct rpc_clnt *clnt)
+{
+	refcount_inc(&tls_auth.au_count);
+	return &tls_auth;
+}
+
+static void tls_destroy(struct rpc_auth *auth)
+{
+}
+
+static struct rpc_cred *tls_lookup_cred(struct rpc_auth *auth,
+					struct auth_cred *acred, int flags)
+{
+	return get_rpccred(&tls_cred);
+}
+
+static void tls_destroy_cred(struct rpc_cred *cred)
+{
+}
+
+static int tls_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
+{
+	return 1;
+}
+
+static int tls_marshal(struct rpc_task *task, struct xdr_stream *xdr)
+{
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4 * XDR_UNIT);
+	if (!p)
+		return -EMSGSIZE;
+	/* Credential */
+	*p++ = rpc_auth_tls;
+	*p++ = xdr_zero;
+	/* Verifier */
+	*p++ = rpc_auth_null;
+	*p   = xdr_zero;
+	return 0;
+}
+
+static int tls_refresh(struct rpc_task *task)
+{
+	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
+	return 0;
+}
+
+static int tls_validate(struct rpc_task *task, struct xdr_stream *xdr)
+{
+	__be32 *p;
+	void *str;
+
+	p = xdr_inline_decode(xdr, XDR_UNIT);
+	if (!p)
+		return -EIO;
+	if (*p != rpc_auth_null)
+		return -EIO;
+	if (xdr_stream_decode_opaque_inline(xdr, &str, starttls_len) != starttls_len)
+		return -EIO;
+	if (memcmp(str, starttls_token, starttls_len))
+		return -EIO;
+	return 0;
+}
+
+const struct rpc_authops authtls_ops = {
+	.owner		= THIS_MODULE,
+	.au_flavor	= RPC_AUTH_TLS,
+	.au_name	= "NULL",
+	.create		= tls_create,
+	.destroy	= tls_destroy,
+	.lookup_cred	= tls_lookup_cred,
+	.ping		= tls_probe,
+};
+
+static struct rpc_auth tls_auth = {
+	.au_cslack	= NUL_CALLSLACK,
+	.au_rslack	= NUL_REPLYSLACK,
+	.au_verfsize	= NUL_REPLYSLACK,
+	.au_ralign	= NUL_REPLYSLACK,
+	.au_ops		= &authtls_ops,
+	.au_flavor	= RPC_AUTH_TLS,
+	.au_count	= REFCOUNT_INIT(1),
+};
+
+static const struct rpc_credops tls_credops = {
+	.cr_name	= "AUTH_TLS",
+	.crdestroy	= tls_destroy_cred,
+	.crmatch	= tls_match,
+	.crmarshal	= tls_marshal,
+	.crwrap_req	= rpcauth_wrap_req_encode,
+	.crrefresh	= tls_refresh,
+	.crvalidate	= tls_validate,
+	.crunwrap_resp	= rpcauth_unwrap_resp_decode,
+};
+
+static struct rpc_cred tls_cred = {
+	.cr_lru		= LIST_HEAD_INIT(tls_cred.cr_lru),
+	.cr_auth	= &tls_auth,
+	.cr_ops		= &tls_credops,
+	.cr_count	= REFCOUNT_INIT(2),
+	.cr_flags	= 1UL << RPCAUTH_CRED_UPTODATE,
+};
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ba34cfcf459a..640c76ab2f1a 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2833,6 +2833,9 @@ static int rpc_ping(struct rpc_clnt *clnt)
 	struct rpc_task	*task;
 	int status;
 
+	if (clnt->cl_auth->au_ops->ping)
+		return clnt->cl_auth->au_ops->ping(clnt);
+
 	task = rpc_call_null_helper(clnt, NULL, NULL, 0, NULL, NULL);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-- 
cgit v1.2.3


From 0d3ca07ffda9291843bb0b4b39dea43535bb1f13 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 7 Jun 2023 09:58:22 -0400
Subject: SUNRPC: Ignore data_ready callbacks during TLS handshakes

The RPC header parser doesn't recognize TLS handshake traffic, so it
will close the connection prematurely with an error. To avoid that,
shunt the transport's data_ready callback when there is a TLS
handshake in progress.

The XPRT_SOCK_IGNORE_RECV flag will be toggled by code added in a
subsequent patch.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprtsock.h | 1 +
 net/sunrpc/xprtsock.c           | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 38284f25eddf..daef030f4848 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -90,5 +90,6 @@ struct sock_xprt {
 #define XPRT_SOCK_WAKE_DISCONNECT	(7)
 #define XPRT_SOCK_CONNECT_SENT	(8)
 #define XPRT_SOCK_NOSPACE	(9)
+#define XPRT_SOCK_IGNORE_RECV	(10)
 
 #endif /* _LINUX_SUNRPC_XPRTSOCK_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 515328a8dafe..0b2739d6e1a0 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -700,6 +700,8 @@ static void xs_poll_check_readable(struct sock_xprt *transport)
 {
 
 	clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+	if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state))
+		return;
 	if (!xs_poll_socket_readable(transport))
 		return;
 	if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
@@ -1385,6 +1387,10 @@ static void xs_data_ready(struct sock *sk)
 		trace_xs_data_ready(xprt);
 
 		transport->old_data_ready(sk);
+
+		if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state))
+			return;
+
 		/* Any data means we had a useful conversation, so
 		 * then we don't need to delay the next reconnect
 		 */
-- 
cgit v1.2.3


From 75eb6af7acdf566c68d61e98e67ee2f235201c02 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 7 Jun 2023 09:59:15 -0400
Subject: SUNRPC: Add a TCP-with-TLS RPC transport class

Use the new TLS handshake API to enable the SunRPC client code
to request a TLS handshake. This implements support for RFC 9289,
only on TCP sockets.

Upper layers such as NFS use RPC-with-TLS to protect in-transit
traffic.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h     |   1 +
 include/linux/sunrpc/xprtsock.h |   2 +
 include/trace/events/sunrpc.h   |  44 +++++
 net/sunrpc/sysfs.c              |   1 +
 net/sunrpc/xprtsock.c           | 370 ++++++++++++++++++++++++++++++++++++++++
 5 files changed, 418 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 9e7f12c240c5..b52411bcfe4e 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -200,6 +200,7 @@ enum xprt_transports {
 	XPRT_TRANSPORT_RDMA	= 256,
 	XPRT_TRANSPORT_BC_RDMA	= XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC,
 	XPRT_TRANSPORT_LOCAL	= 257,
+	XPRT_TRANSPORT_TCP_TLS	= 258,
 };
 
 struct rpc_sysfs_xprt;
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index daef030f4848..700a1e6c047c 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -57,9 +57,11 @@ struct sock_xprt {
 	struct work_struct	error_worker;
 	struct work_struct	recv_worker;
 	struct mutex		recv_mutex;
+	struct completion	handshake_done;
 	struct sockaddr_storage	srcaddr;
 	unsigned short		srcport;
 	int			xprt_err;
+	struct rpc_clnt		*clnt;
 
 	/*
 	 * UDP socket buffer size parameters
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 34784f29a63d..7cd4bbd6904c 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1525,6 +1525,50 @@ TRACE_EVENT(rpcb_unregister,
 	)
 );
 
+/**
+ ** RPC-over-TLS tracepoints
+ **/
+
+DECLARE_EVENT_CLASS(rpc_tls_class,
+	TP_PROTO(
+		const struct rpc_clnt *clnt,
+		const struct rpc_xprt *xprt
+	),
+
+	TP_ARGS(clnt, xprt),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, requested_policy)
+		__field(u32, version)
+		__string(servername, xprt->servername)
+		__string(progname, clnt->cl_program->name)
+	),
+
+	TP_fast_assign(
+		__entry->requested_policy = clnt->cl_xprtsec.policy;
+		__entry->version = clnt->cl_vers;
+		__assign_str(servername, xprt->servername);
+		__assign_str(progname, clnt->cl_program->name)
+	),
+
+	TP_printk("server=%s %sv%u requested_policy=%s",
+		__get_str(servername), __get_str(progname), __entry->version,
+		rpc_show_xprtsec_policy(__entry->requested_policy)
+	)
+);
+
+#define DEFINE_RPC_TLS_EVENT(name) \
+	DEFINE_EVENT(rpc_tls_class, rpc_tls_##name, \
+			TP_PROTO( \
+				const struct rpc_clnt *clnt, \
+				const struct rpc_xprt *xprt \
+			), \
+			TP_ARGS(clnt, xprt))
+
+DEFINE_RPC_TLS_EVENT(unavailable);
+DEFINE_RPC_TLS_EVENT(not_started);
+
+
 /* Record an xdr_buf containing a fully-formed RPC message */
 DECLARE_EVENT_CLASS(svc_xdr_msg_class,
 	TP_PROTO(
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 0d0db4e1064e..5c8ecdaaa985 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -239,6 +239,7 @@ static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
 	if (!xprt)
 		return 0;
 	if (!(xprt->xprt_class->ident == XPRT_TRANSPORT_TCP ||
+	      xprt->xprt_class->ident == XPRT_TRANSPORT_TCP_TLS ||
 	      xprt->xprt_class->ident == XPRT_TRANSPORT_RDMA)) {
 		xprt_put(xprt);
 		return -EOPNOTSUPP;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7e2f962d1f66..9f010369100a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -48,6 +48,7 @@
 #include <net/udp.h>
 #include <net/tcp.h>
 #include <net/tls.h>
+#include <net/handshake.h>
 
 #include <linux/bvec.h>
 #include <linux/highmem.h>
@@ -98,6 +99,7 @@ static struct ctl_table_header *sunrpc_table_header;
 static struct xprt_class xs_local_transport;
 static struct xprt_class xs_udp_transport;
 static struct xprt_class xs_tcp_transport;
+static struct xprt_class xs_tcp_tls_transport;
 static struct xprt_class xs_bc_tcp_transport;
 
 /*
@@ -189,6 +191,11 @@ static struct ctl_table xs_tunables_table[] = {
  */
 #define XS_IDLE_DISC_TO		(5U * 60 * HZ)
 
+/*
+ * TLS handshake timeout.
+ */
+#define XS_TLS_HANDSHAKE_TO	(10U * HZ)
+
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # undef  RPC_DEBUG_DATA
 # define RPCDBG_FACILITY	RPCDBG_TRANS
@@ -1243,6 +1250,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
 	if (atomic_read(&transport->xprt.swapper))
 		sk_clear_memalloc(sk);
 
+	tls_handshake_cancel(sk);
+
 	kernel_sock_shutdown(sock, SHUT_RDWR);
 
 	mutex_lock(&transport->recv_mutex);
@@ -2416,6 +2425,267 @@ out_unlock:
 	current_restore_flags(pflags, PF_MEMALLOC);
 }
 
+/*
+ * Transfer the connected socket to @upper_transport, then mark that
+ * xprt CONNECTED.
+ */
+static int xs_tcp_tls_finish_connecting(struct rpc_xprt *lower_xprt,
+					struct sock_xprt *upper_transport)
+{
+	struct sock_xprt *lower_transport =
+			container_of(lower_xprt, struct sock_xprt, xprt);
+	struct rpc_xprt *upper_xprt = &upper_transport->xprt;
+
+	if (!upper_transport->inet) {
+		struct socket *sock = lower_transport->sock;
+		struct sock *sk = sock->sk;
+
+		/* Avoid temporary address, they are bad for long-lived
+		 * connections such as NFS mounts.
+		 * RFC4941, section 3.6 suggests that:
+		 *    Individual applications, which have specific
+		 *    knowledge about the normal duration of connections,
+		 *    MAY override this as appropriate.
+		 */
+		if (xs_addr(upper_xprt)->sa_family == PF_INET6)
+			ip6_sock_set_addr_preferences(sk, IPV6_PREFER_SRC_PUBLIC);
+
+		xs_tcp_set_socket_timeouts(upper_xprt, sock);
+		tcp_sock_set_nodelay(sk);
+
+		lock_sock(sk);
+
+		/* @sk is already connected, so it now has the RPC callbacks.
+		 * Reach into @lower_transport to save the original ones.
+		 */
+		upper_transport->old_data_ready = lower_transport->old_data_ready;
+		upper_transport->old_state_change = lower_transport->old_state_change;
+		upper_transport->old_write_space = lower_transport->old_write_space;
+		upper_transport->old_error_report = lower_transport->old_error_report;
+		sk->sk_user_data = upper_xprt;
+
+		/* socket options */
+		sock_reset_flag(sk, SOCK_LINGER);
+
+		xprt_clear_connected(upper_xprt);
+
+		upper_transport->sock = sock;
+		upper_transport->inet = sk;
+		upper_transport->file = lower_transport->file;
+
+		release_sock(sk);
+
+		/* Reset lower_transport before shutting down its clnt */
+		mutex_lock(&lower_transport->recv_mutex);
+		lower_transport->inet = NULL;
+		lower_transport->sock = NULL;
+		lower_transport->file = NULL;
+
+		xprt_clear_connected(lower_xprt);
+		xs_sock_reset_connection_flags(lower_xprt);
+		xs_stream_reset_connect(lower_transport);
+		mutex_unlock(&lower_transport->recv_mutex);
+	}
+
+	if (!xprt_bound(upper_xprt))
+		return -ENOTCONN;
+
+	xs_set_memalloc(upper_xprt);
+
+	if (!xprt_test_and_set_connected(upper_xprt)) {
+		upper_xprt->connect_cookie++;
+		clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state);
+		xprt_clear_connecting(upper_xprt);
+
+		upper_xprt->stat.connect_count++;
+		upper_xprt->stat.connect_time += (long)jiffies -
+					   upper_xprt->stat.connect_start;
+		xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING);
+	}
+	return 0;
+}
+
+/**
+ * xs_tls_handshake_done - TLS handshake completion handler
+ * @data: address of xprt to wake
+ * @status: status of handshake
+ * @peerid: serial number of key containing the remote's identity
+ *
+ */
+static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid)
+{
+	struct rpc_xprt *lower_xprt = data;
+	struct sock_xprt *lower_transport =
+				container_of(lower_xprt, struct sock_xprt, xprt);
+
+	lower_transport->xprt_err = status ? -EACCES : 0;
+	complete(&lower_transport->handshake_done);
+	xprt_put(lower_xprt);
+}
+
+static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_parms *xprtsec)
+{
+	struct sock_xprt *lower_transport =
+				container_of(lower_xprt, struct sock_xprt, xprt);
+	struct tls_handshake_args args = {
+		.ta_sock	= lower_transport->sock,
+		.ta_done	= xs_tls_handshake_done,
+		.ta_data	= xprt_get(lower_xprt),
+		.ta_peername	= lower_xprt->servername,
+	};
+	struct sock *sk = lower_transport->inet;
+	int rc;
+
+	init_completion(&lower_transport->handshake_done);
+	set_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state);
+	lower_transport->xprt_err = -ETIMEDOUT;
+	switch (xprtsec->policy) {
+	case RPC_XPRTSEC_TLS_ANON:
+		rc = tls_client_hello_anon(&args, GFP_KERNEL);
+		if (rc)
+			goto out_put_xprt;
+		break;
+	case RPC_XPRTSEC_TLS_X509:
+		args.ta_my_cert = xprtsec->cert_serial;
+		args.ta_my_privkey = xprtsec->privkey_serial;
+		rc = tls_client_hello_x509(&args, GFP_KERNEL);
+		if (rc)
+			goto out_put_xprt;
+		break;
+	default:
+		rc = -EACCES;
+		goto out_put_xprt;
+	}
+
+	rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done,
+						       XS_TLS_HANDSHAKE_TO);
+	if (rc <= 0) {
+		if (!tls_handshake_cancel(sk)) {
+			if (rc == 0)
+				rc = -ETIMEDOUT;
+			goto out_put_xprt;
+		}
+	}
+
+	rc = lower_transport->xprt_err;
+
+out:
+	xs_stream_reset_connect(lower_transport);
+	clear_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state);
+	return rc;
+
+out_put_xprt:
+	xprt_put(lower_xprt);
+	goto out;
+}
+
+/**
+ * xs_tcp_tls_setup_socket - establish a TLS session on a TCP socket
+ * @work: queued work item
+ *
+ * Invoked by a work queue tasklet.
+ *
+ * For RPC-with-TLS, there is a two-stage connection process.
+ *
+ * The "upper-layer xprt" is visible to the RPC consumer. Once it has
+ * been marked connected, the consumer knows that a TCP connection and
+ * a TLS session have been established.
+ *
+ * A "lower-layer xprt", created in this function, handles the mechanics
+ * of connecting the TCP socket, performing the RPC_AUTH_TLS probe, and
+ * then driving the TLS handshake. Once all that is complete, the upper
+ * layer xprt is marked connected.
+ */
+static void xs_tcp_tls_setup_socket(struct work_struct *work)
+{
+	struct sock_xprt *upper_transport =
+		container_of(work, struct sock_xprt, connect_worker.work);
+	struct rpc_clnt *upper_clnt = upper_transport->clnt;
+	struct rpc_xprt *upper_xprt = &upper_transport->xprt;
+	struct rpc_create_args args = {
+		.net		= upper_xprt->xprt_net,
+		.protocol	= upper_xprt->prot,
+		.address	= (struct sockaddr *)&upper_xprt->addr,
+		.addrsize	= upper_xprt->addrlen,
+		.timeout	= upper_clnt->cl_timeout,
+		.servername	= upper_xprt->servername,
+		.program	= upper_clnt->cl_program,
+		.prognumber	= upper_clnt->cl_prog,
+		.version	= upper_clnt->cl_vers,
+		.authflavor	= RPC_AUTH_TLS,
+		.cred		= upper_clnt->cl_cred,
+		.xprtsec	= {
+			.policy		= RPC_XPRTSEC_NONE,
+		},
+	};
+	unsigned int pflags = current->flags;
+	struct rpc_clnt *lower_clnt;
+	struct rpc_xprt *lower_xprt;
+	int status;
+
+	if (atomic_read(&upper_xprt->swapper))
+		current->flags |= PF_MEMALLOC;
+
+	xs_stream_start_connect(upper_transport);
+
+	/* This implicitly sends an RPC_AUTH_TLS probe */
+	lower_clnt = rpc_create(&args);
+	if (IS_ERR(lower_clnt)) {
+		trace_rpc_tls_unavailable(upper_clnt, upper_xprt);
+		clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state);
+		xprt_clear_connecting(upper_xprt);
+		xprt_wake_pending_tasks(upper_xprt, PTR_ERR(lower_clnt));
+		xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING);
+		goto out_unlock;
+	}
+
+	/* RPC_AUTH_TLS probe was successful. Try a TLS handshake on
+	 * the lower xprt.
+	 */
+	rcu_read_lock();
+	lower_xprt = rcu_dereference(lower_clnt->cl_xprt);
+	rcu_read_unlock();
+	status = xs_tls_handshake_sync(lower_xprt, &upper_xprt->xprtsec);
+	if (status) {
+		trace_rpc_tls_not_started(upper_clnt, upper_xprt);
+		goto out_close;
+	}
+
+	status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport);
+	if (status)
+		goto out_close;
+
+	trace_rpc_socket_connect(upper_xprt, upper_transport->sock, 0);
+	if (!xprt_test_and_set_connected(upper_xprt)) {
+		upper_xprt->connect_cookie++;
+		clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state);
+		xprt_clear_connecting(upper_xprt);
+
+		upper_xprt->stat.connect_count++;
+		upper_xprt->stat.connect_time += (long)jiffies -
+					   upper_xprt->stat.connect_start;
+		xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING);
+	}
+	rpc_shutdown_client(lower_clnt);
+
+out_unlock:
+	current_restore_flags(pflags, PF_MEMALLOC);
+	upper_transport->clnt = NULL;
+	xprt_unlock_connect(upper_xprt, upper_transport);
+	return;
+
+out_close:
+	rpc_shutdown_client(lower_clnt);
+
+	/* xprt_force_disconnect() wakes tasks with a fixed tk_status code.
+	 * Wake them first here to ensure they get our tk_status code.
+	 */
+	xprt_wake_pending_tasks(upper_xprt, status);
+	xs_tcp_force_close(upper_xprt);
+	xprt_clear_connecting(upper_xprt);
+	goto out_unlock;
+}
+
 /**
  * xs_connect - connect a socket to a remote endpoint
  * @xprt: pointer to transport structure
@@ -2447,6 +2717,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 	} else
 		dprintk("RPC:       xs_connect scheduled xprt %p\n", xprt);
 
+	transport->clnt = task->tk_client;
 	queue_delayed_work(xprtiod_workqueue,
 			&transport->connect_worker,
 			delay);
@@ -3100,6 +3371,94 @@ out_err:
 	return ret;
 }
 
+/**
+ * xs_setup_tcp_tls - Set up transport to use a TCP with TLS
+ * @args: rpc transport creation arguments
+ *
+ */
+static struct rpc_xprt *xs_setup_tcp_tls(struct xprt_create *args)
+{
+	struct sockaddr *addr = args->dstaddr;
+	struct rpc_xprt *xprt;
+	struct sock_xprt *transport;
+	struct rpc_xprt *ret;
+	unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries;
+
+	if (args->flags & XPRT_CREATE_INFINITE_SLOTS)
+		max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT;
+
+	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+			     max_slot_table_size);
+	if (IS_ERR(xprt))
+		return xprt;
+	transport = container_of(xprt, struct sock_xprt, xprt);
+
+	xprt->prot = IPPROTO_TCP;
+	xprt->xprt_class = &xs_tcp_transport;
+	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+
+	xprt->bind_timeout = XS_BIND_TO;
+	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+	xprt->idle_timeout = XS_IDLE_DISC_TO;
+
+	xprt->ops = &xs_tcp_ops;
+	xprt->timeout = &xs_tcp_default_timeout;
+
+	xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+	xprt->connect_timeout = xprt->timeout->to_initval *
+		(xprt->timeout->to_retries + 1);
+
+	INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+	INIT_WORK(&transport->error_worker, xs_error_handle);
+
+	switch (args->xprtsec.policy) {
+	case RPC_XPRTSEC_TLS_ANON:
+	case RPC_XPRTSEC_TLS_X509:
+		xprt->xprtsec = args->xprtsec;
+		INIT_DELAYED_WORK(&transport->connect_worker,
+				  xs_tcp_tls_setup_socket);
+		break;
+	default:
+		ret = ERR_PTR(-EACCES);
+		goto out_err;
+	}
+
+	switch (addr->sa_family) {
+	case AF_INET:
+		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+			xprt_set_bound(xprt);
+
+		xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
+		break;
+	case AF_INET6:
+		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+			xprt_set_bound(xprt);
+
+		xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
+		break;
+	default:
+		ret = ERR_PTR(-EAFNOSUPPORT);
+		goto out_err;
+	}
+
+	if (xprt_bound(xprt))
+		dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
+			xprt->address_strings[RPC_DISPLAY_ADDR],
+			xprt->address_strings[RPC_DISPLAY_PORT],
+			xprt->address_strings[RPC_DISPLAY_PROTO]);
+	else
+		dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
+			xprt->address_strings[RPC_DISPLAY_ADDR],
+			xprt->address_strings[RPC_DISPLAY_PROTO]);
+
+	if (try_module_get(THIS_MODULE))
+		return xprt;
+	ret = ERR_PTR(-EINVAL);
+out_err:
+	xs_xprt_free(xprt);
+	return ret;
+}
+
 /**
  * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
  * @args: rpc transport creation arguments
@@ -3209,6 +3568,15 @@ static struct xprt_class	xs_tcp_transport = {
 	.netid		= { "tcp", "tcp6", "" },
 };
 
+static struct xprt_class	xs_tcp_tls_transport = {
+	.list		= LIST_HEAD_INIT(xs_tcp_tls_transport.list),
+	.name		= "tcp-with-tls",
+	.owner		= THIS_MODULE,
+	.ident		= XPRT_TRANSPORT_TCP_TLS,
+	.setup		= xs_setup_tcp_tls,
+	.netid		= { "tcp", "tcp6", "" },
+};
+
 static struct xprt_class	xs_bc_tcp_transport = {
 	.list		= LIST_HEAD_INIT(xs_bc_tcp_transport.list),
 	.name		= "tcp NFSv4.1 backchannel",
@@ -3230,6 +3598,7 @@ int init_socket_xprt(void)
 	xprt_register_transport(&xs_local_transport);
 	xprt_register_transport(&xs_udp_transport);
 	xprt_register_transport(&xs_tcp_transport);
+	xprt_register_transport(&xs_tcp_tls_transport);
 	xprt_register_transport(&xs_bc_tcp_transport);
 
 	return 0;
@@ -3249,6 +3618,7 @@ void cleanup_socket_xprt(void)
 	xprt_unregister_transport(&xs_local_transport);
 	xprt_unregister_transport(&xs_udp_transport);
 	xprt_unregister_transport(&xs_tcp_transport);
+	xprt_unregister_transport(&xs_tcp_tls_transport);
 	xprt_unregister_transport(&xs_bc_tcp_transport);
 }
 
-- 
cgit v1.2.3


From 6c0a8c5fcf7158e889dbdd077f67c81984704710 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 7 Jun 2023 09:59:42 -0400
Subject: NFS: Have struct nfs_client carry a TLS policy field

The new field is used to match struct nfs_clients that have the same
TLS policy setting.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/client.c           |  8 ++++++++
 fs/nfs/internal.h         |  1 +
 fs/nfs/nfs3client.c       |  1 +
 fs/nfs/nfs4client.c       | 20 +++++++++++++++-----
 include/linux/nfs_fs_sb.h |  3 ++-
 5 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f50e025ae406..9bfdade0f6e6 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -184,6 +184,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
 	clp->cl_net = get_net(cl_init->net);
 
 	clp->cl_principal = "*";
+	clp->cl_xprtsec = cl_init->xprtsec;
 	return clp;
 
 error_cleanup:
@@ -326,6 +327,10 @@ again:
 							   sap))
 				continue;
 
+		/* Match the xprt security policy */
+		if (clp->cl_xprtsec.policy != data->xprtsec.policy)
+			continue;
+
 		refcount_inc(&clp->cl_count);
 		return clp;
 	}
@@ -675,6 +680,9 @@ static int nfs_init_server(struct nfs_server *server,
 		.cred = server->cred,
 		.nconnect = ctx->nfs_server.nconnect,
 		.init_flags = (1UL << NFS_CS_REUSEPORT),
+		.xprtsec = {
+			.policy = RPC_XPRTSEC_NONE,
+		},
 	};
 	struct nfs_client *clp;
 	int error;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3cc027d3bd58..5c986c0d3cce 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -81,6 +81,7 @@ struct nfs_client_initdata {
 	struct net *net;
 	const struct rpc_timeout *timeparms;
 	const struct cred *cred;
+	struct xprtsec_parms xprtsec;
 };
 
 /*
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 669cda757a5c..8fa187a9c46d 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -93,6 +93,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
 		.net = mds_clp->cl_net,
 		.timeparms = &ds_timeout,
 		.cred = mds_srv->cred,
+		.xprtsec = mds_clp->cl_xprtsec,
 	};
 	struct nfs_client *clp;
 	char buf[INET6_ADDRSTRLEN + 1];
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d3051b051a56..75ed8354576b 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -896,7 +896,8 @@ static int nfs4_set_client(struct nfs_server *server,
 		int proto, const struct rpc_timeout *timeparms,
 		u32 minorversion, unsigned int nconnect,
 		unsigned int max_connect,
-		struct net *net)
+		struct net *net,
+		struct xprtsec_parms *xprtsec)
 {
 	struct nfs_client_initdata cl_init = {
 		.hostname = hostname,
@@ -909,6 +910,7 @@ static int nfs4_set_client(struct nfs_server *server,
 		.net = net,
 		.timeparms = timeparms,
 		.cred = server->cred,
+		.xprtsec = *xprtsec,
 	};
 	struct nfs_client *clp;
 
@@ -978,6 +980,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 		.net = mds_clp->cl_net,
 		.timeparms = &ds_timeout,
 		.cred = mds_srv->cred,
+		.xprtsec = mds_srv->nfs_client->cl_xprtsec,
 	};
 	char buf[INET6_ADDRSTRLEN + 1];
 
@@ -1127,6 +1130,9 @@ out:
 static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
 {
 	struct nfs_fs_context *ctx = nfs_fc2context(fc);
+	struct xprtsec_parms xprtsec = {
+		.policy		= RPC_XPRTSEC_NONE,
+	};
 	struct rpc_timeout timeparms;
 	int error;
 
@@ -1157,7 +1163,8 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
 				ctx->minorversion,
 				ctx->nfs_server.nconnect,
 				ctx->nfs_server.max_connect,
-				fc->net_ns);
+				fc->net_ns,
+				&xprtsec);
 	if (error < 0)
 		return error;
 
@@ -1247,7 +1254,8 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 				parent_client->cl_mvops->minor_version,
 				parent_client->cl_nconnect,
 				parent_client->cl_max_connect,
-				parent_client->cl_net);
+				parent_client->cl_net,
+				&parent_client->cl_xprtsec);
 	if (!error)
 		goto init_server;
 #endif	/* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */
@@ -1263,7 +1271,8 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 				parent_client->cl_mvops->minor_version,
 				parent_client->cl_nconnect,
 				parent_client->cl_max_connect,
-				parent_client->cl_net);
+				parent_client->cl_net,
+				&parent_client->cl_xprtsec);
 	if (error < 0)
 		goto error;
 
@@ -1336,7 +1345,8 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
 				clp->cl_proto, clnt->cl_timeout,
 				clp->cl_minorversion,
-				clp->cl_nconnect, clp->cl_max_connect, net);
+				clp->cl_nconnect, clp->cl_max_connect,
+				net, &clp->cl_xprtsec);
 	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index ea2f7e6b1b0b..fa5a592de798 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -63,7 +63,8 @@ struct nfs_client {
 	u32			cl_minorversion;/* NFSv4 minorversion */
 	unsigned int		cl_nconnect;	/* Number of connections */
 	unsigned int		cl_max_connect; /* max number of xprts allowed */
-	const char *		cl_principal;  /* used for machine cred */
+	const char *		cl_principal;	/* used for machine cred */
+	struct xprtsec_parms	cl_xprtsec;	/* xprt security policy */
 
 #if IS_ENABLED(CONFIG_NFS_V4)
 	struct list_head	cl_ds_clients; /* auth flavor data servers */
-- 
cgit v1.2.3


From 1c7251187dc067a6d460cf33ca67da9c1dd87807 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 15 Jun 2023 14:07:26 -0400
Subject: NFS: add superblock sysfs entries

Create a sysfs directory for each mount that corresponds to the mount's
nfs_server struct.  As the mount is being constructed, use the name
"server-n", but rename it to the "MAJOR:MINOR" of the mount after assigning
a device_id. The rename approach allows us to populate the mount's directory
with links to the various rpc_client objects during the mount's
construction.  The naming convention (MAJOR:MINOR) can be used to reference
a particular NFS mount's sysfs tree.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/client.c           | 16 +++++++++++++
 fs/nfs/nfs4client.c       |  3 +++
 fs/nfs/super.c            |  6 ++++-
 fs/nfs/sysfs.c            | 59 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/sysfs.h            |  5 ++++
 include/linux/nfs_fs_sb.h |  2 ++
 6 files changed, 90 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d5441e60d7e1..e95672a9bcd6 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -698,6 +698,7 @@ static int nfs_init_server(struct nfs_server *server,
 		return PTR_ERR(clp);
 
 	server->nfs_client = clp;
+	nfs_sysfs_add_server(server);
 
 	/* Initialise the client representation from the mount data */
 	server->flags = ctx->flags;
@@ -952,6 +953,8 @@ void nfs_server_remove_lists(struct nfs_server *server)
 }
 EXPORT_SYMBOL_GPL(nfs_server_remove_lists);
 
+static DEFINE_IDA(s_sysfs_ids);
+
 /*
  * Allocate and initialise a server record
  */
@@ -963,6 +966,12 @@ struct nfs_server *nfs_alloc_server(void)
 	if (!server)
 		return NULL;
 
+	server->s_sysfs_id = ida_alloc(&s_sysfs_ids, GFP_KERNEL);
+	if (server->s_sysfs_id < 0) {
+		kfree(server);
+		return NULL;
+	}
+
 	server->client = server->client_acl = ERR_PTR(-EINVAL);
 
 	/* Zero out the NFS state stuff */
@@ -1009,6 +1018,10 @@ void nfs_free_server(struct nfs_server *server)
 
 	nfs_put_client(server->nfs_client);
 
+	nfs_sysfs_remove_server(server);
+	kobject_put(&server->kobj);
+	ida_free(&s_sysfs_ids, server->s_sysfs_id);
+
 	ida_destroy(&server->lockowner_id);
 	ida_destroy(&server->openowner_id);
 	nfs_free_iostats(server->io_stats);
@@ -1110,6 +1123,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 
 	server->fsid = fattr->fsid;
 
+	nfs_sysfs_add_server(server);
+
 	error = nfs_init_server_rpcclient(server,
 			source->client->cl_timeout,
 			flavor);
@@ -1393,6 +1408,7 @@ error_0:
 void nfs_fs_proc_exit(void)
 {
 	remove_proc_subtree("fs/nfsfs", NULL);
+	ida_destroy(&s_sysfs_ids);
 }
 
 #endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 321854942ce1..a098a41811d6 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -18,6 +18,7 @@
 #include "nfs4idmap.h"
 #include "pnfs.h"
 #include "netns.h"
+#include "sysfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_CLIENT
 
@@ -952,6 +953,8 @@ static int nfs4_set_client(struct nfs_server *server,
 	set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
 
 	server->nfs_client = clp;
+	nfs_sysfs_add_server(server);
+
 	return 0;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 059b0beabc1b..2284f749d892 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -70,6 +70,8 @@
 #include "nfs4session.h"
 #include "pnfs.h"
 #include "nfs.h"
+#include "netns.h"
+#include "sysfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
@@ -1089,6 +1091,7 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
 						 &sb->s_blocksize_bits);
 
 	nfs_super_set_maxbytes(sb, server->maxfilesize);
+	nfs_sysfs_move_server_to_sb(sb);
 	server->has_sec_mnt_opts = ctx->has_sec_mnt_opts;
 }
 
@@ -1331,13 +1334,14 @@ error_splat_super:
 }
 
 /*
- * Destroy an NFS2/3 superblock
+ * Destroy an NFS superblock
  */
 void nfs_kill_super(struct super_block *s)
 {
 	struct nfs_server *server = NFS_SB(s);
 	dev_t dev = s->s_dev;
 
+	nfs_sysfs_move_sb_to_server(server);
 	generic_shutdown_super(s);
 
 	nfs_fscache_release_super_cookie(s);
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 90256a3a714e..0ff24f133a02 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -215,3 +215,62 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns)
 		netns->nfs_client = NULL;
 	}
 }
+
+static void nfs_sysfs_sb_release(struct kobject *kobj)
+{
+	/* no-op: why? see lib/kobject.c kobject_cleanup() */
+}
+
+static const void *nfs_netns_server_namespace(const struct kobject *kobj)
+{
+	return container_of(kobj, struct nfs_server, kobj)->nfs_client->cl_net;
+}
+
+static struct kobj_type nfs_sb_ktype = {
+	.release = nfs_sysfs_sb_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.namespace = nfs_netns_server_namespace,
+	.child_ns_type = nfs_netns_object_child_ns_type,
+};
+
+void nfs_sysfs_add_server(struct nfs_server *server)
+{
+	int ret;
+
+	ret = kobject_init_and_add(&server->kobj, &nfs_sb_ktype,
+				&nfs_kset->kobj, "server-%d", server->s_sysfs_id);
+	if (ret < 0)
+		pr_warn("NFS: nfs sysfs add server-%d failed (%d)\n",
+					server->s_sysfs_id, ret);
+}
+EXPORT_SYMBOL_GPL(nfs_sysfs_add_server);
+
+void nfs_sysfs_move_server_to_sb(struct super_block *s)
+{
+	struct nfs_server *server = s->s_fs_info;
+	int ret;
+
+	ret = kobject_rename(&server->kobj, s->s_id);
+	if (ret < 0)
+		pr_warn("NFS: rename sysfs %s failed (%d)\n",
+					server->kobj.name, ret);
+}
+
+void nfs_sysfs_move_sb_to_server(struct nfs_server *server)
+{
+	const char *s;
+	int ret = -ENOMEM;
+
+	s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id);
+	if (s)
+		ret = kobject_rename(&server->kobj, s);
+	if (ret < 0)
+		pr_warn("NFS: rename sysfs %s failed (%d)\n",
+					server->kobj.name, ret);
+}
+
+/* unlink, not dec-ref */
+void nfs_sysfs_remove_server(struct nfs_server *server)
+{
+	kobject_del(&server->kobj);
+}
diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h
index dc4cc9809d1b..c9f5e3677eb5 100644
--- a/fs/nfs/sysfs.h
+++ b/fs/nfs/sysfs.h
@@ -23,4 +23,9 @@ extern void nfs_sysfs_exit(void);
 void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net);
 void nfs_netns_sysfs_destroy(struct nfs_net *netns);
 
+void nfs_sysfs_add_server(struct nfs_server *s);
+void nfs_sysfs_move_server_to_sb(struct super_block *s);
+void nfs_sysfs_move_sb_to_server(struct nfs_server *s);
+void nfs_sysfs_remove_server(struct nfs_server *s);
+
 #endif
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fa5a592de798..4bed0b6c79c7 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -184,6 +184,7 @@ struct nfs_server {
 				change_attr_type;/* Description of change attribute */
 
 	struct nfs_fsid		fsid;
+	int			s_sysfs_id;	/* sysfs dentry index */
 	__u64			maxfilesize;	/* maximum file size */
 	struct timespec64	time_delta;	/* smallest time granularity */
 	unsigned long		mount_time;	/* when this fs was mounted */
@@ -260,6 +261,7 @@ struct nfs_server {
 	/* User namespace info */
 	const struct cred	*cred;
 	bool			has_sec_mnt_opts;
+	struct kobject		kobj;
 };
 
 /* Server capabilities */
-- 
cgit v1.2.3


From e13b549319a684dd80c4cc25e9567a5c84007e32 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 15 Jun 2023 14:07:27 -0400
Subject: NFS: Add sysfs links to sunrpc clients for nfs_clients

For the general and state management nfs_client under each mount, create
symlinks to their respective rpc_client sysfs entries.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/client.c             |  5 +++++
 fs/nfs/nfs4client.c         |  1 +
 fs/nfs/sysfs.c              | 20 ++++++++++++++++++++
 fs/nfs/sysfs.h              |  2 ++
 include/linux/sunrpc/clnt.h |  8 +++++++-
 net/sunrpc/sysfs.h          |  7 -------
 6 files changed, 35 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e95672a9bcd6..745c661429f2 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -628,6 +628,7 @@ int nfs_init_server_rpcclient(struct nfs_server *server,
 	if (server->flags & NFS_MOUNT_SOFT)
 		server->client->cl_softrtry = 1;
 
+	nfs_sysfs_link_rpc_client(server, server->client, NULL);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
@@ -699,6 +700,7 @@ static int nfs_init_server(struct nfs_server *server,
 
 	server->nfs_client = clp;
 	nfs_sysfs_add_server(server);
+	nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state");
 
 	/* Initialise the client representation from the mount data */
 	server->flags = ctx->flags;
@@ -1125,6 +1127,9 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 
 	nfs_sysfs_add_server(server);
 
+	nfs_sysfs_link_rpc_client(server,
+		server->nfs_client->cl_rpcclient, "_state");
+
 	error = nfs_init_server_rpcclient(server,
 			source->client->cl_timeout,
 			flavor);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index a098a41811d6..d9114a754db7 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -954,6 +954,7 @@ static int nfs4_set_client(struct nfs_server *server,
 
 	server->nfs_client = clp;
 	nfs_sysfs_add_server(server);
+	nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state");
 
 	return 0;
 }
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 0ff24f133a02..7009de149158 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -216,6 +216,26 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns)
 	}
 }
 
+#define RPC_CLIENT_NAME_SIZE 64
+
+void nfs_sysfs_link_rpc_client(struct nfs_server *server,
+			struct rpc_clnt *clnt, const char *uniq)
+{
+	char name[RPC_CLIENT_NAME_SIZE];
+	int ret;
+
+	strcpy(name, clnt->cl_program->name);
+	strcat(name, uniq ? uniq : "");
+	strcat(name, "_client");
+
+	ret = sysfs_create_link_nowarn(&server->kobj,
+						&clnt->cl_sysfs->kobject, name);
+	if (ret < 0)
+		pr_warn("NFS: can't create link to %s in sysfs (%d)\n",
+			name, ret);
+}
+EXPORT_SYMBOL_GPL(nfs_sysfs_link_rpc_client);
+
 static void nfs_sysfs_sb_release(struct kobject *kobj)
 {
 	/* no-op: why? see lib/kobject.c kobject_cleanup() */
diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h
index c9f5e3677eb5..c5d1990cade5 100644
--- a/fs/nfs/sysfs.h
+++ b/fs/nfs/sysfs.h
@@ -23,6 +23,8 @@ extern void nfs_sysfs_exit(void);
 void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net);
 void nfs_netns_sysfs_destroy(struct nfs_net *netns);
 
+void nfs_sysfs_link_rpc_client(struct nfs_server *server,
+			struct rpc_clnt *clnt, const char *sysfs_prefix);
 void nfs_sysfs_add_server(struct nfs_server *s);
 void nfs_sysfs_move_server_to_sb(struct super_block *s);
 void nfs_sysfs_move_sb_to_server(struct nfs_server *s);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 063692cd2a60..88cdf6e3012a 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -30,7 +30,13 @@
 #include <linux/sunrpc/xprtmultipath.h>
 
 struct rpc_inode;
-struct rpc_sysfs_client;
+struct rpc_sysfs_client {
+	struct kobject kobject;
+	struct net *net;
+	struct rpc_clnt *clnt;
+	struct rpc_xprt_switch *xprt_switch;
+};
+
 
 /*
  * The high-level client handle
diff --git a/net/sunrpc/sysfs.h b/net/sunrpc/sysfs.h
index 6620cebd1037..d2dd77a0a0e9 100644
--- a/net/sunrpc/sysfs.h
+++ b/net/sunrpc/sysfs.h
@@ -5,13 +5,6 @@
 #ifndef __SUNRPC_SYSFS_H
 #define __SUNRPC_SYSFS_H
 
-struct rpc_sysfs_client {
-	struct kobject kobject;
-	struct net *net;
-	struct rpc_clnt *clnt;
-	struct rpc_xprt_switch *xprt_switch;
-};
-
 struct rpc_sysfs_xprt_switch {
 	struct kobject kobject;
 	struct net *net;
-- 
cgit v1.2.3


From d97c05897757a5d7fa131073d04a2fb29b5836ee Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 15 Jun 2023 14:07:28 -0400
Subject: NFS: add a sysfs link to the lockd rpc_client

After lockd is started, add a symlink for lockd's rpc_client under
NFS' superblock sysfs.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/lockd/clntlock.c        | 6 ++++++
 fs/nfs/client.c            | 1 +
 include/linux/lockd/bind.h | 2 ++
 3 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index e3972aa3045a..5d85715be763 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -93,6 +93,12 @@ void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct
 	block->b_status = nlm_lck_blocked;
 }
 
+struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host)
+{
+	return host->h_rpcclnt;
+}
+EXPORT_SYMBOL_GPL(nlmclnt_rpc_clnt);
+
 /*
  * Queue up a lock for blocking so that the GRANTED request can see it
  */
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 745c661429f2..48c9d8411c0e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -599,6 +599,7 @@ static int nfs_start_lockd(struct nfs_server *server)
 
 	server->nlm_host = host;
 	server->destroy = nfs_destroy_server;
+	nfs_sysfs_link_rpc_client(server, nlmclnt_rpc_clnt(host), NULL);
 	return 0;
 }
 
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 3bc9f7410e21..c53c81242e72 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -20,6 +20,7 @@
 /* Dummy declarations */
 struct svc_rqst;
 struct rpc_task;
+struct rpc_clnt;
 
 /*
  * This is the set of functions for lockd->nfsd communication
@@ -56,6 +57,7 @@ struct nlmclnt_initdata {
 
 extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init);
 extern void	nlmclnt_done(struct nlm_host *host);
+extern struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host);
 
 /*
  * NLM client operations provide a means to modify RPC processing of NLM
-- 
cgit v1.2.3


From d9615d166c7ede67bf16bdd0772e35e124f305f5 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 15 Jun 2023 14:07:30 -0400
Subject: NFS: add sysfs shutdown knob

Within each nfs_server sysfs tree, add an entry named "shutdown".  Writing
1 to this file will set the cl_shutdown bit on the rpc_clnt structs
associated with that mount.  If cl_shutdown is set, the task scheduler
immediately returns -EIO for new tasks.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/sysfs.c              | 54 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/nfs_fs_sb.h   |  1 +
 include/linux/sunrpc/clnt.h |  3 ++-
 net/sunrpc/clnt.c           |  5 +++++
 4 files changed, 61 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 7009de149158..1fedbaff10e9 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -12,6 +12,7 @@
 #include <linux/string.h>
 #include <linux/nfs_fs.h>
 #include <linux/rcupdate.h>
+#include <linux/lockd/lockd.h>
 
 #include "nfs4_fs.h"
 #include "netns.h"
@@ -216,6 +217,50 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns)
 	}
 }
 
+static ssize_t
+shutdown_show(struct kobject *kobj, struct kobj_attribute *attr,
+				char *buf)
+{
+	struct nfs_server *server = container_of(kobj, struct nfs_server, kobj);
+	bool shutdown = server->flags & NFS_MOUNT_SHUTDOWN;
+	return sysfs_emit(buf, "%d\n", shutdown);
+}
+
+static ssize_t
+shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct nfs_server *server;
+	int ret, val;
+
+	server = container_of(kobj, struct nfs_server, kobj);
+
+	ret = kstrtoint(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val != 1)
+		return -EINVAL;
+
+	/* already shut down? */
+	if (server->flags & NFS_MOUNT_SHUTDOWN)
+		goto out;
+
+	server->flags |= NFS_MOUNT_SHUTDOWN;
+	server->client->cl_shutdown = 1;
+	server->nfs_client->cl_rpcclient->cl_shutdown = 1;
+
+	if (!IS_ERR(server->client_acl))
+		server->client_acl->cl_shutdown = 1;
+
+	if (server->nlm_host)
+		server->nlm_host->h_rpcclnt->cl_shutdown = 1;
+out:
+	return count;
+}
+
+static struct kobj_attribute nfs_sysfs_attr_shutdown = __ATTR_RW(shutdown);
+
 #define RPC_CLIENT_NAME_SIZE 64
 
 void nfs_sysfs_link_rpc_client(struct nfs_server *server,
@@ -259,9 +304,16 @@ void nfs_sysfs_add_server(struct nfs_server *server)
 
 	ret = kobject_init_and_add(&server->kobj, &nfs_sb_ktype,
 				&nfs_kset->kobj, "server-%d", server->s_sysfs_id);
-	if (ret < 0)
+	if (ret < 0) {
 		pr_warn("NFS: nfs sysfs add server-%d failed (%d)\n",
 					server->s_sysfs_id, ret);
+		return;
+	}
+	ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_shutdown.attr,
+				nfs_netns_server_namespace(&server->kobj));
+	if (ret < 0)
+		pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n",
+			server->s_sysfs_id, ret);
 }
 EXPORT_SYMBOL_GPL(nfs_sysfs_add_server);
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 4bed0b6c79c7..20eeba8b009d 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -154,6 +154,7 @@ struct nfs_server {
 #define NFS_MOUNT_WRITE_EAGER		0x01000000
 #define NFS_MOUNT_WRITE_WAIT		0x02000000
 #define NFS_MOUNT_TRUNK_DISCOVERY	0x04000000
+#define NFS_MOUNT_SHUTDOWN			0x08000000
 
 	unsigned int		fattr_valid;	/* Valid attributes */
 	unsigned int		caps;		/* server capabilities */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 88cdf6e3012a..4f41d839face 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -63,7 +63,8 @@ struct rpc_clnt {
 				cl_discrtry : 1,/* disconnect before retry */
 				cl_noretranstimeo: 1,/* No retransmit timeouts */
 				cl_autobind : 1,/* use getport() */
-				cl_chatty   : 1;/* be verbose */
+				cl_chatty   : 1,/* be verbose */
+				cl_shutdown : 1;/* rpc immediate -EIO */
 	struct xprtsec_parms	cl_xprtsec;	/* transport security policy */
 
 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 640c76ab2f1a..d7c697af3762 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1724,6 +1724,11 @@ call_start(struct rpc_task *task)
 
 	trace_rpc_request(task);
 
+	if (task->tk_client->cl_shutdown) {
+		rpc_call_rpcerror(task, -EIO);
+		return;
+	}
+
 	/* Increment call count (version might not be valid for ping) */
 	if (clnt->cl_program->version[clnt->cl_vers])
 		clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
-- 
cgit v1.2.3


From ce5df7764b3b2abaf3687c460a9a1922daaed5b7 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 19 May 2023 13:16:52 +0200
Subject: mm: page_isolation: write proper kerneldoc

And remove the incorrect header comments.

[akpm@linux-foundation.org: s/lower/first/, s/upper/last/, per Mike]
Link: https://lkml.kernel.org/r/20230519111652.40658-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mike Rapoport <rppt@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-isolation.h | 24 ++++++------------------
 mm/page_isolation.c            | 33 ++++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 5456b7be38ae..0ab089e89db4 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -37,24 +37,12 @@ void set_pageblock_migratetype(struct page *page, int migratetype);
 int move_freepages_block(struct zone *zone, struct page *page,
 				int migratetype, int *num_movable);
 
-/*
- * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
- */
-int
-start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
-			 int migratetype, int flags, gfp_t gfp_flags);
-
-/*
- * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
- * target range is [start_pfn, end_pfn)
- */
-void
-undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
-			int migratetype);
-
-/*
- * Test all pages in [start_pfn, end_pfn) are isolated or not.
- */
+int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
+			     int migratetype, int flags, gfp_t gfp_flags);
+
+void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
+			     int migratetype);
+
 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 			int isol_flags);
 
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index c6f3605e37ab..6599cc965e21 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -481,10 +481,9 @@ failed:
 }
 
 /**
- * start_isolate_page_range() - make page-allocation-type of range of pages to
- * be MIGRATE_ISOLATE.
- * @start_pfn:		The lower PFN of the range to be isolated.
- * @end_pfn:		The upper PFN of the range to be isolated.
+ * start_isolate_page_range() - mark page range MIGRATE_ISOLATE
+ * @start_pfn:		The first PFN of the range to be isolated.
+ * @end_pfn:		The last PFN of the range to be isolated.
  * @migratetype:	Migrate type to set in error recovery.
  * @flags:		The following flags are allowed (they can be combined in
  *			a bit mask)
@@ -571,8 +570,14 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 	return 0;
 }
 
-/*
- * Make isolated pages available again.
+/**
+ * undo_isolate_page_range - undo effects of start_isolate_page_range()
+ * @start_pfn:		The first PFN of the isolated range
+ * @end_pfn:		The last PFN of the isolated range
+ * @migratetype:	New migrate type to set on the range
+ *
+ * This finds every MIGRATE_ISOLATE page block in the given range
+ * and switches it to @migratetype.
  */
 void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 			    int migratetype)
@@ -631,7 +636,21 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn,
 	return pfn;
 }
 
-/* Caller should ensure that requested range is in a single zone */
+/**
+ * test_pages_isolated - check if pageblocks in range are isolated
+ * @start_pfn:		The first PFN of the isolated range
+ * @end_pfn:		The first PFN *after* the isolated range
+ * @isol_flags:		Testing mode flags
+ *
+ * This tests if all in the specified range are free.
+ *
+ * If %MEMORY_OFFLINE is specified in @flags, it will consider
+ * poisoned and offlined pages free as well.
+ *
+ * Caller must ensure the requested range doesn't span zones.
+ *
+ * Returns 0 if true, -EBUSY if one or more pages are in use.
+ */
 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 			int isol_flags)
 {
-- 
cgit v1.2.3


From e52ee4cc8fa87a75ab0cfc7bf51c0715a880a08e Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 3 Jun 2023 22:25:13 +0800
Subject: mm: remove obsolete alloc_migrate_target()

There's only declaration left in the header file. Remove it.

Link: https://lkml.kernel.org/r/20230603142513.787000-1-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/page-isolation.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 0ab089e89db4..4ac34392823a 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -45,7 +45,4 @@ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 
 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 			int isol_flags);
-
-struct page *alloc_migrate_target(struct page *page, unsigned long private);
-
 #endif
-- 
cgit v1.2.3


From e5797dc011182f8b25420bc977f37cd92fc6e755 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Tue, 6 Jun 2023 20:18:13 +0800
Subject: mm: vmscan: mark kswapd_run() and kswapd_stop() __meminit

Add __meminit to kswapd_run() and kswapd_stop() to ensure they're default
to __init when memory hotplug is not enabled.

Link: https://lkml.kernel.org/r/20230606121813.242163-1-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Acked-by: Yu Zhao <yuzhao@google.com>
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 4 ++--
 mm/vmscan.c          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2ddbfd85f6c7..b5f6f2916de1 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -460,8 +460,8 @@ static inline bool node_reclaim_enabled(void)
 void check_move_unevictable_folios(struct folio_batch *fbatch);
 void check_move_unevictable_pages(struct pagevec *pvec);
 
-extern void kswapd_run(int nid);
-extern void kswapd_stop(int nid);
+extern void __meminit kswapd_run(int nid);
+extern void __meminit kswapd_stop(int nid);
 
 #ifdef CONFIG_SWAP
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9048739c41db..a4e2936f6d35 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -7871,7 +7871,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
 /*
  * This kswapd start function will be called by init and node-hot-add.
  */
-void kswapd_run(int nid)
+void __meminit kswapd_run(int nid)
 {
 	pg_data_t *pgdat = NODE_DATA(nid);
 
@@ -7892,7 +7892,7 @@ void kswapd_run(int nid)
  * Called by memory hotplug when all memory in a node is offlined.  Caller must
  * be holding mem_hotplug_begin/done().
  */
-void kswapd_stop(int nid)
+void __meminit kswapd_stop(int nid)
 {
 	pg_data_t *pgdat = NODE_DATA(nid);
 	struct task_struct *kswapd;
-- 
cgit v1.2.3


From bd5f79ab39367665f40e10c2486aa15e7a841490 Mon Sep 17 00:00:00 2001
From: Yajun Deng <yajun.deng@linux.dev>
Date: Wed, 7 Jun 2023 10:39:52 +0800
Subject: mm/sparse: remove unused parameters in sparse_remove_section()

These parameters ms and map_offset are not used in
sparse_remove_section(), so remove them.

The __remove_section() is only called by __remove_pages(), remove it.  And
put the WARN_ON_ONCE() in sparse_remove_section().

Link: https://lkml.kernel.org/r/20230607023952.2247489-1-yajun.deng@linux.dev
Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  5 ++---
 mm/memory_hotplug.c            | 18 +-----------------
 mm/sparse.c                    | 10 +++++++---
 3 files changed, 10 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 04bc286eed42..013c69753c91 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -344,9 +344,8 @@ extern void remove_pfn_range_from_zone(struct zone *zone,
 extern int sparse_add_section(int nid, unsigned long pfn,
 		unsigned long nr_pages, struct vmem_altmap *altmap,
 		struct dev_pagemap *pgmap);
-extern void sparse_remove_section(struct mem_section *ms,
-		unsigned long pfn, unsigned long nr_pages,
-		unsigned long map_offset, struct vmem_altmap *altmap);
+extern void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
+				  struct vmem_altmap *altmap);
 extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 					  unsigned long pnum);
 extern struct zone *zone_for_pfn_range(int online_type, int nid,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9061ac69b1b6..8877734b5f2f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -492,18 +492,6 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
 	set_zone_contiguous(zone);
 }
 
-static void __remove_section(unsigned long pfn, unsigned long nr_pages,
-			     unsigned long map_offset,
-			     struct vmem_altmap *altmap)
-{
-	struct mem_section *ms = __pfn_to_section(pfn);
-
-	if (WARN_ON_ONCE(!valid_section(ms)))
-		return;
-
-	sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
-}
-
 /**
  * __remove_pages() - remove sections of pages
  * @pfn: starting pageframe (must be aligned to start of a section)
@@ -520,9 +508,6 @@ void __remove_pages(unsigned long pfn, unsigned long nr_pages,
 {
 	const unsigned long end_pfn = pfn + nr_pages;
 	unsigned long cur_nr_pages;
-	unsigned long map_offset = 0;
-
-	map_offset = vmem_altmap_offset(altmap);
 
 	if (check_pfn_span(pfn, nr_pages)) {
 		WARN(1, "Misaligned %s start: %#lx end: %#lx\n", __func__, pfn, pfn + nr_pages - 1);
@@ -534,8 +519,7 @@ void __remove_pages(unsigned long pfn, unsigned long nr_pages,
 		/* Select all remaining pages up to the next section boundary */
 		cur_nr_pages = min(end_pfn - pfn,
 				   SECTION_ALIGN_UP(pfn + 1) - pfn);
-		__remove_section(pfn, cur_nr_pages, map_offset, altmap);
-		map_offset = 0;
+		sparse_remove_section(pfn, cur_nr_pages, altmap);
 	}
 }
 
diff --git a/mm/sparse.c b/mm/sparse.c
index c2afdb26039e..7a29e10193fe 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -922,10 +922,14 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
 	return 0;
 }
 
-void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
-		unsigned long nr_pages, unsigned long map_offset,
-		struct vmem_altmap *altmap)
+void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
+			   struct vmem_altmap *altmap)
 {
+	struct mem_section *ms = __pfn_to_section(pfn);
+
+	if (WARN_ON_ONCE(!valid_section(ms)))
+		return;
+
 	section_deactivate(pfn, nr_pages, altmap);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
-- 
cgit v1.2.3


From 36ce9d76b0a93bae799e27e4f5ac35478c676592 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 7 Jun 2023 18:15:23 +0200
Subject: shmem: use ramfs_kill_sb() for kill_sb method of ramfs-based tmpfs

As the ramfs-based tmpfs uses ramfs_init_fs_context() for the
init_fs_context method, which allocates fc->s_fs_info, use ramfs_kill_sb()
to free it and avoid a memory leak.

Link: https://lkml.kernel.org/r/20230607161523.2876433-1-roberto.sassu@huaweicloud.com
Fixes: c3b1b1cbf002 ("ramfs: add support for "mode=" mount option")
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ramfs/inode.c      | 2 +-
 include/linux/ramfs.h | 1 +
 mm/shmem.c            | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 5ba580c78835..fef477c78107 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -278,7 +278,7 @@ int ramfs_init_fs_context(struct fs_context *fc)
 	return 0;
 }
 
-static void ramfs_kill_sb(struct super_block *sb)
+void ramfs_kill_sb(struct super_block *sb)
 {
 	kfree(sb->s_fs_info);
 	kill_litter_super(sb);
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 917528d102c4..d506dc63dd47 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -7,6 +7,7 @@
 struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir,
 	 umode_t mode, dev_t dev);
 extern int ramfs_init_fs_context(struct fs_context *fc);
+extern void ramfs_kill_sb(struct super_block *sb);
 
 #ifdef CONFIG_MMU
 static inline int
diff --git a/mm/shmem.c b/mm/shmem.c
index 5e54ab5f61f2..c606ab89693a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -4199,7 +4199,7 @@ static struct file_system_type shmem_fs_type = {
 	.name		= "tmpfs",
 	.init_fs_context = ramfs_init_fs_context,
 	.parameters	= ramfs_fs_parameters,
-	.kill_sb	= kill_litter_super,
+	.kill_sb	= ramfs_kill_sb,
 	.fs_flags	= FS_USERNS_MOUNT,
 };
 
-- 
cgit v1.2.3


From a668968f84265e698a122656c433809ab9f023fa Mon Sep 17 00:00:00 2001
From: Haifeng Xu <haifeng.xu@shopee.com>
Date: Wed, 7 Jun 2023 02:45:48 +0000
Subject: mm/memory_hotplug: remove reset_node_managed_pages() in
 hotadd_init_pgdat()

managed pages has already been set to 0 in free_area_init_core_hotplug(),
via zone_init_internals() on each zone.  It's pointless to reset again.

Furthermore, reset_node_managed_pages() no longer needs to be exposed
outside of mm/memblock.c.  Remove declaration in include/linux/memblock.h
and define it as static.

In addtion to this, the only caller of reset_node_managed_pages() is
reset_all_zones_managed_pages(), which is annotated with __init, so it
should be safe to also mark reset_node_managed_pages() as __init.

Link: https://lkml.kernel.org/r/20230607024548.1240-1-haifeng.xu@shopee.com
Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memblock.h | 1 -
 mm/memblock.c            | 2 +-
 mm/memory_hotplug.c      | 1 -
 3 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f82ee3fac1cd..f71ff9f0ec81 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -128,7 +128,6 @@ int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
 
 void memblock_free_all(void);
 void memblock_free(void *ptr, size_t size);
-void reset_node_managed_pages(pg_data_t *pgdat);
 void reset_all_zones_managed_pages(void);
 
 /* Low level functions */
diff --git a/mm/memblock.c b/mm/memblock.c
index 3feafea06ab2..da4264528e1e 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2122,7 +2122,7 @@ static unsigned long __init free_low_memory_core_early(void)
 
 static int reset_managed_pages_done __initdata;
 
-void reset_node_managed_pages(pg_data_t *pgdat)
+static void __init reset_node_managed_pages(pg_data_t *pgdat)
 {
 	struct zone *z;
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 8877734b5f2f..5248323fc0f7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1194,7 +1194,6 @@ static pg_data_t __ref *hotadd_init_pgdat(int nid)
 	 * online_pages() and offline_pages().
 	 * TODO: should be in free_area_init_core_hotplug?
 	 */
-	reset_node_managed_pages(pgdat);
 	reset_node_present_pages(pgdat);
 
 	return pgdat;
-- 
cgit v1.2.3


From b9c91c43412f2e07a5287dfe7027acdd8fb0b1ef Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Wed, 7 Jun 2023 19:51:43 +0000
Subject: mm: zswap: support exclusive loads

Commit 71024cb4a0bf ("frontswap: remove frontswap_tmem_exclusive_gets")
removed support for exclusive loads from frontswap as it was not used.
Bring back exclusive loads support to frontswap by adding an "exclusive"
output parameter to frontswap_ops->load.

On the zswap side, add a module parameter to enable/disable exclusive
loads, and a config option to control the boot default value.  Refactor
zswap entry invalidation in zswap_frontswap_invalidate_page() into
zswap_invalidate_entry() to reuse it in zswap_frontswap_load() if
exclusive loads are enabled.

With exclusive loads, we avoid having two copies of the same page in
memory (compressed & uncompressed) after faulting it in from zswap.  On
the other hand, if the page is to be reclaimed again without being
dirtied, it will be re-compressed.  Compression is not usually slow, and a
page that was just faulted in is less likely to be reclaimed again soon.

Link: https://lkml.kernel.org/r/20230607195143.1473802-1-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Suggested-by: Yu Zhao <yuzhao@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/frontswap.h |  2 +-
 mm/Kconfig                | 16 ++++++++++++++++
 mm/frontswap.c            | 10 ++++++++--
 mm/zswap.c                | 28 ++++++++++++++++++++--------
 4 files changed, 45 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index a631bac12220..eaa0ac5f9003 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -10,7 +10,7 @@
 struct frontswap_ops {
 	void (*init)(unsigned); /* this swap type was just swapon'ed */
 	int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
-	int (*load)(unsigned, pgoff_t, struct page *); /* load a page */
+	int (*load)(unsigned, pgoff_t, struct page *, bool *); /* load a page */
 	void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */
 	void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */
 };
diff --git a/mm/Kconfig b/mm/Kconfig
index 7672a22647b4..12f32f8d26bf 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -46,6 +46,22 @@ config ZSWAP_DEFAULT_ON
 	  The selection made here can be overridden by using the kernel
 	  command line 'zswap.enabled=' option.
 
+config ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON
+	bool "Invalidate zswap entries when pages are loaded"
+	depends on ZSWAP
+	help
+	  If selected, exclusive loads for zswap will be enabled at boot,
+	  otherwise it will be disabled.
+
+	  If exclusive loads are enabled, when a page is loaded from zswap,
+	  the zswap entry is invalidated at once, as opposed to leaving it
+	  in zswap until the swap entry is freed.
+
+	  This avoids having two copies of the same page in memory
+	  (compressed and uncompressed) after faulting in a page from zswap.
+	  The cost is that if the page was never dirtied and needs to be
+	  swapped out again, it will be re-compressed.
+
 choice
 	prompt "Default compressor"
 	depends on ZSWAP
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 279e55b4ed87..2fb5df3384b8 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -206,6 +206,7 @@ int __frontswap_load(struct page *page)
 	int type = swp_type(entry);
 	struct swap_info_struct *sis = swap_info[type];
 	pgoff_t offset = swp_offset(entry);
+	bool exclusive = false;
 
 	VM_BUG_ON(!frontswap_ops);
 	VM_BUG_ON(!PageLocked(page));
@@ -215,9 +216,14 @@ int __frontswap_load(struct page *page)
 		return -1;
 
 	/* Try loading from each implementation, until one succeeds. */
-	ret = frontswap_ops->load(type, offset, page);
-	if (ret == 0)
+	ret = frontswap_ops->load(type, offset, page, &exclusive);
+	if (ret == 0) {
 		inc_frontswap_loads();
+		if (exclusive) {
+			SetPageDirty(page);
+			__frontswap_clear(sis, offset);
+		}
+	}
 	return ret;
 }
 
diff --git a/mm/zswap.c b/mm/zswap.c
index bcb82e09eb64..9fa86265f6dd 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -138,6 +138,10 @@ static bool zswap_non_same_filled_pages_enabled = true;
 module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled,
 		   bool, 0644);
 
+static bool zswap_exclusive_loads_enabled = IS_ENABLED(
+		CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON);
+module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644);
+
 /*********************************
 * data structures
 **********************************/
@@ -1340,12 +1344,22 @@ shrink:
 	goto reject;
 }
 
+static void zswap_invalidate_entry(struct zswap_tree *tree,
+				   struct zswap_entry *entry)
+{
+	/* remove from rbtree */
+	zswap_rb_erase(&tree->rbroot, entry);
+
+	/* drop the initial reference from entry creation */
+	zswap_entry_put(tree, entry);
+}
+
 /*
  * returns 0 if the page was successfully decompressed
  * return -1 on entry not found or error
 */
 static int zswap_frontswap_load(unsigned type, pgoff_t offset,
-				struct page *page)
+				struct page *page, bool *exclusive)
 {
 	struct zswap_tree *tree = zswap_trees[type];
 	struct zswap_entry *entry;
@@ -1415,6 +1429,10 @@ stats:
 freeentry:
 	spin_lock(&tree->lock);
 	zswap_entry_put(tree, entry);
+	if (!ret && zswap_exclusive_loads_enabled) {
+		zswap_invalidate_entry(tree, entry);
+		*exclusive = true;
+	}
 	spin_unlock(&tree->lock);
 
 	return ret;
@@ -1434,13 +1452,7 @@ static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
 		spin_unlock(&tree->lock);
 		return;
 	}
-
-	/* remove from rbtree */
-	zswap_rb_erase(&tree->rbroot, entry);
-
-	/* drop the initial reference from entry creation */
-	zswap_entry_put(tree, entry);
-
+	zswap_invalidate_entry(tree, entry);
 	spin_unlock(&tree->lock);
 }
 
-- 
cgit v1.2.3


From 26e1a0c3277d7f43856ec424902423be212cc178 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 8 Jun 2023 18:06:53 -0700
Subject: mm: use pmdp_get_lockless() without surplus barrier()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm: allow pte_offset_map[_lock]() to fail", v2.

What is it all about?  Some mmap_lock avoidance i.e.  latency reduction.
Initially just for the case of collapsing shmem or file pages to THPs; but
likely to be relied upon later in other contexts e.g.  freeing of empty
page tables (but that's not work I'm doing).  mmap_write_lock avoidance
when collapsing to anon THPs?  Perhaps, but again that's not work I've
done: a quick attempt was not as easy as the shmem/file case.

I would much prefer not to have to make these small but wide-ranging
changes for such a niche case; but failed to find another way, and have
heard that shmem MADV_COLLAPSE's usefulness is being limited by that
mmap_write_lock it currently requires.

These changes (though of course not these exact patches) have been in
Google's data centre kernel for three years now: we do rely upon them.

What is this preparatory series about?

The current mmap locking will not be enough to guard against that tricky
transition between pmd entry pointing to page table, and empty pmd entry,
and pmd entry pointing to huge page: pte_offset_map() will have to
validate the pmd entry for itself, returning NULL if no page table is
there.  What to do about that varies: sometimes nearby error handling
indicates just to skip it; but in many cases an ACTION_AGAIN or "goto
again" is appropriate (and if that risks an infinite loop, then there must
have been an oops, or pfn 0 mistaken for page table, before).

Given the likely extension to freeing empty page tables, I have not
limited this set of changes to a THP config; and it has been easier, and
sets a better example, if each site is given appropriate handling: even
where deeper study might prove that failure could only happen if the pmd
table were corrupted.

Several of the patches are, or include, cleanup on the way; and by the
end, pmd_trans_unstable() and suchlike are deleted: pte_offset_map() and
pte_offset_map_lock() then handle those original races and more.  Most
uses of pte_lockptr() are deprecated, with pte_offset_map_nolock() taking
its place.


This patch (of 32):

Use pmdp_get_lockless() in preference to READ_ONCE(*pmdp), to get a more
reliable result with PAE (or READ_ONCE as before without PAE); and remove
the unnecessary extra barrier()s which got left behind in its callers.

HOWEVER: Note the small print in linux/pgtable.h, where it was designed
specifically for fast GUP, and depends on interrupts being disabled for
its full guarantee: most callers which have been added (here and before)
do NOT have interrupts disabled, so there is still some need for caution.

Link: https://lkml.kernel.org/r/f35279a9-9ac0-de22-d245-591afbfb4dc@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Yu Zhao <yuzhao@google.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zack Rusin <zackr@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/userfaultfd.c        | 10 +---------
 include/linux/pgtable.h | 17 -----------------
 mm/gup.c                |  6 +-----
 mm/hmm.c                |  2 +-
 mm/khugepaged.c         |  5 -----
 mm/ksm.c                |  3 +--
 mm/memory.c             | 14 ++------------
 mm/mprotect.c           |  5 -----
 mm/page_vma_mapped.c    |  2 +-
 9 files changed, 7 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 0fd96d6e39ce..f7a0817b1ec0 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -349,15 +349,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
 	if (!pud_present(*pud))
 		goto out;
 	pmd = pmd_offset(pud, address);
-	/*
-	 * READ_ONCE must function as a barrier with narrower scope
-	 * and it must be equivalent to:
-	 *	_pmd = *pmd; barrier();
-	 *
-	 * This is to deal with the instability (as in
-	 * pmd_trans_unstable) of the pmd.
-	 */
-	_pmd = READ_ONCE(*pmd);
+	_pmd = pmdp_get_lockless(pmd);
 	if (pmd_none(_pmd))
 		goto out;
 
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index c5a51481bbb9..8ec27fe69dc8 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1344,23 +1344,6 @@ static inline int pud_trans_unstable(pud_t *pud)
 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
 {
 	pmd_t pmdval = pmdp_get_lockless(pmd);
-	/*
-	 * The barrier will stabilize the pmdval in a register or on
-	 * the stack so that it will stop changing under the code.
-	 *
-	 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
-	 * pmdp_get_lockless is allowed to return a not atomic pmdval
-	 * (for example pointing to an hugepage that has never been
-	 * mapped in the pmd). The below checks will only care about
-	 * the low part of the pmd with 32bit PAE x86 anyway, with the
-	 * exception of pmd_none(). So the important thing is that if
-	 * the low part of the pmd is found null, the high part will
-	 * be also null or the pmd_none() check below would be
-	 * confused.
-	 */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	barrier();
-#endif
 	/*
 	 * !pmd_present() checks for pmd migration entries
 	 *
diff --git a/mm/gup.c b/mm/gup.c
index a718b956edbe..d448fd286b8c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -654,11 +654,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 
 	pmd = pmd_offset(pudp, address);
-	/*
-	 * The READ_ONCE() will stabilize the pmdval in a register or
-	 * on the stack so that it will stop changing under the code.
-	 */
-	pmdval = READ_ONCE(*pmd);
+	pmdval = pmdp_get_lockless(pmd);
 	if (pmd_none(pmdval))
 		return no_page_table(vma, flags);
 	if (!pmd_present(pmdval))
diff --git a/mm/hmm.c b/mm/hmm.c
index 6a151c09de5e..e23043345615 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -332,7 +332,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 	pmd_t pmd;
 
 again:
-	pmd = READ_ONCE(*pmdp);
+	pmd = pmdp_get_lockless(pmdp);
 	if (pmd_none(pmd))
 		return hmm_vma_walk_hole(start, end, -1, walk);
 
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 3649ba12a235..2d206e62d358 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -959,11 +959,6 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
 		return SCAN_PMD_NULL;
 
 	pmde = pmdp_get_lockless(*pmd);
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	/* See comments in pmd_none_or_trans_huge_or_clear_bad() */
-	barrier();
-#endif
 	if (pmd_none(pmde))
 		return SCAN_PMD_NONE;
 	if (!pmd_present(pmde))
diff --git a/mm/ksm.c b/mm/ksm.c
index 0156bded3a66..df2aa281d49d 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1194,8 +1194,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
 	 * without holding anon_vma lock for write.  So when looking for a
 	 * genuine pmde (in which to find pte), test present and !THP together.
 	 */
-	pmde = *pmd;
-	barrier();
+	pmde = pmdp_get_lockless(pmd);
 	if (!pmd_present(pmde) || pmd_trans_huge(pmde))
 		goto out;
 
diff --git a/mm/memory.c b/mm/memory.c
index 36082fd42df4..221b21623644 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4923,18 +4923,9 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 		 * So now it's safe to run pte_offset_map().
 		 */
 		vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
-		vmf->orig_pte = *vmf->pte;
+		vmf->orig_pte = ptep_get_lockless(vmf->pte);
 		vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID;
 
-		/*
-		 * some architectures can have larger ptes than wordsize,
-		 * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and
-		 * CONFIG_32BIT=y, so READ_ONCE cannot guarantee atomic
-		 * accesses.  The code below just needs a consistent view
-		 * for the ifs and we later double check anyway with the
-		 * ptl lock held. So here a barrier will do.
-		 */
-		barrier();
 		if (pte_none(vmf->orig_pte)) {
 			pte_unmap(vmf->pte);
 			vmf->pte = NULL;
@@ -5058,9 +5049,8 @@ retry_pud:
 		if (!(ret & VM_FAULT_FALLBACK))
 			return ret;
 	} else {
-		vmf.orig_pmd = *vmf.pmd;
+		vmf.orig_pmd = pmdp_get_lockless(vmf.pmd);
 
-		barrier();
 		if (unlikely(is_swap_pmd(vmf.orig_pmd))) {
 			VM_BUG_ON(thp_migration_supported() &&
 					  !is_pmd_migration_entry(vmf.orig_pmd));
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 92d3d3ca390a..c5a13c0f1017 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -309,11 +309,6 @@ static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
 {
 	pmd_t pmdval = pmdp_get_lockless(pmd);
 
-	/* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	barrier();
-#endif
-
 	if (pmd_none(pmdval))
 		return 1;
 	if (pmd_trans_huge(pmdval))
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 4e448cfbc6ef..64aff6718bdb 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -210,7 +210,7 @@ restart:
 		 * compiler and used as a stale value after we've observed a
 		 * subsequent update.
 		 */
-		pmde = READ_ONCE(*pvmw->pmd);
+		pmde = pmdp_get_lockless(pvmw->pmd);
 
 		if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde) ||
 		    (pmd_present(pmde) && pmd_devmap(pmde))) {
-- 
cgit v1.2.3


From 0cb8fd4d14165a7e654048e43983d86f75b90879 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 8 Jun 2023 18:08:20 -0700
Subject: mm/migrate: remove cruft from migration_entry_wait()s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

migration_entry_wait_on_locked() does not need to take a mapped pte
pointer, its callers can do the unmap first.  Annotate it with
__releases(ptl) to reduce sparse warnings.

Fold __migration_entry_wait_huge() into migration_entry_wait_huge().  Fold
__migration_entry_wait() into migration_entry_wait(), preferring the
tighter pte_offset_map_lock() to pte_offset_map() and pte_lockptr().

Link: https://lkml.kernel.org/r/b0e2a532-cdf2-561b-e999-f3b13b8d6d3@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zack Rusin <zackr@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/migrate.h |  4 ++--
 include/linux/swapops.h | 17 +++--------------
 mm/filemap.c            | 13 ++++---------
 mm/migrate.c            | 37 +++++++++++++------------------------
 4 files changed, 22 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 6de5756d8533..711dd9412561 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -75,8 +75,8 @@ bool isolate_movable_page(struct page *page, isolate_mode_t mode);
 
 int migrate_huge_page_move_mapping(struct address_space *mapping,
 		struct folio *dst, struct folio *src);
-void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
-				spinlock_t *ptl);
+void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl)
+		__releases(ptl);
 void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
 void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
 int folio_migrate_mapping(struct address_space *mapping,
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 3a451b7afcb3..4c932cb45e0b 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -332,15 +332,9 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
 	return false;
 }
 
-extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
-					spinlock_t *ptl);
 extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 					unsigned long address);
-#ifdef CONFIG_HUGETLB_PAGE
-extern void __migration_entry_wait_huge(struct vm_area_struct *vma,
-					pte_t *ptep, spinlock_t *ptl);
 extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte);
-#endif	/* CONFIG_HUGETLB_PAGE */
 #else  /* CONFIG_MIGRATION */
 static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
 {
@@ -362,15 +356,10 @@ static inline int is_migration_entry(swp_entry_t swp)
 	return 0;
 }
 
-static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
-					spinlock_t *ptl) { }
 static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
-					 unsigned long address) { }
-#ifdef CONFIG_HUGETLB_PAGE
-static inline void __migration_entry_wait_huge(struct vm_area_struct *vma,
-					       pte_t *ptep, spinlock_t *ptl) { }
-static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { }
-#endif	/* CONFIG_HUGETLB_PAGE */
+					unsigned long address) { }
+static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
+					pte_t *pte) { }
 static inline int is_writable_migration_entry(swp_entry_t entry)
 {
 	return 0;
diff --git a/mm/filemap.c b/mm/filemap.c
index 916b7c6444fe..e0259fb823a5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1362,8 +1362,6 @@ repeat:
 /**
  * migration_entry_wait_on_locked - Wait for a migration entry to be removed
  * @entry: migration swap entry.
- * @ptep: mapped pte pointer. Will return with the ptep unmapped. Only required
- *        for pte entries, pass NULL for pmd entries.
  * @ptl: already locked ptl. This function will drop the lock.
  *
  * Wait for a migration entry referencing the given page to be removed. This is
@@ -1372,13 +1370,13 @@ repeat:
  * should be called while holding the ptl for the migration entry referencing
  * the page.
  *
- * Returns after unmapping and unlocking the pte/ptl with pte_unmap_unlock().
+ * Returns after unlocking the ptl.
  *
  * This follows the same logic as folio_wait_bit_common() so see the comments
  * there.
  */
-void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
-				spinlock_t *ptl)
+void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl)
+	__releases(ptl)
 {
 	struct wait_page_queue wait_page;
 	wait_queue_entry_t *wait = &wait_page.wait;
@@ -1412,10 +1410,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep,
 	 * a valid reference to the page, and it must take the ptl to remove the
 	 * migration entry. So the page is valid until the ptl is dropped.
 	 */
-	if (ptep)
-		pte_unmap_unlock(ptep, ptl);
-	else
-		spin_unlock(ptl);
+	spin_unlock(ptl);
 
 	for (;;) {
 		unsigned int flags;
diff --git a/mm/migrate.c b/mm/migrate.c
index 30b5ce10935e..c1f2c40441e1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -296,14 +296,18 @@ void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked)
  * get to the page and wait until migration is finished.
  * When we return from this function the fault will be retried.
  */
-void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
-				spinlock_t *ptl)
+void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
+			  unsigned long address)
 {
+	spinlock_t *ptl;
+	pte_t *ptep;
 	pte_t pte;
 	swp_entry_t entry;
 
-	spin_lock(ptl);
+	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
 	pte = *ptep;
+	pte_unmap(ptep);
+
 	if (!is_swap_pte(pte))
 		goto out;
 
@@ -311,18 +315,10 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
 	if (!is_migration_entry(entry))
 		goto out;
 
-	migration_entry_wait_on_locked(entry, ptep, ptl);
+	migration_entry_wait_on_locked(entry, ptl);
 	return;
 out:
-	pte_unmap_unlock(ptep, ptl);
-}
-
-void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
-				unsigned long address)
-{
-	spinlock_t *ptl = pte_lockptr(mm, pmd);
-	pte_t *ptep = pte_offset_map(pmd, address);
-	__migration_entry_wait(mm, ptep, ptl);
+	spin_unlock(ptl);
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -332,9 +328,9 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
  *
  * This function will release the vma lock before returning.
  */
-void __migration_entry_wait_huge(struct vm_area_struct *vma,
-				 pte_t *ptep, spinlock_t *ptl)
+void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *ptep)
 {
+	spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), vma->vm_mm, ptep);
 	pte_t pte;
 
 	hugetlb_vma_assert_locked(vma);
@@ -352,16 +348,9 @@ void __migration_entry_wait_huge(struct vm_area_struct *vma,
 		 * lock release in migration_entry_wait_on_locked().
 		 */
 		hugetlb_vma_unlock_read(vma);
-		migration_entry_wait_on_locked(pte_to_swp_entry(pte), NULL, ptl);
+		migration_entry_wait_on_locked(pte_to_swp_entry(pte), ptl);
 	}
 }
-
-void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte)
-{
-	spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), vma->vm_mm, pte);
-
-	__migration_entry_wait_huge(vma, pte, ptl);
-}
 #endif
 
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
@@ -372,7 +361,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
 	ptl = pmd_lock(mm, pmd);
 	if (!is_pmd_migration_entry(*pmd))
 		goto unlock;
-	migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), NULL, ptl);
+	migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), ptl);
 	return;
 unlock:
 	spin_unlock(ptl);
-- 
cgit v1.2.3


From 46c475bd676bb05077c8a38b37f175552f035406 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 8 Jun 2023 18:09:25 -0700
Subject: mm/pgtable: kmap_local_page() instead of kmap_atomic()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pte_offset_map() was still using kmap_atomic(): update it to the preferred
kmap_local_page() before making further changes there, in case we need
this as a bisection point; but I doubt it can cause any trouble.

Link: https://lkml.kernel.org/r/d74dc4b3-6a76-446f-8f5-52ae271fa07d@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zack Rusin <zackr@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 8ec27fe69dc8..94235ff2706e 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -96,9 +96,9 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
 
 #if defined(CONFIG_HIGHPTE)
 #define pte_offset_map(dir, address)				\
-	((pte_t *)kmap_atomic(pmd_page(*(dir))) +		\
+	((pte_t *)kmap_local_page(pmd_page(*(dir))) +		\
 	 pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte))
+#define pte_unmap(pte) kunmap_local((pte))
 #else
 #define pte_offset_map(dir, address)	pte_offset_kernel((dir), (address))
 #define pte_unmap(pte) ((void)(pte))	/* NOP */
-- 
cgit v1.2.3


From 0d940a9b270b9220dcff74d8e9123c9788365751 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 8 Jun 2023 18:10:32 -0700
Subject: mm/pgtable: allow pte_offset_map[_lock]() to fail
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make pte_offset_map() a wrapper for __pte_offset_map() (optionally outputs
pmdval), pte_offset_map_lock() a sparse __cond_lock wrapper for
__pte_offset_map_lock(): those __funcs added in mm/pgtable-generic.c.

__pte_offset_map() do pmdval validation (including pmd_clear_bad() when
pmd_bad()), returning NULL if pmdval is not for a page table.
__pte_offset_map_lock() verify pmdval unchanged after getting the lock,
trying again if it changed.

No #ifdef CONFIG_TRANSPARENT_HUGEPAGE around them: that could be done to
cover the imminent case, but we expect to generalize it later, and it
makes a mess of where to do the pmd_bad() clearing.

Add pte_offset_map_nolock(): outputs ptl like pte_offset_map_lock(),
without actually taking the lock.  This will be preferred to open uses of
pte_lockptr(), because (when split ptlock is in page table's struct page)
it points to the right lock for the returned pte pointer, even if *pmd
gets changed racily afterwards.

Update corresponding Documentation.

Do not add the anticipated rcu_read_lock() and rcu_read_unlock()s yet:
they have to wait until all architectures are balancing pte_offset_map()s
with pte_unmap()s (as in the arch series posted earlier).  But comment
where they will go, so that it's easy to add them for experiments.  And
only when those are in place can transient racy failure cases be enabled.
Add more safety for the PAE mismatched pmd_low pmd_high case at that time.

Link: https://lkml.kernel.org/r/2929bfd-9893-a374-e463-4c3127ff9b9d@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zack Rusin <zackr@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/mm/split_page_table_lock.rst | 17 +++++----
 include/linux/mm.h                         | 27 +++++++++-----
 include/linux/pgtable.h                    | 22 ++++++++----
 mm/pgtable-generic.c                       | 56 ++++++++++++++++++++++++++++++
 4 files changed, 101 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/mm/split_page_table_lock.rst b/Documentation/mm/split_page_table_lock.rst
index 50ee0dfc95be..a834fad9de12 100644
--- a/Documentation/mm/split_page_table_lock.rst
+++ b/Documentation/mm/split_page_table_lock.rst
@@ -14,15 +14,20 @@ tables. Access to higher level tables protected by mm->page_table_lock.
 There are helpers to lock/unlock a table and other accessor functions:
 
  - pte_offset_map_lock()
-	maps pte and takes PTE table lock, returns pointer to the taken
-	lock;
+	maps PTE and takes PTE table lock, returns pointer to PTE with
+	pointer to its PTE table lock, or returns NULL if no PTE table;
+ - pte_offset_map_nolock()
+	maps PTE, returns pointer to PTE with pointer to its PTE table
+	lock (not taken), or returns NULL if no PTE table;
+ - pte_offset_map()
+	maps PTE, returns pointer to PTE, or returns NULL if no PTE table;
+ - pte_unmap()
+	unmaps PTE table;
  - pte_unmap_unlock()
 	unlocks and unmaps PTE table;
  - pte_alloc_map_lock()
-	allocates PTE table if needed and take the lock, returns pointer
-	to taken lock or NULL if allocation failed;
- - pte_lockptr()
-	returns pointer to PTE table lock;
+	allocates PTE table if needed and takes its lock, returns pointer to
+	PTE with pointer to its lock, or returns NULL if allocation failed;
  - pmd_lock()
 	takes PMD table lock, returns pointer to taken lock;
  - pmd_lockptr()
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 66032f0d515c..a08dc8cc48fb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2827,14 +2827,25 @@ static inline void pgtable_pte_page_dtor(struct page *page)
 	dec_lruvec_page_state(page, NR_PAGETABLE);
 }
 
-#define pte_offset_map_lock(mm, pmd, address, ptlp)	\
-({							\
-	spinlock_t *__ptl = pte_lockptr(mm, pmd);	\
-	pte_t *__pte = pte_offset_map(pmd, address);	\
-	*(ptlp) = __ptl;				\
-	spin_lock(__ptl);				\
-	__pte;						\
-})
+pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
+static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr)
+{
+	return __pte_offset_map(pmd, addr, NULL);
+}
+
+pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
+			unsigned long addr, spinlock_t **ptlp);
+static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
+			unsigned long addr, spinlock_t **ptlp)
+{
+	pte_t *pte;
+
+	__cond_lock(*ptlp, pte = __pte_offset_map_lock(mm, pmd, addr, ptlp));
+	return pte;
+}
+
+pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
+			unsigned long addr, spinlock_t **ptlp);
 
 #define pte_unmap_unlock(pte, ptl)	do {		\
 	spin_unlock(ptl);				\
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 94235ff2706e..3fabbb018557 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -94,14 +94,22 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
 #define pte_offset_kernel pte_offset_kernel
 #endif
 
-#if defined(CONFIG_HIGHPTE)
-#define pte_offset_map(dir, address)				\
-	((pte_t *)kmap_local_page(pmd_page(*(dir))) +		\
-	 pte_index((address)))
-#define pte_unmap(pte) kunmap_local((pte))
+#ifdef CONFIG_HIGHPTE
+#define __pte_map(pmd, address) \
+	((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address)))
+#define pte_unmap(pte)	do {	\
+	kunmap_local((pte));	\
+	/* rcu_read_unlock() to be added later */	\
+} while (0)
 #else
-#define pte_offset_map(dir, address)	pte_offset_kernel((dir), (address))
-#define pte_unmap(pte) ((void)(pte))	/* NOP */
+static inline pte_t *__pte_map(pmd_t *pmd, unsigned long address)
+{
+	return pte_offset_kernel(pmd, address);
+}
+static inline void pte_unmap(pte_t *pte)
+{
+	/* rcu_read_unlock() to be added later */
+}
 #endif
 
 /* Find an entry in the second-level page table.. */
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index d2fc52bffafc..c7ab18a5fb77 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -10,6 +10,8 @@
 #include <linux/pagemap.h>
 #include <linux/hugetlb.h>
 #include <linux/pgtable.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
 #include <linux/mm_inline.h>
 #include <asm/tlb.h>
 
@@ -229,3 +231,57 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
 }
 #endif
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp)
+{
+	pmd_t pmdval;
+
+	/* rcu_read_lock() to be added later */
+	pmdval = pmdp_get_lockless(pmd);
+	if (pmdvalp)
+		*pmdvalp = pmdval;
+	if (unlikely(pmd_none(pmdval) || is_pmd_migration_entry(pmdval)))
+		goto nomap;
+	if (unlikely(pmd_trans_huge(pmdval) || pmd_devmap(pmdval)))
+		goto nomap;
+	if (unlikely(pmd_bad(pmdval))) {
+		pmd_clear_bad(pmd);
+		goto nomap;
+	}
+	return __pte_map(&pmdval, addr);
+nomap:
+	/* rcu_read_unlock() to be added later */
+	return NULL;
+}
+
+pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
+			     unsigned long addr, spinlock_t **ptlp)
+{
+	pmd_t pmdval;
+	pte_t *pte;
+
+	pte = __pte_offset_map(pmd, addr, &pmdval);
+	if (likely(pte))
+		*ptlp = pte_lockptr(mm, &pmdval);
+	return pte;
+}
+
+pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd,
+			     unsigned long addr, spinlock_t **ptlp)
+{
+	spinlock_t *ptl;
+	pmd_t pmdval;
+	pte_t *pte;
+again:
+	pte = __pte_offset_map(pmd, addr, &pmdval);
+	if (unlikely(!pte))
+		return pte;
+	ptl = pte_lockptr(mm, &pmdval);
+	spin_lock(ptl);
+	if (likely(pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
+		*ptlp = ptl;
+		return pte;
+	}
+	pte_unmap_unlock(pte, ptl);
+	goto again;
+}
-- 
cgit v1.2.3


From feda5c393a6c843c7bf1fc49e1381e2d3822b564 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 8 Jun 2023 18:50:37 -0700
Subject: mm/pgtable: delete pmd_trans_unstable() and friends
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Delete pmd_trans_unstable, pmd_none_or_trans_huge_or_clear_bad() and
pmd_devmap_trans_unstable(), all now unused.

With mixed feelings, delete all the comments on pmd_trans_unstable().
That was very good documentation of a subtle state, and this series does
not even eliminate that state: but rather, normalizes and extends it,
asking pte_offset_map[_lock]() callers to anticipate failure, without
regard for whether mmap_read_lock() or mmap_write_lock() is held.

Retain pud_trans_unstable(), which has one use in __handle_mm_fault(), but
delete its equivalent pud_none_or_trans_huge_or_dev_or_clear_bad().  While
there, move the default arch_needs_pgtable_deposit() definition up near
where pgtable_trans_huge_deposit() and withdraw() are declared.

Link: https://lkml.kernel.org/r/5abdab3-3136-b42e-274d-9c6281bfb79@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zack Rusin <zackr@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 103 ++++--------------------------------------------
 mm/khugepaged.c         |   4 --
 2 files changed, 7 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 3fabbb018557..a1326e61d7ee 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -599,6 +599,10 @@ extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
 #endif
 
+#ifndef arch_needs_pgtable_deposit
+#define arch_needs_pgtable_deposit() (false)
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
  * This is an implementation of pmdp_establish() that is only suitable for an
@@ -1300,9 +1304,10 @@ static inline int pud_trans_huge(pud_t pud)
 }
 #endif
 
-/* See pmd_none_or_trans_huge_or_clear_bad for discussion. */
-static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud)
+static inline int pud_trans_unstable(pud_t *pud)
 {
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+	defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
 	pud_t pudval = READ_ONCE(*pud);
 
 	if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
@@ -1311,104 +1316,10 @@ static inline int pud_none_or_trans_huge_or_dev_or_clear_bad(pud_t *pud)
 		pud_clear_bad(pud);
 		return 1;
 	}
-	return 0;
-}
-
-/* See pmd_trans_unstable for discussion. */
-static inline int pud_trans_unstable(pud_t *pud)
-{
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&			\
-	defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
-	return pud_none_or_trans_huge_or_dev_or_clear_bad(pud);
-#else
-	return 0;
 #endif
-}
-
-#ifndef arch_needs_pgtable_deposit
-#define arch_needs_pgtable_deposit() (false)
-#endif
-/*
- * This function is meant to be used by sites walking pagetables with
- * the mmap_lock held in read mode to protect against MADV_DONTNEED and
- * transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
- * into a null pmd and the transhuge page fault can convert a null pmd
- * into an hugepmd or into a regular pmd (if the hugepage allocation
- * fails). While holding the mmap_lock in read mode the pmd becomes
- * stable and stops changing under us only if it's not null and not a
- * transhuge pmd. When those races occurs and this function makes a
- * difference vs the standard pmd_none_or_clear_bad, the result is
- * undefined so behaving like if the pmd was none is safe (because it
- * can return none anyway). The compiler level barrier() is critically
- * important to compute the two checks atomically on the same pmdval.
- *
- * For 32bit kernels with a 64bit large pmd_t this automatically takes
- * care of reading the pmd atomically to avoid SMP race conditions
- * against pmd_populate() when the mmap_lock is hold for reading by the
- * caller (a special atomic read not done by "gcc" as in the generic
- * version above, is also needed when THP is disabled because the page
- * fault can populate the pmd from under us).
- */
-static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
-{
-	pmd_t pmdval = pmdp_get_lockless(pmd);
-	/*
-	 * !pmd_present() checks for pmd migration entries
-	 *
-	 * The complete check uses is_pmd_migration_entry() in linux/swapops.h
-	 * But using that requires moving current function and pmd_trans_unstable()
-	 * to linux/swapops.h to resolve dependency, which is too much code move.
-	 *
-	 * !pmd_present() is equivalent to is_pmd_migration_entry() currently,
-	 * because !pmd_present() pages can only be under migration not swapped
-	 * out.
-	 *
-	 * pmd_none() is preserved for future condition checks on pmd migration
-	 * entries and not confusing with this function name, although it is
-	 * redundant with !pmd_present().
-	 */
-	if (pmd_none(pmdval) || pmd_trans_huge(pmdval) ||
-		(IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval)))
-		return 1;
-	if (unlikely(pmd_bad(pmdval))) {
-		pmd_clear_bad(pmd);
-		return 1;
-	}
 	return 0;
 }
 
-/*
- * This is a noop if Transparent Hugepage Support is not built into
- * the kernel. Otherwise it is equivalent to
- * pmd_none_or_trans_huge_or_clear_bad(), and shall only be called in
- * places that already verified the pmd is not none and they want to
- * walk ptes while holding the mmap sem in read mode (write mode don't
- * need this). If THP is not enabled, the pmd can't go away under the
- * code even if MADV_DONTNEED runs, but if THP is enabled we need to
- * run a pmd_trans_unstable before walking the ptes after
- * split_huge_pmd returns (because it may have run when the pmd become
- * null, but then a page fault can map in a THP and not a regular page).
- */
-static inline int pmd_trans_unstable(pmd_t *pmd)
-{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	return pmd_none_or_trans_huge_or_clear_bad(pmd);
-#else
-	return 0;
-#endif
-}
-
-/*
- * the ordering of these checks is important for pmds with _page_devmap set.
- * if we check pmd_trans_unstable() first we will trip the bad_pmd() check
- * inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly
- * returning 1 but not before it spams dmesg with the pmd_clear_bad() output.
- */
-static inline int pmd_devmap_trans_unstable(pmd_t *pmd)
-{
-	return pmd_devmap(*pmd) || pmd_trans_unstable(pmd);
-}
-
 #ifndef CONFIG_NUMA_BALANCING
 /*
  * Technically a PTE can be PROTNONE even when not doing NUMA balancing but
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index d1951ed572f8..881669e738c0 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -944,10 +944,6 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
 	return SCAN_SUCCEED;
 }
 
-/*
- * See pmd_trans_unstable() for how the result may change out from
- * underneath us, even if we hold mmap_lock in read.
- */
 static int find_pmd_or_thp_or_none(struct mm_struct *mm,
 				   unsigned long address,
 				   pmd_t **pmd)
-- 
cgit v1.2.3


From 4f8fcf4ced0b7184149045818dcc2f9e2689b775 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 8 Jun 2023 18:52:17 -0700
Subject: mm/swap: swap_vma_readahead() do the pte_offset_map()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

swap_vma_readahead() has been proceeding in an unconventional way, its
preliminary swap_ra_info() doing the pte_offset_map() and pte_unmap(),
then relying on that pte pointer even after the pte_unmap() - in its
CONFIG_64BIT case (I think !CONFIG_HIGHPTE was intended; whereas 32-bit
copied ptes to stack while they were mapped, but had to limit how many).

Though it would be difficult to construct a failing testcase, accessing
page table after pte_unmap() will become bad practice, even on 64-bit: an
rcu_read_unlock() in pte_unmap() will allow page table to be freed.

Move relevant definitions from include/linux/swap.h to mm/swap_state.c,
nothing else used them.  Delete the CONFIG_64BIT distinction and buffer,
delete all reference to ptes from swap_ra_info(), use pte_offset_map()
repeatedly in swap_vma_readahead(), breaking from the loop if it fails.

(Will the repeated "map" and "unmap" show up as a slowdown anywhere?  If
so, maybe modify __read_swap_cache_async() to do the pte_unmap() only when
it does not find the page already in the swapcache.)

Use ptep_get_lockless(), mainly for its READ_ONCE().  Correctly advance
the address passed down to each call of __read__swap_cache_async().

Link: https://lkml.kernel.org/r/b7c64ab3-9e44-aac0-d2b-c57de578af1c@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qi Zheng <zhengqi.arch@bytedance.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zack Rusin <zackr@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 19 -------------------
 mm/swap_state.c      | 45 ++++++++++++++++++++++++---------------------
 2 files changed, 24 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index b5f6f2916de1..ce7e82cf787f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -337,25 +337,6 @@ struct swap_info_struct {
 					   */
 };
 
-#ifdef CONFIG_64BIT
-#define SWAP_RA_ORDER_CEILING	5
-#else
-/* Avoid stack overflow, because we need to save part of page table */
-#define SWAP_RA_ORDER_CEILING	3
-#define SWAP_RA_PTE_CACHE_SIZE	(1 << SWAP_RA_ORDER_CEILING)
-#endif
-
-struct vma_swap_readahead {
-	unsigned short win;
-	unsigned short offset;
-	unsigned short nr_pte;
-#ifdef CONFIG_64BIT
-	pte_t *ptes;
-#else
-	pte_t ptes[SWAP_RA_PTE_CACHE_SIZE];
-#endif
-};
-
 static inline swp_entry_t folio_swap_entry(struct folio *folio)
 {
 	swp_entry_t entry = { .val = page_private(&folio->page) };
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ef32353c18a6..a33c60e0158f 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -709,6 +709,14 @@ void exit_swap_address_space(unsigned int type)
 	swapper_spaces[type] = NULL;
 }
 
+#define SWAP_RA_ORDER_CEILING	5
+
+struct vma_swap_readahead {
+	unsigned short win;
+	unsigned short offset;
+	unsigned short nr_pte;
+};
+
 static void swap_ra_info(struct vm_fault *vmf,
 			 struct vma_swap_readahead *ra_info)
 {
@@ -716,11 +724,7 @@ static void swap_ra_info(struct vm_fault *vmf,
 	unsigned long ra_val;
 	unsigned long faddr, pfn, fpfn, lpfn, rpfn;
 	unsigned long start, end;
-	pte_t *pte, *orig_pte;
 	unsigned int max_win, hits, prev_win, win;
-#ifndef CONFIG_64BIT
-	pte_t *tpte;
-#endif
 
 	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
 			     SWAP_RA_ORDER_CEILING);
@@ -739,12 +743,9 @@ static void swap_ra_info(struct vm_fault *vmf,
 					       max_win, prev_win);
 	atomic_long_set(&vma->swap_readahead_info,
 			SWAP_RA_VAL(faddr, win, 0));
-
 	if (win == 1)
 		return;
 
-	/* Copy the PTEs because the page table may be unmapped */
-	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
 	if (fpfn == pfn + 1) {
 		lpfn = fpfn;
 		rpfn = fpfn + win;
@@ -764,15 +765,6 @@ static void swap_ra_info(struct vm_fault *vmf,
 
 	ra_info->nr_pte = end - start;
 	ra_info->offset = fpfn - start;
-	pte -= ra_info->offset;
-#ifdef CONFIG_64BIT
-	ra_info->ptes = pte;
-#else
-	tpte = ra_info->ptes;
-	for (pfn = start; pfn != end; pfn++)
-		*tpte++ = *pte++;
-#endif
-	pte_unmap(orig_pte);
 }
 
 /**
@@ -796,7 +788,8 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 	struct swap_iocb *splug = NULL;
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page;
-	pte_t *pte, pentry;
+	pte_t *pte = NULL, pentry;
+	unsigned long addr;
 	swp_entry_t entry;
 	unsigned int i;
 	bool page_allocated;
@@ -808,17 +801,25 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 	if (ra_info.win == 1)
 		goto skip;
 
+	addr = vmf->address - (ra_info.offset * PAGE_SIZE);
+
 	blk_start_plug(&plug);
-	for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
-	     i++, pte++) {
-		pentry = *pte;
+	for (i = 0; i < ra_info.nr_pte; i++, addr += PAGE_SIZE) {
+		if (!pte++) {
+			pte = pte_offset_map(vmf->pmd, addr);
+			if (!pte)
+				break;
+		}
+		pentry = ptep_get_lockless(pte);
 		if (!is_swap_pte(pentry))
 			continue;
 		entry = pte_to_swp_entry(pentry);
 		if (unlikely(non_swap_entry(entry)))
 			continue;
+		pte_unmap(pte);
+		pte = NULL;
 		page = __read_swap_cache_async(entry, gfp_mask, vma,
-					       vmf->address, &page_allocated);
+					       addr, &page_allocated);
 		if (!page)
 			continue;
 		if (page_allocated) {
@@ -830,6 +831,8 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 		}
 		put_page(page);
 	}
+	if (pte)
+		pte_unmap(pte);
 	blk_finish_plug(&plug);
 	swap_read_unplug(splug);
 	lru_add_drain();
-- 
cgit v1.2.3


From b95826c9aa48b2997b3973b42a8716ba132b920e Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Mon, 12 Jun 2023 09:34:05 -0700
Subject: mm: remove set_compound_page_dtor()

All users can use the folio equivalent so this function can be safely
removed.

Link: https://lkml.kernel.org/r/20230612163405.99345-1-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 10 ----------
 mm/huge_memory.c   |  2 +-
 mm/internal.h      |  2 +-
 3 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a08dc8cc48fb..8f40bf17d597 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1223,16 +1223,6 @@ enum compound_dtor_id {
 };
 extern compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS];
 
-static inline void set_compound_page_dtor(struct page *page,
-		enum compound_dtor_id compound_dtor)
-{
-	struct folio *folio = (struct folio *)page;
-
-	VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page);
-	VM_BUG_ON_PAGE(!PageHead(page), page);
-	folio->_folio_dtor = compound_dtor;
-}
-
 static inline void folio_set_compound_dtor(struct folio *folio,
 		enum compound_dtor_id compound_dtor)
 {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 31bc8fa768e3..76f970aa5b4d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -583,7 +583,7 @@ void prep_transhuge_page(struct page *page)
 
 	VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio);
 	INIT_LIST_HEAD(&folio->_deferred_list);
-	set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
+	folio_set_compound_dtor(folio, TRANSHUGE_PAGE_DTOR);
 }
 
 static inline bool is_transparent_hugepage(struct page *page)
diff --git a/mm/internal.h b/mm/internal.h
index faf0508d89a5..33b8b8f66af3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -391,7 +391,7 @@ static inline void prep_compound_head(struct page *page, unsigned int order)
 {
 	struct folio *folio = (struct folio *)page;
 
-	set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
+	folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
 	set_compound_order(page, order);
 	atomic_set(&folio->_entire_mapcount, -1);
 	atomic_set(&folio->_nr_pages_mapped, 0);
-- 
cgit v1.2.3


From 4ab5f8ec7d71aea5fe13a48248242130f84ac6bb Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 12 Jun 2023 16:31:45 +0100
Subject: mm/slab: decouple ARCH_KMALLOC_MINALIGN from ARCH_DMA_MINALIGN

Patch series "mm, dma, arm64: Reduce ARCH_KMALLOC_MINALIGN to 8", v7.

A series reducing the kmalloc() minimum alignment on arm64 to 8 (from
128).


This patch (of 17):

In preparation for supporting a kmalloc() minimum alignment smaller than
the arch DMA alignment, decouple the two definitions.  This requires that
either the kmalloc() caches are aligned to a (run-time) cache-line size or
the DMA API bounces unaligned kmalloc() allocations.  Subsequent patches
will implement both options.

After this patch, ARCH_DMA_MINALIGN is expected to be used in static
alignment annotations and defined by an architecture to be the maximum
alignment for all supported configurations/SoCs in a single Image.
Architectures opting in to a smaller ARCH_KMALLOC_MINALIGN will need to
define its value in the arch headers.

Since ARCH_DMA_MINALIGN is now always defined, adjust the #ifdef in
dma_get_cache_alignment() so that there is no change for architectures not
requiring a minimum DMA alignment.

Link: https://lkml.kernel.org/r/20230612153201.554742-1-catalin.marinas@arm.com
Link: https://lkml.kernel.org/r/20230612153201.554742-2-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Mike Snitzer <snitzer@kernel.org>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Will Deacon <will@kernel.org>
Cc: Jerry Snitselaar <jsnitsel@redhat.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/cache.h       |  6 ++++++
 include/linux/dma-mapping.h |  3 ++-
 include/linux/slab.h        | 14 ++++++++++----
 3 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cache.h b/include/linux/cache.h
index 5da1bbd96154..9900d20b76c2 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -98,4 +98,10 @@ struct cacheline_padding {
 #define CACHELINE_PADDING(name)
 #endif
 
+#ifdef ARCH_DMA_MINALIGN
+#define ARCH_HAS_DMA_MINALIGN
+#else
+#define ARCH_DMA_MINALIGN __alignof__(unsigned long long)
+#endif
+
 #endif /* __LINUX_CACHE_H */
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 0ee20b764000..a50375331eac 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_DMA_MAPPING_H
 #define _LINUX_DMA_MAPPING_H
 
+#include <linux/cache.h>
 #include <linux/sizes.h>
 #include <linux/string.h>
 #include <linux/device.h>
@@ -545,7 +546,7 @@ static inline int dma_set_min_align_mask(struct device *dev,
 
 static inline int dma_get_cache_alignment(void)
 {
-#ifdef ARCH_DMA_MINALIGN
+#ifdef ARCH_HAS_DMA_MINALIGN
 	return ARCH_DMA_MINALIGN;
 #endif
 	return 1;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 6b3e155b70bf..ca53425e9b32 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -12,6 +12,7 @@
 #ifndef _LINUX_SLAB_H
 #define	_LINUX_SLAB_H
 
+#include <linux/cache.h>
 #include <linux/gfp.h>
 #include <linux/overflow.h>
 #include <linux/types.h>
@@ -235,12 +236,17 @@ void kmem_dump_obj(void *object);
  * alignment larger than the alignment of a 64-bit integer.
  * Setting ARCH_DMA_MINALIGN in arch headers allows that.
  */
-#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
+#ifdef ARCH_HAS_DMA_MINALIGN
+#if ARCH_DMA_MINALIGN > 8 && !defined(ARCH_KMALLOC_MINALIGN)
 #define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
-#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
-#define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN)
-#else
+#endif
+#endif
+
+#ifndef ARCH_KMALLOC_MINALIGN
 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
+#elif ARCH_KMALLOC_MINALIGN > 8
+#define KMALLOC_MIN_SIZE ARCH_KMALLOC_MINALIGN
+#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
 #endif
 
 /*
-- 
cgit v1.2.3


From 8c57da28dc3df4e091474a004b5596c9b88a3be0 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 12 Jun 2023 16:31:46 +0100
Subject: dma: allow dma_get_cache_alignment() to be overridden by the arch
 code

On arm64, ARCH_DMA_MINALIGN is larger than most cache line size
configurations deployed.  Allow an architecture to override
dma_get_cache_alignment() in order to return a run-time probed value (e.g.
cache_line_size()).

Link: https://lkml.kernel.org/r/20230612153201.554742-3-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jerry Snitselaar <jsnitsel@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Mike Snitzer <snitzer@kernel.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/dma-mapping.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index a50375331eac..e13050eb9777 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -544,6 +544,7 @@ static inline int dma_set_min_align_mask(struct device *dev,
 	return 0;
 }
 
+#ifndef dma_get_cache_alignment
 static inline int dma_get_cache_alignment(void)
 {
 #ifdef ARCH_HAS_DMA_MINALIGN
@@ -551,6 +552,7 @@ static inline int dma_get_cache_alignment(void)
 #endif
 	return 1;
 }
+#endif
 
 static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
 		dma_addr_t *dma_handle, gfp_t gfp)
-- 
cgit v1.2.3


From 88b216d339691888ef98644a5eae62c3d9c8ddf0 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 12 Jun 2023 16:31:54 +0100
Subject: iio: core: use ARCH_DMA_MINALIGN instead of ARCH_KMALLOC_MINALIGN

ARCH_DMA_MINALIGN represents the minimum (static) alignment for safe DMA
operations while ARCH_KMALLOC_MINALIGN is the minimum kmalloc() objects
alignment.

Link: https://lkml.kernel.org/r/20230612153201.554742-11-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jerry Snitselaar <jsnitsel@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Mike Snitzer <snitzer@kernel.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/iio/iio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 81413cd3a3e7..d28a5e8097e4 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -722,7 +722,7 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev)
  * must not share  cachelines with the rest of the structure, thus making
  * them safe for use with non-coherent DMA.
  */
-#define IIO_DMA_MINALIGN ARCH_KMALLOC_MINALIGN
+#define IIO_DMA_MINALIGN ARCH_DMA_MINALIGN
 struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv);
 
 /* The information at the returned address is guaranteed to be cacheline aligned */
-- 
cgit v1.2.3


From af2880ec44021d32cc72a5aa7c5d7d7beaa722d3 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 12 Jun 2023 16:31:56 +0100
Subject: scatterlist: add dedicated config for DMA flags

The DMA flags field will be useful for users beyond PCI P2P, so upgrade to
its own dedicated config option.

[catalin.marinas@arm.com: use #ifdef CONFIG_NEED_SG_DMA_FLAGS in scatterlist.h]
[catalin.marinas@arm.com: update PCI_P2PDMA dma_flags comment in scatterlist.h]
Link: https://lkml.kernel.org/r/20230612153201.554742-13-catalin.marinas@arm.com
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jerry Snitselaar <jsnitsel@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Mike Snitzer <snitzer@kernel.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/pci/Kconfig         |  1 +
 include/linux/scatterlist.h | 13 ++++++-------
 kernel/dma/Kconfig          |  3 +++
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 9309f2469b41..3c07d8d214b3 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -168,6 +168,7 @@ config PCI_P2PDMA
 	#
 	depends on 64BIT
 	select GENERIC_ALLOCATOR
+	select NEED_SG_DMA_FLAGS
 	help
 	  Enableѕ drivers to do PCI peer-to-peer transactions to and from
 	  BARs that are exposed in other devices that are the part of
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 375a5e90d86a..19833fd4113b 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -16,7 +16,7 @@ struct scatterlist {
 #ifdef CONFIG_NEED_SG_DMA_LENGTH
 	unsigned int	dma_length;
 #endif
-#ifdef CONFIG_PCI_P2PDMA
+#ifdef CONFIG_NEED_SG_DMA_FLAGS
 	unsigned int    dma_flags;
 #endif
 };
@@ -249,12 +249,11 @@ static inline void sg_unmark_end(struct scatterlist *sg)
 }
 
 /*
- * CONFGI_PCI_P2PDMA depends on CONFIG_64BIT which means there is 4 bytes
- * in struct scatterlist (assuming also CONFIG_NEED_SG_DMA_LENGTH is set).
- * Use this padding for DMA flags bits to indicate when a specific
- * dma address is a bus address.
+ * One 64-bit architectures there is a 4-byte padding in struct scatterlist
+ * (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). Use this padding for DMA
+ * flags bits to indicate when a specific dma address is a bus address.
  */
-#ifdef CONFIG_PCI_P2PDMA
+#ifdef CONFIG_NEED_SG_DMA_FLAGS
 
 #define SG_DMA_BUS_ADDRESS (1 << 0)
 
@@ -312,7 +311,7 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
 {
 }
 
-#endif
+#endif	/* CONFIG_NEED_SG_DMA_FLAGS */
 
 /**
  * sg_phys - Return physical address of an sg entry
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 6677d0e64d27..acc6f231259c 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -24,6 +24,9 @@ config DMA_OPS_BYPASS
 config ARCH_HAS_DMA_MAP_DIRECT
 	bool
 
+config NEED_SG_DMA_FLAGS
+	bool
+
 config NEED_SG_DMA_LENGTH
 	bool
 
-- 
cgit v1.2.3


From cb147bbe22d2be9b49021c2e5dacdf2935745d1c Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 12 Jun 2023 16:31:57 +0100
Subject: dma-mapping: name SG DMA flag helpers consistently

sg_is_dma_bus_address() is inconsistent with the naming pattern of its
corresponding setters and its own kerneldoc, so take the majority vote and
rename it sg_dma_is_bus_address() (and fix up the missing underscores in
the kerneldoc too).  This gives us a nice clear pattern where SG DMA flags
are SG_DMA_<NAME>, and the helpers for acting on them are
sg_dma_<action>_<name>().

Link: https://lkml.kernel.org/r/20230612153201.554742-14-catalin.marinas@arm.com
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
  Link: https://lore.kernel.org/r/fa2eca2862c7ffc41b50337abffb2dfd2864d3ea.1685036694.git.robin.murphy@arm.com
Tested-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Mike Snitzer <snitzer@kernel.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/iommu/dma-iommu.c   | 8 ++++----
 drivers/iommu/iommu.c       | 2 +-
 include/linux/scatterlist.h | 8 ++++----
 kernel/dma/direct.c         | 2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 7a9f0b0bddbd..b8bba4aa196f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1080,7 +1080,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
 		sg_dma_address(s) = DMA_MAPPING_ERROR;
 		sg_dma_len(s) = 0;
 
-		if (sg_is_dma_bus_address(s)) {
+		if (sg_dma_is_bus_address(s)) {
 			if (i > 0)
 				cur = sg_next(cur);
 
@@ -1136,7 +1136,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents)
 	int i;
 
 	for_each_sg(sg, s, nents, i) {
-		if (sg_is_dma_bus_address(s)) {
+		if (sg_dma_is_bus_address(s)) {
 			sg_dma_unmark_bus_address(s);
 		} else {
 			if (sg_dma_address(s) != DMA_MAPPING_ERROR)
@@ -1329,7 +1329,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 	 * just have to be determined.
 	 */
 	for_each_sg(sg, tmp, nents, i) {
-		if (sg_is_dma_bus_address(tmp)) {
+		if (sg_dma_is_bus_address(tmp)) {
 			sg_dma_unmark_bus_address(tmp);
 			continue;
 		}
@@ -1343,7 +1343,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 
 	nents -= i;
 	for_each_sg(tmp, tmp, nents, i) {
-		if (sg_is_dma_bus_address(tmp)) {
+		if (sg_dma_is_bus_address(tmp)) {
 			sg_dma_unmark_bus_address(tmp);
 			continue;
 		}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f1dcfa3f1a1b..eb620552967b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2567,7 +2567,7 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 			len = 0;
 		}
 
-		if (sg_is_dma_bus_address(sg))
+		if (sg_dma_is_bus_address(sg))
 			goto next;
 
 		if (len) {
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 19833fd4113b..2f06178996ba 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -258,7 +258,7 @@ static inline void sg_unmark_end(struct scatterlist *sg)
 #define SG_DMA_BUS_ADDRESS (1 << 0)
 
 /**
- * sg_dma_is_bus address - Return whether a given segment was marked
+ * sg_dma_is_bus_address - Return whether a given segment was marked
  *			   as a bus address
  * @sg:		 SG entry
  *
@@ -266,13 +266,13 @@ static inline void sg_unmark_end(struct scatterlist *sg)
  *   Returns true if sg_dma_mark_bus_address() has been called on
  *   this segment.
  **/
-static inline bool sg_is_dma_bus_address(struct scatterlist *sg)
+static inline bool sg_dma_is_bus_address(struct scatterlist *sg)
 {
 	return sg->dma_flags & SG_DMA_BUS_ADDRESS;
 }
 
 /**
- * sg_dma_mark_bus address - Mark the scatterlist entry as a bus address
+ * sg_dma_mark_bus_address - Mark the scatterlist entry as a bus address
  * @sg:		 SG entry
  *
  * Description:
@@ -300,7 +300,7 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
 
 #else
 
-static inline bool sg_is_dma_bus_address(struct scatterlist *sg)
+static inline bool sg_dma_is_bus_address(struct scatterlist *sg)
 {
 	return false;
 }
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 5595d1d5cdcc..d29cade048db 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -463,7 +463,7 @@ void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
 	int i;
 
 	for_each_sg(sgl,  sg, nents, i) {
-		if (sg_is_dma_bus_address(sg))
+		if (sg_dma_is_bus_address(sg))
 			sg_dma_unmark_bus_address(sg);
 		else
 			dma_direct_unmap_page(dev, sg->dma_address,
-- 
cgit v1.2.3


From 370645f41e6e2fdd2fb6f6982530b04612c9793c Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 12 Jun 2023 16:31:58 +0100
Subject: dma-mapping: force bouncing if the kmalloc() size is not
 cache-line-aligned

For direct DMA, if the size is small enough to have originated from a
kmalloc() cache below ARCH_DMA_MINALIGN, check its alignment against
dma_get_cache_alignment() and bounce if necessary.  For larger sizes, it
is the responsibility of the DMA API caller to ensure proper alignment.

At this point, the kmalloc() caches are properly aligned but this will
change in a subsequent patch.

Architectures can opt in by selecting DMA_BOUNCE_UNALIGNED_KMALLOC.

Link: https://lkml.kernel.org/r/20230612153201.554742-15-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jerry Snitselaar <jsnitsel@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Mike Snitzer <snitzer@kernel.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/dma-map-ops.h | 61 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/dma/Kconfig          |  4 +++
 kernel/dma/direct.h         |  3 ++-
 3 files changed, 67 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h
index 31f114f486c4..9bf19b5bf755 100644
--- a/include/linux/dma-map-ops.h
+++ b/include/linux/dma-map-ops.h
@@ -8,6 +8,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/pgtable.h>
+#include <linux/slab.h>
 
 struct cma;
 
@@ -277,6 +278,66 @@ static inline bool dev_is_dma_coherent(struct device *dev)
 }
 #endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */
 
+/*
+ * Check whether potential kmalloc() buffers are safe for non-coherent DMA.
+ */
+static inline bool dma_kmalloc_safe(struct device *dev,
+				    enum dma_data_direction dir)
+{
+	/*
+	 * If DMA bouncing of kmalloc() buffers is disabled, the kmalloc()
+	 * caches have already been aligned to a DMA-safe size.
+	 */
+	if (!IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC))
+		return true;
+
+	/*
+	 * kmalloc() buffers are DMA-safe irrespective of size if the device
+	 * is coherent or the direction is DMA_TO_DEVICE (non-desctructive
+	 * cache maintenance and benign cache line evictions).
+	 */
+	if (dev_is_dma_coherent(dev) || dir == DMA_TO_DEVICE)
+		return true;
+
+	return false;
+}
+
+/*
+ * Check whether the given size, assuming it is for a kmalloc()'ed buffer, is
+ * sufficiently aligned for non-coherent DMA.
+ */
+static inline bool dma_kmalloc_size_aligned(size_t size)
+{
+	/*
+	 * Larger kmalloc() sizes are guaranteed to be aligned to
+	 * ARCH_DMA_MINALIGN.
+	 */
+	if (size >= 2 * ARCH_DMA_MINALIGN ||
+	    IS_ALIGNED(kmalloc_size_roundup(size), dma_get_cache_alignment()))
+		return true;
+
+	return false;
+}
+
+/*
+ * Check whether the given object size may have originated from a kmalloc()
+ * buffer with a slab alignment below the DMA-safe alignment and needs
+ * bouncing for non-coherent DMA. The pointer alignment is not considered and
+ * in-structure DMA-safe offsets are the responsibility of the caller. Such
+ * code should use the static ARCH_DMA_MINALIGN for compiler annotations.
+ *
+ * The heuristics can have false positives, bouncing unnecessarily, though the
+ * buffers would be small. False negatives are theoretically possible if, for
+ * example, multiple small kmalloc() buffers are coalesced into a larger
+ * buffer that passes the alignment check. There are no such known constructs
+ * in the kernel.
+ */
+static inline bool dma_kmalloc_needs_bounce(struct device *dev, size_t size,
+					    enum dma_data_direction dir)
+{
+	return !dma_kmalloc_safe(dev, dir) && !dma_kmalloc_size_aligned(size);
+}
+
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs);
 void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index acc6f231259c..abea1823fe21 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -90,6 +90,10 @@ config SWIOTLB
 	bool
 	select NEED_DMA_MAP_STATE
 
+config DMA_BOUNCE_UNALIGNED_KMALLOC
+	bool
+	depends on SWIOTLB
+
 config DMA_RESTRICTED_POOL
 	bool "DMA Restricted Pool"
 	depends on OF && OF_RESERVED_MEM && SWIOTLB
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index e38ffc5e6bdd..97ec892ea0b5 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -94,7 +94,8 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 		return swiotlb_map(dev, phys, size, dir, attrs);
 	}
 
-	if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
+	if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
+	    dma_kmalloc_needs_bounce(dev, size, dir)) {
 		if (is_pci_p2pdma_page(page))
 			return DMA_MAPPING_ERROR;
 		if (is_swiotlb_active(dev))
-- 
cgit v1.2.3


From 861370f49ce484cd6ef2e9b3ad06d137f3cb0ca3 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 12 Jun 2023 16:31:59 +0100
Subject: iommu/dma: force bouncing if the size is not cacheline-aligned

Similarly to the direct DMA, bounce small allocations as they may have
originated from a kmalloc() cache not safe for DMA. Unlike the direct
DMA, iommu_dma_map_sg() cannot call iommu_dma_map_sg_swiotlb() for all
non-coherent devices as this would break some cases where the iova is
expected to be contiguous (dmabuf). Instead, scan the scatterlist for
any small sizes and only go the swiotlb path if any element of the list
needs bouncing (note that iommu_dma_map_page() would still only bounce
those buffers which are not DMA-aligned).

To avoid scanning the scatterlist on the 'sync' operations, introduce an
SG_DMA_SWIOTLB flag set by iommu_dma_map_sg_swiotlb(). The
dev_use_swiotlb() function together with the newly added
dev_use_sg_swiotlb() now check for both untrusted devices and unaligned
kmalloc() buffers (suggested by Robin Murphy).

Link: https://lkml.kernel.org/r/20230612153201.554742-16-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jerry Snitselaar <jsnitsel@redhat.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Mike Snitzer <snitzer@kernel.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/iommu/Kconfig       |  1 +
 drivers/iommu/dma-iommu.c   | 50 +++++++++++++++++++++++++++++++++++++--------
 include/linux/scatterlist.h | 41 +++++++++++++++++++++++++++++++++++--
 3 files changed, 81 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 6de900776e24..74c45359869d 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -152,6 +152,7 @@ config IOMMU_DMA
 	select IOMMU_IOVA
 	select IRQ_MSI_IOMMU
 	select NEED_SG_DMA_LENGTH
+	select NEED_SG_DMA_FLAGS if SWIOTLB
 
 # Shared Virtual Addressing
 config IOMMU_SVA
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index b8bba4aa196f..e86ae462cade 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -520,9 +520,38 @@ static bool dev_is_untrusted(struct device *dev)
 	return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
 }
 
-static bool dev_use_swiotlb(struct device *dev)
+static bool dev_use_swiotlb(struct device *dev, size_t size,
+			    enum dma_data_direction dir)
 {
-	return IS_ENABLED(CONFIG_SWIOTLB) && dev_is_untrusted(dev);
+	return IS_ENABLED(CONFIG_SWIOTLB) &&
+		(dev_is_untrusted(dev) ||
+		 dma_kmalloc_needs_bounce(dev, size, dir));
+}
+
+static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
+			       int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	if (!IS_ENABLED(CONFIG_SWIOTLB))
+		return false;
+
+	if (dev_is_untrusted(dev))
+		return true;
+
+	/*
+	 * If kmalloc() buffers are not DMA-safe for this device and
+	 * direction, check the individual lengths in the sg list. If any
+	 * element is deemed unsafe, use the swiotlb for bouncing.
+	 */
+	if (!dma_kmalloc_safe(dev, dir)) {
+		for_each_sg(sg, s, nents, i)
+			if (!dma_kmalloc_size_aligned(s->length))
+				return true;
+	}
+
+	return false;
 }
 
 /**
@@ -922,7 +951,7 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev,
 {
 	phys_addr_t phys;
 
-	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
+	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir))
 		return;
 
 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -938,7 +967,7 @@ static void iommu_dma_sync_single_for_device(struct device *dev,
 {
 	phys_addr_t phys;
 
-	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev))
+	if (dev_is_dma_coherent(dev) && !dev_use_swiotlb(dev, size, dir))
 		return;
 
 	phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
@@ -956,7 +985,7 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev,
 	struct scatterlist *sg;
 	int i;
 
-	if (dev_use_swiotlb(dev))
+	if (sg_dma_is_swiotlb(sgl))
 		for_each_sg(sgl, sg, nelems, i)
 			iommu_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
 						      sg->length, dir);
@@ -972,7 +1001,7 @@ static void iommu_dma_sync_sg_for_device(struct device *dev,
 	struct scatterlist *sg;
 	int i;
 
-	if (dev_use_swiotlb(dev))
+	if (sg_dma_is_swiotlb(sgl))
 		for_each_sg(sgl, sg, nelems, i)
 			iommu_dma_sync_single_for_device(dev,
 							 sg_dma_address(sg),
@@ -998,7 +1027,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 	 * If both the physical buffer start address and size are
 	 * page aligned, we don't need to use a bounce page.
 	 */
-	if (dev_use_swiotlb(dev) && iova_offset(iovad, phys | size)) {
+	if (dev_use_swiotlb(dev, size, dir) &&
+	    iova_offset(iovad, phys | size)) {
 		void *padding_start;
 		size_t padding_size, aligned_size;
 
@@ -1166,6 +1196,8 @@ static int iommu_dma_map_sg_swiotlb(struct device *dev, struct scatterlist *sg,
 	struct scatterlist *s;
 	int i;
 
+	sg_dma_mark_swiotlb(sg);
+
 	for_each_sg(sg, s, nents, i) {
 		sg_dma_address(s) = iommu_dma_map_page(dev, sg_page(s),
 				s->offset, s->length, dir, attrs);
@@ -1210,7 +1242,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
 			goto out;
 	}
 
-	if (dev_use_swiotlb(dev))
+	if (dev_use_sg_swiotlb(dev, sg, nents, dir))
 		return iommu_dma_map_sg_swiotlb(dev, sg, nents, dir, attrs);
 
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
@@ -1315,7 +1347,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 	struct scatterlist *tmp;
 	int i;
 
-	if (dev_use_swiotlb(dev)) {
+	if (sg_dma_is_swiotlb(sg)) {
 		iommu_dma_unmap_sg_swiotlb(dev, sg, nents, dir, attrs);
 		return;
 	}
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 2f06178996ba..ec46d8e8e49d 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -251,11 +251,13 @@ static inline void sg_unmark_end(struct scatterlist *sg)
 /*
  * One 64-bit architectures there is a 4-byte padding in struct scatterlist
  * (assuming also CONFIG_NEED_SG_DMA_LENGTH is set). Use this padding for DMA
- * flags bits to indicate when a specific dma address is a bus address.
+ * flags bits to indicate when a specific dma address is a bus address or the
+ * buffer may have been bounced via SWIOTLB.
  */
 #ifdef CONFIG_NEED_SG_DMA_FLAGS
 
-#define SG_DMA_BUS_ADDRESS (1 << 0)
+#define SG_DMA_BUS_ADDRESS	(1 << 0)
+#define SG_DMA_SWIOTLB		(1 << 1)
 
 /**
  * sg_dma_is_bus_address - Return whether a given segment was marked
@@ -298,6 +300,34 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
 	sg->dma_flags &= ~SG_DMA_BUS_ADDRESS;
 }
 
+/**
+ * sg_dma_is_swiotlb - Return whether the scatterlist was marked for SWIOTLB
+ *			bouncing
+ * @sg:		SG entry
+ *
+ * Description:
+ *   Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all
+ *   elements may have been bounced, so the caller would have to check
+ *   individual SG entries with is_swiotlb_buffer().
+ */
+static inline bool sg_dma_is_swiotlb(struct scatterlist *sg)
+{
+	return sg->dma_flags & SG_DMA_SWIOTLB;
+}
+
+/**
+ * sg_dma_mark_swiotlb - Mark the scatterlist for SWIOTLB bouncing
+ * @sg:		SG entry
+ *
+ * Description:
+ *   Marks a a scatterlist for SWIOTLB bounce. Not all SG entries may be
+ *   bounced.
+ */
+static inline void sg_dma_mark_swiotlb(struct scatterlist *sg)
+{
+	sg->dma_flags |= SG_DMA_SWIOTLB;
+}
+
 #else
 
 static inline bool sg_dma_is_bus_address(struct scatterlist *sg)
@@ -310,6 +340,13 @@ static inline void sg_dma_mark_bus_address(struct scatterlist *sg)
 static inline void sg_dma_unmark_bus_address(struct scatterlist *sg)
 {
 }
+static inline bool sg_dma_is_swiotlb(struct scatterlist *sg)
+{
+	return false;
+}
+static inline void sg_dma_mark_swiotlb(struct scatterlist *sg)
+{
+}
 
 #endif	/* CONFIG_NEED_SG_DMA_FLAGS */
 
-- 
cgit v1.2.3


From 6c1d2a073a1d850e79026411e79dff7ef997c90d Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Mon, 12 Jun 2023 16:15:44 +0100
Subject: mm: move ptep_get() and pmdp_get() helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are many call sites that directly dereference a pte_t pointer.  This
makes it very difficult to properly encapsulate a page table in the arch
code without having to allocate shadow page tables.

We will shortly solve this by replacing all the call sites with ptep_get()
calls.  But there are call sites above the function definition in the
header file, so let's move ptep_get() to an earlier location to solve that
problem.  And move pmdp_get() at the same time to keep it close to
ptep_get().

Link: https://lkml.kernel.org/r/20230612151545.3317766-3-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Dave Airlie <airlied@gmail.com>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: kernel test robot <lkp@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index a1326e61d7ee..fc06f6419661 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -212,6 +212,20 @@ static inline int pudp_set_access_flags(struct vm_area_struct *vma,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
+#ifndef ptep_get
+static inline pte_t ptep_get(pte_t *ptep)
+{
+	return READ_ONCE(*ptep);
+}
+#endif
+
+#ifndef pmdp_get
+static inline pmd_t pmdp_get(pmd_t *pmdp)
+{
+	return READ_ONCE(*pmdp);
+}
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long address,
@@ -317,20 +331,6 @@ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
 	ptep_get_and_clear(mm, addr, ptep);
 }
 
-#ifndef ptep_get
-static inline pte_t ptep_get(pte_t *ptep)
-{
-	return READ_ONCE(*ptep);
-}
-#endif
-
-#ifndef pmdp_get
-static inline pmd_t pmdp_get(pmd_t *pmdp)
-{
-	return READ_ONCE(*pmdp);
-}
-#endif
-
 #ifdef CONFIG_GUP_GET_PXX_LOW_HIGH
 /*
  * For walking the pagetables without holding any locks.  Some architectures
-- 
cgit v1.2.3


From c33c794828f21217f72ce6fc140e0d34e0d56bff Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Mon, 12 Jun 2023 16:15:45 +0100
Subject: mm: ptep_get() conversion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert all instances of direct pte_t* dereferencing to instead use
ptep_get() helper.  This means that by default, the accesses change from a
C dereference to a READ_ONCE().  This is technically the correct thing to
do since where pgtables are modified by HW (for access/dirty) they are
volatile and therefore we should always ensure READ_ONCE() semantics.

But more importantly, by always using the helper, it can be overridden by
the architecture to fully encapsulate the contents of the pte.  Arch code
is deliberately not converted, as the arch code knows best.  It is
intended that arch code (arm64) will override the default with its own
implementation that can (e.g.) hide certain bits from the core code, or
determine young/dirty status by mixing in state from another source.

Conversion was done using Coccinelle:

----

// $ make coccicheck \
//          COCCI=ptepget.cocci \
//          SPFLAGS="--include-headers" \
//          MODE=patch

virtual patch

@ depends on patch @
pte_t *v;
@@

- *v
+ ptep_get(v)

----

Then reviewed and hand-edited to avoid multiple unnecessary calls to
ptep_get(), instead opting to store the result of a single call in a
variable, where it is correct to do so.  This aims to negate any cost of
READ_ONCE() and will benefit arch-overrides that may be more complex.

Included is a fix for an issue in an earlier version of this patch that
was pointed out by kernel test robot.  The issue arose because config
MMU=n elides definition of the ptep helper functions, including
ptep_get().  HUGETLB_PAGE=n configs still define a simple
huge_ptep_clear_flush() for linking purposes, which dereferences the ptep.
So when both configs are disabled, this caused a build error because
ptep_get() is not defined.  Fix by continuing to do a direct dereference
when MMU=n.  This is safe because for this config the arch code cannot be
trying to virtualize the ptes because none of the ptep helpers are
defined.

Link: https://lkml.kernel.org/r/20230612151545.3317766-4-ryan.roberts@arm.com
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/oe-kbuild-all/202305120142.yXsNEo6H-lkp@intel.com/
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Dave Airlie <airlied@gmail.com>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: SeongJae Park <sj@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c |   8 +-
 drivers/misc/sgi-gru/grufault.c                    |   2 +-
 drivers/vfio/vfio_iommu_type1.c                    |   7 +-
 drivers/xen/privcmd.c                              |   2 +-
 fs/proc/task_mmu.c                                 |  33 +++----
 fs/userfaultfd.c                                   |   6 +-
 include/linux/hugetlb.h                            |   4 +
 include/linux/mm_inline.h                          |   2 +-
 include/linux/pgtable.h                            |   6 +-
 kernel/events/uprobes.c                            |   2 +-
 mm/damon/ops-common.c                              |   2 +-
 mm/damon/paddr.c                                   |   2 +-
 mm/damon/vaddr.c                                   |  10 ++-
 mm/filemap.c                                       |   2 +-
 mm/gup.c                                           |  21 +++--
 mm/highmem.c                                       |  12 +--
 mm/hmm.c                                           |   2 +-
 mm/huge_memory.c                                   |   4 +-
 mm/hugetlb.c                                       |   2 +-
 mm/hugetlb_vmemmap.c                               |   6 +-
 mm/kasan/init.c                                    |   9 +-
 mm/kasan/shadow.c                                  |  10 +--
 mm/khugepaged.c                                    |  22 ++---
 mm/ksm.c                                           |  22 ++---
 mm/madvise.c                                       |   6 +-
 mm/mapping_dirty_helpers.c                         |   4 +-
 mm/memcontrol.c                                    |   4 +-
 mm/memory-failure.c                                |  26 +++---
 mm/memory.c                                        | 100 +++++++++++----------
 mm/mempolicy.c                                     |   6 +-
 mm/migrate.c                                       |  14 +--
 mm/migrate_device.c                                |  15 ++--
 mm/mincore.c                                       |   2 +-
 mm/mlock.c                                         |   6 +-
 mm/mprotect.c                                      |   8 +-
 mm/mremap.c                                        |   2 +-
 mm/page_table_check.c                              |   4 +-
 mm/page_vma_mapped.c                               |  27 +++---
 mm/pgtable-generic.c                               |   2 +-
 mm/rmap.c                                          |  34 ++++---
 mm/sparse-vmemmap.c                                |   8 +-
 mm/swap_state.c                                    |   8 +-
 mm/swapfile.c                                      |  20 +++--
 mm/userfaultfd.c                                   |   4 +-
 mm/vmalloc.c                                       |   6 +-
 mm/vmscan.c                                        |  14 +--
 virt/kvm/kvm_main.c                                |  11 ++-
 47 files changed, 301 insertions(+), 228 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 56279908ed30..01e271b6ad21 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -1681,7 +1681,9 @@ static int igt_mmap_gpu(void *arg)
 
 static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
 {
-	if (!pte_present(*pte) || pte_none(*pte)) {
+	pte_t ptent = ptep_get(pte);
+
+	if (!pte_present(ptent) || pte_none(ptent)) {
 		pr_err("missing PTE:%lx\n",
 		       (addr - (unsigned long)data) >> PAGE_SHIFT);
 		return -EINVAL;
@@ -1692,7 +1694,9 @@ static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
 
 static int check_absent_pte(pte_t *pte, unsigned long addr, void *data)
 {
-	if (pte_present(*pte) && !pte_none(*pte)) {
+	pte_t ptent = ptep_get(pte);
+
+	if (pte_present(ptent) && !pte_none(ptent)) {
 		pr_err("present PTE:%lx; expected to be revoked\n",
 		       (addr - (unsigned long)data) >> PAGE_SHIFT);
 		return -EINVAL;
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 378cf02a2aa1..629edb6486de 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -228,7 +228,7 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
 		goto err;
 #ifdef CONFIG_X86_64
 	if (unlikely(pmd_large(*pmdp)))
-		pte = *(pte_t *) pmdp;
+		pte = ptep_get((pte_t *)pmdp);
 	else
 #endif
 		pte = *pte_offset_kernel(pmdp, vaddr);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 306e6f1d1c70..ebe0ad31d0b0 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -514,6 +514,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
 			    bool write_fault)
 {
 	pte_t *ptep;
+	pte_t pte;
 	spinlock_t *ptl;
 	int ret;
 
@@ -536,10 +537,12 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
 			return ret;
 	}
 
-	if (write_fault && !pte_write(*ptep))
+	pte = ptep_get(ptep);
+
+	if (write_fault && !pte_write(pte))
 		ret = -EFAULT;
 	else
-		*pfn = pte_pfn(*ptep);
+		*pfn = pte_pfn(pte);
 
 	pte_unmap_unlock(ptep, ptl);
 	return ret;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index e2f580e30a86..f447cd37cc4c 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -949,7 +949,7 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
  */
 static int is_mapped_fn(pte_t *pte, unsigned long addr, void *data)
 {
-	return pte_none(*pte) ? 0 : -EBUSY;
+	return pte_none(ptep_get(pte)) ? 0 : -EBUSY;
 }
 
 static int privcmd_vma_range_is_mapped(
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 0d63b6a0f0d8..507cd4e59d07 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -538,13 +538,14 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 	bool locked = !!(vma->vm_flags & VM_LOCKED);
 	struct page *page = NULL;
 	bool migration = false, young = false, dirty = false;
+	pte_t ptent = ptep_get(pte);
 
-	if (pte_present(*pte)) {
-		page = vm_normal_page(vma, addr, *pte);
-		young = pte_young(*pte);
-		dirty = pte_dirty(*pte);
-	} else if (is_swap_pte(*pte)) {
-		swp_entry_t swpent = pte_to_swp_entry(*pte);
+	if (pte_present(ptent)) {
+		page = vm_normal_page(vma, addr, ptent);
+		young = pte_young(ptent);
+		dirty = pte_dirty(ptent);
+	} else if (is_swap_pte(ptent)) {
+		swp_entry_t swpent = pte_to_swp_entry(ptent);
 
 		if (!non_swap_entry(swpent)) {
 			int mapcount;
@@ -732,11 +733,12 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
 	struct mem_size_stats *mss = walk->private;
 	struct vm_area_struct *vma = walk->vma;
 	struct page *page = NULL;
+	pte_t ptent = ptep_get(pte);
 
-	if (pte_present(*pte)) {
-		page = vm_normal_page(vma, addr, *pte);
-	} else if (is_swap_pte(*pte)) {
-		swp_entry_t swpent = pte_to_swp_entry(*pte);
+	if (pte_present(ptent)) {
+		page = vm_normal_page(vma, addr, ptent);
+	} else if (is_swap_pte(ptent)) {
+		swp_entry_t swpent = pte_to_swp_entry(ptent);
 
 		if (is_pfn_swap_entry(swpent))
 			page = pfn_swap_entry_to_page(swpent);
@@ -1105,7 +1107,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
 	 * Documentation/admin-guide/mm/soft-dirty.rst for full description
 	 * of how soft-dirty works.
 	 */
-	pte_t ptent = *pte;
+	pte_t ptent = ptep_get(pte);
 
 	if (pte_present(ptent)) {
 		pte_t old_pte;
@@ -1194,7 +1196,7 @@ out:
 		return 0;
 	}
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
-		ptent = *pte;
+		ptent = ptep_get(pte);
 
 		if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
 			clear_soft_dirty(vma, addr, pte);
@@ -1550,7 +1552,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 	for (; addr < end; pte++, addr += PAGE_SIZE) {
 		pagemap_entry_t pme;
 
-		pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
+		pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte));
 		err = add_to_pagemap(addr, &pme, pm);
 		if (err)
 			break;
@@ -1893,10 +1895,11 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 		return 0;
 	}
 	do {
-		struct page *page = can_gather_numa_stats(*pte, vma, addr);
+		pte_t ptent = ptep_get(pte);
+		struct page *page = can_gather_numa_stats(ptent, vma, addr);
 		if (!page)
 			continue;
-		gather_stats(page, md, pte_dirty(*pte), 1);
+		gather_stats(page, md, pte_dirty(ptent), 1);
 
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	pte_unmap_unlock(orig_pte, ptl);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ca83423f8d54..478e2b169c13 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -335,6 +335,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
 	pud_t *pud;
 	pmd_t *pmd, _pmd;
 	pte_t *pte;
+	pte_t ptent;
 	bool ret = true;
 
 	mmap_assert_locked(mm);
@@ -374,9 +375,10 @@ again:
 	 * changes under us.  PTE markers should be handled the same as none
 	 * ptes here.
 	 */
-	if (pte_none_mostly(*pte))
+	ptent = ptep_get(pte);
+	if (pte_none_mostly(ptent))
 		ret = true;
-	if (!pte_write(*pte) && (reason & VM_UFFD_WP))
+	if (!pte_write(ptent) && (reason & VM_UFFD_WP))
 		ret = true;
 	pte_unmap(pte);
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 21f942025fec..beb7c63d2871 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1185,7 +1185,11 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
 static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
 					  unsigned long addr, pte_t *ptep)
 {
+#ifdef CONFIG_MMU
+	return ptep_get(ptep);
+#else
 	return *ptep;
+#endif
 }
 
 static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 0e1d239a882c..08c2bcefcb2b 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -555,7 +555,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
 	bool arm_uffd_pte = false;
 
 	/* The current status of the pte should be "cleared" before calling */
-	WARN_ON_ONCE(!pte_none(*pte));
+	WARN_ON_ONCE(!pte_none(ptep_get(pte)));
 
 	/*
 	 * NOTE: userfaultfd_wp_unpopulated() doesn't need this whole
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index fc06f6419661..5063b482e34f 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -231,7 +231,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 					    unsigned long address,
 					    pte_t *ptep)
 {
-	pte_t pte = *ptep;
+	pte_t pte = ptep_get(ptep);
 	int r = 1;
 	if (!pte_young(pte))
 		r = 0;
@@ -318,7 +318,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long address,
 				       pte_t *ptep)
 {
-	pte_t pte = *ptep;
+	pte_t pte = ptep_get(ptep);
 	pte_clear(mm, address, ptep);
 	page_table_check_pte_clear(mm, address, pte);
 	return pte;
@@ -519,7 +519,7 @@ extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma,
 struct mm_struct;
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 {
-	pte_t old_pte = *ptep;
+	pte_t old_pte = ptep_get(ptep);
 	set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
 }
 #endif
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 607d742caa61..f0ac5b874919 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -192,7 +192,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 		inc_mm_counter(mm, MM_ANONPAGES);
 	}
 
-	flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
+	flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte)));
 	ptep_clear_flush_notify(vma, addr, pvmw.pte);
 	if (new_page)
 		set_pte_at_notify(mm, addr, pvmw.pte,
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
index d4ab81229136..e940802a15a4 100644
--- a/mm/damon/ops-common.c
+++ b/mm/damon/ops-common.c
@@ -39,7 +39,7 @@ struct folio *damon_get_folio(unsigned long pfn)
 
 void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr)
 {
-	struct folio *folio = damon_get_folio(pte_pfn(*pte));
+	struct folio *folio = damon_get_folio(pte_pfn(ptep_get(pte)));
 
 	if (!folio)
 		return;
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 5b3a3463d078..40801e38fcf0 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -89,7 +89,7 @@ static bool __damon_pa_young(struct folio *folio, struct vm_area_struct *vma,
 	while (page_vma_mapped_walk(&pvmw)) {
 		addr = pvmw.address;
 		if (pvmw.pte) {
-			*accessed = pte_young(*pvmw.pte) ||
+			*accessed = pte_young(ptep_get(pvmw.pte)) ||
 				!folio_test_idle(folio) ||
 				mmu_notifier_test_young(vma->vm_mm, addr);
 		} else {
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index e814f66dfc2e..2fcc9731528a 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -323,7 +323,7 @@ static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
 		walk->action = ACTION_AGAIN;
 		return 0;
 	}
-	if (!pte_present(*pte))
+	if (!pte_present(ptep_get(pte)))
 		goto out;
 	damon_ptep_mkold(pte, walk->vma, addr);
 out:
@@ -433,6 +433,7 @@ static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr,
 		unsigned long next, struct mm_walk *walk)
 {
 	pte_t *pte;
+	pte_t ptent;
 	spinlock_t *ptl;
 	struct folio *folio;
 	struct damon_young_walk_private *priv = walk->private;
@@ -471,12 +472,13 @@ regular_page:
 		walk->action = ACTION_AGAIN;
 		return 0;
 	}
-	if (!pte_present(*pte))
+	ptent = ptep_get(pte);
+	if (!pte_present(ptent))
 		goto out;
-	folio = damon_get_folio(pte_pfn(*pte));
+	folio = damon_get_folio(pte_pfn(ptent));
 	if (!folio)
 		goto out;
-	if (pte_young(*pte) || !folio_test_idle(folio) ||
+	if (pte_young(ptent) || !folio_test_idle(folio) ||
 			mmu_notifier_test_young(walk->mm, addr))
 		priv->young = true;
 	*priv->folio_sz = folio_size(folio);
diff --git a/mm/filemap.c b/mm/filemap.c
index 1893048ec9ff..00933089b8b6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3523,7 +3523,7 @@ again:
 		 * handled in the specific fault path, and it'll prohibit the
 		 * fault-around logic.
 		 */
-		if (!pte_none(*vmf->pte))
+		if (!pte_none(ptep_get(vmf->pte)))
 			goto unlock;
 
 		/* We're about to handle the fault */
diff --git a/mm/gup.c b/mm/gup.c
index 838db6c0bfc2..38986e522d34 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -477,13 +477,14 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
 		pte_t *pte, unsigned int flags)
 {
 	if (flags & FOLL_TOUCH) {
-		pte_t entry = *pte;
+		pte_t orig_entry = ptep_get(pte);
+		pte_t entry = orig_entry;
 
 		if (flags & FOLL_WRITE)
 			entry = pte_mkdirty(entry);
 		entry = pte_mkyoung(entry);
 
-		if (!pte_same(*pte, entry)) {
+		if (!pte_same(orig_entry, entry)) {
 			set_pte_at(vma->vm_mm, address, pte, entry);
 			update_mmu_cache(vma, address, pte);
 		}
@@ -549,7 +550,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
 	if (!ptep)
 		return no_page_table(vma, flags);
-	pte = *ptep;
+	pte = ptep_get(ptep);
 	if (!pte_present(pte))
 		goto no_page;
 	if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
@@ -821,6 +822,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
+	pte_t entry;
 	int ret = -EFAULT;
 
 	/* user gate pages are read-only */
@@ -844,16 +846,17 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
 	pte = pte_offset_map(pmd, address);
 	if (!pte)
 		return -EFAULT;
-	if (pte_none(*pte))
+	entry = ptep_get(pte);
+	if (pte_none(entry))
 		goto unmap;
 	*vma = get_gate_vma(mm);
 	if (!page)
 		goto out;
-	*page = vm_normal_page(*vma, address, *pte);
+	*page = vm_normal_page(*vma, address, entry);
 	if (!*page) {
-		if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
+		if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(entry)))
 			goto unmap;
-		*page = pte_page(*pte);
+		*page = pte_page(entry);
 	}
 	ret = try_grab_page(*page, gup_flags);
 	if (unlikely(ret))
@@ -2496,7 +2499,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
 		}
 
 		if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
-		    unlikely(pte_val(pte) != pte_val(*ptep))) {
+		    unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
 			gup_put_folio(folio, 1, flags);
 			goto pte_unmap;
 		}
@@ -2693,7 +2696,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 	if (!folio)
 		return 0;
 
-	if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+	if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
 		gup_put_folio(folio, refs, flags);
 		return 0;
 	}
diff --git a/mm/highmem.c b/mm/highmem.c
index db251e77f98f..e19269093a93 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -161,7 +161,7 @@ struct page *__kmap_to_page(void *vaddr)
 	/* kmap() mappings */
 	if (WARN_ON_ONCE(addr >= PKMAP_ADDR(0) &&
 			 addr < PKMAP_ADDR(LAST_PKMAP)))
-		return pte_page(pkmap_page_table[PKMAP_NR(addr)]);
+		return pte_page(ptep_get(&pkmap_page_table[PKMAP_NR(addr)]));
 
 	/* kmap_local_page() mappings */
 	if (WARN_ON_ONCE(base >= __fix_to_virt(FIX_KMAP_END) &&
@@ -191,6 +191,7 @@ static void flush_all_zero_pkmaps(void)
 
 	for (i = 0; i < LAST_PKMAP; i++) {
 		struct page *page;
+		pte_t ptent;
 
 		/*
 		 * zero means we don't have anything to do,
@@ -203,7 +204,8 @@ static void flush_all_zero_pkmaps(void)
 		pkmap_count[i] = 0;
 
 		/* sanity check */
-		BUG_ON(pte_none(pkmap_page_table[i]));
+		ptent = ptep_get(&pkmap_page_table[i]);
+		BUG_ON(pte_none(ptent));
 
 		/*
 		 * Don't need an atomic fetch-and-clear op here;
@@ -212,7 +214,7 @@ static void flush_all_zero_pkmaps(void)
 		 * getting the kmap_lock (which is held here).
 		 * So no dangers, even with speculative execution.
 		 */
-		page = pte_page(pkmap_page_table[i]);
+		page = pte_page(ptent);
 		pte_clear(&init_mm, PKMAP_ADDR(i), &pkmap_page_table[i]);
 
 		set_page_address(page, NULL);
@@ -511,7 +513,7 @@ static inline bool kmap_high_unmap_local(unsigned long vaddr)
 {
 #ifdef ARCH_NEEDS_KMAP_HIGH_GET
 	if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
-		kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
+		kunmap_high(pte_page(ptep_get(&pkmap_page_table[PKMAP_NR(vaddr)])));
 		return true;
 	}
 #endif
@@ -548,7 +550,7 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
 	idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn);
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	kmap_pte = kmap_get_pte(vaddr, idx);
-	BUG_ON(!pte_none(*kmap_pte));
+	BUG_ON(!pte_none(ptep_get(kmap_pte)));
 	pteval = pfn_pte(pfn, prot);
 	arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte, pteval);
 	arch_kmap_local_post_map(vaddr, pteval);
diff --git a/mm/hmm.c b/mm/hmm.c
index b1a9159d7c92..855e25e59d8f 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -228,7 +228,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 	struct hmm_range *range = hmm_vma_walk->range;
 	unsigned int required_fault;
 	unsigned long cpu_flags;
-	pte_t pte = *ptep;
+	pte_t pte = ptep_get(ptep);
 	uint64_t pfn_req_flags = *hmm_pfn;
 
 	if (pte_none_mostly(pte)) {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 76f970aa5b4d..e94fe292f30a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2063,7 +2063,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 		entry = pte_mkspecial(entry);
 		if (pmd_uffd_wp(old_pmd))
 			entry = pte_mkuffd_wp(entry);
-		VM_BUG_ON(!pte_none(*pte));
+		VM_BUG_ON(!pte_none(ptep_get(pte)));
 		set_pte_at(mm, addr, pte, entry);
 		pte++;
 	}
@@ -2257,7 +2257,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 				entry = pte_mkuffd_wp(entry);
 			page_add_anon_rmap(page + i, vma, addr, false);
 		}
-		VM_BUG_ON(!pte_none(*pte));
+		VM_BUG_ON(!pte_none(ptep_get(pte)));
 		set_pte_at(mm, addr, pte, entry);
 		pte++;
 	}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1d3d8a61b336..d76574425da3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7246,7 +7246,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
 				pte = (pte_t *)pmd_alloc(mm, pud, addr);
 		}
 	}
-	BUG_ON(pte && pte_present(*pte) && !pte_huge(*pte));
+	BUG_ON(pte && pte_present(ptep_get(pte)) && !pte_huge(ptep_get(pte)));
 
 	return pte;
 }
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index f42079b73f82..c2007ef5e9b0 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -105,7 +105,7 @@ static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr,
 	 * remapping (which is calling @walk->remap_pte).
 	 */
 	if (!walk->reuse_page) {
-		walk->reuse_page = pte_page(*pte);
+		walk->reuse_page = pte_page(ptep_get(pte));
 		/*
 		 * Because the reuse address is part of the range that we are
 		 * walking, skip the reuse address range.
@@ -239,7 +239,7 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
 	 * to the tail pages.
 	 */
 	pgprot_t pgprot = PAGE_KERNEL_RO;
-	struct page *page = pte_page(*pte);
+	struct page *page = pte_page(ptep_get(pte));
 	pte_t entry;
 
 	/* Remapping the head page requires r/w */
@@ -286,7 +286,7 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
 	struct page *page;
 	void *to;
 
-	BUG_ON(pte_page(*pte) != walk->reuse_page);
+	BUG_ON(pte_page(ptep_get(pte)) != walk->reuse_page);
 
 	page = list_first_entry(walk->vmemmap_pages, struct page, lru);
 	list_del(&page->lru);
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index cc64ed6858c6..dcfec277e839 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -286,7 +286,7 @@ static void kasan_free_pte(pte_t *pte_start, pmd_t *pmd)
 
 	for (i = 0; i < PTRS_PER_PTE; i++) {
 		pte = pte_start + i;
-		if (!pte_none(*pte))
+		if (!pte_none(ptep_get(pte)))
 			return;
 	}
 
@@ -343,16 +343,19 @@ static void kasan_remove_pte_table(pte_t *pte, unsigned long addr,
 				unsigned long end)
 {
 	unsigned long next;
+	pte_t ptent;
 
 	for (; addr < end; addr = next, pte++) {
 		next = (addr + PAGE_SIZE) & PAGE_MASK;
 		if (next > end)
 			next = end;
 
-		if (!pte_present(*pte))
+		ptent = ptep_get(pte);
+
+		if (!pte_present(ptent))
 			continue;
 
-		if (WARN_ON(!kasan_early_shadow_page_entry(*pte)))
+		if (WARN_ON(!kasan_early_shadow_page_entry(ptent)))
 			continue;
 		pte_clear(&init_mm, addr, pte);
 	}
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 3e62728ae25d..dd772f9d0f08 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -226,7 +226,7 @@ static bool shadow_mapped(unsigned long addr)
 	if (pmd_bad(*pmd))
 		return true;
 	pte = pte_offset_kernel(pmd, addr);
-	return !pte_none(*pte);
+	return !pte_none(ptep_get(pte));
 }
 
 static int __meminit kasan_mem_notifier(struct notifier_block *nb,
@@ -317,7 +317,7 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
 	unsigned long page;
 	pte_t pte;
 
-	if (likely(!pte_none(*ptep)))
+	if (likely(!pte_none(ptep_get(ptep))))
 		return 0;
 
 	page = __get_free_page(GFP_KERNEL);
@@ -328,7 +328,7 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
 	pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL);
 
 	spin_lock(&init_mm.page_table_lock);
-	if (likely(pte_none(*ptep))) {
+	if (likely(pte_none(ptep_get(ptep)))) {
 		set_pte_at(&init_mm, addr, ptep, pte);
 		page = 0;
 	}
@@ -418,11 +418,11 @@ static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
 {
 	unsigned long page;
 
-	page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT);
+	page = (unsigned long)__va(pte_pfn(ptep_get(ptep)) << PAGE_SHIFT);
 
 	spin_lock(&init_mm.page_table_lock);
 
-	if (likely(!pte_none(*ptep))) {
+	if (likely(!pte_none(ptep_get(ptep)))) {
 		pte_clear(&init_mm, addr, ptep);
 		free_page(page);
 	}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 881669e738c0..0b4f00712895 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -511,7 +511,7 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte,
 	struct folio *folio, *tmp;
 
 	while (--_pte >= pte) {
-		pte_t pteval = *_pte;
+		pte_t pteval = ptep_get(_pte);
 		unsigned long pfn;
 
 		if (pte_none(pteval))
@@ -555,7 +555,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 
 	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
 	     _pte++, address += PAGE_SIZE) {
-		pte_t pteval = *_pte;
+		pte_t pteval = ptep_get(_pte);
 		if (pte_none(pteval) || (pte_present(pteval) &&
 				is_zero_pfn(pte_pfn(pteval)))) {
 			++none_or_zero;
@@ -699,7 +699,7 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
 
 	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
 	     _pte++, address += PAGE_SIZE) {
-		pteval = *_pte;
+		pteval = ptep_get(_pte);
 		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
 			add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1);
 			if (is_zero_pfn(pte_pfn(pteval))) {
@@ -797,7 +797,7 @@ static int __collapse_huge_page_copy(pte_t *pte,
 	 */
 	for (_pte = pte, _address = address; _pte < pte + HPAGE_PMD_NR;
 	     _pte++, page++, _address += PAGE_SIZE) {
-		pteval = *_pte;
+		pteval = ptep_get(_pte);
 		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
 			clear_user_highpage(page, _address);
 			continue;
@@ -1274,7 +1274,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
 
 	for (_address = address, _pte = pte; _pte < pte + HPAGE_PMD_NR;
 	     _pte++, _address += PAGE_SIZE) {
-		pte_t pteval = *_pte;
+		pte_t pteval = ptep_get(_pte);
 		if (is_swap_pte(pteval)) {
 			++unmapped;
 			if (!cc->is_khugepaged ||
@@ -1650,18 +1650,19 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
 	for (i = 0, addr = haddr, pte = start_pte;
 	     i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
 		struct page *page;
+		pte_t ptent = ptep_get(pte);
 
 		/* empty pte, skip */
-		if (pte_none(*pte))
+		if (pte_none(ptent))
 			continue;
 
 		/* page swapped out, abort */
-		if (!pte_present(*pte)) {
+		if (!pte_present(ptent)) {
 			result = SCAN_PTE_NON_PRESENT;
 			goto abort;
 		}
 
-		page = vm_normal_page(vma, addr, *pte);
+		page = vm_normal_page(vma, addr, ptent);
 		if (WARN_ON_ONCE(page && is_zone_device_page(page)))
 			page = NULL;
 		/*
@@ -1677,10 +1678,11 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
 	for (i = 0, addr = haddr, pte = start_pte;
 	     i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
 		struct page *page;
+		pte_t ptent = ptep_get(pte);
 
-		if (pte_none(*pte))
+		if (pte_none(ptent))
 			continue;
-		page = vm_normal_page(vma, addr, *pte);
+		page = vm_normal_page(vma, addr, ptent);
 		if (WARN_ON_ONCE(page && is_zone_device_page(page)))
 			goto abort;
 		page_remove_rmap(page, vma, false);
diff --git a/mm/ksm.c b/mm/ksm.c
index 3dc15459dd20..d995779dc1fe 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -429,15 +429,17 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
 	struct page *page = NULL;
 	spinlock_t *ptl;
 	pte_t *pte;
+	pte_t ptent;
 	int ret;
 
 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
 	if (!pte)
 		return 0;
-	if (pte_present(*pte)) {
-		page = vm_normal_page(walk->vma, addr, *pte);
-	} else if (!pte_none(*pte)) {
-		swp_entry_t entry = pte_to_swp_entry(*pte);
+	ptent = ptep_get(pte);
+	if (pte_present(ptent)) {
+		page = vm_normal_page(walk->vma, addr, ptent);
+	} else if (!pte_none(ptent)) {
+		swp_entry_t entry = pte_to_swp_entry(ptent);
 
 		/*
 		 * As KSM pages remain KSM pages until freed, no need to wait
@@ -1085,6 +1087,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 	int err = -EFAULT;
 	struct mmu_notifier_range range;
 	bool anon_exclusive;
+	pte_t entry;
 
 	pvmw.address = page_address_in_vma(page, vma);
 	if (pvmw.address == -EFAULT)
@@ -1102,10 +1105,9 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 		goto out_unlock;
 
 	anon_exclusive = PageAnonExclusive(page);
-	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
+	entry = ptep_get(pvmw.pte);
+	if (pte_write(entry) || pte_dirty(entry) ||
 	    anon_exclusive || mm_tlb_flush_pending(mm)) {
-		pte_t entry;
-
 		swapped = PageSwapCache(page);
 		flush_cache_page(vma, pvmw.address, page_to_pfn(page));
 		/*
@@ -1147,7 +1149,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 
 		set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
 	}
-	*orig_pte = *pvmw.pte;
+	*orig_pte = entry;
 	err = 0;
 
 out_unlock:
@@ -1204,7 +1206,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
 	ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	if (!ptep)
 		goto out_mn;
-	if (!pte_same(*ptep, orig_pte)) {
+	if (!pte_same(ptep_get(ptep), orig_pte)) {
 		pte_unmap_unlock(ptep, ptl);
 		goto out_mn;
 	}
@@ -1231,7 +1233,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
 		dec_mm_counter(mm, MM_ANONPAGES);
 	}
 
-	flush_cache_page(vma, addr, pte_pfn(*ptep));
+	flush_cache_page(vma, addr, pte_pfn(ptep_get(ptep)));
 	/*
 	 * No need to notify as we are replacing a read only page with another
 	 * read only page with the same content.
diff --git a/mm/madvise.c b/mm/madvise.c
index 9b3c9610052f..886f06066622 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -207,7 +207,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 				break;
 		}
 
-		pte = *ptep;
+		pte = ptep_get(ptep);
 		if (!is_swap_pte(pte))
 			continue;
 		entry = pte_to_swp_entry(pte);
@@ -438,7 +438,7 @@ regular_folio:
 	flush_tlb_batched_pending(mm);
 	arch_enter_lazy_mmu_mode();
 	for (; addr < end; pte++, addr += PAGE_SIZE) {
-		ptent = *pte;
+		ptent = ptep_get(pte);
 
 		if (pte_none(ptent))
 			continue;
@@ -642,7 +642,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 	flush_tlb_batched_pending(mm);
 	arch_enter_lazy_mmu_mode();
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
-		ptent = *pte;
+		ptent = ptep_get(pte);
 
 		if (pte_none(ptent))
 			continue;
diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c
index 87b4beeda4fa..a26dd8bcfcdb 100644
--- a/mm/mapping_dirty_helpers.c
+++ b/mm/mapping_dirty_helpers.c
@@ -35,7 +35,7 @@ static int wp_pte(pte_t *pte, unsigned long addr, unsigned long end,
 		  struct mm_walk *walk)
 {
 	struct wp_walk *wpwalk = walk->private;
-	pte_t ptent = *pte;
+	pte_t ptent = ptep_get(pte);
 
 	if (pte_write(ptent)) {
 		pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
@@ -91,7 +91,7 @@ static int clean_record_pte(pte_t *pte, unsigned long addr,
 {
 	struct wp_walk *wpwalk = walk->private;
 	struct clean_walk *cwalk = to_clean_walk(wpwalk);
-	pte_t ptent = *pte;
+	pte_t ptent = ptep_get(pte);
 
 	if (pte_dirty(ptent)) {
 		pgoff_t pgoff = ((addr - walk->vma->vm_start) >> PAGE_SHIFT) +
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 77d8d2d14fcf..93056918e956 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6025,7 +6025,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
 	if (!pte)
 		return 0;
 	for (; addr != end; pte++, addr += PAGE_SIZE)
-		if (get_mctgt_type(vma, addr, *pte, NULL))
+		if (get_mctgt_type(vma, addr, ptep_get(pte), NULL))
 			mc.precharge++;	/* increment precharge temporarily */
 	pte_unmap_unlock(pte - 1, ptl);
 	cond_resched();
@@ -6246,7 +6246,7 @@ retry:
 	if (!pte)
 		return 0;
 	for (; addr != end; addr += PAGE_SIZE) {
-		pte_t ptent = *(pte++);
+		pte_t ptent = ptep_get(pte++);
 		bool device = false;
 		swp_entry_t ent;
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d5116f0eb1b6..e245191e6b04 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -6,16 +6,16 @@
  * High level machine check handler. Handles pages reported by the
  * hardware as being corrupted usually due to a multi-bit ECC memory or cache
  * failure.
- * 
+ *
  * In addition there is a "soft offline" entry point that allows stop using
  * not-yet-corrupted-by-suspicious pages without killing anything.
  *
  * Handles page cache pages in various states.	The tricky part
- * here is that we can access any page asynchronously in respect to 
- * other VM users, because memory failures could happen anytime and 
- * anywhere. This could violate some of their assumptions. This is why 
- * this code has to be extremely careful. Generally it tries to use 
- * normal locking rules, as in get the standard locks, even if that means 
+ * here is that we can access any page asynchronously in respect to
+ * other VM users, because memory failures could happen anytime and
+ * anywhere. This could violate some of their assumptions. This is why
+ * this code has to be extremely careful. Generally it tries to use
+ * normal locking rules, as in get the standard locks, even if that means
  * the error handling takes potentially a long time.
  *
  * It can be very tempting to add handling for obscure cases here.
@@ -25,12 +25,12 @@
  *   https://git.kernel.org/cgit/utils/cpu/mce/mce-test.git/
  * - The case actually shows up as a frequent (top 10) page state in
  *   tools/mm/page-types when running a real workload.
- * 
+ *
  * There are several operations here with exponential complexity because
- * of unsuitable VM data structures. For example the operation to map back 
- * from RMAP chains to processes has to walk the complete process list and 
+ * of unsuitable VM data structures. For example the operation to map back
+ * from RMAP chains to processes has to walk the complete process list and
  * has non linear complexity with the number. But since memory corruptions
- * are rare we hope to get away with this. This avoids impacting the core 
+ * are rare we hope to get away with this. This avoids impacting the core
  * VM.
  */
 
@@ -386,6 +386,7 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
+	pte_t ptent;
 
 	VM_BUG_ON_VMA(address == -EFAULT, vma);
 	pgd = pgd_offset(vma->vm_mm, address);
@@ -407,7 +408,8 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
 	pte = pte_offset_map(pmd, address);
 	if (!pte)
 		return 0;
-	if (pte_present(*pte) && pte_devmap(*pte))
+	ptent = ptep_get(pte);
+	if (pte_present(ptent) && pte_devmap(ptent))
 		ret = PAGE_SHIFT;
 	pte_unmap(pte);
 	return ret;
@@ -799,7 +801,7 @@ static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
 		goto out;
 
 	for (; addr != end; ptep++, addr += PAGE_SIZE) {
-		ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
+		ret = check_hwpoisoned_entry(ptep_get(ptep), addr, PAGE_SHIFT,
 					     hwp->pfn, &hwp->tk);
 		if (ret == 1)
 			break;
diff --git a/mm/memory.c b/mm/memory.c
index 63c30f58142b..3d78b552866d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -699,15 +699,17 @@ static void restore_exclusive_pte(struct vm_area_struct *vma,
 				  struct page *page, unsigned long address,
 				  pte_t *ptep)
 {
+	pte_t orig_pte;
 	pte_t pte;
 	swp_entry_t entry;
 
+	orig_pte = ptep_get(ptep);
 	pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot)));
-	if (pte_swp_soft_dirty(*ptep))
+	if (pte_swp_soft_dirty(orig_pte))
 		pte = pte_mksoft_dirty(pte);
 
-	entry = pte_to_swp_entry(*ptep);
-	if (pte_swp_uffd_wp(*ptep))
+	entry = pte_to_swp_entry(orig_pte);
+	if (pte_swp_uffd_wp(orig_pte))
 		pte = pte_mkuffd_wp(pte);
 	else if (is_writable_device_exclusive_entry(entry))
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -744,7 +746,7 @@ static int
 try_restore_exclusive_pte(pte_t *src_pte, struct vm_area_struct *vma,
 			unsigned long addr)
 {
-	swp_entry_t entry = pte_to_swp_entry(*src_pte);
+	swp_entry_t entry = pte_to_swp_entry(ptep_get(src_pte));
 	struct page *page = pfn_swap_entry_to_page(entry);
 
 	if (trylock_page(page)) {
@@ -768,9 +770,10 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		struct vm_area_struct *src_vma, unsigned long addr, int *rss)
 {
 	unsigned long vm_flags = dst_vma->vm_flags;
-	pte_t pte = *src_pte;
+	pte_t orig_pte = ptep_get(src_pte);
+	pte_t pte = orig_pte;
 	struct page *page;
-	swp_entry_t entry = pte_to_swp_entry(pte);
+	swp_entry_t entry = pte_to_swp_entry(orig_pte);
 
 	if (likely(!non_swap_entry(entry))) {
 		if (swap_duplicate(entry) < 0)
@@ -785,8 +788,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			spin_unlock(&mmlist_lock);
 		}
 		/* Mark the swap entry as shared. */
-		if (pte_swp_exclusive(*src_pte)) {
-			pte = pte_swp_clear_exclusive(*src_pte);
+		if (pte_swp_exclusive(orig_pte)) {
+			pte = pte_swp_clear_exclusive(orig_pte);
 			set_pte_at(src_mm, addr, src_pte, pte);
 		}
 		rss[MM_SWAPENTS]++;
@@ -805,9 +808,9 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			entry = make_readable_migration_entry(
 							swp_offset(entry));
 			pte = swp_entry_to_pte(entry);
-			if (pte_swp_soft_dirty(*src_pte))
+			if (pte_swp_soft_dirty(orig_pte))
 				pte = pte_swp_mksoft_dirty(pte);
-			if (pte_swp_uffd_wp(*src_pte))
+			if (pte_swp_uffd_wp(orig_pte))
 				pte = pte_swp_mkuffd_wp(pte);
 			set_pte_at(src_mm, addr, src_pte, pte);
 		}
@@ -840,7 +843,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			entry = make_readable_device_private_entry(
 							swp_offset(entry));
 			pte = swp_entry_to_pte(entry);
-			if (pte_swp_uffd_wp(*src_pte))
+			if (pte_swp_uffd_wp(orig_pte))
 				pte = pte_swp_mkuffd_wp(pte);
 			set_pte_at(src_mm, addr, src_pte, pte);
 		}
@@ -904,7 +907,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
 	/* All done, just insert the new page copy in the child */
 	pte = mk_pte(&new_folio->page, dst_vma->vm_page_prot);
 	pte = maybe_mkwrite(pte_mkdirty(pte), dst_vma);
-	if (userfaultfd_pte_wp(dst_vma, *src_pte))
+	if (userfaultfd_pte_wp(dst_vma, ptep_get(src_pte)))
 		/* Uffd-wp needs to be delivered to dest pte as well */
 		pte = pte_mkuffd_wp(pte);
 	set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte);
@@ -922,7 +925,7 @@ copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
 {
 	struct mm_struct *src_mm = src_vma->vm_mm;
 	unsigned long vm_flags = src_vma->vm_flags;
-	pte_t pte = *src_pte;
+	pte_t pte = ptep_get(src_pte);
 	struct page *page;
 	struct folio *folio;
 
@@ -1002,6 +1005,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
 	struct mm_struct *src_mm = src_vma->vm_mm;
 	pte_t *orig_src_pte, *orig_dst_pte;
 	pte_t *src_pte, *dst_pte;
+	pte_t ptent;
 	spinlock_t *src_ptl, *dst_ptl;
 	int progress, ret = 0;
 	int rss[NR_MM_COUNTERS];
@@ -1047,17 +1051,18 @@ again:
 			    spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
 				break;
 		}
-		if (pte_none(*src_pte)) {
+		ptent = ptep_get(src_pte);
+		if (pte_none(ptent)) {
 			progress++;
 			continue;
 		}
-		if (unlikely(!pte_present(*src_pte))) {
+		if (unlikely(!pte_present(ptent))) {
 			ret = copy_nonpresent_pte(dst_mm, src_mm,
 						  dst_pte, src_pte,
 						  dst_vma, src_vma,
 						  addr, rss);
 			if (ret == -EIO) {
-				entry = pte_to_swp_entry(*src_pte);
+				entry = pte_to_swp_entry(ptep_get(src_pte));
 				break;
 			} else if (ret == -EBUSY) {
 				break;
@@ -1407,7 +1412,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 	flush_tlb_batched_pending(mm);
 	arch_enter_lazy_mmu_mode();
 	do {
-		pte_t ptent = *pte;
+		pte_t ptent = ptep_get(pte);
 		struct page *page;
 
 		if (pte_none(ptent))
@@ -1822,7 +1827,7 @@ static int validate_page_before_insert(struct page *page)
 static int insert_page_into_pte_locked(struct vm_area_struct *vma, pte_t *pte,
 			unsigned long addr, struct page *page, pgprot_t prot)
 {
-	if (!pte_none(*pte))
+	if (!pte_none(ptep_get(pte)))
 		return -EBUSY;
 	/* Ok, finally just insert the thing.. */
 	get_page(page);
@@ -2116,7 +2121,8 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 	pte = get_locked_pte(mm, addr, &ptl);
 	if (!pte)
 		return VM_FAULT_OOM;
-	if (!pte_none(*pte)) {
+	entry = ptep_get(pte);
+	if (!pte_none(entry)) {
 		if (mkwrite) {
 			/*
 			 * For read faults on private mappings the PFN passed
@@ -2128,11 +2134,11 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			 * allocation and mapping invalidation so just skip the
 			 * update.
 			 */
-			if (pte_pfn(*pte) != pfn_t_to_pfn(pfn)) {
-				WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte)));
+			if (pte_pfn(entry) != pfn_t_to_pfn(pfn)) {
+				WARN_ON_ONCE(!is_zero_pfn(pte_pfn(entry)));
 				goto out_unlock;
 			}
-			entry = pte_mkyoung(*pte);
+			entry = pte_mkyoung(entry);
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 			if (ptep_set_access_flags(vma, addr, pte, entry, 1))
 				update_mmu_cache(vma, addr, pte);
@@ -2344,7 +2350,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 		return -ENOMEM;
 	arch_enter_lazy_mmu_mode();
 	do {
-		BUG_ON(!pte_none(*pte));
+		BUG_ON(!pte_none(ptep_get(pte)));
 		if (!pfn_modify_allowed(pfn, prot)) {
 			err = -EACCES;
 			break;
@@ -2585,7 +2591,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 
 	if (fn) {
 		do {
-			if (create || !pte_none(*pte)) {
+			if (create || !pte_none(ptep_get(pte))) {
 				err = fn(pte++, addr, data);
 				if (err)
 					break;
@@ -2787,7 +2793,7 @@ static inline int pte_unmap_same(struct vm_fault *vmf)
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
 	if (sizeof(pte_t) > sizeof(unsigned long)) {
 		spin_lock(vmf->ptl);
-		same = pte_same(*vmf->pte, vmf->orig_pte);
+		same = pte_same(ptep_get(vmf->pte), vmf->orig_pte);
 		spin_unlock(vmf->ptl);
 	}
 #endif
@@ -2838,7 +2844,7 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,
 		pte_t entry;
 
 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
-		if (unlikely(!vmf->pte || !pte_same(*vmf->pte, vmf->orig_pte))) {
+		if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
 			/*
 			 * Other thread has already handled the fault
 			 * and update local tlb only
@@ -2866,7 +2872,7 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,
 
 		/* Re-validate under PTL if the page is still mapped */
 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
-		if (unlikely(!vmf->pte || !pte_same(*vmf->pte, vmf->orig_pte))) {
+		if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
 			/* The PTE changed under us, update local tlb */
 			if (vmf->pte)
 				update_mmu_tlb(vma, addr, vmf->pte);
@@ -3114,7 +3120,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 	 * Re-check the pte - we dropped the lock
 	 */
 	vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);
-	if (likely(vmf->pte && pte_same(*vmf->pte, vmf->orig_pte))) {
+	if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
 		if (old_folio) {
 			if (!folio_test_anon(old_folio)) {
 				dec_mm_counter(mm, mm_counter_file(&old_folio->page));
@@ -3241,7 +3247,7 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf)
 	 * We might have raced with another page fault while we released the
 	 * pte_offset_map_lock.
 	 */
-	if (!pte_same(*vmf->pte, vmf->orig_pte)) {
+	if (!pte_same(ptep_get(vmf->pte), vmf->orig_pte)) {
 		update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		return VM_FAULT_NOPAGE;
@@ -3336,7 +3342,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 	struct folio *folio = NULL;
 
 	if (likely(!unshare)) {
-		if (userfaultfd_pte_wp(vma, *vmf->pte)) {
+		if (userfaultfd_pte_wp(vma, ptep_get(vmf->pte))) {
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			return handle_userfault(vmf, VM_UFFD_WP);
 		}
@@ -3598,7 +3604,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
 
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 				&vmf->ptl);
-	if (likely(vmf->pte && pte_same(*vmf->pte, vmf->orig_pte)))
+	if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
 		restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte);
 
 	if (vmf->pte)
@@ -3643,7 +3649,7 @@ static vm_fault_t pte_marker_clear(struct vm_fault *vmf)
 	 * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_SWAPIN_ERROR.
 	 * So is_pte_marker() check is not enough to safely drop the pte.
 	 */
-	if (pte_same(vmf->orig_pte, *vmf->pte))
+	if (pte_same(vmf->orig_pte, ptep_get(vmf->pte)))
 		pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
 	return 0;
@@ -3739,7 +3745,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					vmf->address, &vmf->ptl);
 			if (unlikely(!vmf->pte ||
-				     !pte_same(*vmf->pte, vmf->orig_pte)))
+				     !pte_same(ptep_get(vmf->pte),
+							vmf->orig_pte)))
 				goto unlock;
 
 			/*
@@ -3816,7 +3823,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			 */
 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					vmf->address, &vmf->ptl);
-			if (likely(vmf->pte && pte_same(*vmf->pte, vmf->orig_pte)))
+			if (likely(vmf->pte &&
+				   pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
 				ret = VM_FAULT_OOM;
 			goto unlock;
 		}
@@ -3886,7 +3894,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	 */
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 			&vmf->ptl);
-	if (unlikely(!vmf->pte || !pte_same(*vmf->pte, vmf->orig_pte)))
+	if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
 		goto out_nomap;
 
 	if (unlikely(!folio_test_uptodate(folio))) {
@@ -4331,9 +4339,9 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
 static bool vmf_pte_changed(struct vm_fault *vmf)
 {
 	if (vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)
-		return !pte_same(*vmf->pte, vmf->orig_pte);
+		return !pte_same(ptep_get(vmf->pte), vmf->orig_pte);
 
-	return !pte_none(*vmf->pte);
+	return !pte_none(ptep_get(vmf->pte));
 }
 
 /**
@@ -4643,7 +4651,7 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
 			 * we don't have concurrent modification by hardware
 			 * followed by an update.
 			 */
-			if (unlikely(pte_none(*vmf->pte)))
+			if (unlikely(pte_none(ptep_get(vmf->pte))))
 				ret = VM_FAULT_SIGBUS;
 			else
 				ret = VM_FAULT_NOPAGE;
@@ -4699,7 +4707,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	 * the pfn may be screwed if the read is non atomic.
 	 */
 	spin_lock(vmf->ptl);
-	if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
+	if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		goto out;
 	}
@@ -4772,7 +4780,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 					       vmf->address, &vmf->ptl);
 		if (unlikely(!vmf->pte))
 			goto out;
-		if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) {
+		if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			goto out;
 		}
@@ -4930,7 +4938,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 
 	spin_lock(vmf->ptl);
 	entry = vmf->orig_pte;
-	if (unlikely(!pte_same(*vmf->pte, entry))) {
+	if (unlikely(!pte_same(ptep_get(vmf->pte), entry))) {
 		update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
 		goto unlock;
 	}
@@ -5416,7 +5424,7 @@ int follow_pte(struct mm_struct *mm, unsigned long address,
 	ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
 	if (!ptep)
 		goto out;
-	if (!pte_present(*ptep))
+	if (!pte_present(ptep_get(ptep)))
 		goto unlock;
 	*ptepp = ptep;
 	return 0;
@@ -5453,7 +5461,7 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address,
 	ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
 	if (ret)
 		return ret;
-	*pfn = pte_pfn(*ptep);
+	*pfn = pte_pfn(ptep_get(ptep));
 	pte_unmap_unlock(ptep, ptl);
 	return 0;
 }
@@ -5473,7 +5481,7 @@ int follow_phys(struct vm_area_struct *vma,
 
 	if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
 		goto out;
-	pte = *ptep;
+	pte = ptep_get(ptep);
 
 	if ((flags & FOLL_WRITE) && !pte_write(pte))
 		goto unlock;
@@ -5517,7 +5525,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
 retry:
 	if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
 		return -EINVAL;
-	pte = *ptep;
+	pte = ptep_get(ptep);
 	pte_unmap_unlock(ptep, ptl);
 
 	prot = pgprot_val(pte_pgprot(pte));
@@ -5533,7 +5541,7 @@ retry:
 	if (follow_pte(vma->vm_mm, addr, &ptep, &ptl))
 		goto out_unmap;
 
-	if (!pte_same(pte, *ptep)) {
+	if (!pte_same(pte, ptep_get(ptep))) {
 		pte_unmap_unlock(ptep, ptl);
 		iounmap(maddr);
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0241bb64978b..edc25195f5bd 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -508,6 +508,7 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
 	unsigned long flags = qp->flags;
 	bool has_unmovable = false;
 	pte_t *pte, *mapped_pte;
+	pte_t ptent;
 	spinlock_t *ptl;
 
 	ptl = pmd_trans_huge_lock(pmd, vma);
@@ -520,9 +521,10 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
 		return 0;
 	}
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
-		if (!pte_present(*pte))
+		ptent = ptep_get(pte);
+		if (!pte_present(ptent))
 			continue;
-		folio = vm_normal_folio(vma, addr, *pte);
+		folio = vm_normal_folio(vma, addr, ptent);
 		if (!folio || folio_is_zone_device(folio))
 			continue;
 		/*
diff --git a/mm/migrate.c b/mm/migrate.c
index 363562992046..ce35afdbc1e3 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -188,6 +188,7 @@ static bool remove_migration_pte(struct folio *folio,
 
 	while (page_vma_mapped_walk(&pvmw)) {
 		rmap_t rmap_flags = RMAP_NONE;
+		pte_t old_pte;
 		pte_t pte;
 		swp_entry_t entry;
 		struct page *new;
@@ -210,17 +211,18 @@ static bool remove_migration_pte(struct folio *folio,
 
 		folio_get(folio);
 		pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
-		if (pte_swp_soft_dirty(*pvmw.pte))
+		old_pte = ptep_get(pvmw.pte);
+		if (pte_swp_soft_dirty(old_pte))
 			pte = pte_mksoft_dirty(pte);
 
-		entry = pte_to_swp_entry(*pvmw.pte);
+		entry = pte_to_swp_entry(old_pte);
 		if (!is_migration_entry_young(entry))
 			pte = pte_mkold(pte);
 		if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
 			pte = pte_mkdirty(pte);
 		if (is_writable_migration_entry(entry))
 			pte = pte_mkwrite(pte);
-		else if (pte_swp_uffd_wp(*pvmw.pte))
+		else if (pte_swp_uffd_wp(old_pte))
 			pte = pte_mkuffd_wp(pte);
 
 		if (folio_test_anon(folio) && !is_readable_migration_entry(entry))
@@ -234,9 +236,9 @@ static bool remove_migration_pte(struct folio *folio,
 				entry = make_readable_device_private_entry(
 							page_to_pfn(new));
 			pte = swp_entry_to_pte(entry);
-			if (pte_swp_soft_dirty(*pvmw.pte))
+			if (pte_swp_soft_dirty(old_pte))
 				pte = pte_swp_mksoft_dirty(pte);
-			if (pte_swp_uffd_wp(*pvmw.pte))
+			if (pte_swp_uffd_wp(old_pte))
 				pte = pte_swp_mkuffd_wp(pte);
 		}
 
@@ -308,7 +310,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 	if (!ptep)
 		return;
 
-	pte = *ptep;
+	pte = ptep_get(ptep);
 	pte_unmap(ptep);
 
 	if (!is_swap_pte(pte))
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index a14af6b12b04..02d272b909b5 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -111,7 +111,7 @@ again:
 		swp_entry_t entry;
 		pte_t pte;
 
-		pte = *ptep;
+		pte = ptep_get(ptep);
 
 		if (pte_none(pte)) {
 			if (vma_is_anonymous(vma)) {
@@ -194,7 +194,7 @@ again:
 			bool anon_exclusive;
 			pte_t swp_pte;
 
-			flush_cache_page(vma, addr, pte_pfn(*ptep));
+			flush_cache_page(vma, addr, pte_pfn(pte));
 			anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
 			if (anon_exclusive) {
 				pte = ptep_clear_flush(vma, addr, ptep);
@@ -573,6 +573,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	pud_t *pudp;
 	pmd_t *pmdp;
 	pte_t *ptep;
+	pte_t orig_pte;
 
 	/* Only allow populating anonymous memory */
 	if (!vma_is_anonymous(vma))
@@ -628,16 +629,18 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
 	if (!ptep)
 		goto abort;
+	orig_pte = ptep_get(ptep);
+
 	if (check_stable_address_space(mm))
 		goto unlock_abort;
 
-	if (pte_present(*ptep)) {
-		unsigned long pfn = pte_pfn(*ptep);
+	if (pte_present(orig_pte)) {
+		unsigned long pfn = pte_pfn(orig_pte);
 
 		if (!is_zero_pfn(pfn))
 			goto unlock_abort;
 		flush = true;
-	} else if (!pte_none(*ptep))
+	} else if (!pte_none(orig_pte))
 		goto unlock_abort;
 
 	/*
@@ -654,7 +657,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 	get_page(page);
 
 	if (flush) {
-		flush_cache_page(vma, addr, pte_pfn(*ptep));
+		flush_cache_page(vma, addr, pte_pfn(orig_pte));
 		ptep_clear_flush_notify(vma, addr, ptep);
 		set_pte_at_notify(mm, addr, ptep, entry);
 		update_mmu_cache(vma, addr, ptep);
diff --git a/mm/mincore.c b/mm/mincore.c
index f33f6a0b1ded..b7f7a516b26c 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -119,7 +119,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 		return 0;
 	}
 	for (; addr != end; ptep++, addr += PAGE_SIZE) {
-		pte_t pte = *ptep;
+		pte_t pte = ptep_get(ptep);
 
 		/* We need to do cache lookup too for pte markers */
 		if (pte_none_mostly(pte))
diff --git a/mm/mlock.c b/mm/mlock.c
index 9f2b1173b1b1..d7db94519884 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -312,6 +312,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
 	struct vm_area_struct *vma = walk->vma;
 	spinlock_t *ptl;
 	pte_t *start_pte, *pte;
+	pte_t ptent;
 	struct folio *folio;
 
 	ptl = pmd_trans_huge_lock(pmd, vma);
@@ -334,9 +335,10 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
 		return 0;
 	}
 	for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
-		if (!pte_present(*pte))
+		ptent = ptep_get(pte);
+		if (!pte_present(ptent))
 			continue;
-		folio = vm_normal_folio(vma, addr, *pte);
+		folio = vm_normal_folio(vma, addr, ptent);
 		if (!folio || folio_is_zone_device(folio))
 			continue;
 		if (folio_test_large(folio))
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 64e1df0af514..327a6eb90afb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -105,7 +105,7 @@ static long change_pte_range(struct mmu_gather *tlb,
 	flush_tlb_batched_pending(vma->vm_mm);
 	arch_enter_lazy_mmu_mode();
 	do {
-		oldpte = *pte;
+		oldpte = ptep_get(pte);
 		if (pte_present(oldpte)) {
 			pte_t ptent;
 
@@ -544,7 +544,8 @@ long change_protection(struct mmu_gather *tlb,
 static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
 			       unsigned long next, struct mm_walk *walk)
 {
-	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+	return pfn_modify_allowed(pte_pfn(ptep_get(pte)),
+				  *(pgprot_t *)(walk->private)) ?
 		0 : -EACCES;
 }
 
@@ -552,7 +553,8 @@ static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
 				   unsigned long addr, unsigned long next,
 				   struct mm_walk *walk)
 {
-	return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+	return pfn_modify_allowed(pte_pfn(ptep_get(pte)),
+				  *(pgprot_t *)(walk->private)) ?
 		0 : -EACCES;
 }
 
diff --git a/mm/mremap.c b/mm/mremap.c
index bfc3d1902a94..8ec184ac90ff 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -188,7 +188,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 
 	for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
 				   new_pte++, new_addr += PAGE_SIZE) {
-		if (pte_none(*old_pte))
+		if (pte_none(ptep_get(old_pte)))
 			continue;
 
 		pte = ptep_get_and_clear(mm, old_addr, old_pte);
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 0c511330dbc9..8f89f9c8f0df 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -190,7 +190,7 @@ void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr,
 	if (&init_mm == mm)
 		return;
 
-	__page_table_check_pte_clear(mm, addr, *ptep);
+	__page_table_check_pte_clear(mm, addr, ptep_get(ptep));
 	if (pte_user_accessible_page(pte)) {
 		page_table_check_set(mm, addr, pte_pfn(pte),
 				     PAGE_SIZE >> PAGE_SHIFT,
@@ -243,7 +243,7 @@ void __page_table_check_pte_clear_range(struct mm_struct *mm,
 		if (WARN_ON(!ptep))
 			return;
 		for (i = 0; i < PTRS_PER_PTE; i++) {
-			__page_table_check_pte_clear(mm, addr, *ptep);
+			__page_table_check_pte_clear(mm, addr, ptep_get(ptep));
 			addr += PAGE_SIZE;
 			ptep++;
 		}
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 2af734274073..49e0d28f0379 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -15,6 +15,8 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
 
 static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
 {
+	pte_t ptent;
+
 	if (pvmw->flags & PVMW_SYNC) {
 		/* Use the stricter lookup */
 		pvmw->pte = pte_offset_map_lock(pvmw->vma->vm_mm, pvmw->pmd,
@@ -35,10 +37,12 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
 	if (!pvmw->pte)
 		return false;
 
+	ptent = ptep_get(pvmw->pte);
+
 	if (pvmw->flags & PVMW_MIGRATION) {
-		if (!is_swap_pte(*pvmw->pte))
+		if (!is_swap_pte(ptent))
 			return false;
-	} else if (is_swap_pte(*pvmw->pte)) {
+	} else if (is_swap_pte(ptent)) {
 		swp_entry_t entry;
 		/*
 		 * Handle un-addressable ZONE_DEVICE memory.
@@ -56,11 +60,11 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
 		 * For more details on device private memory see HMM
 		 * (include/linux/hmm.h or mm/hmm.c).
 		 */
-		entry = pte_to_swp_entry(*pvmw->pte);
+		entry = pte_to_swp_entry(ptent);
 		if (!is_device_private_entry(entry) &&
 		    !is_device_exclusive_entry(entry))
 			return false;
-	} else if (!pte_present(*pvmw->pte)) {
+	} else if (!pte_present(ptent)) {
 		return false;
 	}
 	pvmw->ptl = *ptlp;
@@ -90,33 +94,34 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, spinlock_t **ptlp)
 static bool check_pte(struct page_vma_mapped_walk *pvmw)
 {
 	unsigned long pfn;
+	pte_t ptent = ptep_get(pvmw->pte);
 
 	if (pvmw->flags & PVMW_MIGRATION) {
 		swp_entry_t entry;
-		if (!is_swap_pte(*pvmw->pte))
+		if (!is_swap_pte(ptent))
 			return false;
-		entry = pte_to_swp_entry(*pvmw->pte);
+		entry = pte_to_swp_entry(ptent);
 
 		if (!is_migration_entry(entry) &&
 		    !is_device_exclusive_entry(entry))
 			return false;
 
 		pfn = swp_offset_pfn(entry);
-	} else if (is_swap_pte(*pvmw->pte)) {
+	} else if (is_swap_pte(ptent)) {
 		swp_entry_t entry;
 
 		/* Handle un-addressable ZONE_DEVICE memory */
-		entry = pte_to_swp_entry(*pvmw->pte);
+		entry = pte_to_swp_entry(ptent);
 		if (!is_device_private_entry(entry) &&
 		    !is_device_exclusive_entry(entry))
 			return false;
 
 		pfn = swp_offset_pfn(entry);
 	} else {
-		if (!pte_present(*pvmw->pte))
+		if (!pte_present(ptent))
 			return false;
 
-		pfn = pte_pfn(*pvmw->pte);
+		pfn = pte_pfn(ptent);
 	}
 
 	return (pfn - pvmw->pfn) < pvmw->nr_pages;
@@ -294,7 +299,7 @@ next_pte:
 				goto restart;
 			}
 			pvmw->pte++;
-		} while (pte_none(*pvmw->pte));
+		} while (pte_none(ptep_get(pvmw->pte)));
 
 		if (!pvmw->ptl) {
 			pvmw->ptl = ptl;
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index c7ab18a5fb77..4d454953046f 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -68,7 +68,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 			  unsigned long address, pte_t *ptep,
 			  pte_t entry, int dirty)
 {
-	int changed = !pte_same(*ptep, entry);
+	int changed = !pte_same(ptep_get(ptep), entry);
 	if (changed) {
 		set_pte_at(vma->vm_mm, address, ptep, entry);
 		flush_tlb_fix_spurious_fault(vma, address, ptep);
diff --git a/mm/rmap.c b/mm/rmap.c
index cd918cb9a431..0c0d8857dfce 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -826,7 +826,8 @@ static bool folio_referenced_one(struct folio *folio,
 		}
 
 		if (pvmw.pte) {
-			if (lru_gen_enabled() && pte_young(*pvmw.pte)) {
+			if (lru_gen_enabled() &&
+			    pte_young(ptep_get(pvmw.pte))) {
 				lru_gen_look_around(&pvmw);
 				referenced++;
 			}
@@ -956,13 +957,13 @@ static int page_vma_mkclean_one(struct page_vma_mapped_walk *pvmw)
 
 		address = pvmw->address;
 		if (pvmw->pte) {
-			pte_t entry;
 			pte_t *pte = pvmw->pte;
+			pte_t entry = ptep_get(pte);
 
-			if (!pte_dirty(*pte) && !pte_write(*pte))
+			if (!pte_dirty(entry) && !pte_write(entry))
 				continue;
 
-			flush_cache_page(vma, address, pte_pfn(*pte));
+			flush_cache_page(vma, address, pte_pfn(entry));
 			entry = ptep_clear_flush(vma, address, pte);
 			entry = pte_wrprotect(entry);
 			entry = pte_mkclean(entry);
@@ -1137,7 +1138,7 @@ void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
  * @folio:	Folio which contains page.
  * @page:	Page to add to rmap.
  * @vma:	VM area to add page to.
- * @address:	User virtual address of the mapping	
+ * @address:	User virtual address of the mapping
  * @exclusive:	the page is exclusively owned by the current process
  */
 static void __page_set_anon_rmap(struct folio *folio, struct page *page,
@@ -1458,6 +1459,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 	bool anon_exclusive, ret = true;
 	struct mmu_notifier_range range;
 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
+	unsigned long pfn;
 
 	/*
 	 * When racing against e.g. zap_pte_range() on another cpu,
@@ -1508,8 +1510,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 			break;
 		}
 
-		subpage = folio_page(folio,
-					pte_pfn(*pvmw.pte) - folio_pfn(folio));
+		pfn = pte_pfn(ptep_get(pvmw.pte));
+		subpage = folio_page(folio, pfn - folio_pfn(folio));
 		address = pvmw.address;
 		anon_exclusive = folio_test_anon(folio) &&
 				 PageAnonExclusive(subpage);
@@ -1571,7 +1573,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 			}
 			pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
 		} else {
-			flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+			flush_cache_page(vma, address, pfn);
 			/* Nuke the page table entry. */
 			if (should_defer_flush(mm, flags)) {
 				/*
@@ -1818,6 +1820,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 	bool anon_exclusive, ret = true;
 	struct mmu_notifier_range range;
 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
+	unsigned long pfn;
 
 	/*
 	 * When racing against e.g. zap_pte_range() on another cpu,
@@ -1877,6 +1880,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 		/* Unexpected PMD-mapped THP? */
 		VM_BUG_ON_FOLIO(!pvmw.pte, folio);
 
+		pfn = pte_pfn(ptep_get(pvmw.pte));
+
 		if (folio_is_zone_device(folio)) {
 			/*
 			 * Our PTE is a non-present device exclusive entry and
@@ -1891,8 +1896,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 			VM_BUG_ON_FOLIO(folio_nr_pages(folio) > 1, folio);
 			subpage = &folio->page;
 		} else {
-			subpage = folio_page(folio,
-					pte_pfn(*pvmw.pte) - folio_pfn(folio));
+			subpage = folio_page(folio, pfn - folio_pfn(folio));
 		}
 		address = pvmw.address;
 		anon_exclusive = folio_test_anon(folio) &&
@@ -1952,7 +1956,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 			/* Nuke the hugetlb page table entry */
 			pteval = huge_ptep_clear_flush(vma, address, pvmw.pte);
 		} else {
-			flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+			flush_cache_page(vma, address, pfn);
 			/* Nuke the page table entry. */
 			if (should_defer_flush(mm, flags)) {
 				/*
@@ -2187,6 +2191,7 @@ static bool page_make_device_exclusive_one(struct folio *folio,
 	struct mmu_notifier_range range;
 	swp_entry_t entry;
 	pte_t swp_pte;
+	pte_t ptent;
 
 	mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
 				      vma->vm_mm, address, min(vma->vm_end,
@@ -2198,18 +2203,19 @@ static bool page_make_device_exclusive_one(struct folio *folio,
 		/* Unexpected PMD-mapped THP? */
 		VM_BUG_ON_FOLIO(!pvmw.pte, folio);
 
-		if (!pte_present(*pvmw.pte)) {
+		ptent = ptep_get(pvmw.pte);
+		if (!pte_present(ptent)) {
 			ret = false;
 			page_vma_mapped_walk_done(&pvmw);
 			break;
 		}
 
 		subpage = folio_page(folio,
-				pte_pfn(*pvmw.pte) - folio_pfn(folio));
+				pte_pfn(ptent) - folio_pfn(folio));
 		address = pvmw.address;
 
 		/* Nuke the page table entry. */
-		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+		flush_cache_page(vma, address, pte_pfn(ptent));
 		pteval = ptep_clear_flush(vma, address, pvmw.pte);
 
 		/* Set the dirty flag on the folio now the pte is gone. */
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 10d73a0dfcec..a044a130405b 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -133,7 +133,7 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size,
 void __meminit vmemmap_verify(pte_t *pte, int node,
 				unsigned long start, unsigned long end)
 {
-	unsigned long pfn = pte_pfn(*pte);
+	unsigned long pfn = pte_pfn(ptep_get(pte));
 	int actual_node = early_pfn_to_nid(pfn);
 
 	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
@@ -146,7 +146,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
 				       struct page *reuse)
 {
 	pte_t *pte = pte_offset_kernel(pmd, addr);
-	if (pte_none(*pte)) {
+	if (pte_none(ptep_get(pte))) {
 		pte_t entry;
 		void *p;
 
@@ -414,7 +414,7 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
 		 * with just tail struct pages.
 		 */
 		return vmemmap_populate_range(start, end, node, NULL,
-					      pte_page(*pte));
+					      pte_page(ptep_get(pte)));
 	}
 
 	size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page));
@@ -438,7 +438,7 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
 		 */
 		next += PAGE_SIZE;
 		rc = vmemmap_populate_range(next, last, node, NULL,
-					    pte_page(*pte));
+					    pte_page(ptep_get(pte)));
 		if (rc)
 			return -ENOMEM;
 	}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index a33c60e0158f..4a5c7b748051 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -275,9 +275,9 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin,
 	}
 }
 
-/* 
- * If we are the only user, then try to free up the swap cache. 
- * 
+/*
+ * If we are the only user, then try to free up the swap cache.
+ *
  * Its ok to check the swapcache flag without the folio lock
  * here because we are going to recheck again inside
  * folio_free_swap() _with_ the lock.
@@ -294,7 +294,7 @@ void free_swap_cache(struct page *page)
 	}
 }
 
-/* 
+/*
  * Perform a free_page(), also freeing any swap cache associated with
  * this page if it is the last user of the page.
  */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 74dd4d2337b7..a6945c2e0d03 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1745,7 +1745,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 	struct page *page = folio_file_page(folio, swp_offset(entry));
 	struct page *swapcache;
 	spinlock_t *ptl;
-	pte_t *pte, new_pte;
+	pte_t *pte, new_pte, old_pte;
 	bool hwposioned = false;
 	int ret = 1;
 
@@ -1757,11 +1757,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		hwposioned = true;
 
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-	if (unlikely(!pte || !pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) {
+	if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte),
+						swp_entry_to_pte(entry)))) {
 		ret = 0;
 		goto out;
 	}
 
+	old_pte = ptep_get(pte);
+
 	if (unlikely(hwposioned || !PageUptodate(page))) {
 		swp_entry_t swp_entry;
 
@@ -1793,7 +1796,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		 * call and have the page locked.
 		 */
 		VM_BUG_ON_PAGE(PageWriteback(page), page);
-		if (pte_swp_exclusive(*pte))
+		if (pte_swp_exclusive(old_pte))
 			rmap_flags |= RMAP_EXCLUSIVE;
 
 		page_add_anon_rmap(page, vma, addr, rmap_flags);
@@ -1802,9 +1805,9 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		lru_cache_add_inactive_or_unevictable(page, vma);
 	}
 	new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot));
-	if (pte_swp_soft_dirty(*pte))
+	if (pte_swp_soft_dirty(old_pte))
 		new_pte = pte_mksoft_dirty(new_pte);
-	if (pte_swp_uffd_wp(*pte))
+	if (pte_swp_uffd_wp(old_pte))
 		new_pte = pte_mkuffd_wp(new_pte);
 setpte:
 	set_pte_at(vma->vm_mm, addr, pte, new_pte);
@@ -1833,6 +1836,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned char swp_count;
 		swp_entry_t entry;
 		int ret;
+		pte_t ptent;
 
 		if (!pte++) {
 			pte = pte_offset_map(pmd, addr);
@@ -1840,10 +1844,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				break;
 		}
 
-		if (!is_swap_pte(*pte))
+		ptent = ptep_get_lockless(pte);
+
+		if (!is_swap_pte(ptent))
 			continue;
 
-		entry = pte_to_swp_entry(*pte);
+		entry = pte_to_swp_entry(ptent);
 		if (swp_type(entry) != type)
 			continue;
 
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5fd787158c70..a2bf37ee276d 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -97,7 +97,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
 	 * registered, we firstly wr-protect a none pte which has no page cache
 	 * page backing it, then access the page.
 	 */
-	if (!pte_none_mostly(*dst_pte))
+	if (!pte_none_mostly(ptep_get(dst_pte)))
 		goto out_unlock;
 
 	folio = page_folio(page);
@@ -230,7 +230,7 @@ static int mfill_atomic_pte_zeropage(pmd_t *dst_pmd,
 			goto out_unlock;
 	}
 	ret = -EEXIST;
-	if (!pte_none(*dst_pte))
+	if (!pte_none(ptep_get(dst_pte)))
 		goto out_unlock;
 	set_pte_at(dst_vma->vm_mm, dst_addr, dst_pte, _dst_pte);
 	/* No need to invalidate - it was non-present before */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7382e0a60ce1..5a3bf408251b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -103,7 +103,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	if (!pte)
 		return -ENOMEM;
 	do {
-		BUG_ON(!pte_none(*pte));
+		BUG_ON(!pte_none(ptep_get(pte)));
 
 #ifdef CONFIG_HUGETLB_PAGE
 		size = arch_vmap_pte_range_map_size(addr, end, pfn, max_page_shift);
@@ -472,7 +472,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
 	do {
 		struct page *page = pages[*nr];
 
-		if (WARN_ON(!pte_none(*pte)))
+		if (WARN_ON(!pte_none(ptep_get(pte))))
 			return -EBUSY;
 		if (WARN_ON(!page))
 			return -ENOMEM;
@@ -704,7 +704,7 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
 		return NULL;
 
 	ptep = pte_offset_kernel(pmd, addr);
-	pte = *ptep;
+	pte = ptep_get(ptep);
 	if (pte_present(pte))
 		page = pte_page(pte);
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3f64c8d9f629..e305c11ec8fc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4037,15 +4037,16 @@ restart:
 	for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) {
 		unsigned long pfn;
 		struct folio *folio;
+		pte_t ptent = ptep_get(pte + i);
 
 		total++;
 		walk->mm_stats[MM_LEAF_TOTAL]++;
 
-		pfn = get_pte_pfn(pte[i], args->vma, addr);
+		pfn = get_pte_pfn(ptent, args->vma, addr);
 		if (pfn == -1)
 			continue;
 
-		if (!pte_young(pte[i])) {
+		if (!pte_young(ptent)) {
 			walk->mm_stats[MM_LEAF_OLD]++;
 			continue;
 		}
@@ -4060,7 +4061,7 @@ restart:
 		young++;
 		walk->mm_stats[MM_LEAF_YOUNG]++;
 
-		if (pte_dirty(pte[i]) && !folio_test_dirty(folio) &&
+		if (pte_dirty(ptent) && !folio_test_dirty(folio) &&
 		    !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
 		      !folio_test_swapcache(folio)))
 			folio_mark_dirty(folio);
@@ -4703,12 +4704,13 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 
 	for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
 		unsigned long pfn;
+		pte_t ptent = ptep_get(pte + i);
 
-		pfn = get_pte_pfn(pte[i], pvmw->vma, addr);
+		pfn = get_pte_pfn(ptent, pvmw->vma, addr);
 		if (pfn == -1)
 			continue;
 
-		if (!pte_young(pte[i]))
+		if (!pte_young(ptent))
 			continue;
 
 		folio = get_pfn_folio(pfn, memcg, pgdat, !walk || walk->can_swap);
@@ -4720,7 +4722,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
 
 		young++;
 
-		if (pte_dirty(pte[i]) && !folio_test_dirty(folio) &&
+		if (pte_dirty(ptent) && !folio_test_dirty(folio) &&
 		    !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
 		      !folio_test_swapcache(folio)))
 			folio_mark_dirty(folio);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 51e4882d0873..fb37adecfc91 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2578,6 +2578,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 {
 	kvm_pfn_t pfn;
 	pte_t *ptep;
+	pte_t pte;
 	spinlock_t *ptl;
 	int r;
 
@@ -2601,14 +2602,16 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 			return r;
 	}
 
-	if (write_fault && !pte_write(*ptep)) {
+	pte = ptep_get(ptep);
+
+	if (write_fault && !pte_write(pte)) {
 		pfn = KVM_PFN_ERR_RO_FAULT;
 		goto out;
 	}
 
 	if (writable)
-		*writable = pte_write(*ptep);
-	pfn = pte_pfn(*ptep);
+		*writable = pte_write(pte);
+	pfn = pte_pfn(pte);
 
 	/*
 	 * Get a reference here because callers of *hva_to_pfn* and
@@ -2626,7 +2629,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 	 * tail pages of non-compound higher order allocations, which
 	 * would then underflow the refcount when the caller does the
 	 * required put_page. Don't allow those pages here.
-	 */ 
+	 */
 	if (!kvm_try_get_pfn(pfn))
 		r = -EFAULT;
 
-- 
cgit v1.2.3


From 35499e2b79ffc51ea704c3268a5830164825a43e Mon Sep 17 00:00:00 2001
From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Date: Mon, 12 Jun 2023 11:38:13 +0200
Subject: mm: zswap: remove shrink from zpool interface

Now that all three zswap backends have removed their shrink code, it is
no longer necessary for the zpool interface to include shrink/writeback
endpoints.

Link: https://lkml.kernel.org/r/20230612093815.133504-6-cerasuolodomenico@gmail.com
Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
Reviewed-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Nhat Pham <nphamcs@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/zpool.h | 20 ++------------------
 mm/z3fold.c           |  4 +---
 mm/zbud.c             |  4 +---
 mm/zpool.c            | 48 ++----------------------------------------------
 mm/zsmalloc.c         |  4 +---
 mm/zswap.c            | 27 +++++++--------------------
 6 files changed, 14 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index e8997010612a..3296438eec06 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -14,10 +14,6 @@
 
 struct zpool;
 
-struct zpool_ops {
-	int (*evict)(struct zpool *pool, unsigned long handle);
-};
-
 /*
  * Control how a handle is mapped.  It will be ignored if the
  * implementation does not support it.  Its use is optional.
@@ -39,8 +35,7 @@ enum zpool_mapmode {
 
 bool zpool_has_pool(char *type);
 
-struct zpool *zpool_create_pool(const char *type, const char *name,
-			gfp_t gfp, const struct zpool_ops *ops);
+struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp);
 
 const char *zpool_get_type(struct zpool *pool);
 
@@ -53,9 +48,6 @@ int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
 
 void zpool_free(struct zpool *pool, unsigned long handle);
 
-int zpool_shrink(struct zpool *pool, unsigned int pages,
-			unsigned int *reclaimed);
-
 void *zpool_map_handle(struct zpool *pool, unsigned long handle,
 			enum zpool_mapmode mm);
 
@@ -72,7 +64,6 @@ u64 zpool_get_total_size(struct zpool *pool);
  * @destroy:	destroy a pool.
  * @malloc:	allocate mem from a pool.
  * @free:	free mem from a pool.
- * @shrink:	shrink the pool.
  * @sleep_mapped: whether zpool driver can sleep during map.
  * @map:	map a handle.
  * @unmap:	unmap a handle.
@@ -87,10 +78,7 @@ struct zpool_driver {
 	atomic_t refcount;
 	struct list_head list;
 
-	void *(*create)(const char *name,
-			gfp_t gfp,
-			const struct zpool_ops *ops,
-			struct zpool *zpool);
+	void *(*create)(const char *name, gfp_t gfp);
 	void (*destroy)(void *pool);
 
 	bool malloc_support_movable;
@@ -98,9 +86,6 @@ struct zpool_driver {
 				unsigned long *handle);
 	void (*free)(void *pool, unsigned long handle);
 
-	int (*shrink)(void *pool, unsigned int pages,
-				unsigned int *reclaimed);
-
 	bool sleep_mapped;
 	void *(*map)(void *pool, unsigned long handle,
 				enum zpool_mapmode mm);
@@ -113,7 +98,6 @@ void zpool_register_driver(struct zpool_driver *driver);
 
 int zpool_unregister_driver(struct zpool_driver *driver);
 
-bool zpool_evictable(struct zpool *pool);
 bool zpool_can_sleep_mapped(struct zpool *pool);
 
 #endif
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 238a214de59f..e84de91ecccb 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -1364,9 +1364,7 @@ static const struct movable_operations z3fold_mops = {
  * zpool
  ****************/
 
-static void *z3fold_zpool_create(const char *name, gfp_t gfp,
-			       const struct zpool_ops *zpool_ops,
-			       struct zpool *zpool)
+static void *z3fold_zpool_create(const char *name, gfp_t gfp)
 {
 	return z3fold_create_pool(name, gfp);
 }
diff --git a/mm/zbud.c b/mm/zbud.c
index 9d35fd4091ed..2190cc1f37b3 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -380,9 +380,7 @@ static u64 zbud_get_pool_size(struct zbud_pool *pool)
  * zpool
  ****************/
 
-static void *zbud_zpool_create(const char *name, gfp_t gfp,
-			       const struct zpool_ops *zpool_ops,
-			       struct zpool *zpool)
+static void *zbud_zpool_create(const char *name, gfp_t gfp)
 {
 	return zbud_create_pool(gfp);
 }
diff --git a/mm/zpool.c b/mm/zpool.c
index 6a19c4a58f77..846410479c2f 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -133,7 +133,6 @@ EXPORT_SYMBOL(zpool_has_pool);
  * @type:	The type of the zpool to create (e.g. zbud, zsmalloc)
  * @name:	The name of the zpool (e.g. zram0, zswap)
  * @gfp:	The GFP flags to use when allocating the pool.
- * @ops:	The optional ops callback.
  *
  * This creates a new zpool of the specified type.  The gfp flags will be
  * used when allocating memory, if the implementation supports it.  If the
@@ -145,8 +144,7 @@ EXPORT_SYMBOL(zpool_has_pool);
  *
  * Returns: New zpool on success, NULL on failure.
  */
-struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
-		const struct zpool_ops *ops)
+struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp)
 {
 	struct zpool_driver *driver;
 	struct zpool *zpool;
@@ -173,7 +171,7 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
 	}
 
 	zpool->driver = driver;
-	zpool->pool = driver->create(name, gfp, ops, zpool);
+	zpool->pool = driver->create(name, gfp);
 
 	if (!zpool->pool) {
 		pr_err("couldn't create %s pool\n", type);
@@ -279,30 +277,6 @@ void zpool_free(struct zpool *zpool, unsigned long handle)
 	zpool->driver->free(zpool->pool, handle);
 }
 
-/**
- * zpool_shrink() - Shrink the pool size
- * @zpool:	The zpool to shrink.
- * @pages:	The number of pages to shrink the pool.
- * @reclaimed:	The number of pages successfully evicted.
- *
- * This attempts to shrink the actual memory size of the pool
- * by evicting currently used handle(s).  If the pool was
- * created with no zpool_ops, or the evict call fails for any
- * of the handles, this will fail.  If non-NULL, the @reclaimed
- * parameter will be set to the number of pages reclaimed,
- * which may be more than the number of pages requested.
- *
- * Implementations must guarantee this to be thread-safe.
- *
- * Returns: 0 on success, negative value on error/failure.
- */
-int zpool_shrink(struct zpool *zpool, unsigned int pages,
-			unsigned int *reclaimed)
-{
-	return zpool->driver->shrink ?
-	       zpool->driver->shrink(zpool->pool, pages, reclaimed) : -EINVAL;
-}
-
 /**
  * zpool_map_handle() - Map a previously allocated handle into memory
  * @zpool:	The zpool that the handle was allocated from
@@ -359,24 +333,6 @@ u64 zpool_get_total_size(struct zpool *zpool)
 	return zpool->driver->total_size(zpool->pool);
 }
 
-/**
- * zpool_evictable() - Test if zpool is potentially evictable
- * @zpool:	The zpool to test
- *
- * Zpool is only potentially evictable when it's created with struct
- * zpool_ops.evict and its driver implements struct zpool_driver.shrink.
- *
- * However, it doesn't necessarily mean driver will use zpool_ops.evict
- * in its implementation of zpool_driver.shrink. It could do internal
- * defragmentation instead.
- *
- * Returns: true if potentially evictable; false otherwise.
- */
-bool zpool_evictable(struct zpool *zpool)
-{
-	return zpool->driver->shrink;
-}
-
 /**
  * zpool_can_sleep_mapped - Test if zpool can sleep when do mapped.
  * @zpool:	The zpool to test
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index e4d1ad521738..3f057970504e 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -351,9 +351,7 @@ static void record_obj(unsigned long handle, unsigned long obj)
 
 #ifdef CONFIG_ZPOOL
 
-static void *zs_zpool_create(const char *name, gfp_t gfp,
-			     const struct zpool_ops *zpool_ops,
-			     struct zpool *zpool)
+static void *zs_zpool_create(const char *name, gfp_t gfp)
 {
 	/*
 	 * Ignore global gfp flags: zs_malloc() may be invoked from
diff --git a/mm/zswap.c b/mm/zswap.c
index 0024ec5ed574..a4f8c20e161b 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -258,10 +258,6 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle);
 static int zswap_pool_get(struct zswap_pool *pool);
 static void zswap_pool_put(struct zswap_pool *pool);
 
-static const struct zpool_ops zswap_zpool_ops = {
-	.evict = zswap_writeback_entry
-};
-
 static bool zswap_is_full(void)
 {
 	return totalram_pages() * zswap_max_pool_percent / 100 <
@@ -379,12 +375,9 @@ static void zswap_free_entry(struct zswap_entry *entry)
 	if (!entry->length)
 		atomic_dec(&zswap_same_filled_pages);
 	else {
-		/* zpool_evictable will be removed once all 3 backends have migrated */
-		if (!zpool_evictable(entry->pool->zpool)) {
-			spin_lock(&entry->pool->lru_lock);
-			list_del(&entry->lru);
-			spin_unlock(&entry->pool->lru_lock);
-		}
+		spin_lock(&entry->pool->lru_lock);
+		list_del(&entry->lru);
+		spin_unlock(&entry->pool->lru_lock);
 		zpool_free(entry->pool->zpool, entry->handle);
 		zswap_pool_put(entry->pool);
 	}
@@ -665,12 +658,8 @@ static void shrink_worker(struct work_struct *w)
 						shrink_work);
 	int ret, failures = 0;
 
-	/* zpool_evictable will be removed once all 3 backends have migrated */
 	do {
-		if (zpool_evictable(pool->zpool))
-			ret = zpool_shrink(pool->zpool, 1, NULL);
-		else
-			ret = zswap_reclaim_entry(pool);
+		ret = zswap_reclaim_entry(pool);
 		if (ret) {
 			zswap_reject_reclaim_fail++;
 			if (ret != -EAGAIN)
@@ -708,7 +697,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 	/* unique name for each pool specifically required by zsmalloc */
 	snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
 
-	pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops);
+	pool->zpool = zpool_create_pool(type, name, gfp);
 	if (!pool->zpool) {
 		pr_err("%s zpool not available\n", type);
 		goto error;
@@ -1394,8 +1383,7 @@ insert_entry:
 			zswap_entry_put(tree, dupentry);
 		}
 	} while (ret == -EEXIST);
-	/* zpool_evictable will be removed once all 3 backends have migrated */
-	if (entry->length && !zpool_evictable(entry->pool->zpool)) {
+	if (entry->length) {
 		spin_lock(&entry->pool->lru_lock);
 		list_add(&entry->lru, &entry->pool->lru);
 		spin_unlock(&entry->pool->lru_lock);
@@ -1514,8 +1502,7 @@ freeentry:
 	if (!ret && zswap_exclusive_loads_enabled) {
 		zswap_invalidate_entry(tree, entry);
 		*exclusive = true;
-	} else if (entry->length && !zpool_evictable(entry->pool->zpool)) {
-		/* zpool_evictable will be removed once all 3 backends have migrated */
+	} else if (entry->length) {
 		spin_lock(&entry->pool->lru_lock);
 		list_move(&entry->lru, &entry->pool->lru);
 		spin_unlock(&entry->pool->lru_lock);
-- 
cgit v1.2.3


From 1e3be4856f49d55c60b6cd500297b06acfe216a9 Mon Sep 17 00:00:00 2001
From: Tarun Sahu <tsahu@linux.ibm.com>
Date: Mon, 12 Jun 2023 15:05:14 +0530
Subject: mm/folio: replace set_compound_order with folio_set_order

The patch ("mm/folio: Avoid special handling for order value 0 in
folio_set_order") [1] removed the need for special handling of order = 0
in folio_set_order.  Now, folio_set_order and set_compound_order becomes
similar function.  This patch removes the set_compound_order and uses
folio_set_order instead.

[1] https://lore.kernel.org/all/20230609183032.13E08C433D2@smtp.kernel.org/

Link: https://lkml.kernel.org/r/20230612093514.689846-1-tsahu@linux.ibm.com
Signed-off-by: Tarun Sahu <tsahu@linux.ibm.com>
Reviewed-by Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 10 ----------
 mm/internal.h      | 32 ++++++++++++++++----------------
 2 files changed, 16 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8f40bf17d597..ab04756b2240 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1232,16 +1232,6 @@ static inline void folio_set_compound_dtor(struct folio *folio,
 
 void destroy_large_folio(struct folio *folio);
 
-static inline void set_compound_order(struct page *page, unsigned int order)
-{
-	struct folio *folio = (struct folio *)page;
-
-	folio->_folio_order = order;
-#ifdef CONFIG_64BIT
-	folio->_folio_nr_pages = 1U << order;
-#endif
-}
-
 /* Returns the number of bytes in this potentially compound page. */
 static inline unsigned long page_size(struct page *page)
 {
diff --git a/mm/internal.h b/mm/internal.h
index 33b8b8f66af3..b0d8778dd910 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -387,12 +387,27 @@ extern void memblock_free_pages(struct page *page, unsigned long pfn,
 					unsigned int order);
 extern void __free_pages_core(struct page *page, unsigned int order);
 
+/*
+ * This will have no effect, other than possibly generating a warning, if the
+ * caller passes in a non-large folio.
+ */
+static inline void folio_set_order(struct folio *folio, unsigned int order)
+{
+	if (WARN_ON_ONCE(!order || !folio_test_large(folio)))
+		return;
+
+	folio->_folio_order = order;
+#ifdef CONFIG_64BIT
+	folio->_folio_nr_pages = 1U << order;
+#endif
+}
+
 static inline void prep_compound_head(struct page *page, unsigned int order)
 {
 	struct folio *folio = (struct folio *)page;
 
 	folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
-	set_compound_order(page, order);
+	folio_set_order(folio, order);
 	atomic_set(&folio->_entire_mapcount, -1);
 	atomic_set(&folio->_nr_pages_mapped, 0);
 	atomic_set(&folio->_pincount, 0);
@@ -432,21 +447,6 @@ void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
 int split_free_page(struct page *free_page,
 			unsigned int order, unsigned long split_pfn_offset);
 
-/*
- * This will have no effect, other than possibly generating a warning, if the
- * caller passes in a non-large folio.
- */
-static inline void folio_set_order(struct folio *folio, unsigned int order)
-{
-	if (WARN_ON_ONCE(!order || !folio_test_large(folio)))
-		return;
-
-	folio->_folio_order = order;
-#ifdef CONFIG_64BIT
-	folio->_folio_nr_pages = 1U << order;
-#endif
-}
-
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 /*
-- 
cgit v1.2.3


From 65ac132027a884c411b8f9f96d240ba2dde34dec Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Wed, 31 May 2023 21:54:02 -0400
Subject: userfaultfd: fix regression in userfaultfd_unmap_prep()

Android reported a performance regression in the userfaultfd unmap path.
A closer inspection on the userfaultfd_unmap_prep() change showed that a
second tree walk would be necessary in the reworked code.

Fix the regression by passing each VMA that will be unmapped through to
the userfaultfd_unmap_prep() function as they are added to the unmap list,
instead of re-walking the tree for the VMA.

Link: https://lkml.kernel.org/r/20230601015402.2819343-1-Liam.Howlett@oracle.com
Fixes: 69dbe6daf104 ("userfaultfd: use maple tree iterator to iterate VMAs")
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reported-by: Suren Baghdasaryan <surenb@google.com>
Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/userfaultfd.c              | 35 +++++++++++++++--------------------
 include/linux/userfaultfd_k.h |  6 +++---
 mm/mmap.c                     | 31 +++++++++++++++----------------
 3 files changed, 33 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 478e2b169c13..0aa5caac5164 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -852,31 +852,26 @@ static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
 	return false;
 }
 
-int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
+int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start,
 			   unsigned long end, struct list_head *unmaps)
 {
-	VMA_ITERATOR(vmi, mm, start);
-	struct vm_area_struct *vma;
-
-	for_each_vma_range(vmi, vma, end) {
-		struct userfaultfd_unmap_ctx *unmap_ctx;
-		struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+	struct userfaultfd_unmap_ctx *unmap_ctx;
+	struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
 
-		if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
-		    has_unmap_ctx(ctx, unmaps, start, end))
-			continue;
+	if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
+	    has_unmap_ctx(ctx, unmaps, start, end))
+		return 0;
 
-		unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
-		if (!unmap_ctx)
-			return -ENOMEM;
+	unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
+	if (!unmap_ctx)
+		return -ENOMEM;
 
-		userfaultfd_ctx_get(ctx);
-		atomic_inc(&ctx->mmap_changing);
-		unmap_ctx->ctx = ctx;
-		unmap_ctx->start = start;
-		unmap_ctx->end = end;
-		list_add_tail(&unmap_ctx->list, unmaps);
-	}
+	userfaultfd_ctx_get(ctx);
+	atomic_inc(&ctx->mmap_changing);
+	unmap_ctx->ctx = ctx;
+	unmap_ctx->start = start;
+	unmap_ctx->end = end;
+	list_add_tail(&unmap_ctx->list, unmaps);
 
 	return 0;
 }
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index d78b01524349..ac7b0c96d351 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -188,8 +188,8 @@ extern bool userfaultfd_remove(struct vm_area_struct *vma,
 			       unsigned long start,
 			       unsigned long end);
 
-extern int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
-				  unsigned long end, struct list_head *uf);
+extern int userfaultfd_unmap_prep(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end, struct list_head *uf);
 extern void userfaultfd_unmap_complete(struct mm_struct *mm,
 				       struct list_head *uf);
 extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma);
@@ -271,7 +271,7 @@ static inline bool userfaultfd_remove(struct vm_area_struct *vma,
 	return true;
 }
 
-static inline int userfaultfd_unmap_prep(struct mm_struct *mm,
+static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
 					 unsigned long start, unsigned long end,
 					 struct list_head *uf)
 {
diff --git a/mm/mmap.c b/mm/mmap.c
index f084b7940431..4fc496bc5b95 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2417,6 +2417,21 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
 			goto munmap_sidetree_failed;
 
 		count++;
+		if (unlikely(uf)) {
+			/*
+			 * If userfaultfd_unmap_prep returns an error the vmas
+			 * will remain split, but userland will get a
+			 * highly unexpected error anyway. This is no
+			 * different than the case where the first of the two
+			 * __split_vma fails, but we don't undo the first
+			 * split, despite we could. This is unlikely enough
+			 * failure that it's not worth optimizing it for.
+			 */
+			error = userfaultfd_unmap_prep(next, start, end, uf);
+
+			if (error)
+				goto userfaultfd_error;
+		}
 #ifdef CONFIG_DEBUG_VM_MAPLE_TREE
 		BUG_ON(next->vm_start < start);
 		BUG_ON(next->vm_start > end);
@@ -2429,22 +2444,6 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	if (!next)
 		next = vma_next(vmi);
 
-	if (unlikely(uf)) {
-		/*
-		 * If userfaultfd_unmap_prep returns an error the vmas
-		 * will remain split, but userland will get a
-		 * highly unexpected error anyway. This is no
-		 * different than the case where the first of the two
-		 * __split_vma fails, but we don't undo the first
-		 * split, despite we could. This is unlikely enough
-		 * failure that it's not worth optimizing it for.
-		 */
-		error = userfaultfd_unmap_prep(mm, start, end, uf);
-
-		if (error)
-			goto userfaultfd_error;
-	}
-
 #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
 	/* Make sure no VMAs are about to be lost. */
 	{
-- 
cgit v1.2.3


From e4d86756159b5794edad5b0d0d19c6f3d9888240 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 10 Jun 2023 18:19:56 +0800
Subject: mm: remove unused vma_init_lock()

commit c7f8f31c00d1 ("mm: separate vma->lock from vm_area_struct")
left this behind.

Link: https://lkml.kernel.org/r/20230610101956.20592-1-yuehaibing@huawei.com
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ab04756b2240..f20ac57b634d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -725,7 +725,6 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
 
 #else /* CONFIG_PER_VMA_LOCK */
 
-static inline void vma_init_lock(struct vm_area_struct *vma) {}
 static inline bool vma_start_read(struct vm_area_struct *vma)
 		{ return false; }
 static inline void vma_end_read(struct vm_area_struct *vma) {}
-- 
cgit v1.2.3


From 833dfc0090b3f8017ddac82d818b2d8e5ceb61db Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 10 Jun 2023 11:46:15 +0800
Subject: mm: compaction: mark kcompactd_run() and kcompactd_stop() __meminit

Add __meminit to kcompactd_run() and kcompactd_stop() to ensure they're
default to __init when memory hotplug is not enabled.

Link: https://lkml.kernel.org/r/20230610034615.997813-1-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compaction.h | 4 ++--
 mm/compaction.c            | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 57b16e69c19a..e94776496049 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -98,8 +98,8 @@ extern void compaction_defer_reset(struct zone *zone, int order,
 bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 					int alloc_flags);
 
-extern void kcompactd_run(int nid);
-extern void kcompactd_stop(int nid);
+extern void __meminit kcompactd_run(int nid);
+extern void __meminit kcompactd_stop(int nid);
 extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx);
 
 #else
diff --git a/mm/compaction.c b/mm/compaction.c
index 767b0815c874..6149a2d324be 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -3050,7 +3050,7 @@ static int kcompactd(void *p)
  * This kcompactd start function will be called by init and node-hot-add.
  * On node-hot-add, kcompactd will moved to proper cpus if cpus are hot-added.
  */
-void kcompactd_run(int nid)
+void __meminit kcompactd_run(int nid)
 {
 	pg_data_t *pgdat = NODE_DATA(nid);
 
@@ -3068,7 +3068,7 @@ void kcompactd_run(int nid)
  * Called by memory hotplug when all memory in a node is offlined. Caller must
  * be holding mem_hotplug_begin/done().
  */
-void kcompactd_stop(int nid)
+void __meminit kcompactd_stop(int nid)
 {
 	struct task_struct *kcompactd = NODE_DATA(nid)->kcompactd;
 
-- 
cgit v1.2.3


From 53418a18fcbbb086dbfacbdd9b853c1071d3ec16 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 12 Jun 2023 22:01:31 +0100
Subject: buffer: convert __block_write_full_page() to
 __block_write_full_folio()

Remove nine hidden calls to compound_head() by using a folio instead of a
page.

Link: https://lkml.kernel.org/r/20230612210141.730128-5-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Tested-by: Bob Peterson <rpeterso@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/buffer.c                 | 53 +++++++++++++++++++++++----------------------
 fs/gfs2/aops.c              |  5 ++---
 fs/ntfs/aops.c              |  2 +-
 fs/reiserfs/inode.c         |  2 +-
 include/linux/buffer_head.h |  2 +-
 5 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index a7fc561758b1..4d518df50fab 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1764,7 +1764,7 @@ static struct buffer_head *folio_create_buffers(struct folio *folio,
  * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
  * causes the writes to be flagged as synchronous writes.
  */
-int __block_write_full_page(struct inode *inode, struct page *page,
+int __block_write_full_folio(struct inode *inode, struct folio *folio,
 			get_block_t *get_block, struct writeback_control *wbc,
 			bh_end_io_t *handler)
 {
@@ -1776,14 +1776,14 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 	int nr_underway = 0;
 	blk_opf_t write_flags = wbc_to_write_flags(wbc);
 
-	head = folio_create_buffers(page_folio(page), inode,
+	head = folio_create_buffers(folio, inode,
 				    (1 << BH_Dirty) | (1 << BH_Uptodate));
 
 	/*
 	 * Be very careful.  We have no exclusion from block_dirty_folio
 	 * here, and the (potentially unmapped) buffers may become dirty at
 	 * any time.  If a buffer becomes dirty here after we've inspected it
-	 * then we just miss that fact, and the page stays dirty.
+	 * then we just miss that fact, and the folio stays dirty.
 	 *
 	 * Buffers outside i_size may be dirtied by block_dirty_folio;
 	 * handle that here by just cleaning them.
@@ -1793,7 +1793,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 	blocksize = bh->b_size;
 	bbits = block_size_bits(blocksize);
 
-	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
+	block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
 	last_block = (i_size_read(inode) - 1) >> bbits;
 
 	/*
@@ -1804,7 +1804,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 		if (block > last_block) {
 			/*
 			 * mapped buffers outside i_size will occur, because
-			 * this page can be outside i_size when there is a
+			 * this folio can be outside i_size when there is a
 			 * truncate in progress.
 			 */
 			/*
@@ -1834,7 +1834,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 			continue;
 		/*
 		 * If it's a fully non-blocking write attempt and we cannot
-		 * lock the buffer then redirty the page.  Note that this can
+		 * lock the buffer then redirty the folio.  Note that this can
 		 * potentially cause a busy-wait loop from writeback threads
 		 * and kswapd activity, but those code paths have their own
 		 * higher-level throttling.
@@ -1842,7 +1842,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 		if (wbc->sync_mode != WB_SYNC_NONE) {
 			lock_buffer(bh);
 		} else if (!trylock_buffer(bh)) {
-			redirty_page_for_writepage(wbc, page);
+			folio_redirty_for_writepage(wbc, folio);
 			continue;
 		}
 		if (test_clear_buffer_dirty(bh)) {
@@ -1853,11 +1853,11 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 	} while ((bh = bh->b_this_page) != head);
 
 	/*
-	 * The page and its buffers are protected by PageWriteback(), so we can
-	 * drop the bh refcounts early.
+	 * The folio and its buffers are protected by the writeback flag,
+	 * so we can drop the bh refcounts early.
 	 */
-	BUG_ON(PageWriteback(page));
-	set_page_writeback(page);
+	BUG_ON(folio_test_writeback(folio));
+	folio_start_writeback(folio);
 
 	do {
 		struct buffer_head *next = bh->b_this_page;
@@ -1867,20 +1867,20 @@ int __block_write_full_page(struct inode *inode, struct page *page,
 		}
 		bh = next;
 	} while (bh != head);
-	unlock_page(page);
+	folio_unlock(folio);
 
 	err = 0;
 done:
 	if (nr_underway == 0) {
 		/*
-		 * The page was marked dirty, but the buffers were
+		 * The folio was marked dirty, but the buffers were
 		 * clean.  Someone wrote them back by hand with
 		 * write_dirty_buffer/submit_bh.  A rare case.
 		 */
-		end_page_writeback(page);
+		folio_end_writeback(folio);
 
 		/*
-		 * The page and buffer_heads can be released at any time from
+		 * The folio and buffer_heads can be released at any time from
 		 * here on.
 		 */
 	}
@@ -1891,7 +1891,7 @@ recover:
 	 * ENOSPC, or some other error.  We may already have added some
 	 * blocks to the file, so we need to write these out to avoid
 	 * exposing stale data.
-	 * The page is currently locked and not marked for writeback
+	 * The folio is currently locked and not marked for writeback
 	 */
 	bh = head;
 	/* Recovery: lock and submit the mapped buffers */
@@ -1903,15 +1903,15 @@ recover:
 		} else {
 			/*
 			 * The buffer may have been set dirty during
-			 * attachment to a dirty page.
+			 * attachment to a dirty folio.
 			 */
 			clear_buffer_dirty(bh);
 		}
 	} while ((bh = bh->b_this_page) != head);
-	SetPageError(page);
-	BUG_ON(PageWriteback(page));
-	mapping_set_error(page->mapping, err);
-	set_page_writeback(page);
+	folio_set_error(folio);
+	BUG_ON(folio_test_writeback(folio));
+	mapping_set_error(folio->mapping, err);
+	folio_start_writeback(folio);
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
@@ -1921,10 +1921,10 @@ recover:
 		}
 		bh = next;
 	} while (bh != head);
-	unlock_page(page);
+	folio_unlock(folio);
 	goto done;
 }
-EXPORT_SYMBOL(__block_write_full_page);
+EXPORT_SYMBOL(__block_write_full_folio);
 
 /*
  * If a page has any new buffers, zero them out here, and mark them uptodate
@@ -2677,6 +2677,7 @@ EXPORT_SYMBOL(block_truncate_page);
 int block_write_full_page(struct page *page, get_block_t *get_block,
 			struct writeback_control *wbc)
 {
+	struct folio *folio = page_folio(page);
 	struct inode * const inode = page->mapping->host;
 	loff_t i_size = i_size_read(inode);
 	const pgoff_t end_index = i_size >> PAGE_SHIFT;
@@ -2684,13 +2685,13 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
 
 	/* Is the page fully inside i_size? */
 	if (page->index < end_index)
-		return __block_write_full_page(inode, page, get_block, wbc,
+		return __block_write_full_folio(inode, folio, get_block, wbc,
 					       end_buffer_async_write);
 
 	/* Is the page fully outside i_size? (truncate in progress) */
 	offset = i_size & (PAGE_SIZE-1);
 	if (page->index >= end_index+1 || !offset) {
-		unlock_page(page);
+		folio_unlock(folio);
 		return 0; /* don't care */
 	}
 
@@ -2702,7 +2703,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
 	 * writes to that region are not written out to the file."
 	 */
 	zero_user_segment(page, offset, PAGE_SIZE);
-	return __block_write_full_page(inode, page, get_block, wbc,
+	return __block_write_full_folio(inode, folio, get_block, wbc,
 							end_buffer_async_write);
 }
 EXPORT_SYMBOL(block_write_full_page);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ec5b5c1ea634..3a2be1901e1e 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -107,9 +107,8 @@ static int gfs2_write_jdata_folio(struct folio *folio,
 		folio_zero_segment(folio, offset_in_folio(folio, i_size),
 				folio_size(folio));
 
-	return __block_write_full_page(inode, &folio->page,
-				       gfs2_get_block_noalloc, wbc,
-				       end_buffer_async_write);
+	return __block_write_full_folio(inode, folio, gfs2_get_block_noalloc,
+			wbc, end_buffer_async_write);
 }
 
 /**
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index e8aeba124a95..4e158bce4192 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -526,7 +526,7 @@ err_out:
  *
  * Return 0 on success and -errno on error.
  *
- * Based on ntfs_read_block() and __block_write_full_page().
+ * Based on ntfs_read_block() and __block_write_full_folio().
  */
 static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
 {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index d8debbb6105f..ff34ee49106f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2506,7 +2506,7 @@ out:
 
 /*
  * mason@suse.com: updated in 2.5.54 to follow the same general io
- * start/recovery path as __block_write_full_page, along with special
+ * start/recovery path as __block_write_full_folio, along with special
  * code to handle reiserfs tails.
  */
 static int reiserfs_write_full_page(struct page *page,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 1520793c72da..a366e01f8bd4 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -263,7 +263,7 @@ extern int buffer_heads_over_limit;
 void block_invalidate_folio(struct folio *folio, size_t offset, size_t length);
 int block_write_full_page(struct page *page, get_block_t *get_block,
 				struct writeback_control *wbc);
-int __block_write_full_page(struct inode *inode, struct page *page,
+int __block_write_full_folio(struct inode *inode, struct folio *folio,
 			get_block_t *get_block, struct writeback_control *wbc,
 			bh_end_io_t *handler);
 int block_read_full_folio(struct folio *, get_block_t *);
-- 
cgit v1.2.3


From 4a9622f2fdaee84c373f3f285d898a3ea60ee9f2 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 12 Jun 2023 22:01:36 +0100
Subject: buffer: convert page_zero_new_buffers() to folio_zero_new_buffers()

Most of the callers already have a folio; convert reiserfs_write_end() to
have a folio.  Removes a couple of hidden calls to compound_head().

Link: https://lkml.kernel.org/r/20230612210141.730128-10-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Bob Peterson <rpeterso@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/buffer.c                 | 27 ++++++++++++++-------------
 fs/ext4/inode.c             |  4 ++--
 fs/reiserfs/inode.c         |  7 ++++---
 include/linux/buffer_head.h |  2 +-
 4 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 97c64b05151f..e4bd465ecee8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1927,33 +1927,34 @@ recover:
 EXPORT_SYMBOL(__block_write_full_folio);
 
 /*
- * If a page has any new buffers, zero them out here, and mark them uptodate
+ * If a folio has any new buffers, zero them out here, and mark them uptodate
  * and dirty so they'll be written out (in order to prevent uninitialised
  * block data from leaking). And clear the new bit.
  */
-void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
+void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to)
 {
-	unsigned int block_start, block_end;
+	size_t block_start, block_end;
 	struct buffer_head *head, *bh;
 
-	BUG_ON(!PageLocked(page));
-	if (!page_has_buffers(page))
+	BUG_ON(!folio_test_locked(folio));
+	head = folio_buffers(folio);
+	if (!head)
 		return;
 
-	bh = head = page_buffers(page);
+	bh = head;
 	block_start = 0;
 	do {
 		block_end = block_start + bh->b_size;
 
 		if (buffer_new(bh)) {
 			if (block_end > from && block_start < to) {
-				if (!PageUptodate(page)) {
-					unsigned start, size;
+				if (!folio_test_uptodate(folio)) {
+					size_t start, xend;
 
 					start = max(from, block_start);
-					size = min(to, block_end) - start;
+					xend = min(to, block_end);
 
-					zero_user(page, start, size);
+					folio_zero_segment(folio, start, xend);
 					set_buffer_uptodate(bh);
 				}
 
@@ -1966,7 +1967,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
 		bh = bh->b_this_page;
 	} while (bh != head);
 }
-EXPORT_SYMBOL(page_zero_new_buffers);
+EXPORT_SYMBOL(folio_zero_new_buffers);
 
 static void
 iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
@@ -2104,7 +2105,7 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
 			err = -EIO;
 	}
 	if (unlikely(err))
-		page_zero_new_buffers(&folio->page, from, to);
+		folio_zero_new_buffers(folio, from, to);
 	return err;
 }
 
@@ -2208,7 +2209,7 @@ int block_write_end(struct file *file, struct address_space *mapping,
 		if (!folio_test_uptodate(folio))
 			copied = 0;
 
-		page_zero_new_buffers(&folio->page, start+copied, start+len);
+		folio_zero_new_buffers(folio, start+copied, start+len);
 	}
 	flush_dcache_folio(folio);
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ce5f21b6c2b3..31b839a0ce8b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1093,7 +1093,7 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
 			err = -EIO;
 	}
 	if (unlikely(err)) {
-		page_zero_new_buffers(&folio->page, from, to);
+		folio_zero_new_buffers(folio, from, to);
 	} else if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
 		for (i = 0; i < nr_wait; i++) {
 			int err2;
@@ -1339,7 +1339,7 @@ static int ext4_write_end(struct file *file,
 }
 
 /*
- * This is a private version of page_zero_new_buffers() which doesn't
+ * This is a private version of folio_zero_new_buffers() which doesn't
  * set the buffer to be dirty, since in data=journalled mode we need
  * to call ext4_dirty_journalled_data() instead.
  */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ff34ee49106f..77bd3b27059f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2872,6 +2872,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
 			      loff_t pos, unsigned len, unsigned copied,
 			      struct page *page, void *fsdata)
 {
+	struct folio *folio = page_folio(page);
 	struct inode *inode = page->mapping->host;
 	int ret = 0;
 	int update_sd = 0;
@@ -2887,12 +2888,12 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
 
 	start = pos & (PAGE_SIZE - 1);
 	if (unlikely(copied < len)) {
-		if (!PageUptodate(page))
+		if (!folio_test_uptodate(folio))
 			copied = 0;
 
-		page_zero_new_buffers(page, start + copied, start + len);
+		folio_zero_new_buffers(folio, start + copied, start + len);
 	}
-	flush_dcache_page(page);
+	flush_dcache_folio(folio);
 
 	reiserfs_commit_page(inode, page, start, start + copied);
 
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index a366e01f8bd4..c794ea7096ba 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -278,7 +278,7 @@ int block_write_end(struct file *, struct address_space *,
 int generic_write_end(struct file *, struct address_space *,
 				loff_t, unsigned, unsigned,
 				struct page *, void *);
-void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
+void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to);
 void clean_page_buffers(struct page *page);
 int cont_write_begin(struct file *, struct address_space *, loff_t,
 			unsigned, struct page **, void **,
-- 
cgit v1.2.3


From 6c77b607ee26472fb945aa41734281c39d06d68f Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 14 Jun 2023 22:36:12 +0800
Subject: mm: kill lock|unlock_page_memcg()

Since commit c7c3dec1c9db ("mm: rmap: remove lock_page_memcg()"),
no more user, kill lock_page_memcg() and unlock_page_memcg().

Link: https://lkml.kernel.org/r/20230614143612.62575-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/cgroup-v1/memory.rst |  2 +-
 include/linux/memcontrol.h                     | 12 +-----------
 mm/filemap.c                                   |  2 +-
 mm/memcontrol.c                                | 18 ++++--------------
 mm/page-writeback.c                            |  6 +++---
 5 files changed, 10 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index 47d1d7d932a8..fabaad3fd9c2 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -297,7 +297,7 @@ Lock order is as follows::
 
   Page lock (PG_locked bit of page->flags)
     mm->page_table_lock or split pte_lock
-      lock_page_memcg (memcg->move_lock)
+      folio_memcg_lock (memcg->move_lock)
         mapping->i_pages lock
           lruvec->lru_lock.
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 00a88cf947e1..c3d3a0c09315 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -419,7 +419,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
  *
  * - the folio lock
  * - LRU isolation
- * - lock_page_memcg()
+ * - folio_memcg_lock()
  * - exclusive reference
  * - mem_cgroup_trylock_pages()
  *
@@ -949,8 +949,6 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
 
 void folio_memcg_lock(struct folio *folio);
 void folio_memcg_unlock(struct folio *folio);
-void lock_page_memcg(struct page *page);
-void unlock_page_memcg(struct page *page);
 
 void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
 
@@ -1438,14 +1436,6 @@ mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg)
 {
 }
 
-static inline void lock_page_memcg(struct page *page)
-{
-}
-
-static inline void unlock_page_memcg(struct page *page)
-{
-}
-
 static inline void folio_memcg_lock(struct folio *folio)
 {
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index 00933089b8b6..758bbdf300e7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -117,7 +117,7 @@
  *    ->i_pages lock		(page_remove_rmap->set_page_dirty)
  *    bdi.wb->list_lock		(page_remove_rmap->set_page_dirty)
  *    ->inode->i_lock		(page_remove_rmap->set_page_dirty)
- *    ->memcg->move_lock	(page_remove_rmap->lock_page_memcg)
+ *    ->memcg->move_lock	(page_remove_rmap->folio_memcg_lock)
  *    bdi.wb->list_lock		(zap_pte_range->set_page_dirty)
  *    ->inode->i_lock		(zap_pte_range->set_page_dirty)
  *    ->private_lock		(zap_pte_range->block_dirty_folio)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 93056918e956..cf06b1c9b3bb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2148,17 +2148,12 @@ again:
 	 * When charge migration first begins, we can have multiple
 	 * critical sections holding the fast-path RCU lock and one
 	 * holding the slowpath move_lock. Track the task who has the
-	 * move_lock for unlock_page_memcg().
+	 * move_lock for folio_memcg_unlock().
 	 */
 	memcg->move_lock_task = current;
 	memcg->move_lock_flags = flags;
 }
 
-void lock_page_memcg(struct page *page)
-{
-	folio_memcg_lock(page_folio(page));
-}
-
 static void __folio_memcg_unlock(struct mem_cgroup *memcg)
 {
 	if (memcg && memcg->move_lock_task == current) {
@@ -2186,11 +2181,6 @@ void folio_memcg_unlock(struct folio *folio)
 	__folio_memcg_unlock(folio_memcg(folio));
 }
 
-void unlock_page_memcg(struct page *page)
-{
-	folio_memcg_unlock(page_folio(page));
-}
-
 struct memcg_stock_pcp {
 	local_lock_t stock_lock;
 	struct mem_cgroup *cached; /* this never be root cgroup */
@@ -2866,7 +2856,7 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
 	 *
 	 * - the page lock
 	 * - LRU isolation
-	 * - lock_page_memcg()
+	 * - folio_memcg_lock()
 	 * - exclusive reference
 	 * - mem_cgroup_trylock_pages()
 	 */
@@ -5829,7 +5819,7 @@ static int mem_cgroup_move_account(struct page *page,
 	 * with (un)charging, migration, LRU putback, or anything else
 	 * that would rely on a stable page's memory cgroup.
 	 *
-	 * Note that lock_page_memcg is a memcg lock, not a page lock,
+	 * Note that folio_memcg_lock is a memcg lock, not a page lock,
 	 * to save space. As soon as we switch page's memory cgroup to a
 	 * new memcg that isn't locked, the above state can change
 	 * concurrently again. Make sure we're truly done with it.
@@ -6320,7 +6310,7 @@ static void mem_cgroup_move_charge(void)
 {
 	lru_add_drain_all();
 	/*
-	 * Signal lock_page_memcg() to take the memcg's move_lock
+	 * Signal folio_memcg_lock() to take the memcg's move_lock
 	 * while we're moving its pages to another memcg. Then wait
 	 * for already started RCU-only updates to finish.
 	 */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index db7943999007..1d17fb1ec863 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2597,7 +2597,7 @@ EXPORT_SYMBOL(noop_dirty_folio);
 /*
  * Helper function for set_page_dirty family.
  *
- * Caller must hold lock_page_memcg().
+ * Caller must hold folio_memcg_lock().
  *
  * NOTE: This relies on being atomic wrt interrupts.
  */
@@ -2631,7 +2631,7 @@ static void folio_account_dirtied(struct folio *folio,
 /*
  * Helper function for deaccounting dirty page without writeback.
  *
- * Caller must hold lock_page_memcg().
+ * Caller must hold folio_memcg_lock().
  */
 void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
 {
@@ -2650,7 +2650,7 @@ void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb)
  * If warn is true, then emit a warning if the folio is not uptodate and has
  * not been truncated.
  *
- * The caller must hold lock_page_memcg().  Most callers have the folio
+ * The caller must hold folio_memcg_lock().  Most callers have the folio
  * locked.  A few have the folio blocked from truncation through other
  * means (eg zap_vma_pages() has it mapped and is holding the page table
  * lock).  This can also be called from mark_buffer_dirty(), which I
-- 
cgit v1.2.3


From 708ff4914dfb410761227a219c17c3e9dbd68c05 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Tue, 13 Jun 2023 19:13:08 -0700
Subject: mmzone: introduce folio_is_zone_movable()

Patch series "Replace is_longterm_pinnable_page()", v2.

This patchset introduces some more helper functions for the folio
conversions, and converts all callers of is_longterm_pinnable_page() to
use folios.


This patch (of 5):

Introduce folio_is_zone_movable() to act as a folio equivalent for
is_zone_movable_page().  This is to assist in later folio conversions.

Link: https://lkml.kernel.org/r/20230614021312.34085-1-vishal.moola@gmail.com
Link: https://lkml.kernel.org/r/20230614021312.34085-2-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5a7ada0413da..f10902491ead 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1116,6 +1116,11 @@ static inline bool is_zone_movable_page(const struct page *page)
 {
 	return page_zonenum(page) == ZONE_MOVABLE;
 }
+
+static inline bool folio_is_zone_movable(const struct folio *folio)
+{
+	return folio_zonenum(folio) == ZONE_MOVABLE;
+}
 #endif
 
 /*
-- 
cgit v1.2.3


From 28fb54f6a2fd6cc471165cce1650a57dfbf49746 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Tue, 13 Jun 2023 19:13:09 -0700
Subject: mmzone: introduce folio_migratetype()

Introduce folio_migratetype() as a folio equivalent for
get_pageblock_migratetype().  This function intends to return the
migratetype the folio is located in, hence the name choice.

Link: https://lkml.kernel.org/r/20230614021312.34085-3-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f10902491ead..3e822335f214 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -105,6 +105,9 @@ extern int page_group_by_mobility_disabled;
 #define get_pageblock_migratetype(page)					\
 	get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
 
+#define folio_migratetype(folio)				\
+	get_pfnblock_flags_mask(&folio->page, folio_pfn(folio),		\
+			MIGRATETYPE_MASK)
 struct free_area {
 	struct list_head	free_list[MIGRATE_TYPES];
 	unsigned long		nr_free;
-- 
cgit v1.2.3


From 5d949953f841fd661a2a49df188426d5930ed723 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Tue, 13 Jun 2023 19:13:12 -0700
Subject: mm: remove is_longterm_pinnable_page() and reimplement
 folio_is_longterm_pinnable()

folio_is_longterm_pinnable() already exists as a wrapper function.  Now
that the whole implementation of is_longterm_pinnable_page() can be
implemented using folios, folio_is_longterm_pinnable() can be made its own
standalone function - and we can remove is_longterm_pinnable_page().

Link: https://lkml.kernel.org/r/20230614021312.34085-6-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Lorenzo Stoakes <lstoakes@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f20ac57b634d..a8baa34d0747 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1902,39 +1902,35 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
 	return page_maybe_dma_pinned(page);
 }
 
-/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
+/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin folios */
 #ifdef CONFIG_MIGRATION
-static inline bool is_longterm_pinnable_page(struct page *page)
+static inline bool folio_is_longterm_pinnable(struct folio *folio)
 {
 #ifdef CONFIG_CMA
-	int mt = get_pageblock_migratetype(page);
+	int mt = folio_migratetype(folio);
 
 	if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
 		return false;
 #endif
 	/* The zero page may always be pinned */
-	if (is_zero_pfn(page_to_pfn(page)))
+	if (is_zero_pfn(folio_pfn(folio)))
 		return true;
 
 	/* Coherent device memory must always allow eviction. */
-	if (is_device_coherent_page(page))
+	if (folio_is_device_coherent(folio))
 		return false;
 
-	/* Otherwise, non-movable zone pages can be pinned. */
-	return !is_zone_movable_page(page);
+	/* Otherwise, non-movable zone folios can be pinned. */
+	return !folio_is_zone_movable(folio);
+
 }
 #else
-static inline bool is_longterm_pinnable_page(struct page *page)
+static inline bool folio_is_longterm_pinnable(struct folio *folio)
 {
 	return true;
 }
 #endif
 
-static inline bool folio_is_longterm_pinnable(struct folio *folio)
-{
-	return is_longterm_pinnable_page(&folio->page);
-}
-
 static inline void set_page_zone(struct page *page, enum zone_type zone)
 {
 	page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
-- 
cgit v1.2.3


From 025b7799b35d32e46988ba0614ea2f91b85d6375 Mon Sep 17 00:00:00 2001
From: ZhangPeng <zhangpeng362@huawei.com>
Date: Fri, 16 Jun 2023 14:30:30 +0800
Subject: mm/memcg: remove return value of mem_cgroup_scan_tasks()

No user checks the return value of mem_cgroup_scan_tasks(). Make the
return value void.

Link: https://lkml.kernel.org/r/20230616063030.977586-1-zhangpeng362@huawei.com
Signed-off-by: ZhangPeng <zhangpeng362@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nanyong Sun <sunnanyong@huawei.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 7 +++----
 mm/memcontrol.c            | 9 ++++-----
 2 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c3d3a0c09315..5818af8eca5a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -820,8 +820,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
 				   struct mem_cgroup *,
 				   struct mem_cgroup_reclaim_cookie *);
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
-int mem_cgroup_scan_tasks(struct mem_cgroup *,
-			  int (*)(struct task_struct *, void *), void *);
+void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
+			   int (*)(struct task_struct *, void *), void *arg);
 
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
@@ -1364,10 +1364,9 @@ static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
 {
 }
 
-static inline int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
+static inline void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 		int (*fn)(struct task_struct *, void *), void *arg)
 {
-	return 0;
 }
 
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cf06b1c9b3bb..a834b1edcde9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1259,13 +1259,13 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
  *
  * This function iterates over tasks attached to @memcg or to any of its
  * descendants and calls @fn for each task. If @fn returns a non-zero
- * value, the function breaks the iteration loop and returns the value.
- * Otherwise, it will iterate over all tasks and return 0.
+ * value, the function breaks the iteration loop. Otherwise, it will iterate
+ * over all tasks and return 0.
  *
  * This function must not be called for the root memory cgroup.
  */
-int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
-			  int (*fn)(struct task_struct *, void *), void *arg)
+void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
+			   int (*fn)(struct task_struct *, void *), void *arg)
 {
 	struct mem_cgroup *iter;
 	int ret = 0;
@@ -1285,7 +1285,6 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 			break;
 		}
 	}
-	return ret;
 }
 
 #ifdef CONFIG_DEBUG_VM
-- 
cgit v1.2.3


From c1753fd02a0058ea43cbb31ab26d25be2f6cfe08 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 15 May 2023 10:35:36 -0400
Subject: mm: move mm_count into its own cache line

The mm_struct mm_count field is frequently updated by mmgrab/mmdrop
performed by context switch.  This causes false-sharing for surrounding
mm_struct fields which are read-mostly.

This has been observed on a 2sockets/112core/224cpu Intel Sapphire Rapids
server running hackbench, and by the kernel test robot will-it-scale
testcase.

Move the mm_count field into its own cache line to prevent false-sharing
with other mm_struct fields.

Move mm_count to the first field of mm_struct to minimize the amount of
padding required: rather than adding padding before and after the mm_count
field, padding is only added after mm_count.

Note that I noticed this odd comment in mm_struct:

commit 2e3025434a6b ("mm: relocate 'write_protect_seq' in struct mm_struct")

                /*
                 * With some kernel config, the current mmap_lock's offset
                 * inside 'mm_struct' is at 0x120, which is very optimal, as
                 * its two hot fields 'count' and 'owner' sit in 2 different
                 * cachelines,  and when mmap_lock is highly contended, both
                 * of the 2 fields will be accessed frequently, current layout
                 * will help to reduce cache bouncing.
                 *
                 * So please be careful with adding new fields before
                 * mmap_lock, which can easily push the 2 fields into one
                 * cacheline.
                 */
                struct rw_semaphore mmap_lock;

This comment is rather odd for a few reasons:

- It requires addition/removal of mm_struct fields to carefully consider
  field alignment of _other_ fields,
- It expresses the wish to keep an "optimal" alignment for a specific
  kernel config.

I suspect that the author of this comment may want to revisit this topic
and perhaps introduce a split-struct approach for struct rw_semaphore,
if the need is to place various fields of this structure in different
cache lines.

Link: https://lkml.kernel.org/r/20230515143536.114960-1-mathieu.desnoyers@efficios.com
Fixes: 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid")
Fixes: af7f588d8f73 ("sched: Introduce per-memory-map concurrency ID")
Link: https://lore.kernel.org/lkml/7a0c1db1-103d-d518-ed96-1584a28fbf32@efficios.com
Reported-by: kernel test robot <yujie.liu@intel.com>
Link: https://lore.kernel.org/oe-lkp/202305151017.27581d75-yujie.liu@intel.com
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Reviewed-by: Aaron Lu <aaron.lu@intel.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Olivier Dion <odion@efficios.com>
Cc: <michael.christie@oracle.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 306a3d1a0fa6..de10fc797c8e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -583,6 +583,21 @@ struct mm_cid {
 struct kioctx_table;
 struct mm_struct {
 	struct {
+		/*
+		 * Fields which are often written to are placed in a separate
+		 * cache line.
+		 */
+		struct {
+			/**
+			 * @mm_count: The number of references to &struct
+			 * mm_struct (@mm_users count as 1).
+			 *
+			 * Use mmgrab()/mmdrop() to modify. When this drops to
+			 * 0, the &struct mm_struct is freed.
+			 */
+			atomic_t mm_count;
+		} ____cacheline_aligned_in_smp;
+
 		struct maple_tree mm_mt;
 #ifdef CONFIG_MMU
 		unsigned long (*get_unmapped_area) (struct file *filp,
@@ -620,14 +635,6 @@ struct mm_struct {
 		 */
 		atomic_t mm_users;
 
-		/**
-		 * @mm_count: The number of references to &struct mm_struct
-		 * (@mm_users count as 1).
-		 *
-		 * Use mmgrab()/mmdrop() to modify. When this drops to 0, the
-		 * &struct mm_struct is freed.
-		 */
-		atomic_t mm_count;
 #ifdef CONFIG_SCHED_MM_CID
 		/**
 		 * @pcpu_cid: Per-cpu current cid.
-- 
cgit v1.2.3


From cf01724e2d73a90524450e3dd8798cfb9d7aca05 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 17 Jun 2023 11:46:22 +0800
Subject: mm: page_alloc: make compound_page_dtors static

It's only used inside page_alloc.c now. So make it static and remove the
declaration in mm.h.

Link: https://lkml.kernel.org/r/20230617034622.1235913-1-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 1 -
 mm/page_alloc.c    | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a8baa34d0747..cf43deb25553 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1220,7 +1220,6 @@ enum compound_dtor_id {
 #endif
 	NR_COMPOUND_DTORS,
 };
-extern compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS];
 
 static inline void folio_set_compound_dtor(struct folio *folio,
 		enum compound_dtor_id compound_dtor)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6a18f2232e3e..5b8a9d610b72 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -284,7 +284,7 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
 #endif
 };
 
-compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {
+static compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {
 	[NULL_COMPOUND_DTOR] = NULL,
 	[COMPOUND_PAGE_DTOR] = free_compound_page,
 #ifdef CONFIG_HUGETLB_PAGE
-- 
cgit v1.2.3


From 9ec272c586b07d1abf73438524bd12b1df9c5f9b Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 26 May 2023 18:41:31 -0700
Subject: watchdog/hardlockup: keep kernel.nmi_watchdog sysctl as 0444 if probe
 fails

Patch series "watchdog: Cleanup / fixes after buddy series v5 reviews".

This patch series attempts to finish resolving the feedback received
from Petr Mladek on the v5 series I posted.

Probably the only thing that wasn't fully as clean as Petr requested was
the Kconfig stuff.  I couldn't find a better way to express it without a
more major overhaul.  In the very least, I renamed "NON_ARCH" to
"PERF_OR_BUDDY" in the hopes that will make it marginally better.

Nothing in this series is terribly critical and even the bugfixes are
small.  However, it does cleanup a few things that were pointed out in
review.


This patch (of 10):

The permissions for the kernel.nmi_watchdog sysctl have always been set at
compile time despite the fact that a watchdog can fail to probe.  Let's
fix this and set the permissions based on whether the hardlockup detector
actually probed.

Link: https://lkml.kernel.org/r/20230527014153.2793931-1-dianders@chromium.org
Link: https://lkml.kernel.org/r/20230526184139.1.I0d75971cc52a7283f495aac0bd5c3041aadc734e@changeid
Fixes: a994a3147e4c ("watchdog/hardlockup/perf: Implement init time detection of perf")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reported-by: Petr Mladek <pmladek@suse.com>
Closes: https://lore.kernel.org/r/ZHCn4hNxFpY5-9Ki@alley
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h |  6 ------
 kernel/watchdog.c   | 30 ++++++++++++++++++++----------
 2 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 83577ae736cc..99b7d748ca21 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -95,12 +95,6 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
 static inline void arch_touch_nmi_watchdog(void) { }
 #endif
 
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-# define NMI_WATCHDOG_SYSCTL_PERM	0644
-#else
-# define NMI_WATCHDOG_SYSCTL_PERM	0444
-#endif
-
 #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 extern void hardlockup_detector_perf_stop(void);
 extern void hardlockup_detector_perf_restart(void);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 237990e8d345..4b9e31edb47f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -880,15 +880,6 @@ static struct ctl_table watchdog_sysctls[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= (void *)&sixty,
 	},
-	{
-		.procname       = "nmi_watchdog",
-		.data		= &watchdog_hardlockup_user_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
-		.proc_handler   = proc_nmi_watchdog,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
 	{
 		.procname	= "watchdog_cpumask",
 		.data		= &watchdog_cpumask_bits,
@@ -952,10 +943,28 @@ static struct ctl_table watchdog_sysctls[] = {
 	{}
 };
 
+static struct ctl_table watchdog_hardlockup_sysctl[] = {
+	{
+		.procname       = "nmi_watchdog",
+		.data		= &watchdog_hardlockup_user_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler   = proc_nmi_watchdog,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{}
+};
+
 static void __init watchdog_sysctl_init(void)
 {
 	register_sysctl_init("kernel", watchdog_sysctls);
+
+	if (watchdog_hardlockup_available)
+		watchdog_hardlockup_sysctl[0].mode = 0644;
+	register_sysctl_init("kernel", watchdog_hardlockup_sysctl);
 }
+
 #else
 #define watchdog_sysctl_init() do { } while (0)
 #endif /* CONFIG_SYSCTL */
@@ -1011,6 +1020,8 @@ static int __init lockup_detector_check(void)
 	/* Make sure no work is pending. */
 	flush_work(&detector_work);
 
+	watchdog_sysctl_init();
+
 	return 0;
 
 }
@@ -1030,5 +1041,4 @@ void __init lockup_detector_init(void)
 		allow_lockup_detector_init_retry = true;
 
 	lockup_detector_setup();
-	watchdog_sysctl_init();
 }
-- 
cgit v1.2.3


From 05e7b558766114aa9c3d5d3af188a5c574809661 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 26 May 2023 18:41:35 -0700
Subject: watchdog/hardlockup: remove softlockup comment in
 touch_nmi_watchdog()

In the patch ("watchdog/hardlockup: add comments to touch_nmi_watchdog()")
we adjusted some comments for touch_nmi_watchdog().  The comment about the
softlockup had a typo and were also felt to be too obvious.  Remove it.

Link: https://lkml.kernel.org/r/20230526184139.5.Ia593afc9eb12082d55ea6681dc2c5a89677f20a8@changeid
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Suggested-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 99b7d748ca21..3625d64da6db 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -140,10 +140,6 @@ static inline void touch_nmi_watchdog(void)
 	 */
 	arch_touch_nmi_watchdog();
 
-	/*
-	 * Touching the hardlock detector implicitly resets the
-	 * softlockup detector too
-	 */
 	touch_softlockup_watchdog();
 }
 
-- 
cgit v1.2.3


From d3b62ace0f097f1d863fb6c41df3c61503e4ec9e Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 26 May 2023 18:41:36 -0700
Subject: watchdog/buddy: cleanup how watchdog_buddy_check_hardlockup() is
 called

In the patch ("watchdog/hardlockup: detect hard lockups using secondary
(buddy) CPUs"), we added a call from the common watchdog.c file into the
buddy.  That call could be done more cleanly.  Specifically:

1. If we move the call into watchdog_hardlockup_kick() then it keeps
   watchdog_timer_fn() simpler.
2. We don't need to pass an "unsigned long" to the buddy for the timer
   count. In the patch ("watchdog/hardlockup: add a "cpu" param to
   watchdog_hardlockup_check()") the count was changed to "atomic_t"
   which is backed by an int, so we should match types.

Link: https://lkml.kernel.org/r/20230526184139.6.I006c7d958a1ea5c4e1e4dc44a25596d9bb5fd3ba@changeid
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Suggested-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nmi.h     |  4 ++--
 kernel/watchdog.c       | 15 +++++++--------
 kernel/watchdog_buddy.c |  2 +-
 3 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 3625d64da6db..d35393405b24 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -114,9 +114,9 @@ void watchdog_hardlockup_disable(unsigned int cpu);
 void lockup_detector_reconfigure(void);
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR_BUDDY
-void watchdog_buddy_check_hardlockup(unsigned long hrtimer_interrupts);
+void watchdog_buddy_check_hardlockup(int hrtimer_interrupts);
 #else
-static inline void watchdog_buddy_check_hardlockup(unsigned long hrtimer_interrupts) {}
+static inline void watchdog_buddy_check_hardlockup(int hrtimer_interrupts) {}
 #endif
 
 /**
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 85f4839b6faf..6cc46b8e3d07 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -131,9 +131,12 @@ static bool is_hardlockup(unsigned int cpu)
 	return false;
 }
 
-static unsigned long watchdog_hardlockup_kick(void)
+static void watchdog_hardlockup_kick(void)
 {
-	return atomic_inc_return(this_cpu_ptr(&hrtimer_interrupts));
+	int new_interrupts;
+
+	new_interrupts = atomic_inc_return(this_cpu_ptr(&hrtimer_interrupts));
+	watchdog_buddy_check_hardlockup(new_interrupts);
 }
 
 void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
@@ -195,7 +198,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
 
 #else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
 
-static inline unsigned long watchdog_hardlockup_kick(void) { return 0; }
+static inline void watchdog_hardlockup_kick(void) { }
 
 #endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
 
@@ -449,15 +452,11 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
 	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
-	unsigned long hrtimer_interrupts;
 
 	if (!watchdog_enabled)
 		return HRTIMER_NORESTART;
 
-	hrtimer_interrupts = watchdog_hardlockup_kick();
-
-	/* test for hardlockups */
-	watchdog_buddy_check_hardlockup(hrtimer_interrupts);
+	watchdog_hardlockup_kick();
 
 	/* kick the softlockup detector */
 	if (completion_done(this_cpu_ptr(&softlockup_completion))) {
diff --git a/kernel/watchdog_buddy.c b/kernel/watchdog_buddy.c
index fee45af2e5bd..3ffc5f2ede5a 100644
--- a/kernel/watchdog_buddy.c
+++ b/kernel/watchdog_buddy.c
@@ -72,7 +72,7 @@ void watchdog_hardlockup_disable(unsigned int cpu)
 	cpumask_clear_cpu(cpu, &watchdog_cpus);
 }
 
-void watchdog_buddy_check_hardlockup(unsigned long hrtimer_interrupts)
+void watchdog_buddy_check_hardlockup(int hrtimer_interrupts)
 {
 	unsigned int next_cpu;
 
-- 
cgit v1.2.3


From 0c68bda69665307bf835b0c433363e5073608c95 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 16 Jun 2023 17:06:15 +0200
Subject: watchdog/hardlockup: declare arch_touch_nmi_watchdog() only in
 linux/nmi.h

arch_touch_nmi_watchdog() needs a different implementation for various
hardlockup detector implementations. And it does nothing when
any hardlockup detector is not built at all.

arch_touch_nmi_watchdog() is declared via linux/nmi.h. And it must be
defined as an empty function when there is no hardlockup detector.
It is done directly in this header file for the perf and buddy detectors.
And it is done in the included asm/linux.h for arch specific detectors.

The reason probably is that the arch specific variants build the code
using another conditions. For example, powerpc64/sparc64 builds the code
when CONFIG_PPC_WATCHDOG is enabled.

Another reason might be that these architectures define more functions
in asm/nmi.h anyway.

However the generic code actually knows when the function will be
implemented. It happens when some full featured or the sparc64-specific
hardlockup detector is built.

In particular, CONFIG_HARDLOCKUP_DETECTOR can be enabled only when
a generic or arch-specific full featured hardlockup detector is available.
The only exception is sparc64 which can be built even when the global
HARDLOCKUP_DETECTOR switch is disabled.

The information about sparc64 is a bit complicated. The hardlockup
detector is built there when CONFIG_HAVE_NMI_WATCHDOG is set and
CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH is not set.

People might wonder whether this change really makes things easier.
The motivation is:

  + The current logic in linux/nmi.h is far from obvious.
    For example, arch_touch_nmi_watchdog() is defined as {} when
    neither CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER nor
    CONFIG_HAVE_NMI_WATCHDOG is defined.

  + The change synchronizes the checks in lib/Kconfig.debug and
    in the generic code.

  + It is a step that will help cleaning HAVE_NMI_WATCHDOG related
    checks.

The change should not change the existing behavior.

Link: https://lkml.kernel.org/r/20230616150618.6073-4-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/powerpc/include/asm/nmi.h |  2 --
 arch/sparc/include/asm/nmi.h   |  1 -
 include/linux/nmi.h            | 13 ++++++++++---
 3 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index 43bfd4de868f..ce25318c3902 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -3,11 +3,9 @@
 #define _ASM_NMI_H
 
 #ifdef CONFIG_PPC_WATCHDOG
-extern void arch_touch_nmi_watchdog(void);
 long soft_nmi_interrupt(struct pt_regs *regs);
 void watchdog_hardlockup_set_timeout_pct(u64 pct);
 #else
-static inline void arch_touch_nmi_watchdog(void) {}
 static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {}
 #endif
 
diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h
index 90ee7863d9fe..920dc23f443f 100644
--- a/arch/sparc/include/asm/nmi.h
+++ b/arch/sparc/include/asm/nmi.h
@@ -8,7 +8,6 @@ void nmi_adjust_hz(unsigned int new_hz);
 
 extern atomic_t nmi_active;
 
-void arch_touch_nmi_watchdog(void);
 void start_nmi_watchdog(void *unused);
 void stop_nmi_watchdog(void *unused);
 
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index d35393405b24..07bf2b813463 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -7,6 +7,8 @@
 
 #include <linux/sched.h>
 #include <asm/irq.h>
+
+/* Arch specific watchdogs might need to share extra watchdog-related APIs. */
 #if defined(CONFIG_HAVE_NMI_WATCHDOG)
 #include <asm/nmi.h>
 #endif
@@ -87,12 +89,17 @@ extern unsigned int hardlockup_panic;
 static inline void hardlockup_detector_disable(void) {}
 #endif
 
-#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER)
+/* Sparc64 has special implemetantion that is always enabled. */
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || \
+    (defined(CONFIG_HAVE_NMI_WATCHDOG) && !defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH))
 void arch_touch_nmi_watchdog(void);
+#else
+static inline void arch_touch_nmi_watchdog(void) { }
+#endif
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER)
 void watchdog_hardlockup_touch_cpu(unsigned int cpu);
 void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
-#elif !defined(CONFIG_HAVE_NMI_WATCHDOG)
-static inline void arch_touch_nmi_watchdog(void) { }
 #endif
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
-- 
cgit v1.2.3


From a5fcc2367e223c45c78a882438c2b8e13fe0f580 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 16 Jun 2023 17:06:16 +0200
Subject: watchdog/hardlockup: make HAVE_NMI_WATCHDOG sparc64-specific

There are several hardlockup detector implementations and several Kconfig
values which allow selection and build of the preferred one.

CONFIG_HARDLOCKUP_DETECTOR was introduced by the commit 23637d477c1f53acb
("lockup_detector: Introduce CONFIG_HARDLOCKUP_DETECTOR") in v2.6.36.
It was a preparation step for introducing the new generic perf hardlockup
detector.

The existing arch-specific variants did not support the to-be-created
generic build configurations, sysctl interface, etc. This distinction
was made explicit by the commit 4a7863cc2eb5f98 ("x86, nmi_watchdog:
Remove ARCH_HAS_NMI_WATCHDOG and rely on CONFIG_HARDLOCKUP_DETECTOR")
in v2.6.38.

CONFIG_HAVE_NMI_WATCHDOG was introduced by the commit d314d74c695f967e105
("nmi watchdog: do not use cpp symbol in Kconfig") in v3.4-rc1. It replaced
the above mentioned ARCH_HAS_NMI_WATCHDOG. At that time, it was still used
by three architectures, namely blackfin, mn10300, and sparc.

The support for blackfin and mn10300 architectures has been completely
dropped some time ago. And sparc is the only architecture with the historic
NMI watchdog at the moment.

And the old sparc implementation is really special. It is always built on
sparc64. It used to be always enabled until the commit 7a5c8b57cec93196b
("sparc: implement watchdog_nmi_enable and watchdog_nmi_disable") added
in v4.10-rc1.

There are only few locations where the sparc64 NMI watchdog interacts
with the generic hardlockup detectors code:

  + implements arch_touch_nmi_watchdog() which is called from the generic
    touch_nmi_watchdog()

  + implements watchdog_hardlockup_enable()/disable() to support
    /proc/sys/kernel/nmi_watchdog

  + is always preferred over other generic watchdogs, see
    CONFIG_HARDLOCKUP_DETECTOR

  + includes asm/nmi.h into linux/nmi.h because some sparc-specific
    functions are needed in sparc-specific code which includes
    only linux/nmi.h.

The situation became more complicated after the commit 05a4a95279311c3
("kernel/watchdog: split up config options") and commit 2104180a53698df5
("powerpc/64s: implement arch-specific hardlockup watchdog") in v4.13-rc1.
They introduced HAVE_HARDLOCKUP_DETECTOR_ARCH. It was used for powerpc
specific hardlockup detector. It was compatible with the perf one
regarding the general boot, sysctl, and programming interfaces.

HAVE_HARDLOCKUP_DETECTOR_ARCH was defined as a superset of
HAVE_NMI_WATCHDOG. It made some sense because all arch-specific
detectors had some common requirements, namely:

  + implemented arch_touch_nmi_watchdog()
  + included asm/nmi.h into linux/nmi.h
  + defined the default value for /proc/sys/kernel/nmi_watchdog

But it actually has made things pretty complicated when the generic
buddy hardlockup detector was added. Before the generic perf detector
was newer supported together with an arch-specific one. But the buddy
detector could work on any SMP system. It means that an architecture
could support both the arch-specific and buddy detector.

As a result, there are few tricky dependencies. For example,
CONFIG_HARDLOCKUP_DETECTOR depends on:

  ((HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_BUDDY) && !HAVE_NMI_WATCHDOG) || HAVE_HARDLOCKUP_DETECTOR_ARCH

The problem is that the very special sparc implementation is defined as:

  HAVE_NMI_WATCHDOG && !HAVE_HARDLOCKUP_DETECTOR_ARCH

Another problem is that the meaning of HAVE_NMI_WATCHDOG is far from clear
without reading understanding the history.

Make the logic less tricky and more self-explanatory by making
HAVE_NMI_WATCHDOG specific for the sparc64 implementation. And rename it to
HAVE_HARDLOCKUP_DETECTOR_SPARC64.

Note that HARDLOCKUP_DETECTOR_PREFER_BUDDY, HARDLOCKUP_DETECTOR_PERF,
and HARDLOCKUP_DETECTOR_BUDDY may conflict only with
HAVE_HARDLOCKUP_DETECTOR_ARCH. They depend on HARDLOCKUP_DETECTOR
and it is not longer enabled when HAVE_NMI_WATCHDOG is set.

Link: https://lkml.kernel.org/r/20230616150618.6073-5-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/Kconfig             | 18 ------------------
 arch/sparc/Kconfig       |  2 +-
 arch/sparc/Kconfig.debug |  9 +++++++++
 include/linux/nmi.h      |  5 ++---
 kernel/watchdog.c        |  2 +-
 lib/Kconfig.debug        | 15 +++++----------
 6 files changed, 18 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 77e5af5fda3f..6517e5477459 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -400,23 +400,8 @@ config HAVE_HARDLOCKUP_DETECTOR_PERF
 	  The arch chooses to use the generic perf-NMI-based hardlockup
 	  detector. Must define HAVE_PERF_EVENTS_NMI.
 
-config HAVE_NMI_WATCHDOG
-	depends on HAVE_NMI
-	bool
-	help
-	  The arch provides its own hardlockup detector implementation instead
-	  of the generic ones.
-
-	  Sparc64 defines this variable without HAVE_HARDLOCKUP_DETECTOR_ARCH.
-	  It is the last arch-specific implementation which was developed before
-	  adding the common infrastructure for handling hardlockup detectors.
-	  It is always built. It does _not_ use the common command line
-	  parameters and sysctl interface, except for
-	  /proc/sys/kernel/nmi_watchdog.
-
 config HAVE_HARDLOCKUP_DETECTOR_ARCH
 	bool
-	select HAVE_NMI_WATCHDOG
 	help
 	  The arch provides its own hardlockup detector implementation instead
 	  of the generic ones.
@@ -424,9 +409,6 @@ config HAVE_HARDLOCKUP_DETECTOR_ARCH
 	  It uses the same command line parameters, and sysctl interface,
 	  as the generic hardlockup detectors.
 
-	  HAVE_NMI_WATCHDOG is selected to build the code shared with
-	  the sparc64 specific implementation.
-
 config HAVE_PERF_REGS
 	bool
 	help
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 8535e19062f6..7297f69635cb 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -33,7 +33,7 @@ config SPARC
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_PCI_IOMAP
 	select HAS_IOPORT
-	select HAVE_NMI_WATCHDOG if SPARC64
+	select HAVE_HARDLOCKUP_DETECTOR_SPARC64 if SPARC64
 	select HAVE_CBPF_JIT if SPARC32
 	select HAVE_EBPF_JIT if SPARC64
 	select HAVE_DEBUG_BUGVERBOSE
diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug
index 6b2bec1888b3..4903b6847e43 100644
--- a/arch/sparc/Kconfig.debug
+++ b/arch/sparc/Kconfig.debug
@@ -14,3 +14,12 @@ config FRAME_POINTER
 	bool
 	depends on MCOUNT
 	default y
+
+config HAVE_HARDLOCKUP_DETECTOR_SPARC64
+	depends on HAVE_NMI
+	bool
+	help
+	  Sparc64 hardlockup detector is the last one developed before adding
+	  the common infrastructure for handling hardlockup detectors. It is
+	  always built. It does _not_ use the common command line parameters
+	  and sysctl interface, except for /proc/sys/kernel/nmi_watchdog.
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 07bf2b813463..63acc6586774 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -9,7 +9,7 @@
 #include <asm/irq.h>
 
 /* Arch specific watchdogs might need to share extra watchdog-related APIs. */
-#if defined(CONFIG_HAVE_NMI_WATCHDOG)
+#if defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH) || defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_SPARC64)
 #include <asm/nmi.h>
 #endif
 
@@ -90,8 +90,7 @@ static inline void hardlockup_detector_disable(void) {}
 #endif
 
 /* Sparc64 has special implemetantion that is always enabled. */
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) || \
-    (defined(CONFIG_HAVE_NMI_WATCHDOG) && !defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH))
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_SPARC64)
 void arch_touch_nmi_watchdog(void);
 #else
 static inline void arch_touch_nmi_watchdog(void) { }
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index a351ab0c35eb..010fcc3ac141 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,7 +29,7 @@
 
 static DEFINE_MUTEX(watchdog_mutex);
 
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_SPARC64)
 # define WATCHDOG_HARDLOCKUP_DEFAULT	1
 #else
 # define WATCHDOG_HARDLOCKUP_DEFAULT	0
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a0b0c4decb89..e94664339e28 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1050,15 +1050,10 @@ config HAVE_HARDLOCKUP_DETECTOR_BUDDY
 #	sparc64: has a custom implementation which is not using the common
 #		hardlockup command line options and sysctl interface.
 #
-# Note that HAVE_NMI_WATCHDOG is used to distinguish the sparc64 specific
-# implementaion. It is automatically enabled also for other arch-specific
-# variants which set HAVE_HARDLOCKUP_DETECTOR_ARCH. It makes the check
-# of avaialable and supported variants quite tricky.
-#
 config HARDLOCKUP_DETECTOR
 	bool "Detect Hard Lockups"
-	depends on DEBUG_KERNEL && !S390
-	depends on ((HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_BUDDY) && !HAVE_NMI_WATCHDOG) || HAVE_HARDLOCKUP_DETECTOR_ARCH
+	depends on DEBUG_KERNEL && !S390 && !HAVE_HARDLOCKUP_DETECTOR_SPARC64
+	depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_BUDDY || HAVE_HARDLOCKUP_DETECTOR_ARCH
 	imply HARDLOCKUP_DETECTOR_PERF
 	imply HARDLOCKUP_DETECTOR_BUDDY
 	select LOCKUP_DETECTOR
@@ -1079,7 +1074,7 @@ config HARDLOCKUP_DETECTOR_PREFER_BUDDY
 	bool "Prefer the buddy CPU hardlockup detector"
 	depends on HARDLOCKUP_DETECTOR
 	depends on HAVE_HARDLOCKUP_DETECTOR_PERF && HAVE_HARDLOCKUP_DETECTOR_BUDDY
-	depends on !HAVE_NMI_WATCHDOG
+	depends on !HAVE_HARLOCKUP_DETECTOR_ARCH
 	help
 	  Say Y here to prefer the buddy hardlockup detector over the perf one.
 
@@ -1095,7 +1090,7 @@ config HARDLOCKUP_DETECTOR_PERF
 	bool
 	depends on HARDLOCKUP_DETECTOR
 	depends on HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY
-	depends on !HAVE_NMI_WATCHDOG
+	depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
 
 config HARDLOCKUP_DETECTOR_BUDDY
@@ -1103,7 +1098,7 @@ config HARDLOCKUP_DETECTOR_BUDDY
 	depends on HARDLOCKUP_DETECTOR
 	depends on HAVE_HARDLOCKUP_DETECTOR_BUDDY
 	depends on !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY
-	depends on !HAVE_NMI_WATCHDOG
+	depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
 
 #
-- 
cgit v1.2.3


From 47f4cb433923a08d81f1e5c065cb680215109db9 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 16 Jun 2023 17:06:17 +0200
Subject: watchdog/sparc64: define HARDLOCKUP_DETECTOR_SPARC64

The HAVE_ prefix means that the code could be enabled. Add another
variable for HAVE_HARDLOCKUP_DETECTOR_SPARC64 without this prefix.
It will be set when it should be built. It will make it compatible
with the other hardlockup detectors.

Before, it is far from obvious that the SPARC64 variant is actually used:

$> make ARCH=sparc64 defconfig
$> grep HARDLOCKUP_DETECTOR .config
CONFIG_HAVE_HARDLOCKUP_DETECTOR_BUDDY=y
CONFIG_HAVE_HARDLOCKUP_DETECTOR_SPARC64=y

After, it is more clear:

$> make ARCH=sparc64 defconfig
$> grep HARDLOCKUP_DETECTOR .config
CONFIG_HAVE_HARDLOCKUP_DETECTOR_BUDDY=y
CONFIG_HAVE_HARDLOCKUP_DETECTOR_SPARC64=y
CONFIG_HARDLOCKUP_DETECTOR_SPARC64=y

Link: https://lkml.kernel.org/r/20230616150618.6073-6-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/sparc/Kconfig.debug | 7 ++++++-
 include/linux/nmi.h      | 4 ++--
 kernel/watchdog.c        | 2 +-
 lib/Kconfig.debug        | 2 +-
 4 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug
index 4903b6847e43..37e003665de6 100644
--- a/arch/sparc/Kconfig.debug
+++ b/arch/sparc/Kconfig.debug
@@ -16,10 +16,15 @@ config FRAME_POINTER
 	default y
 
 config HAVE_HARDLOCKUP_DETECTOR_SPARC64
-	depends on HAVE_NMI
 	bool
+	depends on HAVE_NMI
+	select HARDLOCKUP_DETECTOR_SPARC64
 	help
 	  Sparc64 hardlockup detector is the last one developed before adding
 	  the common infrastructure for handling hardlockup detectors. It is
 	  always built. It does _not_ use the common command line parameters
 	  and sysctl interface, except for /proc/sys/kernel/nmi_watchdog.
+
+config HARDLOCKUP_DETECTOR_SPARC64
+	bool
+	depends on HAVE_HARDLOCKUP_DETECTOR_SPARC64
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 63acc6586774..e91a1f803d55 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -9,7 +9,7 @@
 #include <asm/irq.h>
 
 /* Arch specific watchdogs might need to share extra watchdog-related APIs. */
-#if defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH) || defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_SPARC64)
+#if defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64)
 #include <asm/nmi.h>
 #endif
 
@@ -90,7 +90,7 @@ static inline void hardlockup_detector_disable(void) {}
 #endif
 
 /* Sparc64 has special implemetantion that is always enabled. */
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_SPARC64)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64)
 void arch_touch_nmi_watchdog(void);
 #else
 static inline void arch_touch_nmi_watchdog(void) { }
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 010fcc3ac141..be38276a365f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,7 +29,7 @@
 
 static DEFINE_MUTEX(watchdog_mutex);
 
-#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_SPARC64)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64)
 # define WATCHDOG_HARDLOCKUP_DEFAULT	1
 #else
 # define WATCHDOG_HARDLOCKUP_DEFAULT	0
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e94664339e28..f285e9cf967a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1052,7 +1052,7 @@ config HAVE_HARDLOCKUP_DETECTOR_BUDDY
 #
 config HARDLOCKUP_DETECTOR
 	bool "Detect Hard Lockups"
-	depends on DEBUG_KERNEL && !S390 && !HAVE_HARDLOCKUP_DETECTOR_SPARC64
+	depends on DEBUG_KERNEL && !S390 && !HARDLOCKUP_DETECTOR_SPARC64
 	depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_BUDDY || HAVE_HARDLOCKUP_DETECTOR_ARCH
 	imply HARDLOCKUP_DETECTOR_PERF
 	imply HARDLOCKUP_DETECTOR_BUDDY
-- 
cgit v1.2.3


From 7ca8fe94aa92d9adcd7dcdf64371fc78eb2da3f9 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Fri, 16 Jun 2023 17:06:18 +0200
Subject: watchdog/hardlockup: define HARDLOCKUP_DETECTOR_ARCH

The HAVE_ prefix means that the code could be enabled. Add another
variable for HAVE_HARDLOCKUP_DETECTOR_ARCH without this prefix.
It will be set when it should be built. It will make it compatible
with the other hardlockup detectors.

The change allows to clean up dependencies of PPC_WATCHDOG
and HAVE_HARDLOCKUP_DETECTOR_PERF definitions for powerpc.

As a result HAVE_HARDLOCKUP_DETECTOR_PERF has the same dependencies
on arm, x86, powerpc architectures.

Link: https://lkml.kernel.org/r/20230616150618.6073-7-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/powerpc/Kconfig | 5 ++---
 include/linux/nmi.h  | 2 +-
 lib/Kconfig.debug    | 9 +++++++++
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bff5820b7cda..8eb6c5e9e4f8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -90,8 +90,7 @@ config NMI_IPI
 
 config PPC_WATCHDOG
 	bool
-	depends on HARDLOCKUP_DETECTOR
-	depends on HAVE_HARDLOCKUP_DETECTOR_ARCH
+	depends on HARDLOCKUP_DETECTOR_ARCH
 	default y
 	help
 	  This is a placeholder when the powerpc hardlockup detector
@@ -240,7 +239,7 @@ config PPC
 	select HAVE_GCC_PLUGINS			if GCC_VERSION >= 50200   # plugin support on gcc <= 5.1 is buggy on PPC
 	select HAVE_GENERIC_VDSO
 	select HAVE_HARDLOCKUP_DETECTOR_ARCH	if PPC_BOOK3S_64 && SMP
-	select HAVE_HARDLOCKUP_DETECTOR_PERF	if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
+	select HAVE_HARDLOCKUP_DETECTOR_PERF	if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
 	select HAVE_HW_BREAKPOINT		if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index e91a1f803d55..e3e6a64b98e0 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -9,7 +9,7 @@
 #include <asm/irq.h>
 
 /* Arch specific watchdogs might need to share extra watchdog-related APIs. */
-#if defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64)
+#if defined(CONFIG_HARDLOCKUP_DETECTOR_ARCH) || defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64)
 #include <asm/nmi.h>
 #endif
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f285e9cf967a..2c4bb72e72ad 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1056,6 +1056,7 @@ config HARDLOCKUP_DETECTOR
 	depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_BUDDY || HAVE_HARDLOCKUP_DETECTOR_ARCH
 	imply HARDLOCKUP_DETECTOR_PERF
 	imply HARDLOCKUP_DETECTOR_BUDDY
+	imply HARDLOCKUP_DETECTOR_ARCH
 	select LOCKUP_DETECTOR
 
 	help
@@ -1101,6 +1102,14 @@ config HARDLOCKUP_DETECTOR_BUDDY
 	depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH
 	select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER
 
+config HARDLOCKUP_DETECTOR_ARCH
+	bool
+	depends on HARDLOCKUP_DETECTOR
+	depends on HAVE_HARDLOCKUP_DETECTOR_ARCH
+	help
+	  The arch-specific implementation of the hardlockup detector will
+	  be used.
+
 #
 # Both the "perf" and "buddy" hardlockup detectors count hrtimer
 # interrupts. This config enables functions managing this common code.
-- 
cgit v1.2.3


From a05d070a6164bd0578991e42181a52b9c7cf630c Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Mon, 12 Jun 2023 14:14:52 -0700
Subject: ptp: Clarify ptp_clock_info .adjphase expects an internal servo to be
 used

.adjphase expects a PHC to use an internal servo algorithm to correct the
provided phase offset target in the callback. Implementation of the
internal servo algorithm are defined by the individual devices.

Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/driver-api/ptp.rst | 16 ++++++++++++++++
 include/linux/ptp_clock_kernel.h |  6 ++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/ptp.rst b/Documentation/driver-api/ptp.rst
index 664838ae7776..4552a1f20488 100644
--- a/Documentation/driver-api/ptp.rst
+++ b/Documentation/driver-api/ptp.rst
@@ -73,6 +73,22 @@ Writing clock drivers
    class driver, since the lock may also be needed by the clock
    driver's interrupt service routine.
 
+PTP hardware clock requirements for '.adjphase'
+-----------------------------------------------
+
+   The 'struct ptp_clock_info' interface has a '.adjphase' function.
+   This function has a set of requirements from the PHC in order to be
+   implemented.
+
+     * The PHC implements a servo algorithm internally that is used to
+       correct the offset passed in the '.adjphase' call.
+     * When other PTP adjustment functions are called, the PHC servo
+       algorithm is disabled.
+
+   **NOTE:** '.adjphase' is not a simple time adjustment functionality
+   that 'jumps' the PHC clock time based on the provided offset. It
+   should correct the offset provided using an internal algorithm.
+
 Supported hardware
 ==================
 
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index fdffa6a98d79..f8e8443a8b35 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -77,8 +77,10 @@ struct ptp_system_timestamp {
  *            nominal frequency in parts per million, but with a
  *            16 bit binary fractional field.
  *
- * @adjphase:  Adjusts the phase offset of the hardware clock.
- *             parameter delta: Desired change in nanoseconds.
+ * @adjphase:  Indicates that the PHC should use an internal servo
+ *             algorithm to correct the provided phase offset.
+ *             parameter delta: PHC servo phase adjustment target
+ *                              in nanoseconds.
  *
  * @adjtime:  Shifts the time of the hardware clock.
  *            parameter delta: Desired change in nanoseconds.
-- 
cgit v1.2.3


From c3b60ab7a4dff6e6e608e685b70ddc3d6b2aca81 Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Mon, 12 Jun 2023 14:14:56 -0700
Subject: ptp: Add .getmaxphase callback to ptp_clock_info

Enables advertisement of the maximum offset supported by the phase control
functionality of PHCs. The callback is used to return an error if an offset
not supported by the PHC is used in ADJ_OFFSET. The ioctls
PTP_CLOCK_GETCAPS and PTP_CLOCK_GETCAPS2 now advertise the maximum offset a
PHC's phase control functionality is capable of supporting. Introduce new
sysfs node, max_phase_adjustment.

Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Maciek Machnikowski <maciek@machnikowski.net>
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_chardev.c             |  5 ++++-
 drivers/ptp/ptp_clock.c               |  4 ++++
 drivers/ptp/ptp_sysfs.c               | 12 ++++++++++++
 include/linux/ptp_clock_kernel.h      |  5 +++++
 include/uapi/linux/ptp_clock.h        |  3 ++-
 tools/testing/selftests/ptp/testptp.c |  6 ++++--
 6 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index af3bc65c4595..362bf756e6b7 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -136,7 +136,10 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		caps.pps = ptp->info->pps;
 		caps.n_pins = ptp->info->n_pins;
 		caps.cross_timestamping = ptp->info->getcrosststamp != NULL;
-		caps.adjust_phase = ptp->info->adjphase != NULL;
+		caps.adjust_phase = ptp->info->adjphase != NULL &&
+				    ptp->info->getmaxphase != NULL;
+		if (caps.adjust_phase)
+			caps.max_phase_adj = ptp->info->getmaxphase(ptp->info);
 		if (copy_to_user((void __user *)arg, &caps, sizeof(caps)))
 			err = -EFAULT;
 		break;
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 790f9250b381..80f74e38c2da 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -135,11 +135,15 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
 		ptp->dialed_frequency = tx->freq;
 	} else if (tx->modes & ADJ_OFFSET) {
 		if (ops->adjphase) {
+			s32 max_phase_adj = ops->getmaxphase(ops);
 			s32 offset = tx->offset;
 
 			if (!(tx->modes & ADJ_NANO))
 				offset *= NSEC_PER_USEC;
 
+			if (offset > max_phase_adj || offset < -max_phase_adj)
+				return -ERANGE;
+
 			err = ops->adjphase(ops, offset);
 		}
 	} else if (tx->modes == 0) {
diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c
index f30b0a439470..77219cdcd683 100644
--- a/drivers/ptp/ptp_sysfs.c
+++ b/drivers/ptp/ptp_sysfs.c
@@ -18,6 +18,17 @@ static ssize_t clock_name_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(clock_name);
 
+static ssize_t max_phase_adjustment_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *page)
+{
+	struct ptp_clock *ptp = dev_get_drvdata(dev);
+
+	return snprintf(page, PAGE_SIZE - 1, "%d\n",
+			ptp->info->getmaxphase(ptp->info));
+}
+static DEVICE_ATTR_RO(max_phase_adjustment);
+
 #define PTP_SHOW_INT(name, var)						\
 static ssize_t var##_show(struct device *dev,				\
 			   struct device_attribute *attr, char *page)	\
@@ -309,6 +320,7 @@ static struct attribute *ptp_attrs[] = {
 	&dev_attr_clock_name.attr,
 
 	&dev_attr_max_adjustment.attr,
+	&dev_attr_max_phase_adjustment.attr,
 	&dev_attr_n_alarms.attr,
 	&dev_attr_n_external_timestamps.attr,
 	&dev_attr_n_periodic_outputs.attr,
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index f8e8443a8b35..1ef4e0f9bd2a 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -82,6 +82,10 @@ struct ptp_system_timestamp {
  *             parameter delta: PHC servo phase adjustment target
  *                              in nanoseconds.
  *
+ * @getmaxphase:  Advertises maximum offset that can be provided
+ *                to the hardware clock's phase control functionality
+ *                through adjphase.
+ *
  * @adjtime:  Shifts the time of the hardware clock.
  *            parameter delta: Desired change in nanoseconds.
  *
@@ -171,6 +175,7 @@ struct ptp_clock_info {
 	struct ptp_pin_desc *pin_config;
 	int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
 	int (*adjphase)(struct ptp_clock_info *ptp, s32 phase);
+	s32 (*getmaxphase)(struct ptp_clock_info *ptp);
 	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
 	int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts);
 	int (*gettimex64)(struct ptp_clock_info *ptp, struct timespec64 *ts,
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index 1d108d597f66..05cc35fc94ac 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -95,7 +95,8 @@ struct ptp_clock_caps {
 	int cross_timestamping;
 	/* Whether the clock supports adjust phase */
 	int adjust_phase;
-	int rsv[12];   /* Reserved for future use. */
+	int max_phase_adj; /* Maximum phase adjustment in nanoseconds. */
+	int rsv[11];       /* Reserved for future use. */
 };
 
 struct ptp_extts_request {
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index cc535f76db99..e9438a1862ad 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -292,7 +292,8 @@ int main(int argc, char *argv[])
 			       "  %d pulse per second\n"
 			       "  %d programmable pins\n"
 			       "  %d cross timestamping\n"
-			       "  %d adjust_phase\n",
+			       "  %d adjust_phase\n"
+			       "  %d maximum phase adjustment (ns)\n",
 			       caps.max_adj,
 			       caps.n_alarm,
 			       caps.n_ext_ts,
@@ -300,7 +301,8 @@ int main(int argc, char *argv[])
 			       caps.pps,
 			       caps.n_pins,
 			       caps.cross_timestamping,
-			       caps.adjust_phase);
+			       caps.adjust_phase,
+			       caps.max_phase_adj);
 		}
 	}
 
-- 
cgit v1.2.3


From c467c8f081859d4f4ca4eee4fba54bb5d85d6c97 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Tue, 20 Jun 2023 12:27:13 +0200
Subject: mmc: Add MMC_QUIRK_BROKEN_SD_CACHE for Kingston Canvas Go Plus from
 11/2019

This microSD card never clears Flush Cache bit after cache flush has
been started in sd_flush_cache(). This leads e.g. to failure to mount
file system. Add a quirk which disables the SD cache for this specific
card from specific manufacturing date of 11/2019, since on newer dated
cards from 05/2023 the cache flush works correctly.

Fixes: 08ebf903af57 ("mmc: core: Fixup support for writeback-cache for eMMC and SD")
Signed-off-by: Marek Vasut <marex@denx.de>
Link: https://lore.kernel.org/r/20230620102713.7701-1-marex@denx.de
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/card.h   | 30 +++++++++++++++++++++++-------
 drivers/mmc/core/quirks.h | 13 +++++++++++++
 drivers/mmc/core/sd.c     |  2 +-
 include/linux/mmc/card.h  |  1 +
 4 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
index cfdd1ff40b86..4edf9057fa79 100644
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -53,6 +53,10 @@ struct mmc_fixup {
 	unsigned int manfid;
 	unsigned short oemid;
 
+	/* Manufacturing date */
+	unsigned short year;
+	unsigned char month;
+
 	/* SDIO-specific fields. You can use SDIO_ANY_ID here of course */
 	u16 cis_vendor, cis_device;
 
@@ -68,6 +72,8 @@ struct mmc_fixup {
 
 #define CID_MANFID_ANY (-1u)
 #define CID_OEMID_ANY ((unsigned short) -1)
+#define CID_YEAR_ANY ((unsigned short) -1)
+#define CID_MONTH_ANY ((unsigned char) -1)
 #define CID_NAME_ANY (NULL)
 
 #define EXT_CSD_REV_ANY (-1u)
@@ -81,17 +87,21 @@ struct mmc_fixup {
 #define CID_MANFID_APACER       0x27
 #define CID_MANFID_KINGSTON     0x70
 #define CID_MANFID_HYNIX	0x90
+#define CID_MANFID_KINGSTON_SD	0x9F
 #define CID_MANFID_NUMONYX	0xFE
 
 #define END_FIXUP { NULL }
 
-#define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end,	\
-		   _cis_vendor, _cis_device,				\
-		   _fixup, _data, _ext_csd_rev)				\
+#define _FIXUP_EXT(_name, _manfid, _oemid, _year, _month,	\
+		   _rev_start, _rev_end,			\
+		   _cis_vendor, _cis_device,			\
+		   _fixup, _data, _ext_csd_rev)			\
 	{						\
 		.name = (_name),			\
 		.manfid = (_manfid),			\
 		.oemid = (_oemid),			\
+		.year = (_year),			\
+		.month = (_month),			\
 		.rev_start = (_rev_start),		\
 		.rev_end = (_rev_end),			\
 		.cis_vendor = (_cis_vendor),		\
@@ -103,8 +113,8 @@ struct mmc_fixup {
 
 #define MMC_FIXUP_REV(_name, _manfid, _oemid, _rev_start, _rev_end,	\
 		      _fixup, _data, _ext_csd_rev)			\
-	_FIXUP_EXT(_name, _manfid,					\
-		   _oemid, _rev_start, _rev_end,			\
+	_FIXUP_EXT(_name, _manfid, _oemid, CID_YEAR_ANY, CID_MONTH_ANY,	\
+		   _rev_start, _rev_end,				\
 		   SDIO_ANY_ID, SDIO_ANY_ID,				\
 		   _fixup, _data, _ext_csd_rev)				\
 
@@ -118,8 +128,9 @@ struct mmc_fixup {
 		      _ext_csd_rev)
 
 #define SDIO_FIXUP(_vendor, _device, _fixup, _data)			\
-	_FIXUP_EXT(CID_NAME_ANY, CID_MANFID_ANY,			\
-		    CID_OEMID_ANY, 0, -1ull,				\
+	_FIXUP_EXT(CID_NAME_ANY, CID_MANFID_ANY, CID_OEMID_ANY,		\
+		   CID_YEAR_ANY, CID_MONTH_ANY,				\
+		   0, -1ull,						\
 		   _vendor, _device,					\
 		   _fixup, _data, EXT_CSD_REV_ANY)			\
 
@@ -264,4 +275,9 @@ static inline int mmc_card_broken_sd_discard(const struct mmc_card *c)
 	return c->quirks & MMC_QUIRK_BROKEN_SD_DISCARD;
 }
 
+static inline int mmc_card_broken_sd_cache(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_BROKEN_SD_CACHE;
+}
+
 #endif
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index 8decb788a3db..32b64b564fb1 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -53,6 +53,15 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = {
 	MMC_FIXUP("MMC32G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc,
 		  MMC_QUIRK_BLK_NO_CMD23),
 
+	/*
+	 * Kingston Canvas Go! Plus microSD cards never finish SD cache flush.
+	 * This has so far only been observed on cards from 11/2019, while new
+	 * cards from 2023/05 do not exhibit this behavior.
+	 */
+	_FIXUP_EXT("SD64G", CID_MANFID_KINGSTON_SD, 0x5449, 2019, 11,
+		   0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd,
+		   MMC_QUIRK_BROKEN_SD_CACHE, EXT_CSD_REV_ANY),
+
 	/*
 	 * Some SD cards lockup while using CMD23 multiblock transfers.
 	 */
@@ -223,6 +232,10 @@ static inline void mmc_fixup_device(struct mmc_card *card,
 		if (f->of_compatible &&
 		    !mmc_fixup_of_compatible_match(card, f->of_compatible))
 			continue;
+		if (f->year != CID_YEAR_ANY && f->year != card->cid.year)
+			continue;
+		if (f->month != CID_MONTH_ANY && f->month != card->cid.month)
+			continue;
 
 		dev_dbg(&card->dev, "calling %ps\n", f->vendor_fixup);
 		f->vendor_fixup(card, f->data);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 72b664ed90cf..246ce027ae0a 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1170,7 +1170,7 @@ static int sd_parse_ext_reg_perf(struct mmc_card *card, u8 fno, u8 page,
 		card->ext_perf.feature_support |= SD_EXT_PERF_HOST_MAINT;
 
 	/* Cache support at bit 0. */
-	if (reg_buf[4] & BIT(0))
+	if ((reg_buf[4] & BIT(0)) && !mmc_card_broken_sd_cache(card))
 		card->ext_perf.feature_support |= SD_EXT_PERF_CACHE;
 
 	/* Command queue support indicated via queue depth bits (0 to 4). */
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index c726ea781255..daa2f40d9ce6 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -294,6 +294,7 @@ struct mmc_card {
 #define MMC_QUIRK_TRIM_BROKEN	(1<<12)		/* Skip trim */
 #define MMC_QUIRK_BROKEN_HPI	(1<<13)		/* Disable broken HPI support */
 #define MMC_QUIRK_BROKEN_SD_DISCARD	(1<<14)	/* Disable broken SD discard support */
+#define MMC_QUIRK_BROKEN_SD_CACHE	(1<<15)	/* Disable broken SD cache support */
 
 	bool			reenable_cmdq;	/* Re-enable Command Queue */
 
-- 
cgit v1.2.3


From d7439fb1f4338fffd0bc68bb62d78f7712725f26 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 20 Jun 2023 13:28:32 +0200
Subject: fs: Provide helpers for manipulating sb->s_readonly_remount

Provide helpers to set and clear sb->s_readonly_remount including
appropriate memory barriers. Also use this opportunity to document what
the barriers pair with and why they are needed.

Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Message-Id: <20230620112832.5158-1-jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/internal.h      | 41 +++++++++++++++++++++++++++++++++++++++++
 fs/namespace.c     | 25 ++++++++++++++++---------
 fs/super.c         | 17 ++++++-----------
 include/linux/fs.h |  2 +-
 4 files changed, 64 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index bd3b2810a36b..b916b84809f3 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -120,6 +120,47 @@ void put_super(struct super_block *sb);
 extern bool mount_capable(struct fs_context *);
 int sb_init_dio_done_wq(struct super_block *sb);
 
+/*
+ * Prepare superblock for changing its read-only state (i.e., either remount
+ * read-write superblock read-only or vice versa). After this function returns
+ * mnt_is_readonly() will return true for any mount of the superblock if its
+ * caller is able to observe any changes done by the remount. This holds until
+ * sb_end_ro_state_change() is called.
+ */
+static inline void sb_start_ro_state_change(struct super_block *sb)
+{
+	WRITE_ONCE(sb->s_readonly_remount, 1);
+	/*
+	 * For RO->RW transition, the barrier pairs with the barrier in
+	 * mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY
+	 * cleared, it will see s_readonly_remount set.
+	 * For RW->RO transition, the barrier pairs with the barrier in
+	 * __mnt_want_write() before the mnt_is_readonly() check. The barrier
+	 * makes sure if __mnt_want_write() sees MNT_WRITE_HOLD already
+	 * cleared, it will see s_readonly_remount set.
+	 */
+	smp_wmb();
+}
+
+/*
+ * Ends section changing read-only state of the superblock. After this function
+ * returns if mnt_is_readonly() returns false, the caller will be able to
+ * observe all the changes remount did to the superblock.
+ */
+static inline void sb_end_ro_state_change(struct super_block *sb)
+{
+	/*
+	 * This barrier provides release semantics that pairs with
+	 * the smp_rmb() acquire semantics in mnt_is_readonly().
+	 * This barrier pair ensure that when mnt_is_readonly() sees
+	 * 0 for sb->s_readonly_remount, it will also see all the
+	 * preceding flag changes that were made during the RO state
+	 * change.
+	 */
+	smp_wmb();
+	WRITE_ONCE(sb->s_readonly_remount, 0);
+}
+
 /*
  * open.c
  */
diff --git a/fs/namespace.c b/fs/namespace.c
index 54847db5b819..5ba1eca6f720 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -309,9 +309,16 @@ static unsigned int mnt_get_writers(struct mount *mnt)
 
 static int mnt_is_readonly(struct vfsmount *mnt)
 {
-	if (mnt->mnt_sb->s_readonly_remount)
+	if (READ_ONCE(mnt->mnt_sb->s_readonly_remount))
 		return 1;
-	/* Order wrt setting s_flags/s_readonly_remount in do_remount() */
+	/*
+	 * The barrier pairs with the barrier in sb_start_ro_state_change()
+	 * making sure if we don't see s_readonly_remount set yet, we also will
+	 * not see any superblock / mount flag changes done by remount.
+	 * It also pairs with the barrier in sb_end_ro_state_change()
+	 * assuring that if we see s_readonly_remount already cleared, we will
+	 * see the values of superblock / mount flags updated by remount.
+	 */
 	smp_rmb();
 	return __mnt_is_readonly(mnt);
 }
@@ -364,9 +371,11 @@ int __mnt_want_write(struct vfsmount *m)
 		}
 	}
 	/*
-	 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
-	 * be set to match its requirements. So we must not load that until
-	 * MNT_WRITE_HOLD is cleared.
+	 * The barrier pairs with the barrier sb_start_ro_state_change() making
+	 * sure that if we see MNT_WRITE_HOLD cleared, we will also see
+	 * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in
+	 * mnt_is_readonly() and bail in case we are racing with remount
+	 * read-only.
 	 */
 	smp_rmb();
 	if (mnt_is_readonly(m)) {
@@ -588,10 +597,8 @@ int sb_prepare_remount_readonly(struct super_block *sb)
 	if (!err && atomic_long_read(&sb->s_remove_count))
 		err = -EBUSY;
 
-	if (!err) {
-		sb->s_readonly_remount = 1;
-		smp_wmb();
-	}
+	if (!err)
+		sb_start_ro_state_change(sb);
 	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
 		if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
 			mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
diff --git a/fs/super.c b/fs/super.c
index 5bf056087acc..5ee5da1fd498 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -944,8 +944,7 @@ int reconfigure_super(struct fs_context *fc)
 	 */
 	if (remount_ro) {
 		if (force) {
-			sb->s_readonly_remount = 1;
-			smp_wmb();
+			sb_start_ro_state_change(sb);
 		} else {
 			retval = sb_prepare_remount_readonly(sb);
 			if (retval)
@@ -953,12 +952,10 @@ int reconfigure_super(struct fs_context *fc)
 		}
 	} else if (remount_rw) {
 		/*
-		 * We set s_readonly_remount here to protect filesystem's
-		 * reconfigure code from writes from userspace until
-		 * reconfigure finishes.
+		 * Protect filesystem's reconfigure code from writes from
+		 * userspace until reconfigure finishes.
 		 */
-		sb->s_readonly_remount = 1;
-		smp_wmb();
+		sb_start_ro_state_change(sb);
 	}
 
 	if (fc->ops->reconfigure) {
@@ -974,9 +971,7 @@ int reconfigure_super(struct fs_context *fc)
 
 	WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
 				 (fc->sb_flags & fc->sb_flags_mask)));
-	/* Needs to be ordered wrt mnt_is_readonly() */
-	smp_wmb();
-	sb->s_readonly_remount = 0;
+	sb_end_ro_state_change(sb);
 
 	/*
 	 * Some filesystems modify their metadata via some other path than the
@@ -991,7 +986,7 @@ int reconfigure_super(struct fs_context *fc)
 	return 0;
 
 cancel_readonly:
-	sb->s_readonly_remount = 0;
+	sb_end_ro_state_change(sb);
 	return retval;
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 40bef9bf8749..4caac7fdc5d3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1248,7 +1248,7 @@ struct super_block {
 	 */
 	atomic_long_t s_fsnotify_connectors;
 
-	/* Being remounted read-only */
+	/* Read-only state of the superblock is being changed */
 	int s_readonly_remount;
 
 	/* per-sb errseq_t for reporting writeback errors via syncfs */
-- 
cgit v1.2.3


From ebf51575c8418fcbabe489b3b4a4227c34ed256a Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Thu, 15 Jun 2023 13:19:31 +0300
Subject: clk: fix typo in clk_hw_register_fixed_rate_parent_data() macro

clk_hw_register_fixed_rate_parent_data() 3rd parameter is parent_data
not parent_hw. Inner function (__clk_hw_register_fixed_rate()) is called
with parent_data parameter as valid. To have this parameter taken into
account update the name of the 3rd parameter of
clk_hw_register_fixed_rate_parent_data() macro to parent_data.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Link: https://lore.kernel.org/r/20230615101931.581060-1-claudiu.beznea@microchip.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk-provider.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 28ff6f1a6ada..bbc1bf19250a 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -415,7 +415,7 @@ struct clk *clk_register_fixed_rate(struct device *dev, const char *name,
  * @flags: framework-specific flags
  * @fixed_rate: non-adjustable clock rate
  */
-#define clk_hw_register_fixed_rate_parent_data(dev, name, parent_hw, flags,   \
+#define clk_hw_register_fixed_rate_parent_data(dev, name, parent_data, flags, \
 					     fixed_rate)		      \
 	__clk_hw_register_fixed_rate((dev), NULL, (name), NULL, NULL,	      \
 				     (parent_data), (flags), (fixed_rate), 0, \
-- 
cgit v1.2.3


From a1be9ccc57f07d54278be34eed6bd679bc941c97 Mon Sep 17 00:00:00 2001
From: Donglin Peng <pengdonglin@sangfor.com.cn>
Date: Sat, 8 Apr 2023 05:42:15 -0700
Subject: function_graph: Support recording and printing the return value of
 function

Analyzing system call failures with the function_graph tracer can be a
time-consuming process, particularly when locating the kernel function
that first returns an error in the trace logs. This change aims to
simplify the process by recording the function return value to the
'retval' member of 'ftrace_graph_ret' and printing it when outputting
the trace log.

We have introduced new trace options: funcgraph-retval and
funcgraph-retval-hex. The former controls whether to display the return
value, while the latter controls the display format.

Please note that even if a function's return type is void, a return
value will still be printed. You can simply ignore it.

This patch only establishes the fundamental infrastructure. Subsequent
patches will make this feature available on some commonly used processor
architectures.

Here is an example:

I attempted to attach the demo process to a cpu cgroup, but it failed:

echo `pidof demo` > /sys/fs/cgroup/cpu/test/tasks
-bash: echo: write error: Invalid argument

The strace logs indicate that the write system call returned -EINVAL(-22):
...
write(1, "273\n", 4)                    = -1 EINVAL (Invalid argument)
...

To capture trace logs during a write system call, use the following
commands:

cd /sys/kernel/debug/tracing/
echo 0 > tracing_on
echo > trace
echo *sys_write > set_graph_function
echo *spin* > set_graph_notrace
echo *rcu* >> set_graph_notrace
echo *alloc* >> set_graph_notrace
echo preempt* >> set_graph_notrace
echo kfree* >> set_graph_notrace
echo $$ > set_ftrace_pid
echo function_graph > current_tracer
echo 1 > options/funcgraph-retval
echo 0 > options/funcgraph-retval-hex
echo 1 > tracing_on
echo `pidof demo` > /sys/fs/cgroup/cpu/test/tasks
echo 0 > tracing_on
cat trace > ~/trace.log

To locate the root cause, search for error code -22 directly in the file
trace.log and identify the first function that returned -22. Once you
have identified this function, examine its code to determine the root
cause.

For example, in the trace log below, cpu_cgroup_can_attach
returned -22 first, so we can focus our analysis on this function to
identify the root cause.

...

 1)          | cgroup_migrate() {
 1) 0.651 us |   cgroup_migrate_add_task(); /* = 0xffff93fcfd346c00 */
 1)          |   cgroup_migrate_execute() {
 1)          |     cpu_cgroup_can_attach() {
 1)          |       cgroup_taskset_first() {
 1) 0.732 us |         cgroup_taskset_next(); /* = 0xffff93fc8fb20000 */
 1) 1.232 us |       } /* cgroup_taskset_first = 0xffff93fc8fb20000 */
 1) 0.380 us |       sched_rt_can_attach(); /* = 0x0 */
 1) 2.335 us |     } /* cpu_cgroup_can_attach = -22 */
 1) 4.369 us |   } /* cgroup_migrate_execute = -22 */
 1) 7.143 us | } /* cgroup_migrate = -22 */

...

Link: https://lkml.kernel.org/r/1fc502712c981e0e6742185ba242992170ac9da8.1680954589.git.pengdonglin@sangfor.com.cn

Tested-by: Florian Kauer <florian.kauer@linutronix.de>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Donglin Peng <pengdonglin@sangfor.com.cn>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ftrace.h               |  3 ++
 kernel/trace/Kconfig                 | 15 ++++++
 kernel/trace/fgraph.c                | 23 ++++++++-
 kernel/trace/trace.h                 |  2 +
 kernel/trace/trace_entries.h         | 26 ++++++++++
 kernel/trace/trace_functions_graph.c | 93 ++++++++++++++++++++++++++++++++----
 6 files changed, 151 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b23bdd414394..49f279f4c3a1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1018,6 +1018,9 @@ struct ftrace_graph_ent {
  */
 struct ftrace_graph_ret {
 	unsigned long func; /* Current function */
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+	unsigned long retval;
+#endif
 	int depth;
 	/* Number of functions that overran the depth limit for current task */
 	unsigned int overrun;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8cf97fa4a4b3..abe5c583bd59 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -31,6 +31,9 @@ config HAVE_FUNCTION_GRAPH_TRACER
 	help
 	  See Documentation/trace/ftrace-design.rst
 
+config HAVE_FUNCTION_GRAPH_RETVAL
+	bool
+
 config HAVE_DYNAMIC_FTRACE
 	bool
 	help
@@ -227,6 +230,18 @@ config FUNCTION_GRAPH_TRACER
 	  the return value. This is done by setting the current return
 	  address on the current task structure into a stack of calls.
 
+config FUNCTION_GRAPH_RETVAL
+	bool "Kernel Function Graph Return Value"
+	depends on HAVE_FUNCTION_GRAPH_RETVAL
+	depends on FUNCTION_GRAPH_TRACER
+	default n
+	help
+	  Support recording and printing the function return value when
+	  using function graph tracer. It can be helpful to locate functions
+	  that return errors. This feature is off by default, and you can
+	  enable it via the trace option funcgraph-retval.
+	  See Documentation/trace/ftrace.rst
+
 config DYNAMIC_FTRACE
 	bool "enable/disable function tracing dynamically"
 	depends on FUNCTION_TRACER
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index ea3d7bb235d3..cd2c35b1dd8f 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -243,12 +243,16 @@ struct fgraph_ret_regs;
  * Send the trace to the ring-buffer.
  * @return the original return address.
  */
-unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
+static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs,
+						unsigned long frame_pointer)
 {
 	struct ftrace_graph_ret trace;
 	unsigned long ret;
 
 	ftrace_pop_return_trace(&trace, &ret, frame_pointer);
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+	trace.retval = fgraph_ret_regs_return_value(ret_regs);
+#endif
 	trace.rettime = trace_clock_local();
 	ftrace_graph_return(&trace);
 	/*
@@ -269,6 +273,23 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 	return ret;
 }
 
+/*
+ * After all architecures have selected HAVE_FUNCTION_GRAPH_RETVAL, we can
+ * leave only ftrace_return_to_handler(ret_regs).
+ */
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
+unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs)
+{
+	return __ftrace_return_to_handler(ret_regs,
+				fgraph_ret_regs_frame_pointer(ret_regs));
+}
+#else
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
+{
+	return __ftrace_return_to_handler(NULL, frame_pointer);
+}
+#endif
+
 /**
  * ftrace_graph_get_ret_stack - return the entry of the shadow stack
  * @task: The task to read the shadow stack from
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 79bdefe9261b..e6407a27d644 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -832,6 +832,8 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
 #define TRACE_GRAPH_PRINT_TAIL          0x100
 #define TRACE_GRAPH_SLEEP_TIME          0x200
 #define TRACE_GRAPH_GRAPH_TIME          0x400
+#define TRACE_GRAPH_PRINT_RETVAL        0x800
+#define TRACE_GRAPH_PRINT_RETVAL_HEX    0x1000
 #define TRACE_GRAPH_PRINT_FILL_SHIFT	28
 #define TRACE_GRAPH_PRINT_FILL_MASK	(0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
 
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index cd41e863b51c..340b2fa98218 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -86,6 +86,30 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
 );
 
 /* Function return entry */
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+
+FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
+
+	TRACE_GRAPH_RET,
+
+	F_STRUCT(
+		__field_struct(	struct ftrace_graph_ret,	ret	)
+		__field_packed(	unsigned long,	ret,		func	)
+		__field_packed(	unsigned long,	ret,		retval	)
+		__field_packed(	int,		ret,		depth	)
+		__field_packed(	unsigned int,	ret,		overrun	)
+		__field_packed(	unsigned long long, ret,	calltime)
+		__field_packed(	unsigned long long, ret,	rettime	)
+	),
+
+	F_printk("<-- %ps (%d) (start: %llx  end: %llx) over: %d retval: %lx",
+		 (void *)__entry->func, __entry->depth,
+		 __entry->calltime, __entry->rettime,
+		 __entry->depth, __entry->retval)
+);
+
+#else
+
 FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
 
 	TRACE_GRAPH_RET,
@@ -105,6 +129,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
 		 __entry->depth)
 );
 
+#endif
+
 /*
  * Context switch trace entry - which task (and prio) we switched from/to:
  *
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 203204cadf92..c35fbaab2a47 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -58,6 +58,12 @@ static struct tracer_opt trace_opts[] = {
 	{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
 	/* Display function name after trailing } */
 	{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+	/* Display function return value ? */
+	{ TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) },
+	/* Display function return value in hexadecimal format ? */
+	{ TRACER_OPT(funcgraph-retval-hex, TRACE_GRAPH_PRINT_RETVAL_HEX) },
+#endif
 	/* Include sleep time (scheduled out) between entry and return */
 	{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
 
@@ -619,6 +625,56 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
 	trace_seq_puts(s, "|  ");
 }
 
+#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
+
+#define __TRACE_GRAPH_PRINT_RETVAL TRACE_GRAPH_PRINT_RETVAL
+
+static void print_graph_retval(struct trace_seq *s, unsigned long retval,
+				bool leaf, void *func, bool hex_format)
+{
+	unsigned long err_code = 0;
+
+	if (retval == 0 || hex_format)
+		goto done;
+
+	/* Check if the return value matches the negative format */
+	if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) &&
+		(((u64)retval) >> 32) == 0) {
+		/* sign extension */
+		err_code = (unsigned long)(s32)retval;
+	} else {
+		err_code = retval;
+	}
+
+	if (!IS_ERR_VALUE(err_code))
+		err_code = 0;
+
+done:
+	if (leaf) {
+		if (hex_format || (err_code == 0))
+			trace_seq_printf(s, "%ps(); /* = 0x%lx */\n",
+					func, retval);
+		else
+			trace_seq_printf(s, "%ps(); /* = %ld */\n",
+					func, err_code);
+	} else {
+		if (hex_format || (err_code == 0))
+			trace_seq_printf(s, "} /* %ps = 0x%lx */\n",
+					func, retval);
+		else
+			trace_seq_printf(s, "} /* %ps = %ld */\n",
+					func, err_code);
+	}
+}
+
+#else
+
+#define __TRACE_GRAPH_PRINT_RETVAL 0
+
+#define print_graph_retval(_seq, _retval, _leaf, _func, _format) do {} while (0)
+
+#endif
+
 /* Case of a leaf function on its call entry */
 static enum print_line_t
 print_graph_entry_leaf(struct trace_iterator *iter,
@@ -663,7 +719,15 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
 		trace_seq_putc(s, ' ');
 
-	trace_seq_printf(s, "%ps();\n", (void *)call->func);
+	/*
+	 * Write out the function return value if the option function-retval is
+	 * enabled.
+	 */
+	if (flags & __TRACE_GRAPH_PRINT_RETVAL)
+		print_graph_retval(s, graph_ret->retval, true, (void *)call->func,
+				!!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
+	else
+		trace_seq_printf(s, "%ps();\n", (void *)call->func);
 
 	print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
 			cpu, iter->ent->pid, flags);
@@ -942,16 +1006,25 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 		trace_seq_putc(s, ' ');
 
 	/*
-	 * If the return function does not have a matching entry,
-	 * then the entry was lost. Instead of just printing
-	 * the '}' and letting the user guess what function this
-	 * belongs to, write out the function name. Always do
-	 * that if the funcgraph-tail option is enabled.
+	 * Always write out the function name and its return value if the
+	 * function-retval option is enabled.
 	 */
-	if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL))
-		trace_seq_puts(s, "}\n");
-	else
-		trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
+	if (flags & __TRACE_GRAPH_PRINT_RETVAL) {
+		print_graph_retval(s, trace->retval, false, (void *)trace->func,
+			!!(flags & TRACE_GRAPH_PRINT_RETVAL_HEX));
+	} else {
+		/*
+		 * If the return function does not have a matching entry,
+		 * then the entry was lost. Instead of just printing
+		 * the '}' and letting the user guess what function this
+		 * belongs to, write out the function name. Always do
+		 * that if the funcgraph-tail option is enabled.
+		 */
+		if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL))
+			trace_seq_puts(s, "}\n");
+		else
+			trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
+	}
 
 	/* Overrun */
 	if (flags & TRACE_GRAPH_PRINT_OVERRUN)
-- 
cgit v1.2.3


From 3a4f0edbb7939a16abb54668642ceed1decdbf4a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 19 Jun 2023 07:27:40 +0000
Subject: ipv6: fix a typo in ip6mr_sk_ioctl()

SIOCGETSGCNT_IN6 uses a "struct sioc_sg_req6 buffer".

Unfortunately the blamed commit made hard to ensure type safety.

syzbot reported:

BUG: KASAN: stack-out-of-bounds in ip6mr_ioctl+0xba3/0xcb0 net/ipv6/ip6mr.c:1917
Read of size 16 at addr ffffc900039afb68 by task syz-executor937/5008

CPU: 1 PID: 5008 Comm: syz-executor937 Not tainted 6.4.0-rc6-syzkaller-01304-gc08afcdcf952 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106
print_address_description.constprop.0+0x2c/0x3c0 mm/kasan/report.c:351
print_report mm/kasan/report.c:462 [inline]
kasan_report+0x11c/0x130 mm/kasan/report.c:572
ip6mr_ioctl+0xba3/0xcb0 net/ipv6/ip6mr.c:1917
rawv6_ioctl+0x4e/0x1e0 net/ipv6/raw.c:1143
sock_ioctl_out net/core/sock.c:4186 [inline]
sk_ioctl+0x151/0x440 net/core/sock.c:4214
inet6_ioctl+0x1b8/0x290 net/ipv6/af_inet6.c:582
sock_do_ioctl+0xcc/0x230 net/socket.c:1189
sock_ioctl+0x1f8/0x680 net/socket.c:1306
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:870 [inline]
__se_sys_ioctl fs/ioctl.c:856 [inline]
__x64_sys_ioctl+0x197/0x210 fs/ioctl.c:856
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f255849bad9
Code: 28 c3 e8 2a 14 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffd06792778 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f255849bad9
RDX: 0000000000000000 RSI: 00000000000089e1 RDI: 0000000000000003
RBP: 00007f255845fc80 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f255845fd10
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
</TASK>

The buggy address belongs to stack of task syz-executor937/5008
and is located at offset 40 in frame:
sk_ioctl+0x0/0x440 net/core/sock.c:4172

This frame has 2 objects:
[32, 36) 'karg'
[48, 88) 'buffer'

Fixes: e1d001fa5b47 ("net: ioctl: Use kernel memory on protocol ioctl callbacks")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: David Ahern <dsahern@kernel.org>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Breno Leitao <leitao@debian.org>
Link: https://lore.kernel.org/r/20230619072740.464528-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mroute6.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 2f95d5b4e47a..63ef5191cc57 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -109,13 +109,13 @@ static inline int ip6mr_sk_ioctl(struct sock *sk, unsigned int cmd,
 		struct sioc_mif_req6 buffer;
 
 		return sock_ioctl_inout(sk, cmd, arg, &buffer,
-				      sizeof(buffer));
+					sizeof(buffer));
 		}
 	case SIOCGETSGCNT_IN6: {
-		struct sioc_mif_req6 buffer;
+		struct sioc_sg_req6 buffer;
 
 		return sock_ioctl_inout(sk, cmd, arg, &buffer,
-				      sizeof(buffer));
+					sizeof(buffer));
 		}
 	}
 
-- 
cgit v1.2.3


From 634236b34d7a8c9e11c12b0746b83b8942fc8f2e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 19 Jun 2023 12:43:35 +0000
Subject: net: remove sk_is_ipmr() and sk_is_icmpv6() helpers

Blamed commit added these helpers for sake of detecting RAW
sockets specific ioctl.

syzbot complained about it [1].

Issue here is that RAW sockets could pretend there was no need
to call ipmr_sk_ioctl()

Regardless of inet_sk(sk)->inet_num, we must be prepared
for ipmr_ioctl() being called later. This must happen
from ipmr_sk_ioctl() context only.

We could add a safety check in ipmr_ioctl() at the risk of breaking
applications.

Instead, remove sk_is_ipmr() and sk_is_icmpv6() because their
name would be misleading, once we change their implementation.

[1]
BUG: KASAN: stack-out-of-bounds in ipmr_ioctl+0xb12/0xbd0 net/ipv4/ipmr.c:1654
Read of size 4 at addr ffffc90003aefae4 by task syz-executor105/5004

CPU: 0 PID: 5004 Comm: syz-executor105 Not tainted 6.4.0-rc6-syzkaller-01304-gc08afcdcf952 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106
print_address_description.constprop.0+0x2c/0x3c0 mm/kasan/report.c:351
print_report mm/kasan/report.c:462 [inline]
kasan_report+0x11c/0x130 mm/kasan/report.c:572
ipmr_ioctl+0xb12/0xbd0 net/ipv4/ipmr.c:1654
raw_ioctl+0x4e/0x1e0 net/ipv4/raw.c:881
sock_ioctl_out net/core/sock.c:4186 [inline]
sk_ioctl+0x151/0x440 net/core/sock.c:4214
inet_ioctl+0x18c/0x380 net/ipv4/af_inet.c:1001
sock_do_ioctl+0xcc/0x230 net/socket.c:1189
sock_ioctl+0x1f8/0x680 net/socket.c:1306
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:870 [inline]
__se_sys_ioctl fs/ioctl.c:856 [inline]
__x64_sys_ioctl+0x197/0x210 fs/ioctl.c:856
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f2944bf6ad9
Code: 28 c3 e8 2a 14 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffd8897a028 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f2944bf6ad9
RDX: 0000000000000000 RSI: 00000000000089e1 RDI: 0000000000000003
RBP: 00007f2944bbac80 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f2944bbad10
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
</TASK>

The buggy address belongs to stack of task syz-executor105/5004
and is located at offset 36 in frame:
sk_ioctl+0x0/0x440 net/core/sock.c:4172

This frame has 2 objects:
[32, 36) 'karg'
[48, 88) 'buffer'

Fixes: e1d001fa5b47 ("net: ioctl: Use kernel memory on protocol ioctl callbacks")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Breno Leitao <leitao@debian.org>
Cc: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20230619124336.651528-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/icmpv6.h |  6 ------
 include/linux/mroute.h | 11 -----------
 net/core/sock.c        |  4 ++--
 3 files changed, 2 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 1fe33e6741cc..db0f4fcfdaf4 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -111,10 +111,4 @@ static inline bool icmpv6_is_err(int type)
 	return false;
 }
 
-static inline int sk_is_icmpv6(struct sock *sk)
-{
-	return sk->sk_family == AF_INET6 &&
-		inet_sk(sk)->inet_num == IPPROTO_ICMPV6;
-}
-
 #endif
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 94c6e6f549f0..4c5003afee6c 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -16,12 +16,6 @@ static inline int ip_mroute_opt(int opt)
 	return opt >= MRT_BASE && opt <= MRT_MAX;
 }
 
-static inline int sk_is_ipmr(struct sock *sk)
-{
-	return sk->sk_family == AF_INET &&
-		inet_sk(sk)->inet_num == IPPROTO_IGMP;
-}
-
 int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
 int ip_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
 int ipmr_ioctl(struct sock *sk, int cmd, void *arg);
@@ -57,11 +51,6 @@ static inline int ip_mroute_opt(int opt)
 	return 0;
 }
 
-static inline int sk_is_ipmr(struct sock *sk)
-{
-	return 0;
-}
-
 static inline bool ipmr_rule_default(const struct fib_rule *rule)
 {
 	return true;
diff --git a/net/core/sock.c b/net/core/sock.c
index cff3e82514d1..8ec8f4c9911f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -4199,9 +4199,9 @@ int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 {
 	int rc = 1;
 
-	if (sk_is_ipmr(sk))
+	if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
 		rc = ipmr_sk_ioctl(sk, cmd, arg);
-	else if (sk_is_icmpv6(sk))
+	else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
 		rc = ip6mr_sk_ioctl(sk, cmd, arg);
 	else if (sk_is_phonet(sk))
 		rc = phonet_sk_ioctl(sk, cmd, arg);
-- 
cgit v1.2.3


From aa571b6275fb60da443c490ebeef021a6897d332 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 19 Jun 2023 11:24:00 +0200
Subject: net: stmmac: add new switch to struct plat_stmmacenet_data

On some platforms, the PCS can be integrated in the MAC so the driver
will not see any PCS link activity. Add a switch that allows the platform
drivers to let the core code know.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Jose Abreu <Jose.Abreu@synopsys.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +-
 include/linux/stmmac.h                            | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5c645b6d5660..10e8a5606ba6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5798,7 +5798,7 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv)
 		}
 
 		/* PCS link status */
-		if (priv->hw->pcs) {
+		if (priv->hw->pcs && !priv->plat->has_integrated_pcs) {
 			if (priv->xstats.pcs_link)
 				netif_carrier_on(priv->dev);
 			else
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 225751a8fd8e..06090538fe2d 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -293,5 +293,6 @@ struct plat_stmmacenet_data {
 	bool sph_disable;
 	bool serdes_up_after_phy_linkup;
 	const struct dwmac4_addrs *dwmac4_addrs;
+	bool has_integrated_pcs;
 };
 #endif
-- 
cgit v1.2.3


From 256a9978eb2be53d9d17705707a69ce0b65b4727 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Thu, 15 Jun 2023 15:12:07 +0100
Subject: soundwire: bus: Prevent lockdep asserts when stream has multiple
 buses

Give the bus_lock and msg_lock of each bus a different unique key
so that it is possible to acquire the locks of multiple buses
without lockdep asserting a possible deadlock.

Using mutex_init() to initialize a mutex gives all those mutexes
the same lock class. Lockdep checking treats it as an error to
attempt to take a mutex while already holding a mutex of the same
class. This causes a lockdep assert when sdw_acquire_bus_lock()
attempts to lock multiple buses, and when do_bank_switch() takes
multiple msg_lock.

[  138.697350] WARNING: possible recursive locking detected
[  138.697366] 6.3.0-test #1 Tainted: G            E
[  138.697380] --------------------------------------------
[  138.697394] play/903 is trying to acquire lock:
[  138.697409] ffff99b8c41aa8c8 (&bus->bus_lock){+.+.}-{3:3}, at:
sdw_prepare_stream+0x52/0x2e0
[  138.697443]
               but task is already holding lock:
[  138.697468] ffff99b8c41af8c8 (&bus->bus_lock){+.+.}-{3:3}, at:
sdw_prepare_stream+0x52/0x2e0
[  138.697493]
               other info that might help us debug this:
[  138.697521]  Possible unsafe locking scenario:

[  138.697540]        CPU0
[  138.697550]        ----
[  138.697559]   lock(&bus->bus_lock);
[  138.697570]   lock(&bus->bus_lock);
[  138.697581]
                *** DEADLOCK ***

Giving each mutex a unique key allows multiple to be held
without triggering a lockdep assert. But note that it does not
allow them to be taken in one order then a different order.
If two mutexes are taken in the order A, B then they must
always be taken in that order otherwise they could deadlock.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20230615141208.679011-1-rf@opensource.cirrus.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/bus.c       | 15 +++++++++++++--
 include/linux/soundwire/sdw.h |  3 +++
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index b44f8d0affa6..dba920ec88f6 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -69,8 +69,17 @@ int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent,
 		return -EINVAL;
 	}
 
-	mutex_init(&bus->msg_lock);
-	mutex_init(&bus->bus_lock);
+	/*
+	 * Give each bus_lock and msg_lock a unique key so that lockdep won't
+	 * trigger a deadlock warning when the locks of several buses are
+	 * grabbed during configuration of a multi-bus stream.
+	 */
+	lockdep_register_key(&bus->msg_lock_key);
+	__mutex_init(&bus->msg_lock, "msg_lock", &bus->msg_lock_key);
+
+	lockdep_register_key(&bus->bus_lock_key);
+	__mutex_init(&bus->bus_lock, "bus_lock", &bus->bus_lock_key);
+
 	INIT_LIST_HEAD(&bus->slaves);
 	INIT_LIST_HEAD(&bus->m_rt_list);
 
@@ -181,6 +190,8 @@ void sdw_bus_master_delete(struct sdw_bus *bus)
 	sdw_master_device_del(bus);
 
 	sdw_bus_debugfs_exit(bus);
+	lockdep_unregister_key(&bus->bus_lock_key);
+	lockdep_unregister_key(&bus->msg_lock_key);
 	ida_free(&sdw_bus_ida, bus->id);
 }
 EXPORT_SYMBOL(sdw_bus_master_delete);
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index c076a3f879b3..f523ceabd059 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -5,6 +5,7 @@
 #define __SOUNDWIRE_H
 
 #include <linux/bug.h>
+#include <linux/lockdep_types.h>
 #include <linux/mod_devicetable.h>
 #include <linux/bitfield.h>
 
@@ -907,7 +908,9 @@ struct sdw_bus {
 	struct list_head slaves;
 	DECLARE_BITMAP(assigned, SDW_MAX_DEVICES);
 	struct mutex bus_lock;
+	struct lock_class_key bus_lock_key;
 	struct mutex msg_lock;
+	struct lock_class_key msg_lock_key;
 	int (*compute_params)(struct sdw_bus *bus);
 	const struct sdw_master_ops *ops;
 	const struct sdw_master_port_ops *port_ops;
-- 
cgit v1.2.3


From 5c1f97537bfb9f358e0ecc88c3c8a913c51944cb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 19 Jun 2023 16:26:48 +0300
Subject: wifi: mac80211: store BSS param change count from assoc response

When receiving a multi-link association response, make sure to
track the BSS parameter change count for each link, including
the assoc link.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230619161906.1799c164e7e9.I8e2c1f5eec6eec3fab525ae2dead9f6f099a2427@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  | 64 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/mlme.c        | 16 +++++++++++-
 3 files changed, 81 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index fa679613c562..15c4e12b6fc7 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4689,6 +4689,34 @@ static inline u8 ieee80211_mle_common_size(const u8 *data)
 	return sizeof(*mle) + common + mle->variable[0];
 }
 
+/**
+ * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count
+ * @mle: the basic multi link element
+ *
+ * The element is assumed to be of the correct type (BASIC) and big enough,
+ * this must be checked using ieee80211_mle_type_ok().
+ *
+ * If the BSS parameter change count value can't be found (the presence bit
+ * for it is clear), 0 will be returned.
+ */
+static inline u8
+ieee80211_mle_get_bss_param_ch_cnt(const struct ieee80211_multi_link_elem *mle)
+{
+	u16 control = le16_to_cpu(mle->control);
+	const u8 *common = mle->variable;
+
+	/* common points now at the beginning of ieee80211_mle_basic_common_info */
+	common += sizeof(struct ieee80211_mle_basic_common_info);
+
+	if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT))
+		return 0;
+
+	if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID)
+		common += 1;
+
+	return *common;
+}
+
 /**
  * ieee80211_mle_get_eml_sync_delay - returns the medium sync delay
  * @data: pointer to the multi link EHT IE
@@ -4902,6 +4930,42 @@ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data,
 	       fixed + prof->sta_info_len <= len;
 }
 
+/**
+ * ieee80211_mle_basic_sta_prof_bss_param_ch_cnt - get per-STA profile BSS
+ *	parameter change count
+ * @prof: the per-STA profile, having been checked with
+ *	ieee80211_mle_basic_sta_prof_size_ok() for the correct length
+ *
+ * Return: The BSS parameter change count value if present, 0 otherwise.
+ */
+static inline u8
+ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(const struct ieee80211_mle_per_sta_profile *prof)
+{
+	u16 control = le16_to_cpu(prof->control);
+	const u8 *pos = prof->variable;
+
+	if (!(control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT))
+		return 0;
+
+	if (control & IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT)
+		pos += 6;
+	if (control & IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT)
+		pos += 2;
+	if (control & IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT)
+		pos += 8;
+	if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT)
+		pos += 2;
+	if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE &&
+	    control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) {
+		if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE)
+			pos += 2;
+		else
+			pos += 1;
+	}
+
+	return *pos;
+}
+
 #define IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID			0x000f
 #define IEEE80211_MLE_STA_RECONF_CONTROL_COMPLETE_PROFILE		0x0010
 #define IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT		0x0020
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2f7665998da0..91633a0b723e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -951,6 +951,8 @@ struct ieee80211_link_data_managed {
 	int wmm_last_param_set;
 	int mu_edca_last_param_set;
 
+	u8 bss_param_ch_cnt;
+
 	struct cfg80211_bss *bss;
 };
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cf15089e95f1..f93eb38ae0b8 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4018,6 +4018,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
 	const struct cfg80211_bss_ies *bss_ies = NULL;
 	struct ieee80211_supported_band *sband;
 	struct ieee802_11_elems *elems;
+	const __le16 prof_bss_param_ch_present =
+		cpu_to_le16(IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT);
 	u16 capab_info;
 	bool ret;
 
@@ -4033,7 +4035,17 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
 		 * successful, so set the status directly to success
 		 */
 		assoc_data->link[link_id].status = WLAN_STATUS_SUCCESS;
-	} else if (!elems->prof) {
+		if (elems->ml_basic) {
+			if (!(elems->ml_basic->control &
+					cpu_to_le16(IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT))) {
+				ret = false;
+				goto out;
+			}
+			link->u.mgd.bss_param_ch_cnt =
+				ieee80211_mle_get_bss_param_ch_cnt(elems->ml_basic);
+		}
+	} else if (!elems->prof ||
+		   !(elems->prof->control & prof_bss_param_ch_present)) {
 		ret = false;
 		goto out;
 	} else {
@@ -4046,6 +4058,8 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
 		 */
 		capab_info = get_unaligned_le16(ptr);
 		assoc_data->link[link_id].status = get_unaligned_le16(ptr + 2);
+		link->u.mgd.bss_param_ch_cnt =
+			ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(elems->prof);
 
 		if (assoc_data->link[link_id].status != WLAN_STATUS_SUCCESS) {
 			link_info(link, "association response status code=%u\n",
-- 
cgit v1.2.3


From 4ef2f53e50cba9780057b51357ef45cb5f49859d Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 19 Jun 2023 16:26:52 +0300
Subject: wifi: cfg80211: Retrieve PSD information from RNR AP information

Retrieve the Power Spectral Density (PSD) value from RNR AP
information entry and store it so it could be used by the drivers.

PSD value is explained in Section 9.4.2.170 of Draft
P802.11Revme_D2.0.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230619161906.067ded2b8fc3.I9f407ab5800cbb07045a0537a513012960ced740@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  7 +++++--
 include/net/cfg80211.h    |  2 ++
 net/wireless/scan.c       | 13 +++++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 15c4e12b6fc7..6f1747a9c106 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4504,6 +4504,9 @@ static inline bool for_each_element_completed(const struct element *element,
 #define IEEE80211_RNR_TBTT_PARAMS_PROBE_ACTIVE			0x20
 #define IEEE80211_RNR_TBTT_PARAMS_COLOC_AP			0x40
 
+#define IEEE80211_RNR_TBTT_PARAMS_PSD_NO_LIMIT			127
+#define IEEE80211_RNR_TBTT_PARAMS_PSD_RESERVED			-128
+
 struct ieee80211_neighbor_ap_info {
 	u8 tbtt_info_hdr;
 	u8 tbtt_info_len;
@@ -4539,7 +4542,7 @@ struct ieee80211_tbtt_info_7_8_9 {
 
 	/* The following element is optional, structure may not grow */
 	u8 bss_params;
-	u8 psd_20;
+	s8 psd_20;
 } __packed;
 
 /* Format of the TBTT information element if it has >= 11 bytes */
@@ -4550,7 +4553,7 @@ struct ieee80211_tbtt_info_ge_11 {
 
 	/* The following elements are optional, structure may grow */
 	u8 bss_params;
-	u8 psd_20;
+	s8 psd_20;
 	struct ieee80211_rnr_mld_params mld_params;
 } __packed;
 
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3a736f9286b0..7c7d03aa9d06 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2466,6 +2466,7 @@ struct cfg80211_scan_info {
  * @short_ssid_valid: @short_ssid is valid and can be used
  * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait
  *       20 TUs before starting to send probe requests.
+ * @psd_20: The AP's 20 MHz PSD value.
  */
 struct cfg80211_scan_6ghz_params {
 	u32 short_ssid;
@@ -2474,6 +2475,7 @@ struct cfg80211_scan_6ghz_params {
 	bool unsolicited_probe;
 	bool short_ssid_valid;
 	bool psc_no_listen;
+	s8 psd_20;
 };
 
 /**
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ede95caecb34..8bf00caf5d29 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -96,6 +96,7 @@ MODULE_PARM_DESC(bss_entries_limit,
  *	colocated and can be discovered via legacy bands.
  * @short_ssid_valid: short_ssid is valid and can be used
  * @short_ssid: the short SSID for this SSID
+ * @psd_20: The 20MHz PSD EIRP of the primary 20MHz channel for the reported AP
  */
 struct cfg80211_colocated_ap {
 	struct list_head list;
@@ -111,6 +112,7 @@ struct cfg80211_colocated_ap {
 	   transmitted_bssid:1,
 	   colocated_ess:1,
 	   short_ssid_valid:1;
+	s8 psd_20;
 };
 
 static void bss_free(struct cfg80211_internal_bss *bss)
@@ -578,6 +580,8 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
 {
 	u8 bss_params;
 
+	entry->psd_20 = IEEE80211_RNR_TBTT_PARAMS_PSD_RESERVED;
+
 	/* The length is already verified by the caller to contain bss_params */
 	if (length > sizeof(struct ieee80211_tbtt_info_7_8_9)) {
 		struct ieee80211_tbtt_info_ge_11 *tbtt_info = (void *)pos;
@@ -594,12 +598,20 @@ static int cfg80211_parse_ap_info(struct cfg80211_colocated_ap *entry,
 					  IEEE80211_RNR_MLD_PARAMS_DISABLED_LINK))
 				return -EINVAL;
 		}
+
+		if (length >= offsetofend(struct ieee80211_tbtt_info_ge_11,
+					  psd_20))
+			entry->psd_20 = tbtt_info->psd_20;
 	} else {
 		struct ieee80211_tbtt_info_7_8_9 *tbtt_info = (void *)pos;
 
 		memcpy(entry->bssid, tbtt_info->bssid, ETH_ALEN);
 
 		bss_params = tbtt_info->bss_params;
+
+		if (length == offsetofend(struct ieee80211_tbtt_info_7_8_9,
+					  psd_20))
+			entry->psd_20 = tbtt_info->psd_20;
 	}
 
 	/* ignore entries with invalid BSSID */
@@ -904,6 +916,7 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
 		scan_6ghz_params->short_ssid = ap->short_ssid;
 		scan_6ghz_params->short_ssid_valid = ap->short_ssid_valid;
 		scan_6ghz_params->unsolicited_probe = ap->unsolicited_probe;
+		scan_6ghz_params->psd_20 = ap->psd_20;
 
 		/*
 		 * If a PSC channel is added to the scan and 'need_scan_psc' is
-- 
cgit v1.2.3


From a82d62f708545d22859584e0e0620da8e3759bbc Mon Sep 17 00:00:00 2001
From: Jiaqing Zhao <jiaqing.zhao@linux.intel.com>
Date: Mon, 19 Jun 2023 15:57:44 +0000
Subject: Revert "8250: add support for ASIX devices with a FIFO bug"

This reverts commit eb26dfe8aa7eeb5a5aa0b7574550125f8aa4c3b3.

Commit eb26dfe8aa7e ("8250: add support for ASIX devices with a FIFO
bug") merged on Jul 13, 2012 adds a quirk for PCI_VENDOR_ID_ASIX
(0x9710). But that ID is the same as PCI_VENDOR_ID_NETMOS defined in
1f8b061050c7 ("[PATCH] Netmos parallel/serial/combo support") merged
on Mar 28, 2005. In pci_serial_quirks array, the NetMos entry always
takes precedence over the ASIX entry even since it was initially
merged, code in that commit is always unreachable.

In my tests, adding the FIFO workaround to pci_netmos_init() makes no
difference, and the vendor driver also does not have such workaround.
Given that the code was never used for over a decade, it's safe to
revert it.

Also, the real PCI_VENDOR_ID_ASIX should be 0x125b, which is used on
their newer AX99100 PCIe serial controllers released on 2016. The FIFO
workaround should not be intended for these newer controllers, and it
was never implemented in vendor driver.

Fixes: eb26dfe8aa7e ("8250: add support for ASIX devices with a FIFO bug")
Cc: stable <stable@kernel.org>
Signed-off-by: Jiaqing Zhao <jiaqing.zhao@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230619155743.827859-1-jiaqing.zhao@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.h      |  1 -
 drivers/tty/serial/8250/8250_pci.c  | 19 -------------------
 drivers/tty/serial/8250/8250_port.c | 11 +++--------
 include/linux/serial_8250.h         |  1 -
 4 files changed, 3 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 471c6bc5f78f..1aa3e55c8b47 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -91,7 +91,6 @@ struct serial8250_config {
 #define UART_BUG_TXEN	BIT(1)	/* UART has buggy TX IIR status */
 #define UART_BUG_NOMSR	BIT(2)	/* UART has buggy MSR status bits (Au1x00) */
 #define UART_BUG_THRE	BIT(3)	/* UART has buggy THRE reassertion */
-#define UART_BUG_PARITY	BIT(4)	/* UART mishandles parity if FIFO enabled */
 #define UART_BUG_TXRACE	BIT(5)	/* UART Tx fails to set remote DR */
 
 
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index e80c4f6551a1..d2d547b5da95 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1232,14 +1232,6 @@ static int pci_oxsemi_tornado_setup(struct serial_private *priv,
 	return pci_default_setup(priv, board, up, idx);
 }
 
-static int pci_asix_setup(struct serial_private *priv,
-		  const struct pciserial_board *board,
-		  struct uart_8250_port *port, int idx)
-{
-	port->bugs |= UART_BUG_PARITY;
-	return pci_default_setup(priv, board, port, idx);
-}
-
 #define QPCR_TEST_FOR1		0x3F
 #define QPCR_TEST_GET1		0x00
 #define QPCR_TEST_FOR2		0x40
@@ -1955,7 +1947,6 @@ pci_moxa_setup(struct serial_private *priv,
 #define PCI_DEVICE_ID_WCH_CH355_4S	0x7173
 #define PCI_VENDOR_ID_AGESTAR		0x5372
 #define PCI_DEVICE_ID_AGESTAR_9375	0x6872
-#define PCI_VENDOR_ID_ASIX		0x9710
 #define PCI_DEVICE_ID_BROADCOM_TRUMANAGE 0x160a
 #define PCI_DEVICE_ID_AMCC_ADDIDATA_APCI7800 0x818e
 
@@ -2600,16 +2591,6 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
 		.exit		= pci_wch_ch38x_exit,
 		.setup          = pci_wch_ch38x_setup,
 	},
-	/*
-	 * ASIX devices with FIFO bug
-	 */
-	{
-		.vendor		= PCI_VENDOR_ID_ASIX,
-		.device		= PCI_ANY_ID,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= PCI_ANY_ID,
-		.setup		= pci_asix_setup,
-	},
 	/*
 	 * Broadcom TruManage (NetXtreme)
 	 */
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index dfb51a854e77..16aeb1420137 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2632,11 +2632,8 @@ static unsigned char serial8250_compute_lcr(struct uart_8250_port *up,
 
 	if (c_cflag & CSTOPB)
 		cval |= UART_LCR_STOP;
-	if (c_cflag & PARENB) {
+	if (c_cflag & PARENB)
 		cval |= UART_LCR_PARITY;
-		if (up->bugs & UART_BUG_PARITY)
-			up->fifo_bug = true;
-	}
 	if (!(c_cflag & PARODD))
 		cval |= UART_LCR_EPAR;
 	if (c_cflag & CMSPAR)
@@ -2799,8 +2796,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 	up->lcr = cval;					/* Save computed LCR */
 
 	if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) {
-		/* NOTE: If fifo_bug is not set, a user can set RX_trigger. */
-		if ((baud < 2400 && !up->dma) || up->fifo_bug) {
+		if (baud < 2400 && !up->dma) {
 			up->fcr &= ~UART_FCR_TRIGGER_MASK;
 			up->fcr |= UART_FCR_TRIGGER_1;
 		}
@@ -3131,8 +3127,7 @@ static int do_set_rxtrig(struct tty_port *port, unsigned char bytes)
 	struct uart_8250_port *up = up_to_u8250p(uport);
 	int rxtrig;
 
-	if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1 ||
-	    up->fifo_bug)
+	if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1)
 		return -EINVAL;
 
 	rxtrig = bytes_to_fcr_rxtrig(up, bytes);
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index eb44420b39ec..be65de65fe61 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -127,7 +127,6 @@ struct uart_8250_port {
 	struct list_head	list;		/* ports on this IRQ */
 	u32			capabilities;	/* port capabilities */
 	u16			bugs;		/* port bugs */
-	bool			fifo_bug;	/* min RX trigger if enabled */
 	unsigned int		tx_loadsz;	/* transmit fifo load size */
 	unsigned char		acr;
 	unsigned char		fcr;
-- 
cgit v1.2.3


From c88ad30e3f861c7be4e3b4995554e2b0754059b7 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Tue, 20 Jun 2023 12:24:33 -0500
Subject: cpufreq: amd-pstate: Add a kernel config option to set default mode

Users are having more success with amd-pstate since the introduction
of EPP and Guided modes.  To expose the driver to more users by default
introduce a kernel configuration option for setting the default mode.

Users can use an integer to map out which default mode they want to use
in lieu of a kernel command line option.

This will default to EPP, but only if:
 1) The CPU supports an MSR.
 2) The system profile is identified
 3) The system profile is identified as a non-server by the FADT.

Link: https://gitlab.freedesktop.org/hadess/power-profiles-daemon/-/merge_requests/121
Acked-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Co-developed-by: Perry Yuan <perry.yuan@amd.com>
Signed-off-by: Perry Yuan <perry.yuan@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/Kconfig.x86  | 17 +++++++++++
 drivers/cpufreq/amd-pstate.c | 73 +++++++++++++++++++++++++++++---------------
 include/linux/amd-pstate.h   |  4 ++-
 3 files changed, 68 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 00476e94db90..438c9e75a04d 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -51,6 +51,23 @@ config X86_AMD_PSTATE
 
 	  If in doubt, say N.
 
+config X86_AMD_PSTATE_DEFAULT_MODE
+	int "AMD Processor P-State default mode"
+	depends on X86_AMD_PSTATE
+	default 3 if X86_AMD_PSTATE
+	range 1 4
+	help
+	  Select the default mode the amd-pstate driver will use on
+	  supported hardware.
+	  The value set has the following meanings:
+		1 -> Disabled
+		2 -> Passive
+		3 -> Active (EPP)
+		4 -> Guided
+
+	  For details, take a look at:
+	  <file:Documentation/admin-guide/pm/amd-pstate.rst>.
+
 config X86_AMD_PSTATE_UT
 	tristate "selftest for AMD Processor P-State driver"
 	depends on X86 && ACPI_PROCESSOR
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 3546d7db614d..81fba0dcbee9 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -62,7 +62,7 @@
 static struct cpufreq_driver *current_pstate_driver;
 static struct cpufreq_driver amd_pstate_driver;
 static struct cpufreq_driver amd_pstate_epp_driver;
-static int cppc_state = AMD_PSTATE_DISABLE;
+static int cppc_state = AMD_PSTATE_UNDEFINED;
 static bool cppc_enabled;
 
 /*
@@ -1410,6 +1410,25 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
 	.attr		= amd_pstate_epp_attr,
 };
 
+static int __init amd_pstate_set_driver(int mode_idx)
+{
+	if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
+		cppc_state = mode_idx;
+		if (cppc_state == AMD_PSTATE_DISABLE)
+			pr_info("driver is explicitly disabled\n");
+
+		if (cppc_state == AMD_PSTATE_ACTIVE)
+			current_pstate_driver = &amd_pstate_epp_driver;
+
+		if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
+			current_pstate_driver = &amd_pstate_driver;
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
 static int __init amd_pstate_init(void)
 {
 	struct device *dev_root;
@@ -1417,15 +1436,6 @@ static int __init amd_pstate_init(void)
 
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
 		return -ENODEV;
-	/*
-	 * by default the pstate driver is disabled to load
-	 * enable the amd_pstate passive mode driver explicitly
-	 * with amd_pstate=passive or other modes in kernel command line
-	 */
-	if (cppc_state == AMD_PSTATE_DISABLE) {
-		pr_info("driver load is disabled, boot with specific mode to enable this\n");
-		return -ENODEV;
-	}
 
 	if (!acpi_cpc_valid()) {
 		pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
@@ -1436,6 +1446,33 @@ static int __init amd_pstate_init(void)
 	if (cpufreq_get_current_driver())
 		return -EEXIST;
 
+	switch (cppc_state) {
+	case AMD_PSTATE_UNDEFINED:
+		/* Disable on the following configs by default:
+		 * 1. Undefined platforms
+		 * 2. Server platforms
+		 * 3. Shared memory designs
+		 */
+		if (amd_pstate_acpi_pm_profile_undefined() ||
+		    amd_pstate_acpi_pm_profile_server() ||
+		    !boot_cpu_has(X86_FEATURE_CPPC)) {
+			pr_info("driver load is disabled, boot with specific mode to enable this\n");
+			return -ENODEV;
+		}
+		ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE);
+		if (ret)
+			return ret;
+		break;
+	case AMD_PSTATE_DISABLE:
+		return -ENODEV;
+	case AMD_PSTATE_PASSIVE:
+	case AMD_PSTATE_ACTIVE:
+	case AMD_PSTATE_GUIDED:
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	/* capability check */
 	if (boot_cpu_has(X86_FEATURE_CPPC)) {
 		pr_debug("AMD CPPC MSR based functionality is supported\n");
@@ -1488,21 +1525,7 @@ static int __init amd_pstate_param(char *str)
 	size = strlen(str);
 	mode_idx = get_mode_idx_from_str(str, size);
 
-	if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
-		cppc_state = mode_idx;
-		if (cppc_state == AMD_PSTATE_DISABLE)
-			pr_info("driver is explicitly disabled\n");
-
-		if (cppc_state == AMD_PSTATE_ACTIVE)
-			current_pstate_driver = &amd_pstate_epp_driver;
-
-		if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
-			current_pstate_driver = &amd_pstate_driver;
-
-		return 0;
-	}
-
-	return -EINVAL;
+	return amd_pstate_set_driver(mode_idx);
 }
 early_param("amd_pstate", amd_pstate_param);
 
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index c10ebf8c42e6..446394f84606 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -94,7 +94,8 @@ struct amd_cpudata {
  * enum amd_pstate_mode - driver working mode of amd pstate
  */
 enum amd_pstate_mode {
-	AMD_PSTATE_DISABLE = 0,
+	AMD_PSTATE_UNDEFINED = 0,
+	AMD_PSTATE_DISABLE,
 	AMD_PSTATE_PASSIVE,
 	AMD_PSTATE_ACTIVE,
 	AMD_PSTATE_GUIDED,
@@ -102,6 +103,7 @@ enum amd_pstate_mode {
 };
 
 static const char * const amd_pstate_mode_string[] = {
+	[AMD_PSTATE_UNDEFINED]   = "undefined",
 	[AMD_PSTATE_DISABLE]     = "disable",
 	[AMD_PSTATE_PASSIVE]     = "passive",
 	[AMD_PSTATE_ACTIVE]      = "active",
-- 
cgit v1.2.3


From e0cbc202388af454eb771043b20db6dfe68199ec Mon Sep 17 00:00:00 2001
From: Okan Sahin <okan.sahin@analog.com>
Date: Wed, 12 Apr 2023 14:12:46 +0300
Subject: mfd: max77541: Add ADI MAX77541/MAX77540 PMIC Support

MFD driver for MAX77541/MAX77540 to enable its sub devices.

The MAX77541 is a multi-function devices. It includes buck converter and ADC.

The MAX77540 is a high-efficiency buck converter with two 3A switching phases.

They have same regmap except for ADC part of MAX77541.

Signed-off-by: Okan Sahin <okan.sahin@analog.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230412111256.40013-6-okan.sahin@analog.com
Signed-off-by: Lee Jones <lee@kernel.org>
---
 drivers/mfd/Kconfig          |  13 +++
 drivers/mfd/Makefile         |   1 +
 drivers/mfd/max77541.c       | 224 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/max77541.h |  91 ++++++++++++++++++
 4 files changed, 329 insertions(+)
 create mode 100644 drivers/mfd/max77541.c
 create mode 100644 include/linux/mfd/max77541.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 2ddfc11b8f2b..6f5b259a6d6a 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -784,6 +784,19 @@ config MFD_MAX14577
 	  additional drivers must be enabled in order to use the functionality
 	  of the device.
 
+config MFD_MAX77541
+	tristate "Analog Devices MAX77541/77540 PMIC Support"
+	depends on I2C=y
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	help
+	  Say yes here to add support for Analog Devices MAX77541 and
+	  MAX77540 Power Management ICs. This driver provides
+	  common support for accessing the device; additional drivers
+	  must be enabled in order to use the functionality of the device.
+	  There are regulators and adc.
+
 config MFD_MAX77620
 	bool "Maxim Semiconductor MAX77620 and MAX20024 PMIC Support"
 	depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 39c461536181..f3d1f1dc73b5 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -154,6 +154,7 @@ obj-$(CONFIG_MFD_DA9063)	+= da9063.o
 obj-$(CONFIG_MFD_DA9150)	+= da9150-core.o
 
 obj-$(CONFIG_MFD_MAX14577)	+= max14577.o
+obj-$(CONFIG_MFD_MAX77541)	+= max77541.o
 obj-$(CONFIG_MFD_MAX77620)	+= max77620.o
 obj-$(CONFIG_MFD_MAX77650)	+= max77650.o
 obj-$(CONFIG_MFD_MAX77686)	+= max77686.o
diff --git a/drivers/mfd/max77541.c b/drivers/mfd/max77541.c
new file mode 100644
index 000000000000..4a3bad3493b3
--- /dev/null
+++ b/drivers/mfd/max77541.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2022 Analog Devices, Inc.
+ * Driver for the MAX77540 and MAX77541
+ */
+
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/max77541.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+
+static const struct regmap_config max77541_regmap_config = {
+	.reg_bits   = 8,
+	.val_bits   = 8,
+};
+
+static const struct regmap_irq max77541_src_irqs[] = {
+	{ .mask = MAX77541_BIT_INT_SRC_TOPSYS },
+	{ .mask = MAX77541_BIT_INT_SRC_BUCK },
+};
+
+static const struct regmap_irq_chip max77541_src_irq_chip = {
+	.name		= "max77541-src",
+	.status_base	= MAX77541_REG_INT_SRC,
+	.mask_base	= MAX77541_REG_INT_SRC_M,
+	.num_regs	= 1,
+	.irqs		= max77541_src_irqs,
+	.num_irqs       = ARRAY_SIZE(max77541_src_irqs),
+};
+
+static const struct regmap_irq max77541_topsys_irqs[] = {
+	{ .mask = MAX77541_BIT_TOPSYS_INT_TJ_120C },
+	{ .mask = MAX77541_BIT_TOPSYS_INT_TJ_140C },
+	{ .mask = MAX77541_BIT_TOPSYS_INT_TSHDN },
+	{ .mask = MAX77541_BIT_TOPSYS_INT_UVLO },
+	{ .mask = MAX77541_BIT_TOPSYS_INT_ALT_SWO },
+	{ .mask = MAX77541_BIT_TOPSYS_INT_EXT_FREQ_DET },
+};
+
+static const struct regmap_irq_chip max77541_topsys_irq_chip = {
+	.name		= "max77541-topsys",
+	.status_base	= MAX77541_REG_TOPSYS_INT,
+	.mask_base	= MAX77541_REG_TOPSYS_INT_M,
+	.num_regs	= 1,
+	.irqs		= max77541_topsys_irqs,
+	.num_irqs	= ARRAY_SIZE(max77541_topsys_irqs),
+};
+
+static const struct regmap_irq max77541_buck_irqs[] = {
+	{ .mask = MAX77541_BIT_BUCK_INT_M1_POK_FLT },
+	{ .mask = MAX77541_BIT_BUCK_INT_M2_POK_FLT },
+	{ .mask = MAX77541_BIT_BUCK_INT_M1_SCFLT },
+	{ .mask = MAX77541_BIT_BUCK_INT_M2_SCFLT },
+};
+
+static const struct regmap_irq_chip max77541_buck_irq_chip = {
+	.name		= "max77541-buck",
+	.status_base	= MAX77541_REG_BUCK_INT,
+	.mask_base	= MAX77541_REG_BUCK_INT_M,
+	.num_regs	= 1,
+	.irqs		= max77541_buck_irqs,
+	.num_irqs	= ARRAY_SIZE(max77541_buck_irqs),
+};
+
+static const struct regmap_irq max77541_adc_irqs[] = {
+	{ .mask = MAX77541_BIT_ADC_INT_CH1_I },
+	{ .mask = MAX77541_BIT_ADC_INT_CH2_I },
+	{ .mask = MAX77541_BIT_ADC_INT_CH3_I },
+	{ .mask = MAX77541_BIT_ADC_INT_CH6_I },
+};
+
+static const struct regmap_irq_chip max77541_adc_irq_chip = {
+	.name		= "max77541-adc",
+	.status_base	= MAX77541_REG_ADC_INT,
+	.mask_base	= MAX77541_REG_ADC_INT_M,
+	.num_regs	= 1,
+	.irqs		= max77541_adc_irqs,
+	.num_irqs	= ARRAY_SIZE(max77541_adc_irqs),
+};
+
+static const struct mfd_cell max77540_devs[] = {
+	MFD_CELL_OF("max77540-regulator", NULL, NULL, 0, 0, NULL),
+};
+
+static const struct mfd_cell max77541_devs[] = {
+	MFD_CELL_OF("max77541-regulator", NULL, NULL, 0, 0, NULL),
+	MFD_CELL_OF("max77541-adc", NULL, NULL, 0, 0, NULL),
+};
+
+static int max77541_pmic_irq_init(struct device *dev)
+{
+	struct max77541 *max77541 = dev_get_drvdata(dev);
+	int irq = max77541->i2c->irq;
+	int ret;
+
+	ret = devm_regmap_add_irq_chip(dev, max77541->regmap, irq,
+				       IRQF_ONESHOT | IRQF_SHARED, 0,
+				       &max77541_src_irq_chip,
+				       &max77541->irq_data);
+	if (ret)
+		return ret;
+
+	ret = devm_regmap_add_irq_chip(dev, max77541->regmap, irq,
+				       IRQF_ONESHOT | IRQF_SHARED, 0,
+				       &max77541_topsys_irq_chip,
+				       &max77541->irq_topsys);
+	if (ret)
+		return ret;
+
+	ret = devm_regmap_add_irq_chip(dev, max77541->regmap, irq,
+				       IRQF_ONESHOT | IRQF_SHARED, 0,
+				       &max77541_buck_irq_chip,
+				       &max77541->irq_buck);
+	if (ret)
+		return ret;
+
+	if (max77541->id == MAX77541) {
+		ret = devm_regmap_add_irq_chip(dev, max77541->regmap, irq,
+					       IRQF_ONESHOT | IRQF_SHARED, 0,
+					       &max77541_adc_irq_chip,
+					       &max77541->irq_adc);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int max77541_pmic_setup(struct device *dev)
+{
+	struct max77541 *max77541 = dev_get_drvdata(dev);
+	const struct mfd_cell *cells;
+	int n_devs;
+	int ret;
+
+	switch (max77541->id) {
+	case MAX77540:
+		cells =  max77540_devs;
+		n_devs = ARRAY_SIZE(max77540_devs);
+		break;
+	case MAX77541:
+		cells =  max77541_devs;
+		n_devs = ARRAY_SIZE(max77541_devs);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = max77541_pmic_irq_init(dev);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to initialize IRQ\n");
+
+	ret = device_init_wakeup(dev, true);
+	if (ret)
+		return dev_err_probe(dev, ret, "Unable to init wakeup\n");
+
+	return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
+				    cells, n_devs, NULL, 0, NULL);
+}
+
+static int max77541_probe(struct i2c_client *client)
+{
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
+	struct device *dev = &client->dev;
+	struct max77541 *max77541;
+
+	max77541 = devm_kzalloc(dev, sizeof(*max77541), GFP_KERNEL);
+	if (!max77541)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, max77541);
+	max77541->i2c = client;
+
+	max77541->id  = (enum max7754x_ids)device_get_match_data(dev);
+	if (!max77541->id)
+		max77541->id  = (enum max7754x_ids)id->driver_data;
+
+	if (!max77541->id)
+		return -EINVAL;
+
+	max77541->regmap = devm_regmap_init_i2c(client,
+						&max77541_regmap_config);
+	if (IS_ERR(max77541->regmap))
+		return dev_err_probe(dev, PTR_ERR(max77541->regmap),
+				     "Failed to allocate register map\n");
+
+	return max77541_pmic_setup(dev);
+}
+
+static const struct of_device_id max77541_of_id[] = {
+	{
+		.compatible = "adi,max77540",
+		.data = (void *)MAX77540,
+	},
+	{
+		.compatible = "adi,max77541",
+		.data = (void *)MAX77541,
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, max77541_of_id);
+
+static const struct i2c_device_id max77541_id[] = {
+	{ "max77540", MAX77540 },
+	{ "max77541", MAX77541 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max77541_id);
+
+static struct i2c_driver max77541_driver = {
+	.driver = {
+		.name = "max77541",
+		.of_match_table = max77541_of_id,
+	},
+	.probe_new = max77541_probe,
+	.id_table = max77541_id,
+};
+module_i2c_driver(max77541_driver);
+
+MODULE_DESCRIPTION("MAX7740/MAX7741 Driver");
+MODULE_AUTHOR("Okan Sahin <okan.sahin@analog.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max77541.h b/include/linux/mfd/max77541.h
new file mode 100644
index 000000000000..fe5c0a3dc637
--- /dev/null
+++ b/include/linux/mfd/max77541.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef __MFD_MAX77541_H
+#define __MFD_MAX77541_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+
+/* REGISTERS */
+#define MAX77541_REG_INT_SRC                    0x00
+#define MAX77541_REG_INT_SRC_M                  0x01
+
+#define MAX77541_BIT_INT_SRC_TOPSYS             BIT(0)
+#define MAX77541_BIT_INT_SRC_BUCK               BIT(1)
+
+#define MAX77541_REG_TOPSYS_INT                 0x02
+#define MAX77541_REG_TOPSYS_INT_M               0x03
+
+#define MAX77541_BIT_TOPSYS_INT_TJ_120C         BIT(0)
+#define MAX77541_BIT_TOPSYS_INT_TJ_140C         BIT(1)
+#define MAX77541_BIT_TOPSYS_INT_TSHDN           BIT(2)
+#define MAX77541_BIT_TOPSYS_INT_UVLO            BIT(3)
+#define MAX77541_BIT_TOPSYS_INT_ALT_SWO         BIT(4)
+#define MAX77541_BIT_TOPSYS_INT_EXT_FREQ_DET    BIT(5)
+
+/* REGULATORS */
+#define MAX77541_REG_BUCK_INT                   0x20
+#define MAX77541_REG_BUCK_INT_M                 0x21
+
+#define MAX77541_BIT_BUCK_INT_M1_POK_FLT        BIT(0)
+#define MAX77541_BIT_BUCK_INT_M2_POK_FLT        BIT(1)
+#define MAX77541_BIT_BUCK_INT_M1_SCFLT          BIT(4)
+#define MAX77541_BIT_BUCK_INT_M2_SCFLT          BIT(5)
+
+#define MAX77541_REG_EN_CTRL                    0x0B
+
+#define MAX77541_BIT_M1_EN                      BIT(0)
+#define MAX77541_BIT_M2_EN                      BIT(1)
+
+#define MAX77541_REG_M1_VOUT                    0x23
+#define MAX77541_REG_M2_VOUT                    0x33
+
+#define MAX77541_BITS_MX_VOUT                   GENMASK(7, 0)
+
+#define MAX77541_REG_M1_CFG1                    0x25
+#define MAX77541_REG_M2_CFG1                    0x35
+
+#define MAX77541_BITS_MX_CFG1_RNG               GENMASK(7, 6)
+
+/* ADC */
+#define MAX77541_REG_ADC_INT                    0x70
+#define MAX77541_REG_ADC_INT_M                  0x71
+
+#define MAX77541_BIT_ADC_INT_CH1_I              BIT(0)
+#define MAX77541_BIT_ADC_INT_CH2_I              BIT(1)
+#define MAX77541_BIT_ADC_INT_CH3_I              BIT(2)
+#define MAX77541_BIT_ADC_INT_CH6_I              BIT(5)
+
+#define MAX77541_REG_ADC_DATA_CH1               0x72
+#define MAX77541_REG_ADC_DATA_CH2               0x73
+#define MAX77541_REG_ADC_DATA_CH3               0x74
+#define MAX77541_REG_ADC_DATA_CH6               0x77
+
+/* INTERRUPT MASKS*/
+#define MAX77541_REG_INT_SRC_MASK               0x00
+#define MAX77541_REG_TOPSYS_INT_MASK            0x00
+#define MAX77541_REG_BUCK_INT_MASK              0x00
+
+#define MAX77541_MAX_REGULATORS 2
+
+enum max7754x_ids {
+	MAX77540 = 1,
+	MAX77541,
+};
+
+struct regmap;
+struct regmap_irq_chip_data;
+struct i2c_client;
+
+struct max77541 {
+	struct i2c_client *i2c;
+	struct regmap *regmap;
+	enum max7754x_ids id;
+
+	struct regmap_irq_chip_data *irq_data;
+	struct regmap_irq_chip_data *irq_buck;
+	struct regmap_irq_chip_data *irq_topsys;
+	struct regmap_irq_chip_data *irq_adc;
+};
+
+#endif /* __MFD_MAX77541_H */
-- 
cgit v1.2.3


From d5e01266e7f5fa12400d4c8aa4e86fe89dcc61e9 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Mon, 19 Jun 2023 22:46:58 +0200
Subject: leds: trigger: netdev: add additional specific link speed mode

Add additional modes for specific link speed. Use ethtool APIs to get the
current link speed and enable the LED accordingly. Under netdev event
handler the rtnl lock is already held and is not needed to be set to
access ethtool APIs.

This is especially useful for PHY and Switch that supports LEDs hw
control for specific link speed. (example scenario a PHY that have 2 LED
connected one green and one orange where the green is turned on with
1000mbps speed and orange is turned on with 10mpbs speed)

On mode set from sysfs we check if we have enabled split link speed mode
and reject enabling generic link mode to prevent wrong and redundant
configuration.

Rework logic on the set baseline state to support these new modes to
select if we need to turn on or off the LED.

Add additional modes:
- link_10: Turn on LED when link speed is 10mbps
- link_100: Turn on LED when link speed is 100mbps
- link_1000: Turn on LED when link speed is 1000mbps

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Lee Jones <lee@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/leds/trigger/ledtrig-netdev.c | 80 ++++++++++++++++++++++++++++++-----
 include/linux/leds.h                  |  3 ++
 2 files changed, 73 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
index 2311dae7f070..f625738392bf 100644
--- a/drivers/leds/trigger/ledtrig-netdev.c
+++ b/drivers/leds/trigger/ledtrig-netdev.c
@@ -13,6 +13,7 @@
 #include <linux/atomic.h>
 #include <linux/ctype.h>
 #include <linux/device.h>
+#include <linux/ethtool.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
@@ -21,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/mutex.h>
+#include <linux/rtnetlink.h>
 #include <linux/timer.h>
 #include "../leds.h"
 
@@ -52,6 +54,8 @@ struct led_netdev_data {
 	unsigned int last_activity;
 
 	unsigned long mode;
+	int link_speed;
+
 	bool carrier_link_up;
 	bool hw_control;
 };
@@ -77,7 +81,24 @@ static void set_baseline_state(struct led_netdev_data *trigger_data)
 	if (!trigger_data->carrier_link_up) {
 		led_set_brightness(led_cdev, LED_OFF);
 	} else {
+		bool blink_on = false;
+
 		if (test_bit(TRIGGER_NETDEV_LINK, &trigger_data->mode))
+			blink_on = true;
+
+		if (test_bit(TRIGGER_NETDEV_LINK_10, &trigger_data->mode) &&
+		    trigger_data->link_speed == SPEED_10)
+			blink_on = true;
+
+		if (test_bit(TRIGGER_NETDEV_LINK_100, &trigger_data->mode) &&
+		    trigger_data->link_speed == SPEED_100)
+			blink_on = true;
+
+		if (test_bit(TRIGGER_NETDEV_LINK_1000, &trigger_data->mode) &&
+		    trigger_data->link_speed == SPEED_1000)
+			blink_on = true;
+
+		if (blink_on)
 			led_set_brightness(led_cdev,
 					   led_cdev->blink_brightness);
 		else
@@ -161,6 +182,18 @@ static bool can_hw_control(struct led_netdev_data *trigger_data)
 	return true;
 }
 
+static void get_device_state(struct led_netdev_data *trigger_data)
+{
+	struct ethtool_link_ksettings cmd;
+
+	trigger_data->carrier_link_up = netif_carrier_ok(trigger_data->net_dev);
+	if (!trigger_data->carrier_link_up)
+		return;
+
+	if (!__ethtool_get_link_ksettings(trigger_data->net_dev, &cmd))
+		trigger_data->link_speed = cmd.base.speed;
+}
+
 static ssize_t device_name_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -196,8 +229,12 @@ static int set_device_name(struct led_netdev_data *trigger_data,
 		    dev_get_by_name(&init_net, trigger_data->device_name);
 
 	trigger_data->carrier_link_up = false;
-	if (trigger_data->net_dev != NULL)
-		trigger_data->carrier_link_up = netif_carrier_ok(trigger_data->net_dev);
+	trigger_data->link_speed = SPEED_UNKNOWN;
+	if (trigger_data->net_dev != NULL) {
+		rtnl_lock();
+		get_device_state(trigger_data);
+		rtnl_unlock();
+	}
 
 	trigger_data->last_activity = 0;
 
@@ -234,6 +271,9 @@ static ssize_t netdev_led_attr_show(struct device *dev, char *buf,
 
 	switch (attr) {
 	case TRIGGER_NETDEV_LINK:
+	case TRIGGER_NETDEV_LINK_10:
+	case TRIGGER_NETDEV_LINK_100:
+	case TRIGGER_NETDEV_LINK_1000:
 	case TRIGGER_NETDEV_TX:
 	case TRIGGER_NETDEV_RX:
 		bit = attr;
@@ -249,7 +289,7 @@ static ssize_t netdev_led_attr_store(struct device *dev, const char *buf,
 				     size_t size, enum led_trigger_netdev_modes attr)
 {
 	struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
-	unsigned long state;
+	unsigned long state, mode = trigger_data->mode;
 	int ret;
 	int bit;
 
@@ -259,6 +299,9 @@ static ssize_t netdev_led_attr_store(struct device *dev, const char *buf,
 
 	switch (attr) {
 	case TRIGGER_NETDEV_LINK:
+	case TRIGGER_NETDEV_LINK_10:
+	case TRIGGER_NETDEV_LINK_100:
+	case TRIGGER_NETDEV_LINK_1000:
 	case TRIGGER_NETDEV_TX:
 	case TRIGGER_NETDEV_RX:
 		bit = attr;
@@ -267,13 +310,20 @@ static ssize_t netdev_led_attr_store(struct device *dev, const char *buf,
 		return -EINVAL;
 	}
 
-	cancel_delayed_work_sync(&trigger_data->work);
-
 	if (state)
-		set_bit(bit, &trigger_data->mode);
+		set_bit(bit, &mode);
 	else
-		clear_bit(bit, &trigger_data->mode);
+		clear_bit(bit, &mode);
+
+	if (test_bit(TRIGGER_NETDEV_LINK, &mode) &&
+	    (test_bit(TRIGGER_NETDEV_LINK_10, &mode) ||
+	     test_bit(TRIGGER_NETDEV_LINK_100, &mode) ||
+	     test_bit(TRIGGER_NETDEV_LINK_1000, &mode)))
+		return -EINVAL;
+
+	cancel_delayed_work_sync(&trigger_data->work);
 
+	trigger_data->mode = mode;
 	trigger_data->hw_control = can_hw_control(trigger_data);
 
 	set_baseline_state(trigger_data);
@@ -295,6 +345,9 @@ static ssize_t netdev_led_attr_store(struct device *dev, const char *buf,
 	static DEVICE_ATTR_RW(trigger_name)
 
 DEFINE_NETDEV_TRIGGER(link, TRIGGER_NETDEV_LINK);
+DEFINE_NETDEV_TRIGGER(link_10, TRIGGER_NETDEV_LINK_10);
+DEFINE_NETDEV_TRIGGER(link_100, TRIGGER_NETDEV_LINK_100);
+DEFINE_NETDEV_TRIGGER(link_1000, TRIGGER_NETDEV_LINK_1000);
 DEFINE_NETDEV_TRIGGER(tx, TRIGGER_NETDEV_TX);
 DEFINE_NETDEV_TRIGGER(rx, TRIGGER_NETDEV_RX);
 
@@ -338,6 +391,9 @@ static DEVICE_ATTR_RW(interval);
 static struct attribute *netdev_trig_attrs[] = {
 	&dev_attr_device_name.attr,
 	&dev_attr_link.attr,
+	&dev_attr_link_10.attr,
+	&dev_attr_link_100.attr,
+	&dev_attr_link_1000.attr,
 	&dev_attr_rx.attr,
 	&dev_attr_tx.attr,
 	&dev_attr_interval.attr,
@@ -368,9 +424,10 @@ static int netdev_trig_notify(struct notifier_block *nb,
 	mutex_lock(&trigger_data->lock);
 
 	trigger_data->carrier_link_up = false;
+	trigger_data->link_speed = SPEED_UNKNOWN;
 	switch (evt) {
 	case NETDEV_CHANGENAME:
-		trigger_data->carrier_link_up = netif_carrier_ok(dev);
+		get_device_state(trigger_data);
 		fallthrough;
 	case NETDEV_REGISTER:
 		if (trigger_data->net_dev)
@@ -384,7 +441,7 @@ static int netdev_trig_notify(struct notifier_block *nb,
 		break;
 	case NETDEV_UP:
 	case NETDEV_CHANGE:
-		trigger_data->carrier_link_up = netif_carrier_ok(dev);
+		get_device_state(trigger_data);
 		break;
 	}
 
@@ -427,7 +484,10 @@ static void netdev_trig_work(struct work_struct *work)
 	if (trigger_data->last_activity != new_activity) {
 		led_stop_software_blink(trigger_data->led_cdev);
 
-		invert = test_bit(TRIGGER_NETDEV_LINK, &trigger_data->mode);
+		invert = test_bit(TRIGGER_NETDEV_LINK, &trigger_data->mode) ||
+			 test_bit(TRIGGER_NETDEV_LINK_10, &trigger_data->mode) ||
+			 test_bit(TRIGGER_NETDEV_LINK_100, &trigger_data->mode) ||
+			 test_bit(TRIGGER_NETDEV_LINK_1000, &trigger_data->mode);
 		interval = jiffies_to_msecs(
 				atomic_read(&trigger_data->interval));
 		/* base state is ON (link present) */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 8af62ff431f0..126b79019429 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -555,6 +555,9 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev)
 /* Trigger specific enum */
 enum led_trigger_netdev_modes {
 	TRIGGER_NETDEV_LINK = 0,
+	TRIGGER_NETDEV_LINK_10,
+	TRIGGER_NETDEV_LINK_100,
+	TRIGGER_NETDEV_LINK_1000,
 	TRIGGER_NETDEV_TX,
 	TRIGGER_NETDEV_RX,
 
-- 
cgit v1.2.3


From f22f95b9ff1551c9bab13104131929f33d51f23f Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Mon, 19 Jun 2023 22:46:59 +0200
Subject: leds: trigger: netdev: add additional specific link duplex mode

Add additional modes for specific link duplex. Use ethtool APIs to get the
current link duplex and enable the LED accordingly. Under netdev event
handler the rtnl lock is already held and is not needed to be set to
access ethtool APIs.

This is especially useful for PHY and Switch that supports LEDs hw
control for specific link duplex.

Add additional modes:
- half_duplex: Turn on LED when link is half duplex
- full_duplex: Turn on LED when link is full duplex

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Lee Jones <lee@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/leds/trigger/ledtrig-netdev.c | 27 +++++++++++++++++++++++++--
 include/linux/leds.h                  |  2 ++
 2 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
index f625738392bf..2c1c9e95860e 100644
--- a/drivers/leds/trigger/ledtrig-netdev.c
+++ b/drivers/leds/trigger/ledtrig-netdev.c
@@ -55,6 +55,7 @@ struct led_netdev_data {
 
 	unsigned long mode;
 	int link_speed;
+	u8 duplex;
 
 	bool carrier_link_up;
 	bool hw_control;
@@ -98,6 +99,14 @@ static void set_baseline_state(struct led_netdev_data *trigger_data)
 		    trigger_data->link_speed == SPEED_1000)
 			blink_on = true;
 
+		if (test_bit(TRIGGER_NETDEV_HALF_DUPLEX, &trigger_data->mode) &&
+		    trigger_data->duplex == DUPLEX_HALF)
+			blink_on = true;
+
+		if (test_bit(TRIGGER_NETDEV_FULL_DUPLEX, &trigger_data->mode) &&
+		    trigger_data->duplex == DUPLEX_FULL)
+			blink_on = true;
+
 		if (blink_on)
 			led_set_brightness(led_cdev,
 					   led_cdev->blink_brightness);
@@ -190,8 +199,10 @@ static void get_device_state(struct led_netdev_data *trigger_data)
 	if (!trigger_data->carrier_link_up)
 		return;
 
-	if (!__ethtool_get_link_ksettings(trigger_data->net_dev, &cmd))
+	if (!__ethtool_get_link_ksettings(trigger_data->net_dev, &cmd)) {
 		trigger_data->link_speed = cmd.base.speed;
+		trigger_data->duplex = cmd.base.duplex;
+	}
 }
 
 static ssize_t device_name_show(struct device *dev,
@@ -230,6 +241,7 @@ static int set_device_name(struct led_netdev_data *trigger_data,
 
 	trigger_data->carrier_link_up = false;
 	trigger_data->link_speed = SPEED_UNKNOWN;
+	trigger_data->duplex = DUPLEX_UNKNOWN;
 	if (trigger_data->net_dev != NULL) {
 		rtnl_lock();
 		get_device_state(trigger_data);
@@ -274,6 +286,8 @@ static ssize_t netdev_led_attr_show(struct device *dev, char *buf,
 	case TRIGGER_NETDEV_LINK_10:
 	case TRIGGER_NETDEV_LINK_100:
 	case TRIGGER_NETDEV_LINK_1000:
+	case TRIGGER_NETDEV_HALF_DUPLEX:
+	case TRIGGER_NETDEV_FULL_DUPLEX:
 	case TRIGGER_NETDEV_TX:
 	case TRIGGER_NETDEV_RX:
 		bit = attr;
@@ -302,6 +316,8 @@ static ssize_t netdev_led_attr_store(struct device *dev, const char *buf,
 	case TRIGGER_NETDEV_LINK_10:
 	case TRIGGER_NETDEV_LINK_100:
 	case TRIGGER_NETDEV_LINK_1000:
+	case TRIGGER_NETDEV_HALF_DUPLEX:
+	case TRIGGER_NETDEV_FULL_DUPLEX:
 	case TRIGGER_NETDEV_TX:
 	case TRIGGER_NETDEV_RX:
 		bit = attr;
@@ -348,6 +364,8 @@ DEFINE_NETDEV_TRIGGER(link, TRIGGER_NETDEV_LINK);
 DEFINE_NETDEV_TRIGGER(link_10, TRIGGER_NETDEV_LINK_10);
 DEFINE_NETDEV_TRIGGER(link_100, TRIGGER_NETDEV_LINK_100);
 DEFINE_NETDEV_TRIGGER(link_1000, TRIGGER_NETDEV_LINK_1000);
+DEFINE_NETDEV_TRIGGER(half_duplex, TRIGGER_NETDEV_HALF_DUPLEX);
+DEFINE_NETDEV_TRIGGER(full_duplex, TRIGGER_NETDEV_FULL_DUPLEX);
 DEFINE_NETDEV_TRIGGER(tx, TRIGGER_NETDEV_TX);
 DEFINE_NETDEV_TRIGGER(rx, TRIGGER_NETDEV_RX);
 
@@ -394,6 +412,8 @@ static struct attribute *netdev_trig_attrs[] = {
 	&dev_attr_link_10.attr,
 	&dev_attr_link_100.attr,
 	&dev_attr_link_1000.attr,
+	&dev_attr_full_duplex.attr,
+	&dev_attr_half_duplex.attr,
 	&dev_attr_rx.attr,
 	&dev_attr_tx.attr,
 	&dev_attr_interval.attr,
@@ -425,6 +445,7 @@ static int netdev_trig_notify(struct notifier_block *nb,
 
 	trigger_data->carrier_link_up = false;
 	trigger_data->link_speed = SPEED_UNKNOWN;
+	trigger_data->duplex = DUPLEX_UNKNOWN;
 	switch (evt) {
 	case NETDEV_CHANGENAME:
 		get_device_state(trigger_data);
@@ -487,7 +508,9 @@ static void netdev_trig_work(struct work_struct *work)
 		invert = test_bit(TRIGGER_NETDEV_LINK, &trigger_data->mode) ||
 			 test_bit(TRIGGER_NETDEV_LINK_10, &trigger_data->mode) ||
 			 test_bit(TRIGGER_NETDEV_LINK_100, &trigger_data->mode) ||
-			 test_bit(TRIGGER_NETDEV_LINK_1000, &trigger_data->mode);
+			 test_bit(TRIGGER_NETDEV_LINK_1000, &trigger_data->mode) ||
+			 test_bit(TRIGGER_NETDEV_HALF_DUPLEX, &trigger_data->mode) ||
+			 test_bit(TRIGGER_NETDEV_FULL_DUPLEX, &trigger_data->mode);
 		interval = jiffies_to_msecs(
 				atomic_read(&trigger_data->interval));
 		/* base state is ON (link present) */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 126b79019429..3a65ff72bb04 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -558,6 +558,8 @@ enum led_trigger_netdev_modes {
 	TRIGGER_NETDEV_LINK_10,
 	TRIGGER_NETDEV_LINK_100,
 	TRIGGER_NETDEV_LINK_1000,
+	TRIGGER_NETDEV_HALF_DUPLEX,
+	TRIGGER_NETDEV_FULL_DUPLEX,
 	TRIGGER_NETDEV_TX,
 	TRIGGER_NETDEV_RX,
 
-- 
cgit v1.2.3


From 9a5cb79762e0eda17ca15c2a6eaca4622383c21c Mon Sep 17 00:00:00 2001
From: Gilad Sever <gilad9366@gmail.com>
Date: Wed, 21 Jun 2023 13:42:10 +0300
Subject: bpf: Fix bpf socket lookup from tc/xdp to respect socket VRF bindings

When calling bpf_sk_lookup_tcp(), bpf_sk_lookup_udp() or
bpf_skc_lookup_tcp() from tc/xdp ingress, VRF socket bindings aren't
respoected, i.e. unbound sockets are returned, and bound sockets aren't
found.

VRF binding is determined by the sdif argument to sk_lookup(), however
when called from tc the IP SKB control block isn't initialized and thus
inet{,6}_sdif() always returns 0.

Fix by calculating sdif for the tc/xdp flows by observing the device's
l3 enslaved state.

The cg/sk_skb hooking points which are expected to support
inet{,6}_sdif() pass sdif=-1 which makes __bpf_skc_lookup() use the
existing logic.

Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
Signed-off-by: Gilad Sever <gilad9366@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Acked-by: Stanislav Fomichev <sdf@google.com>
Cc: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/bpf/20230621104211.301902-4-gilad9366@gmail.com
---
 include/linux/netdevice.h |  9 +++++++
 net/core/filter.c         | 69 ++++++++++++++++++++++++++---------------------
 2 files changed, 48 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 08fbd4622ccf..8c95ebbcf203 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5090,6 +5090,15 @@ static inline bool netif_is_l3_slave(const struct net_device *dev)
 	return dev->priv_flags & IFF_L3MDEV_SLAVE;
 }
 
+static inline int dev_sdif(const struct net_device *dev)
+{
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	if (netif_is_l3_slave(dev))
+		return dev->ifindex;
+#endif
+	return 0;
+}
+
 static inline bool netif_is_bridge_master(const struct net_device *dev)
 {
 	return dev->priv_flags & IFF_EBRIDGE;
diff --git a/net/core/filter.c b/net/core/filter.c
index a9fb897822b2..06ba0e56e369 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6568,12 +6568,11 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
 static struct sock *
 __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
 		 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
-		 u64 flags)
+		 u64 flags, int sdif)
 {
 	struct sock *sk = NULL;
 	struct net *net;
 	u8 family;
-	int sdif;
 
 	if (len == sizeof(tuple->ipv4))
 		family = AF_INET;
@@ -6585,10 +6584,12 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
 	if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
 		goto out;
 
-	if (family == AF_INET)
-		sdif = inet_sdif(skb);
-	else
-		sdif = inet6_sdif(skb);
+	if (sdif < 0) {
+		if (family == AF_INET)
+			sdif = inet_sdif(skb);
+		else
+			sdif = inet6_sdif(skb);
+	}
 
 	if ((s32)netns_id < 0) {
 		net = caller_net;
@@ -6608,10 +6609,11 @@ out:
 static struct sock *
 __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
 		struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
-		u64 flags)
+		u64 flags, int sdif)
 {
 	struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
-					   ifindex, proto, netns_id, flags);
+					   ifindex, proto, netns_id, flags,
+					   sdif);
 
 	if (sk) {
 		struct sock *sk2 = sk_to_full_sk(sk);
@@ -6651,7 +6653,7 @@ bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
 	}
 
 	return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
-				netns_id, flags);
+				netns_id, flags, -1);
 }
 
 static struct sock *
@@ -6743,12 +6745,13 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
 BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb,
 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
 {
-	struct net *caller_net = dev_net(skb->dev);
-	int ifindex = skb->dev->ifindex;
+	struct net_device *dev = skb->dev;
+	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
+	struct net *caller_net = dev_net(dev);
 
 	return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net,
 					       ifindex, IPPROTO_TCP, netns_id,
-					       flags);
+					       flags, sdif);
 }
 
 static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
@@ -6766,12 +6769,13 @@ static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = {
 BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb,
 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
 {
-	struct net *caller_net = dev_net(skb->dev);
-	int ifindex = skb->dev->ifindex;
+	struct net_device *dev = skb->dev;
+	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
+	struct net *caller_net = dev_net(dev);
 
 	return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
 					      ifindex, IPPROTO_TCP, netns_id,
-					      flags);
+					      flags, sdif);
 }
 
 static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
@@ -6789,12 +6793,13 @@ static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = {
 BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb,
 	   struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
 {
-	struct net *caller_net = dev_net(skb->dev);
-	int ifindex = skb->dev->ifindex;
+	struct net_device *dev = skb->dev;
+	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
+	struct net *caller_net = dev_net(dev);
 
 	return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net,
 					      ifindex, IPPROTO_UDP, netns_id,
-					      flags);
+					      flags, sdif);
 }
 
 static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = {
@@ -6826,12 +6831,13 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
 	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
 {
-	struct net *caller_net = dev_net(ctx->rxq->dev);
-	int ifindex = ctx->rxq->dev->ifindex;
+	struct net_device *dev = ctx->rxq->dev;
+	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
+	struct net *caller_net = dev_net(dev);
 
 	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
 					      ifindex, IPPROTO_UDP, netns_id,
-					      flags);
+					      flags, sdif);
 }
 
 static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
@@ -6849,12 +6855,13 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
 BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
 	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
 {
-	struct net *caller_net = dev_net(ctx->rxq->dev);
-	int ifindex = ctx->rxq->dev->ifindex;
+	struct net_device *dev = ctx->rxq->dev;
+	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
+	struct net *caller_net = dev_net(dev);
 
 	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
 					       ifindex, IPPROTO_TCP, netns_id,
-					       flags);
+					       flags, sdif);
 }
 
 static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
@@ -6872,12 +6879,13 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
 BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
 	   struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
 {
-	struct net *caller_net = dev_net(ctx->rxq->dev);
-	int ifindex = ctx->rxq->dev->ifindex;
+	struct net_device *dev = ctx->rxq->dev;
+	int ifindex = dev->ifindex, sdif = dev_sdif(dev);
+	struct net *caller_net = dev_net(dev);
 
 	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
 					      ifindex, IPPROTO_TCP, netns_id,
-					      flags);
+					      flags, sdif);
 }
 
 static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
@@ -6897,7 +6905,8 @@ BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
 {
 	return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
 					       sock_net(ctx->sk), 0,
-					       IPPROTO_TCP, netns_id, flags);
+					       IPPROTO_TCP, netns_id, flags,
+					       -1);
 }
 
 static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
@@ -6916,7 +6925,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
 {
 	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
 					      sock_net(ctx->sk), 0, IPPROTO_TCP,
-					      netns_id, flags);
+					      netns_id, flags, -1);
 }
 
 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
@@ -6935,7 +6944,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
 {
 	return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
 					      sock_net(ctx->sk), 0, IPPROTO_UDP,
-					      netns_id, flags);
+					      netns_id, flags, -1);
 }
 
 static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
-- 
cgit v1.2.3


From e6ecc0414c87126836d04b46cce8942e778226bb Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 21 Jun 2023 19:31:22 +0300
Subject: usb: ulpi: Make container_of() no-op in to_ulpi_dev()

Move embedded struct device member to make container_of() noop

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20230621163122.5693-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ulpi/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ulpi/driver.h b/include/linux/ulpi/driver.h
index c7a1810373e3..a8cb617a3028 100644
--- a/include/linux/ulpi/driver.h
+++ b/include/linux/ulpi/driver.h
@@ -15,9 +15,9 @@ struct ulpi_ops;
  * @dev: device interface
  */
 struct ulpi {
+	struct device dev;
 	struct ulpi_device_id id;
 	const struct ulpi_ops *ops;
-	struct device dev;
 };
 
 #define to_ulpi_dev(d) container_of(d, struct ulpi, dev)
-- 
cgit v1.2.3


From 9fde4c557f78ee2f3626e92b4089ce9d54a2573a Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sun, 11 Jun 2023 11:57:26 +0900
Subject: can: length: fix bitstuffing count

The Stuff Bit Count is always coded on 4 bits [1]. Update the Stuff
Bit Count size accordingly.

In addition, the CRC fields of CAN FD Frames contain stuff bits at
fixed positions called fixed stuff bits [2]. The CRC field starts with
a fixed stuff bit and then has another fixed stuff bit after each
fourth bit [2], which allows us to derive this formula:

  FSB count = 1 + round_down(len(CRC field)/4)

The length of the CRC field is [1]:

  len(CRC field) = len(Stuff Bit Count) + len(CRC)
                 = 4 + len(CRC)

with len(CRC) either 17 or 21 bits depending of the payload length.

In conclusion, for CRC17:

  FSB count = 1 + round_down((4 + 17)/4)
            = 6

and for CRC 21:

  FSB count = 1 + round_down((4 + 21)/4)
            = 7

Add a Fixed Stuff bits (FSB) field with above values and update
CANFD_FRAME_OVERHEAD_SFF and CANFD_FRAME_OVERHEAD_EFF accordingly.

[1] ISO 11898-1:2015 section 10.4.2.6 "CRC field":

  The CRC field shall contain the CRC sequence followed by a recessive
  CRC delimiter. For FD Frames, the CRC field shall also contain the
  stuff count.

  Stuff count

  If FD Frames, the stuff count shall be at the beginning of the CRC
  field. It shall consist of the stuff bit count modulo 8 in a 3-bit
  gray code followed by a parity bit [...]

[2] ISO 11898-1:2015 paragraph 10.5 "Frame coding":

  In the CRC field of FD Frames, the stuff bits shall be inserted at
  fixed positions; they are called fixed stuff bits. There shall be a
  fixed stuff bit before the first bit of the stuff count, even if the
  last bits of the preceding field are a sequence of five consecutive
  bits of identical value, there shall be only the fixed stuff bit,
  there shall not be two consecutive stuff bits. A further fixed stuff
  bit shall be inserted after each fourth bit of the CRC field [...]

Fixes: 85d99c3e2a13 ("can: length: can_skb_get_frame_len(): introduce function to get data length of frame in data link layer")
Suggested-by: Thomas Kopp <Thomas.Kopp@microchip.com>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Reviewed-by: Thomas Kopp <Thomas.Kopp@microchip.com>
Link: https://lore.kernel.org/all/20230611025728.450837-2-mailhol.vincent@wanadoo.fr
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/length.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/length.h b/include/linux/can/length.h
index 69336549d24f..b8c12c83bc51 100644
--- a/include/linux/can/length.h
+++ b/include/linux/can/length.h
@@ -72,17 +72,18 @@
  * Error Status Indicator (ESI)		1
  * Data length code (DLC)		4
  * Data field				0...512
- * Stuff Bit Count (SBC)		0...16: 4 20...64:5
+ * Stuff Bit Count (SBC)		4
  * CRC					0...16: 17 20...64:21
  * CRC delimiter (CD)			1
+ * Fixed Stuff bits (FSB)		0...16: 6 20...64:7
  * ACK slot (AS)			1
  * ACK delimiter (AD)			1
  * End-of-frame (EOF)			7
  * Inter frame spacing			3
  *
- * assuming CRC21, rounded up and ignoring bitstuffing
+ * assuming CRC21, rounded up and ignoring dynamic bitstuffing
  */
-#define CANFD_FRAME_OVERHEAD_SFF DIV_ROUND_UP(61, 8)
+#define CANFD_FRAME_OVERHEAD_SFF DIV_ROUND_UP(67, 8)
 
 /*
  * Size of a CAN-FD Extended Frame
@@ -101,17 +102,18 @@
  * Error Status Indicator (ESI)		1
  * Data length code (DLC)		4
  * Data field				0...512
- * Stuff Bit Count (SBC)		0...16: 4 20...64:5
+ * Stuff Bit Count (SBC)		4
  * CRC					0...16: 17 20...64:21
  * CRC delimiter (CD)			1
+ * Fixed Stuff bits (FSB)		0...16: 6 20...64:7
  * ACK slot (AS)			1
  * ACK delimiter (AD)			1
  * End-of-frame (EOF)			7
  * Inter frame spacing			3
  *
- * assuming CRC21, rounded up and ignoring bitstuffing
+ * assuming CRC21, rounded up and ignoring dynamic bitstuffing
  */
-#define CANFD_FRAME_OVERHEAD_EFF DIV_ROUND_UP(80, 8)
+#define CANFD_FRAME_OVERHEAD_EFF DIV_ROUND_UP(86, 8)
 
 /*
  * Maximum size of a Classical CAN frame
-- 
cgit v1.2.3


From 10711b11102bfc351240982d46a51d4eecc28c10 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sun, 11 Jun 2023 11:57:27 +0900
Subject: can: length: fix description of the RRS field

The CAN-FD frames only have one reserved bit. The bit corresponding to
Classical CAN frame's RTR bit is called the "Remote Request
Substitution (RRS)" [1].

N.B. The RRS is not to be confused with the Substitute Remote Request
(SRR).

Fix the description in the CANFD_FRAME_OVERHEAD_SFF/EFF macros.

The total remains unchanged, so this is just a documentation fix.

In addition to the above add myself as copyright owner for 2020 (as
coauthor of the initial version, c.f. Fixes tag).

[1] ISO 11898-1:2015 paragraph 10.4.2.3 "Arbitration field":

  RSS bit [only in FD Frames]

    The RRS bit shall be transmitted in FD Frames at the position of
    the RTR bit in Classical Frames. The RRS bit shall be transmitted
    dominant, but receivers shall accept recessive and dominant RRS
    bits.

Fixes: 85d99c3e2a13 ("can: length: can_skb_get_frame_len(): introduce function to get data length of frame in data link layer")
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Reviewed-by: Thomas Kopp <Thomas.Kopp@microchip.com>
Link: https://lore.kernel.org/all/20230611025728.450837-3-mailhol.vincent@wanadoo.fr
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/length.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/length.h b/include/linux/can/length.h
index b8c12c83bc51..521fdbce2d69 100644
--- a/include/linux/can/length.h
+++ b/include/linux/can/length.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (C) 2020 Oliver Hartkopp <socketcan@hartkopp.net>
  * Copyright (C) 2020 Marc Kleine-Budde <kernel@pengutronix.de>
+ * Copyright (C) 2020 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
  */
 
 #ifndef _CAN_LENGTH_H
@@ -64,7 +65,7 @@
  * ---------------------------------------------------------
  * Start-of-frame			1
  * Identifier				11
- * Reserved bit (r1)			1
+ * Remote Request Substitution (RRS)	1
  * Identifier extension bit (IDE)	1
  * Flexible data rate format (FDF)	1
  * Reserved bit (r0)			1
@@ -95,7 +96,7 @@
  * Substitute remote request (SRR)	1
  * Identifier extension bit (IDE)	1
  * Identifier B				18
- * Reserved bit (r1)			1
+ * Remote Request Substitution (RRS)	1
  * Flexible data rate format (FDF)	1
  * Reserved bit (r0)			1
  * Bit Rate Switch (BRS)		1
-- 
cgit v1.2.3


From 80a2fbce456e3f73b4f49ce12fefa3215a3d0773 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sun, 11 Jun 2023 11:57:28 +0900
Subject: can: length: refactor frame lengths definition to add size in bits

Introduce a method to calculate the exact size in bits of a CAN(-FD)
frame with or without dynamic bitstuffing.

These are all the possible combinations taken into account:

  - Classical CAN or CAN-FD
  - Standard or Extended frame format
  - CAN-FD CRC17 or CRC21
  - Include or not intermission

Instead of doing several individual macro definitions, declare the
can_frame_bits() function-like macro. To this extent, do a full
refactoring of the length definitions.

In addition add the can_frame_bytes(). This function-like macro
replaces the existing macro:

  - CAN_FRAME_OVERHEAD_SFF: can_frame_bytes(false, false, 0)
  - CAN_FRAME_OVERHEAD_EFF: can_frame_bytes(false, true, 0)
  - CANFD_FRAME_OVERHEAD_SFF: can_frame_bytes(true, false, 0)
  - CANFD_FRAME_OVERHEAD_EFF: can_frame_bytes(true, true, 0)

Function-like macros were chosen over inline functions because they
can be used to initialize const struct fields.

The different maximum frame lengths (maximum data length, including
intermission) are as follow:

   Frame type				bits	bytes
  -------------------------------------------------------
   Classic CAN SFF no bitstuffing	111	14
   Classic CAN EFF no bitstuffing	131	17
   Classic CAN SFF bitstuffing		135	17
   Classic CAN EFF bitstuffing		160	20
   CAN-FD SFF no bitstuffing		579	73
   CAN-FD EFF no bitstuffing		598	75
   CAN-FD SFF bitstuffing		712	89
   CAN-FD EFF bitstuffing		736	92

The macro CAN_FRAME_LEN_MAX and CANFD_FRAME_LEN_MAX are kept as an
alias to, respectively, can_frame_bytes(false, true, CAN_MAX_DLEN) and
can_frame_bytes(true, true, CANFD_MAX_DLEN).

In addition to the above:

 - Use ISO 11898-1:2015 definitions for the names of the CAN frame
   fields.
 - Include linux/bits.h for use of BITS_PER_BYTE.
 - Include linux/math.h for use of mult_frac() and
   DIV_ROUND_UP(). N.B: the use of DIV_ROUND_UP() is not new to this
   patch, but the include was previously omitted.
 - Add copyright 2023 for myself.

Suggested-by: Thomas Kopp <Thomas.Kopp@microchip.com>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Reviewed-by: Thomas Kopp <Thomas.Kopp@microchip.com>
Link: https://lore.kernel.org/all/20230611025728.450837-4-mailhol.vincent@wanadoo.fr
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/length.c |  15 +--
 include/linux/can/length.h   | 302 ++++++++++++++++++++++++++++++-------------
 2 files changed, 216 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev/length.c b/drivers/net/can/dev/length.c
index b48140b1102e..b7f4d76dd444 100644
--- a/drivers/net/can/dev/length.c
+++ b/drivers/net/can/dev/length.c
@@ -78,18 +78,7 @@ unsigned int can_skb_get_frame_len(const struct sk_buff *skb)
 	else
 		len = cf->len;
 
-	if (can_is_canfd_skb(skb)) {
-		if (cf->can_id & CAN_EFF_FLAG)
-			len += CANFD_FRAME_OVERHEAD_EFF;
-		else
-			len += CANFD_FRAME_OVERHEAD_SFF;
-	} else {
-		if (cf->can_id & CAN_EFF_FLAG)
-			len += CAN_FRAME_OVERHEAD_EFF;
-		else
-			len += CAN_FRAME_OVERHEAD_SFF;
-	}
-
-	return len;
+	return can_frame_bytes(can_is_canfd_skb(skb), cf->can_id & CAN_EFF_FLAG,
+			       false, len);
 }
 EXPORT_SYMBOL_GPL(can_skb_get_frame_len);
diff --git a/include/linux/can/length.h b/include/linux/can/length.h
index 521fdbce2d69..abc978b38f79 100644
--- a/include/linux/can/length.h
+++ b/include/linux/can/length.h
@@ -1,132 +1,258 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (C) 2020 Oliver Hartkopp <socketcan@hartkopp.net>
  * Copyright (C) 2020 Marc Kleine-Budde <kernel@pengutronix.de>
- * Copyright (C) 2020 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
+ * Copyright (C) 2020, 2023 Vincent Mailhol <mailhol.vincent@wanadoo.fr>
  */
 
 #ifndef _CAN_LENGTH_H
 #define _CAN_LENGTH_H
 
+#include <linux/bits.h>
 #include <linux/can.h>
 #include <linux/can/netlink.h>
+#include <linux/math.h>
 
 /*
- * Size of a Classical CAN Standard Frame
+ * Size of a Classical CAN Standard Frame header in bits
  *
- * Name of Field			Bits
+ * Name of Field				Bits
  * ---------------------------------------------------------
- * Start-of-frame			1
- * Identifier				11
- * Remote transmission request (RTR)	1
- * Identifier extension bit (IDE)	1
- * Reserved bit (r0)			1
- * Data length code (DLC)		4
- * Data field				0...64
- * CRC					15
- * CRC delimiter			1
- * ACK slot				1
- * ACK delimiter			1
- * End-of-frame (EOF)			7
- * Inter frame spacing			3
+ * Start Of Frame (SOF)				1
+ * Arbitration field:
+ *	base ID					11
+ *	Remote Transmission Request (RTR)	1
+ * Control field:
+ *	IDentifier Extension bit (IDE)		1
+ *	FD Format indicator (FDF)		1
+ *	Data Length Code (DLC)			4
+ *
+ * including all fields preceding the data field, ignoring bitstuffing
+ */
+#define CAN_FRAME_HEADER_SFF_BITS 19
+
+/*
+ * Size of a Classical CAN Extended Frame header in bits
  *
- * rounded up and ignoring bitstuffing
+ * Name of Field				Bits
+ * ---------------------------------------------------------
+ * Start Of Frame (SOF)				1
+ * Arbitration field:
+ *	base ID					11
+ *	Substitute Remote Request (SRR)		1
+ *	IDentifier Extension bit (IDE)		1
+ *	ID extension				18
+ *	Remote Transmission Request (RTR)	1
+ * Control field:
+ *	FD Format indicator (FDF)		1
+ *	Reserved bit (r0)			1
+ *	Data length code (DLC)			4
+ *
+ * including all fields preceding the data field, ignoring bitstuffing
  */
-#define CAN_FRAME_OVERHEAD_SFF DIV_ROUND_UP(47, 8)
+#define CAN_FRAME_HEADER_EFF_BITS 39
 
 /*
- * Size of a Classical CAN Extended Frame
+ * Size of a CAN-FD Standard Frame in bits
+ *
+ * Name of Field				Bits
+ * ---------------------------------------------------------
+ * Start Of Frame (SOF)				1
+ * Arbitration field:
+ *	base ID					11
+ *	Remote Request Substitution (RRS)	1
+ * Control field:
+ *	IDentifier Extension bit (IDE)		1
+ *	FD Format indicator (FDF)		1
+ *	Reserved bit (res)			1
+ *	Bit Rate Switch (BRS)			1
+ *	Error Status Indicator (ESI)		1
+ *	Data length code (DLC)			4
+ *
+ * including all fields preceding the data field, ignoring bitstuffing
+ */
+#define CANFD_FRAME_HEADER_SFF_BITS 22
+
+/*
+ * Size of a CAN-FD Extended Frame in bits
+ *
+ * Name of Field				Bits
+ * ---------------------------------------------------------
+ * Start Of Frame (SOF)				1
+ * Arbitration field:
+ *	base ID					11
+ *	Substitute Remote Request (SRR)		1
+ *	IDentifier Extension bit (IDE)		1
+ *	ID extension				18
+ *	Remote Request Substitution (RRS)	1
+ * Control field:
+ *	FD Format indicator (FDF)		1
+ *	Reserved bit (res)			1
+ *	Bit Rate Switch (BRS)			1
+ *	Error Status Indicator (ESI)		1
+ *	Data length code (DLC)			4
+ *
+ * including all fields preceding the data field, ignoring bitstuffing
+ */
+#define CANFD_FRAME_HEADER_EFF_BITS 41
+
+/*
+ * Size of a CAN CRC Field in bits
  *
  * Name of Field			Bits
  * ---------------------------------------------------------
- * Start-of-frame			1
- * Identifier A				11
- * Substitute remote request (SRR)	1
- * Identifier extension bit (IDE)	1
- * Identifier B				18
- * Remote transmission request (RTR)	1
- * Reserved bits (r1, r0)		2
- * Data length code (DLC)		4
- * Data field				0...64
- * CRC					15
- * CRC delimiter			1
- * ACK slot				1
- * ACK delimiter			1
- * End-of-frame (EOF)			7
- * Inter frame spacing			3
+ * CRC sequence (CRC15)			15
+ * CRC Delimiter			1
+ *
+ * ignoring bitstuffing
+ */
+#define CAN_FRAME_CRC_FIELD_BITS 16
+
+/*
+ * Size of a CAN-FD CRC17 Field in bits (length: 0..16)
  *
- * rounded up and ignoring bitstuffing
+ * Name of Field			Bits
+ * ---------------------------------------------------------
+ * Stuff Count				4
+ * CRC Sequence (CRC17)			17
+ * CRC Delimiter			1
+ * Fixed stuff bits			6
  */
-#define CAN_FRAME_OVERHEAD_EFF DIV_ROUND_UP(67, 8)
+#define CANFD_FRAME_CRC17_FIELD_BITS 28
 
 /*
- * Size of a CAN-FD Standard Frame
+ * Size of a CAN-FD CRC21 Field in bits (length: 20..64)
  *
  * Name of Field			Bits
  * ---------------------------------------------------------
- * Start-of-frame			1
- * Identifier				11
- * Remote Request Substitution (RRS)	1
- * Identifier extension bit (IDE)	1
- * Flexible data rate format (FDF)	1
- * Reserved bit (r0)			1
- * Bit Rate Switch (BRS)		1
- * Error Status Indicator (ESI)		1
- * Data length code (DLC)		4
- * Data field				0...512
- * Stuff Bit Count (SBC)		4
- * CRC					0...16: 17 20...64:21
- * CRC delimiter (CD)			1
- * Fixed Stuff bits (FSB)		0...16: 6 20...64:7
- * ACK slot (AS)			1
- * ACK delimiter (AD)			1
- * End-of-frame (EOF)			7
- * Inter frame spacing			3
- *
- * assuming CRC21, rounded up and ignoring dynamic bitstuffing
- */
-#define CANFD_FRAME_OVERHEAD_SFF DIV_ROUND_UP(67, 8)
+ * Stuff Count				4
+ * CRC sequence (CRC21)			21
+ * CRC Delimiter			1
+ * Fixed stuff bits			7
+ */
+#define CANFD_FRAME_CRC21_FIELD_BITS 33
 
 /*
- * Size of a CAN-FD Extended Frame
+ * Size of a CAN(-FD) Frame footer in bits
  *
  * Name of Field			Bits
  * ---------------------------------------------------------
- * Start-of-frame			1
- * Identifier A				11
- * Substitute remote request (SRR)	1
- * Identifier extension bit (IDE)	1
- * Identifier B				18
- * Remote Request Substitution (RRS)	1
- * Flexible data rate format (FDF)	1
- * Reserved bit (r0)			1
- * Bit Rate Switch (BRS)		1
- * Error Status Indicator (ESI)		1
- * Data length code (DLC)		4
- * Data field				0...512
- * Stuff Bit Count (SBC)		4
- * CRC					0...16: 17 20...64:21
- * CRC delimiter (CD)			1
- * Fixed Stuff bits (FSB)		0...16: 6 20...64:7
- * ACK slot (AS)			1
- * ACK delimiter (AD)			1
- * End-of-frame (EOF)			7
- * Inter frame spacing			3
- *
- * assuming CRC21, rounded up and ignoring dynamic bitstuffing
- */
-#define CANFD_FRAME_OVERHEAD_EFF DIV_ROUND_UP(86, 8)
+ * ACK slot				1
+ * ACK delimiter			1
+ * End Of Frame (EOF)			7
+ *
+ * including all fields following the CRC field
+ */
+#define CAN_FRAME_FOOTER_BITS 9
+
+/*
+ * First part of the Inter Frame Space
+ * (a.k.a. IMF - intermission field)
+ */
+#define CAN_INTERMISSION_BITS 3
+
+/**
+ * can_bitstuffing_len() - Calculate the maximum length with bitstuffing
+ * @destuffed_len: length of a destuffed bit stream
+ *
+ * The worst bit stuffing case is a sequence in which dominant and
+ * recessive bits alternate every four bits:
+ *
+ *   Destuffed: 1 1111  0000  1111  0000  1111
+ *   Stuffed:   1 1111o 0000i 1111o 0000i 1111o
+ *
+ * Nomenclature
+ *
+ *  - "0": dominant bit
+ *  - "o": dominant stuff bit
+ *  - "1": recessive bit
+ *  - "i": recessive stuff bit
+ *
+ * Aside from the first bit, one stuff bit is added every four bits.
+ *
+ * Return: length of the stuffed bit stream in the worst case scenario.
+ */
+#define can_bitstuffing_len(destuffed_len)			\
+	(destuffed_len + (destuffed_len - 1) / 4)
+
+#define __can_bitstuffing_len(bitstuffing, destuffed_len)	\
+	(bitstuffing ? can_bitstuffing_len(destuffed_len) :	\
+		       destuffed_len)
+
+#define __can_cc_frame_bits(is_eff, bitstuffing,		\
+			    intermission, data_len)		\
+(								\
+	__can_bitstuffing_len(bitstuffing,			\
+		(is_eff ? CAN_FRAME_HEADER_EFF_BITS :		\
+			  CAN_FRAME_HEADER_SFF_BITS) +		\
+		(data_len) * BITS_PER_BYTE +			\
+		CAN_FRAME_CRC_FIELD_BITS) +			\
+	CAN_FRAME_FOOTER_BITS +					\
+	(intermission ? CAN_INTERMISSION_BITS : 0)		\
+)
+
+#define __can_fd_frame_bits(is_eff, bitstuffing,		\
+			    intermission, data_len)		\
+(								\
+	__can_bitstuffing_len(bitstuffing,			\
+		(is_eff ? CANFD_FRAME_HEADER_EFF_BITS :		\
+			  CANFD_FRAME_HEADER_SFF_BITS) +	\
+		(data_len) * BITS_PER_BYTE) +			\
+	((data_len) <= 16 ?					\
+		CANFD_FRAME_CRC17_FIELD_BITS :			\
+		CANFD_FRAME_CRC21_FIELD_BITS) +			\
+	CAN_FRAME_FOOTER_BITS +					\
+	(intermission ? CAN_INTERMISSION_BITS : 0)		\
+)
+
+/**
+ * can_frame_bits() - Calculate the number of bits on the wire in a
+ *	CAN frame
+ * @is_fd: true: CAN-FD frame; false: Classical CAN frame.
+ * @is_eff: true: Extended frame; false: Standard frame.
+ * @bitstuffing: true: calculate the bitstuffing worst case; false:
+ *	calculate the bitstuffing best case (no dynamic
+ *	bitstuffing). CAN-FD's fixed stuff bits are always included.
+ * @intermission: if and only if true, include the inter frame space
+ *	assuming no bus idle (i.e. only the intermission). Strictly
+ *	speaking, the inter frame space is not part of the
+ *	frame. However, it is needed when calculating the delay
+ *	between the Start Of Frame of two consecutive frames.
+ * @data_len: length of the data field in bytes. Correspond to
+ *	can(fd)_frame->len. Should be zero for remote frames. No
+ *	sanitization is done on @data_len and it shall have no side
+ *	effects.
+ *
+ * Return: the numbers of bits on the wire of a CAN frame.
+ */
+#define can_frame_bits(is_fd, is_eff, bitstuffing,		\
+		       intermission, data_len)			\
+(								\
+	is_fd ? __can_fd_frame_bits(is_eff, bitstuffing,	\
+				    intermission, data_len) :	\
+		__can_cc_frame_bits(is_eff, bitstuffing,	\
+				    intermission, data_len)	\
+)
+
+/*
+ * Number of bytes in a CAN frame
+ * (rounded up, including intermission)
+ */
+#define can_frame_bytes(is_fd, is_eff, bitstuffing, data_len)	\
+	DIV_ROUND_UP(can_frame_bits(is_fd, is_eff, bitstuffing,	\
+				    true, data_len),		\
+		     BITS_PER_BYTE)
 
 /*
  * Maximum size of a Classical CAN frame
- * (rounded up and ignoring bitstuffing)
+ * (rounded up, ignoring bitstuffing but including intermission)
  */
-#define CAN_FRAME_LEN_MAX (CAN_FRAME_OVERHEAD_EFF + CAN_MAX_DLEN)
+#define CAN_FRAME_LEN_MAX can_frame_bytes(false, true, false, CAN_MAX_DLEN)
 
 /*
  * Maximum size of a CAN-FD frame
- * (rounded up and ignoring bitstuffing)
+ * (rounded up, ignoring dynamic bitstuffing but including intermission)
  */
-#define CANFD_FRAME_LEN_MAX (CANFD_FRAME_OVERHEAD_EFF + CANFD_MAX_DLEN)
+#define CANFD_FRAME_LEN_MAX can_frame_bytes(true, true, false, CANFD_MAX_DLEN)
 
 /*
  * can_cc_dlc2len(value) - convert a given data length code (dlc) of a
-- 
cgit v1.2.3


From ddb5cdbafaaad6b99d7007ae1740403124502d03 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 12 Jun 2023 00:50:52 +0900
Subject: kbuild: generate KSYMTAB entries by modpost

Commit 7b4537199a4a ("kbuild: link symbol CRCs at final link, removing
CONFIG_MODULE_REL_CRCS") made modpost output CRCs in the same way
whether the EXPORT_SYMBOL() is placed in *.c or *.S.

For further cleanups, this commit applies a similar approach to the
entire data structure of EXPORT_SYMBOL().

The EXPORT_SYMBOL() compilation is split into two stages.

When a source file is compiled, EXPORT_SYMBOL() will be converted into
a dummy symbol in the .export_symbol section.

For example,

    EXPORT_SYMBOL(foo);
    EXPORT_SYMBOL_NS_GPL(bar, BAR_NAMESPACE);

will be encoded into the following assembly code:

    .section ".export_symbol","a"
    __export_symbol_foo:
            .asciz ""                      /* license */
            .asciz ""                      /* name space */
            .balign 8
            .quad foo                      /* symbol reference */
    .previous

    .section ".export_symbol","a"
    __export_symbol_bar:
            .asciz "GPL"                   /* license */
            .asciz "BAR_NAMESPACE"         /* name space */
            .balign 8
            .quad bar                      /* symbol reference */
    .previous

They are mere markers to tell modpost the name, license, and namespace
of the symbols. They will be dropped from the final vmlinux and modules
because the *(.export_symbol) will go into /DISCARD/ in the linker script.

Then, modpost extracts all the information about EXPORT_SYMBOL() from the
.export_symbol section, and generates the final C code:

    KSYMTAB_FUNC(foo, "", "");
    KSYMTAB_FUNC(bar, "_gpl", "BAR_NAMESPACE");

KSYMTAB_FUNC() (or KSYMTAB_DATA() if it is data) is expanded to struct
kernel_symbol that will be linked to the vmlinux or a module.

With this change, EXPORT_SYMBOL() works in the same way for *.c and *.S
files, providing the following benefits.

[1] Deprecate EXPORT_DATA_SYMBOL()

In the old days, EXPORT_SYMBOL() was only available in C files. To export
a symbol in *.S, EXPORT_SYMBOL() was placed in a separate *.c file.
arch/arm/kernel/armksyms.c is one example written in the classic manner.

Commit 22823ab419d8 ("EXPORT_SYMBOL() for asm") removed this limitation.
Since then, EXPORT_SYMBOL() can be placed close to the symbol definition
in *.S files. It was a nice improvement.

However, as that commit mentioned, you need to use EXPORT_DATA_SYMBOL()
for data objects on some architectures.

In the new approach, modpost checks symbol's type (STT_FUNC or not),
and outputs KSYMTAB_FUNC() or KSYMTAB_DATA() accordingly.

There are only two users of EXPORT_DATA_SYMBOL:

  EXPORT_DATA_SYMBOL_GPL(empty_zero_page)    (arch/ia64/kernel/head.S)
  EXPORT_DATA_SYMBOL(ia64_ivt)               (arch/ia64/kernel/ivt.S)

They are transformed as follows and output into .vmlinux.export.c

  KSYMTAB_DATA(empty_zero_page, "_gpl", "");
  KSYMTAB_DATA(ia64_ivt, "", "");

The other EXPORT_SYMBOL users in ia64 assembly are output as
KSYMTAB_FUNC().

EXPORT_DATA_SYMBOL() is now deprecated.

[2] merge <linux/export.h> and <asm-generic/export.h>

There are two similar header implementations:

  include/linux/export.h        for .c files
  include/asm-generic/export.h  for .S files

Ideally, the functionality should be consistent between them, but they
tend to diverge.

Commit 8651ec01daed ("module: add support for symbol namespaces.") did
not support the namespace for *.S files.

This commit shifts the essential implementation part to C, which supports
EXPORT_SYMBOL_NS() for *.S files.

<asm/export.h> and <asm-generic/export.h> will remain as a wrapper of
<linux/export.h> for a while.

They will be removed after #include <asm/export.h> directives are all
replaced with #include <linux/export.h>.

[3] Implement CONFIG_TRIM_UNUSED_KSYMS in one-pass algorithm (by a later commit)

When CONFIG_TRIM_UNUSED_KSYMS is enabled, Kbuild recursively traverses
the directory tree to determine which EXPORT_SYMBOL to trim. If an
EXPORT_SYMBOL turns out to be unused by anyone, Kbuild begins the
second traverse, where some source files are recompiled with their
EXPORT_SYMBOL() tuned into a no-op.

We can do this better now; modpost can selectively emit KSYMTAB entries
that are really used by modules.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
---
 arch/ia64/include/asm/Kbuild      |   1 +
 arch/ia64/include/asm/export.h    |   3 --
 include/asm-generic/export.h      |  84 ++----------------------------
 include/asm-generic/vmlinux.lds.h |   1 +
 include/linux/export-internal.h   |  49 ++++++++++++++++++
 include/linux/export.h            | 101 +++++++++++++++---------------------
 include/linux/pm.h                |   4 +-
 kernel/module/internal.h          |  12 +++++
 scripts/Makefile.build            |   8 ++-
 scripts/check-local-export        |   4 +-
 scripts/mod/modpost.c             | 106 ++++++++++++++++++++++++++------------
 scripts/mod/modpost.h             |   1 +
 12 files changed, 190 insertions(+), 184 deletions(-)
 delete mode 100644 arch/ia64/include/asm/export.h

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index aefae2efde9f..33733245f42b 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
 generic-y += agp.h
+generic-y += export.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += vtime.h
diff --git a/arch/ia64/include/asm/export.h b/arch/ia64/include/asm/export.h
deleted file mode 100644
index ad18c6583252..000000000000
--- a/arch/ia64/include/asm/export.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/* EXPORT_DATA_SYMBOL != EXPORT_SYMBOL here */
-#define KSYM_FUNC(name) @fptr(name)
-#include <asm-generic/export.h>
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 5e4b1f2369d2..0ae9f38a904c 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -3,86 +3,12 @@
 #define __ASM_GENERIC_EXPORT_H
 
 /*
- * This comment block is used by fixdep. Please do not remove.
- *
- * When CONFIG_MODVERSIONS is changed from n to y, all source files having
- * EXPORT_SYMBOL variants must be re-compiled because genksyms is run as a
- * side effect of the *.o build rule.
+ * <asm/export.h> and <asm-generic/export.h> are deprecated.
+ * Please include <linux/export.h> directly.
  */
+#include <linux/export.h>
 
-#ifndef KSYM_FUNC
-#define KSYM_FUNC(x) x
-#endif
-#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-#define KSYM_ALIGN 4
-#elif defined(CONFIG_64BIT)
-#define KSYM_ALIGN 8
-#else
-#define KSYM_ALIGN 4
-#endif
-
-.macro __put, val, name
-#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-	.long	\val - ., \name - ., 0
-#elif defined(CONFIG_64BIT)
-	.quad	\val, \name, 0
-#else
-	.long	\val, \name, 0
-#endif
-.endm
-
-/*
- * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
- * section flag requires it. Use '%progbits' instead of '@progbits' since the
- * former apparently works on all arches according to the binutils source.
- */
-
-.macro ___EXPORT_SYMBOL name,val,sec
-#if defined(CONFIG_MODULES) && !defined(__DISABLE_EXPORTS)
-	.section ___ksymtab\sec+\name,"a"
-	.balign KSYM_ALIGN
-__ksymtab_\name:
-	__put \val, __kstrtab_\name
-	.previous
-	.section __ksymtab_strings,"aMS",%progbits,1
-__kstrtab_\name:
-	.asciz "\name"
-	.previous
-#endif
-.endm
-
-#if defined(CONFIG_TRIM_UNUSED_KSYMS)
-
-#include <linux/kconfig.h>
-#include <generated/autoksyms.h>
-
-.macro __ksym_marker sym
-	.section ".discard.ksym","a"
-__ksym_marker_\sym:
-	 .previous
-.endm
-
-#define __EXPORT_SYMBOL(sym, val, sec)				\
-	__ksym_marker sym;					\
-	__cond_export_sym(sym, val, sec, __is_defined(__KSYM_##sym))
-#define __cond_export_sym(sym, val, sec, conf)			\
-	___cond_export_sym(sym, val, sec, conf)
-#define ___cond_export_sym(sym, val, sec, enabled)		\
-	__cond_export_sym_##enabled(sym, val, sec)
-#define __cond_export_sym_1(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
-#define __cond_export_sym_0(sym, val, sec) /* nothing */
-
-#else
-#define __EXPORT_SYMBOL(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
-#endif
-
-#define EXPORT_SYMBOL(name)					\
-	__EXPORT_SYMBOL(name, KSYM_FUNC(name),)
-#define EXPORT_SYMBOL_GPL(name) 				\
-	__EXPORT_SYMBOL(name, KSYM_FUNC(name), _gpl)
-#define EXPORT_DATA_SYMBOL(name)				\
-	__EXPORT_SYMBOL(name, name,)
-#define EXPORT_DATA_SYMBOL_GPL(name)				\
-	__EXPORT_SYMBOL(name, name,_gpl)
+#define EXPORT_DATA_SYMBOL(name)	EXPORT_SYMBOL(name)
+#define EXPORT_DATA_SYMBOL_GPL(name)	EXPORT_SYMBOL_GPL(name)
 
 #endif
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index d1f57e4868ed..e65d55e8819c 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -1006,6 +1006,7 @@
 	PATCHABLE_DISCARDS						\
 	*(.discard)							\
 	*(.discard.*)							\
+	*(.export_symbol)						\
 	*(.modinfo)							\
 	/* ld.bfd warns about .gnu.version* even when not emitted */	\
 	*(.gnu.version*)						\
diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h
index fe7e6ba918f1..1c849db953a5 100644
--- a/include/linux/export-internal.h
+++ b/include/linux/export-internal.h
@@ -10,6 +10,55 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 
+#if defined(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)
+/*
+ * relative reference: this reduces the size by half on 64-bit architectures,
+ * and eliminates the need for absolute relocations that require runtime
+ * processing on relocatable kernels.
+ */
+#define __KSYM_REF(sym)		".long " #sym "- ."
+#elif defined(CONFIG_64BIT)
+#define __KSYM_REF(sym)		".quad " #sym
+#else
+#define __KSYM_REF(sym)		".long " #sym
+#endif
+
+/*
+ * For every exported symbol, do the following:
+ *
+ * - Put the name of the symbol and namespace (empty string "" for none) in
+ *   __ksymtab_strings.
+ * - Place a struct kernel_symbol entry in the __ksymtab section.
+ *
+ * Note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
+ * section flag requires it. Use '%progbits' instead of '@progbits' since the
+ * former apparently works on all arches according to the binutils source.
+ */
+#define __KSYMTAB(name, sym, sec, ns)						\
+	asm("	.section \"__ksymtab_strings\",\"aMS\",%progbits,1"	"\n"	\
+	    "__kstrtab_" #name ":"					"\n"	\
+	    "	.asciz \"" #name "\""					"\n"	\
+	    "__kstrtabns_" #name ":"					"\n"	\
+	    "	.asciz \"" ns "\""					"\n"	\
+	    "	.previous"						"\n"	\
+	    "	.section \"___ksymtab" sec "+" #name "\", \"a\""	"\n"	\
+	    "	.balign	4"						"\n"	\
+	    "__ksymtab_" #name ":"					"\n"	\
+		__KSYM_REF(sym)						"\n"	\
+		__KSYM_REF(__kstrtab_ ##name)				"\n"	\
+		__KSYM_REF(__kstrtabns_ ##name)				"\n"	\
+	    "	.previous"						"\n"	\
+	)
+
+#ifdef CONFIG_IA64
+#define KSYM_FUNC(name)		@fptr(name)
+#else
+#define KSYM_FUNC(name)		name
+#endif
+
+#define KSYMTAB_FUNC(name, sec, ns)	__KSYMTAB(name, KSYM_FUNC(name), sec, ns)
+#define KSYMTAB_DATA(name, sec, ns)	__KSYMTAB(name, name, sec, ns)
+
 #define SYMBOL_CRC(sym, crc, sec)   \
 	asm(".section \"___kcrctab" sec "+" #sym "\",\"a\""	"\n" \
 	    "__crc_" #sym ":"					"\n" \
diff --git a/include/linux/export.h b/include/linux/export.h
index 3f31ced0d977..a01868136717 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_EXPORT_H
 #define _LINUX_EXPORT_H
 
+#include <linux/compiler.h>
+#include <linux/linkage.h>
 #include <linux/stringify.h>
 
 /*
@@ -28,72 +30,41 @@ extern struct module __this_module;
 #else
 #define THIS_MODULE ((struct module *)0)
 #endif
+#endif /* __ASSEMBLY__ */
 
-#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-#include <linux/compiler.h>
-/*
- * Emit the ksymtab entry as a pair of relative references: this reduces
- * the size by half on 64-bit architectures, and eliminates the need for
- * absolute relocations that require runtime processing on relocatable
- * kernels.
- */
-#define __KSYMTAB_ENTRY(sym, sec)					\
-	__ADDRESSABLE(sym)						\
-	asm("	.section \"___ksymtab" sec "+" #sym "\", \"a\"	\n"	\
-	    "	.balign	4					\n"	\
-	    "__ksymtab_" #sym ":				\n"	\
-	    "	.long	" #sym "- .				\n"	\
-	    "	.long	__kstrtab_" #sym "- .			\n"	\
-	    "	.long	__kstrtabns_" #sym "- .			\n"	\
-	    "	.previous					\n")
-
-struct kernel_symbol {
-	int value_offset;
-	int name_offset;
-	int namespace_offset;
-};
+#ifdef CONFIG_64BIT
+#define __EXPORT_SYMBOL_REF(sym)			\
+	.balign 8				ASM_NL	\
+	.quad sym
 #else
-#define __KSYMTAB_ENTRY(sym, sec)					\
-	static const struct kernel_symbol __ksymtab_##sym		\
-	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
-	__aligned(sizeof(void *))					\
-	= { (unsigned long)&sym, __kstrtab_##sym, __kstrtabns_##sym }
-
-struct kernel_symbol {
-	unsigned long value;
-	const char *name;
-	const char *namespace;
-};
+#define __EXPORT_SYMBOL_REF(sym)			\
+	.balign 4				ASM_NL	\
+	.long sym
 #endif
 
+#define ____EXPORT_SYMBOL(sym, license, ns)		\
+	.section ".export_symbol","a"		ASM_NL	\
+	__export_symbol_##sym:			ASM_NL	\
+		.asciz license			ASM_NL	\
+		.asciz ns			ASM_NL	\
+		__EXPORT_SYMBOL_REF(sym)	ASM_NL	\
+	.previous
+
 #ifdef __GENKSYMS__
 
 #define ___EXPORT_SYMBOL(sym, sec, ns)	__GENKSYMS_EXPORT_SYMBOL(sym)
 
+#elif defined(__ASSEMBLY__)
+
+#define ___EXPORT_SYMBOL(sym, license, ns) \
+	____EXPORT_SYMBOL(sym, license, ns)
+
 #else
 
-/*
- * For every exported symbol, do the following:
- *
- * - Put the name of the symbol and namespace (empty string "" for none) in
- *   __ksymtab_strings.
- * - Place a struct kernel_symbol entry in the __ksymtab section.
- *
- * note on .section use: we specify progbits since usage of the "M" (SHF_MERGE)
- * section flag requires it. Use '%progbits' instead of '@progbits' since the
- * former apparently works on all arches according to the binutils source.
- */
-#define ___EXPORT_SYMBOL(sym, sec, ns)						\
-	extern typeof(sym) sym;							\
-	extern const char __kstrtab_##sym[];					\
-	extern const char __kstrtabns_##sym[];					\
-	asm("	.section \"__ksymtab_strings\",\"aMS\",%progbits,1	\n"	\
-	    "__kstrtab_" #sym ":					\n"	\
-	    "	.asciz 	\"" #sym "\"					\n"	\
-	    "__kstrtabns_" #sym ":					\n"	\
-	    "	.asciz 	\"" ns "\"					\n"	\
-	    "	.previous						\n");	\
-	__KSYMTAB_ENTRY(sym, sec)
+#define ___EXPORT_SYMBOL(sym, license, ns)			\
+	extern typeof(sym) sym;					\
+	__ADDRESSABLE(sym)					\
+	asm(__stringify(____EXPORT_SYMBOL(sym, license, ns)))
 
 #endif
 
@@ -117,9 +88,21 @@ struct kernel_symbol {
  * from the $(NM) output (see scripts/gen_ksymdeps.sh). These symbols are
  * discarded in the final link stage.
  */
+
+#ifdef __ASSEMBLY__
+
+#define __ksym_marker(sym)					\
+	.section ".discard.ksym","a" ;				\
+__ksym_marker_##sym: ;						\
+	.previous
+
+#else
+
 #define __ksym_marker(sym)	\
 	static int __ksym_marker_##sym[0] __section(".discard.ksym") __used
 
+#endif
+
 #define __EXPORT_SYMBOL(sym, sec, ns)					\
 	__ksym_marker(sym);						\
 	__cond_export_sym(sym, sec, ns, __is_defined(__KSYM_##sym))
@@ -148,10 +131,8 @@ struct kernel_symbol {
 #endif
 
 #define EXPORT_SYMBOL(sym)		_EXPORT_SYMBOL(sym, "")
-#define EXPORT_SYMBOL_GPL(sym)		_EXPORT_SYMBOL(sym, "_gpl")
+#define EXPORT_SYMBOL_GPL(sym)		_EXPORT_SYMBOL(sym, "GPL")
 #define EXPORT_SYMBOL_NS(sym, ns)	__EXPORT_SYMBOL(sym, "", __stringify(ns))
-#define EXPORT_SYMBOL_NS_GPL(sym, ns)	__EXPORT_SYMBOL(sym, "_gpl", __stringify(ns))
-
-#endif /* !__ASSEMBLY__ */
+#define EXPORT_SYMBOL_NS_GPL(sym, ns)	__EXPORT_SYMBOL(sym, "GPL", __stringify(ns))
 
 #endif /* _LINUX_EXPORT_H */
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 035d9649eba4..f615193587d2 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -389,9 +389,9 @@ const struct dev_pm_ops name = { \
 #endif
 
 #define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "")
-#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "_gpl", "")
+#define EXPORT_GPL_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "GPL", "")
 #define EXPORT_NS_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "", #ns)
-#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "_gpl", #ns)
+#define EXPORT_NS_GPL_DEV_PM_OPS(name, ns) _EXPORT_DEV_PM_OPS(name, "GPL", #ns)
 
 /*
  * Use this if you want to use the same suspend and resume callbacks for suspend
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index dc7b0160c480..c8b7b4dcf782 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -32,6 +32,18 @@
 /* Maximum number of characters written by module_flags() */
 #define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4)
 
+struct kernel_symbol {
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	int value_offset;
+	int name_offset;
+	int namespace_offset;
+#else
+	unsigned long value;
+	const char *name;
+	const char *namespace;
+#endif
+};
+
 extern struct mutex module_mutex;
 extern struct list_head modules;
 
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index ddd644bd032d..4119e737fe87 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -163,7 +163,7 @@ quiet_cmd_cc_o_c = CC $(quiet_modtag)  $@
 ifdef CONFIG_MODVERSIONS
 # When module versioning is enabled the following steps are executed:
 # o compile a <file>.o from <file>.c
-# o if <file>.o doesn't contain a __ksymtab version, i.e. does
+# o if <file>.o doesn't contain a __export_symbol_*, i.e. does
 #   not export symbols, it's done.
 # o otherwise, we calculate symbol versions using the good old
 #   genksyms on the preprocessed source and dump them into the .cmd file.
@@ -171,7 +171,7 @@ ifdef CONFIG_MODVERSIONS
 #   be compiled and linked to the kernel and/or modules.
 
 gen_symversions =								\
-	if $(NM) $@ 2>/dev/null | grep -q __ksymtab; then			\
+	if $(NM) $@ 2>/dev/null | grep -q ' __export_symbol_'; then		\
 		$(call cmd_gensymtypes_$(1),$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
 			>> $(dot-target).cmd;					\
 	fi
@@ -342,9 +342,7 @@ $(obj)/%.ll: $(src)/%.rs FORCE
 cmd_gensymtypes_S =                                                         \
    { echo "\#include <linux/kernel.h>" ;                                    \
      echo "\#include <asm/asm-prototypes.h>" ;                              \
-    $(CPP) $(a_flags) $< |                                                  \
-     grep "\<___EXPORT_SYMBOL\>" |                                          \
-     sed 's/.*___EXPORT_SYMBOL[[:space:]]*\([a-zA-Z0-9_]*\)[[:space:]]*,.*/EXPORT_SYMBOL(\1);/' ; } | \
+     $(NM) $@ | sed -n 's/.* __export_symbol_\(.*\)/EXPORT_SYMBOL(\1);/p' ; } | \
     $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | $(genksyms)
 
 quiet_cmd_cc_symtypes_S = SYM $(quiet_modtag) $@
diff --git a/scripts/check-local-export b/scripts/check-local-export
index f90b5a9c67b3..86ad94647164 100755
--- a/scripts/check-local-export
+++ b/scripts/check-local-export
@@ -46,9 +46,9 @@ BEGIN {
 { symbol_types[$3]=$2 }
 
 # append the exported symbol to the array
-($3 ~ /^__ksymtab_/) {
+($3 ~ /^__export_symbol_.*/) {
 	export_symbols[i] = $3
-	sub(/^__ksymtab_/, "", export_symbols[i])
+	sub(/^__export_symbol_/, "", export_symbols[i])
 	i++
 }
 
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 403ba4d923f5..ce37e6de5df7 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -217,6 +217,7 @@ struct symbol {
 	unsigned int crc;
 	bool crc_valid;
 	bool weak;
+	bool is_func;
 	bool is_gpl_only;	/* exported by EXPORT_SYMBOL_GPL */
 	char name[];
 };
@@ -533,6 +534,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
 				fatal("%s has NOBITS .modinfo\n", filename);
 			info->modinfo = (void *)hdr + sechdrs[i].sh_offset;
 			info->modinfo_len = sechdrs[i].sh_size;
+		} else if (!strcmp(secname, ".export_symbol")) {
+			info->export_symbol_secndx = i;
 		}
 
 		if (sechdrs[i].sh_type == SHT_SYMTAB) {
@@ -655,18 +658,6 @@ static void handle_symbol(struct module *mod, struct elf_info *info,
 				   ELF_ST_BIND(sym->st_info) == STB_WEAK);
 		break;
 	default:
-		/* All exported symbols */
-		if (strstarts(symname, "__ksymtab_")) {
-			const char *name, *secname;
-
-			name = symname + strlen("__ksymtab_");
-			secname = sec_name(info, get_secindex(info, sym));
-
-			if (strstarts(secname, "___ksymtab_gpl+"))
-				sym_add_exported(name, mod, true);
-			else if (strstarts(secname, "___ksymtab+"))
-				sym_add_exported(name, mod, false);
-		}
 		if (strcmp(symname, "init_module") == 0)
 			mod->has_init = true;
 		if (strcmp(symname, "cleanup_module") == 0)
@@ -848,7 +839,6 @@ enum mismatch {
 	XXXEXIT_TO_SOME_EXIT,
 	ANY_INIT_TO_ANY_EXIT,
 	ANY_EXIT_TO_ANY_INIT,
-	EXPORT_TO_INIT_EXIT,
 	EXTABLE_TO_NON_TEXT,
 };
 
@@ -920,12 +910,6 @@ static const struct sectioncheck sectioncheck[] = {
 	.bad_tosec = { INIT_SECTIONS, NULL },
 	.mismatch = ANY_INIT_TO_ANY_EXIT,
 },
-/* Do not export init/exit functions or data */
-{
-	.fromsec = { "___ksymtab*", NULL },
-	.bad_tosec = { INIT_SECTIONS, EXIT_SECTIONS, NULL },
-	.mismatch = EXPORT_TO_INIT_EXIT,
-},
 {
 	.fromsec = { "__ex_table", NULL },
 	/* If you're adding any new black-listed sections in here, consider
@@ -1180,10 +1164,6 @@ static void default_mismatch_handler(const char *modname, struct elf_info *elf,
 		warn("%s: section mismatch in reference: %s (section: %s) -> %s (section: %s)\n",
 		     modname, fromsym, fromsec, tosym, tosec);
 		break;
-	case EXPORT_TO_INIT_EXIT:
-		warn("%s: EXPORT_SYMBOL used for init/exit symbol: %s (section: %s)\n",
-		     modname, tosym, tosec);
-		break;
 	case EXTABLE_TO_NON_TEXT:
 		warn("%s(%s+0x%lx): Section mismatch in reference to the %s:%s\n",
 		     modname, fromsec, (long)faddr, tosec, tosym);
@@ -1211,14 +1191,75 @@ static void default_mismatch_handler(const char *modname, struct elf_info *elf,
 	}
 }
 
+static void check_export_symbol(struct module *mod, struct elf_info *elf,
+				Elf_Addr faddr, const char *secname,
+				Elf_Sym *sym)
+{
+	static const char *prefix = "__export_symbol_";
+	const char *label_name, *name, *data;
+	Elf_Sym *label;
+	struct symbol *s;
+	bool is_gpl;
+
+	label = find_fromsym(elf, faddr, elf->export_symbol_secndx);
+	label_name = sym_name(elf, label);
+
+	if (!strstarts(label_name, prefix)) {
+		error("%s: .export_symbol section contains strange symbol '%s'\n",
+		      mod->name, label_name);
+		return;
+	}
+
+	name = sym_name(elf, sym);
+	if (strcmp(label_name + strlen(prefix), name)) {
+		error("%s: .export_symbol section references '%s', but it does not seem to be an export symbol\n",
+		      mod->name, name);
+		return;
+	}
+
+	data = sym_get_data(elf, label);	/* license */
+	if (!strcmp(data, "GPL")) {
+		is_gpl = true;
+	} else if (!strcmp(data, "")) {
+		is_gpl = false;
+	} else {
+		error("%s: unknown license '%s' was specified for '%s'\n",
+		      mod->name, data, name);
+		return;
+	}
+
+	data += strlen(data) + 1;	/* namespace */
+	s = sym_add_exported(name, mod, is_gpl);
+	sym_update_namespace(name, data);
+
+	/*
+	 * We need to be aware whether we are exporting a function or
+	 * a data on some architectures.
+	 */
+	s->is_func = (ELF_ST_TYPE(sym->st_info) == STT_FUNC);
+
+	if (match(secname, PATTERNS(INIT_SECTIONS)))
+		warn("%s: %s: EXPORT_SYMBOL used for init symbol. Remove __init or EXPORT_SYMBOL.\n",
+		     mod->name, name);
+	else if (match(secname, PATTERNS(EXIT_SECTIONS)))
+		warn("%s: %s: EXPORT_SYMBOL used for exit symbol. Remove __exit or EXPORT_SYMBOL.\n",
+		     mod->name, name);
+}
+
 static void check_section_mismatch(struct module *mod, struct elf_info *elf,
 				   Elf_Sym *sym,
 				   unsigned int fsecndx, const char *fromsec,
 				   Elf_Addr faddr, Elf_Addr taddr)
 {
 	const char *tosec = sec_name(elf, get_secindex(elf, sym));
-	const struct sectioncheck *mismatch = section_mismatch(fromsec, tosec);
+	const struct sectioncheck *mismatch;
+
+	if (elf->export_symbol_secndx == fsecndx) {
+		check_export_symbol(mod, elf, faddr, tosec, sym);
+		return;
+	}
 
+	mismatch = section_mismatch(fromsec, tosec);
 	if (!mismatch)
 		return;
 
@@ -1698,15 +1739,6 @@ static void read_symbols(const char *modname)
 		handle_moddevtable(mod, &info, sym, symname);
 	}
 
-	for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
-		symname = remove_dot(info.strtab + sym->st_name);
-
-		/* Apply symbol namespaces from __kstrtabns_<symbol> entries. */
-		if (strstarts(symname, "__kstrtabns_"))
-			sym_update_namespace(symname + strlen("__kstrtabns_"),
-					     sym_get_data(&info, sym));
-	}
-
 	check_sec_ref(mod, &info);
 
 	if (!mod->is_vmlinux) {
@@ -1890,6 +1922,14 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod)
 {
 	struct symbol *sym;
 
+	/* generate struct for exported symbols */
+	buf_printf(buf, "\n");
+	list_for_each_entry(sym, &mod->exported_symbols, list)
+		buf_printf(buf, "KSYMTAB_%s(%s, \"%s\", \"%s\");\n",
+			   sym->is_func ? "FUNC" : "DATA", sym->name,
+			   sym->is_gpl_only ? "_gpl" : "",
+			   sym->namespace ?: "");
+
 	if (!modversions)
 		return;
 
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index b1e2d95f8047..dfdb9484e325 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -137,6 +137,7 @@ struct elf_info {
 	Elf_Shdr     *sechdrs;
 	Elf_Sym      *symtab_start;
 	Elf_Sym      *symtab_stop;
+	unsigned int export_symbol_secndx;	/* .export_symbol section */
 	char         *strtab;
 	char	     *modinfo;
 	unsigned int modinfo_len;
-- 
cgit v1.2.3


From 5e9e95cc9148b82074a5eae283e63bce3f1aacfe Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 12 Jun 2023 00:50:57 +0900
Subject: kbuild: implement CONFIG_TRIM_UNUSED_KSYMS without recursion

When CONFIG_TRIM_UNUSED_KSYMS is enabled, Kbuild recursively traverses
the directory tree to determine which EXPORT_SYMBOL to trim. If an
EXPORT_SYMBOL turns out to be unused by anyone, Kbuild begins the
second traverse, where some source files are recompiled with their
EXPORT_SYMBOL() tuned into a no-op.

Linus stated negative opinions about this slowness in commits:

 - 5cf0fd591f2e ("Kbuild: disable TRIM_UNUSED_KSYMS option")
 - a555bdd0c58c ("Kbuild: enable TRIM_UNUSED_KSYMS again, with some guarding")

We can do this better now. The final data structures of EXPORT_SYMBOL
are generated by the modpost stage, so modpost can selectively emit
KSYMTAB entries that are really used by modules.

Commit f73edc8951b2 ("kbuild: unify two modpost invocations") is another
ground-work to do this in a one-pass algorithm. With the list of modules,
modpost sets sym->used if it is used by a module. modpost emits KSYMTAB
only for symbols with sym->used==true.

BTW, Nicolas explained why the trimming was implemented with recursion:

  https://lore.kernel.org/all/2o2rpn97-79nq-p7s2-nq5-8p83391473r@syhkavp.arg/

Actually, we never achieved that level of optimization where the chain
reaction of trimming comes into play because:

 - CONFIG_LTO_CLANG cannot remove any unused symbols
 - CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is enabled only for vmlinux,
   but not modules

If deeper trimming is required, we need to revisit this, but I guess
that is unlikely to happen.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 .gitignore                  |  2 --
 Makefile                    | 22 ++------------
 include/linux/export.h      | 67 +++++++----------------------------------
 scripts/Makefile.build      | 15 +---------
 scripts/Makefile.modpost    |  7 +++++
 scripts/adjust_autoksyms.sh | 73 ---------------------------------------------
 scripts/basic/fixdep.c      |  3 +-
 scripts/gen_autoksyms.sh    | 62 --------------------------------------
 scripts/gen_ksymdeps.sh     | 30 -------------------
 scripts/mod/modpost.c       | 57 +++++++++++++++++++++++++++++++----
 scripts/remove-stale-files  |  4 +++
 11 files changed, 78 insertions(+), 264 deletions(-)
 delete mode 100755 scripts/adjust_autoksyms.sh
 delete mode 100755 scripts/gen_autoksyms.sh
 delete mode 100755 scripts/gen_ksymdeps.sh

(limited to 'include/linux')

diff --git a/.gitignore b/.gitignore
index 7f86e0837909..c3ce78ca20d2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,7 +51,6 @@
 *.symversions
 *.tab.[ch]
 *.tar
-*.usyms
 *.xz
 *.zst
 Module.symvers
@@ -112,7 +111,6 @@ modules.order
 #
 /include/config/
 /include/generated/
-/include/ksym/
 /arch/*/include/generated/
 
 # stgit generated dirs
diff --git a/Makefile b/Makefile
index f836936fb4d8..cc3fe09c4dec 100644
--- a/Makefile
+++ b/Makefile
@@ -1193,28 +1193,12 @@ endif
 export KBUILD_VMLINUX_LIBS
 export KBUILD_LDS          := arch/$(SRCARCH)/kernel/vmlinux.lds
 
-# Recurse until adjust_autoksyms.sh is satisfied
-PHONY += autoksyms_recursive
 ifdef CONFIG_TRIM_UNUSED_KSYMS
 # For the kernel to actually contain only the needed exported symbols,
 # we have to build modules as well to determine what those symbols are.
-# (this can be evaluated only once include/config/auto.conf has been included)
 KBUILD_MODULES := 1
-
-autoksyms_recursive: $(build-dir) modules.order
-	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \
-	  "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive"
 endif
 
-autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h)
-
-quiet_cmd_autoksyms_h = GEN     $@
-      cmd_autoksyms_h = mkdir -p $(dir $@); \
-			$(CONFIG_SHELL) $(srctree)/scripts/gen_autoksyms.sh $@
-
-$(autoksyms_h):
-	$(call cmd,autoksyms_h)
-
 # '$(AR) mPi' needs 'T' to workaround the bug of llvm-ar <= 14
 quiet_cmd_ar_vmlinux.a = AR      $@
       cmd_ar_vmlinux.a = \
@@ -1223,7 +1207,7 @@ quiet_cmd_ar_vmlinux.a = AR      $@
 	$(AR) mPiT $$($(AR) t $@ | sed -n 1p) $@ $$($(AR) t $@ | grep -F -f $(srctree)/scripts/head-object-list.txt)
 
 targets += vmlinux.a
-vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt autoksyms_recursive FORCE
+vmlinux.a: $(KBUILD_VMLINUX_OBJS) scripts/head-object-list.txt FORCE
 	$(call if_changed,ar_vmlinux.a)
 
 PHONY += vmlinux_o
@@ -1279,7 +1263,7 @@ scripts: scripts_basic scripts_dtc
 PHONY += prepare archprepare
 
 archprepare: outputmakefile archheaders archscripts scripts include/config/kernel.release \
-	asm-generic $(version_h) $(autoksyms_h) include/generated/utsrelease.h \
+	asm-generic $(version_h) include/generated/utsrelease.h \
 	include/generated/compile.h include/generated/autoconf.h remove-stale-files
 
 prepare0: archprepare
@@ -2039,7 +2023,7 @@ clean: $(clean-dirs)
 		-o -name '*.dtb.S' -o -name '*.dtbo.S' \
 		-o -name '*.dt.yaml' \
 		-o -name '*.dwo' -o -name '*.lst' \
-		-o -name '*.su' -o -name '*.mod' -o -name '*.usyms' \
+		-o -name '*.su' -o -name '*.mod' \
 		-o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
 		-o -name '*.lex.c' -o -name '*.tab.[ch]' \
 		-o -name '*.asn1.[ch]' \
diff --git a/include/linux/export.h b/include/linux/export.h
index a01868136717..1de600734071 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -42,7 +42,7 @@ extern struct module __this_module;
 	.long sym
 #endif
 
-#define ____EXPORT_SYMBOL(sym, license, ns)		\
+#define ___EXPORT_SYMBOL(sym, license, ns)		\
 	.section ".export_symbol","a"		ASM_NL	\
 	__export_symbol_##sym:			ASM_NL	\
 		.asciz license			ASM_NL	\
@@ -50,24 +50,6 @@ extern struct module __this_module;
 		__EXPORT_SYMBOL_REF(sym)	ASM_NL	\
 	.previous
 
-#ifdef __GENKSYMS__
-
-#define ___EXPORT_SYMBOL(sym, sec, ns)	__GENKSYMS_EXPORT_SYMBOL(sym)
-
-#elif defined(__ASSEMBLY__)
-
-#define ___EXPORT_SYMBOL(sym, license, ns) \
-	____EXPORT_SYMBOL(sym, license, ns)
-
-#else
-
-#define ___EXPORT_SYMBOL(sym, license, ns)			\
-	extern typeof(sym) sym;					\
-	__ADDRESSABLE(sym)					\
-	asm(__stringify(____EXPORT_SYMBOL(sym, license, ns)))
-
-#endif
-
 #if !defined(CONFIG_MODULES) || defined(__DISABLE_EXPORTS)
 
 /*
@@ -77,50 +59,21 @@ extern struct module __this_module;
  */
 #define __EXPORT_SYMBOL(sym, sec, ns)
 
-#elif defined(CONFIG_TRIM_UNUSED_KSYMS)
+#elif defined(__GENKSYMS__)
 
-#include <generated/autoksyms.h>
+#define __EXPORT_SYMBOL(sym, sec, ns)	__GENKSYMS_EXPORT_SYMBOL(sym)
 
-/*
- * For fine grained build dependencies, we want to tell the build system
- * about each possible exported symbol even if they're not actually exported.
- * We use a symbol pattern __ksym_marker_<symbol> that the build system filters
- * from the $(NM) output (see scripts/gen_ksymdeps.sh). These symbols are
- * discarded in the final link stage.
- */
-
-#ifdef __ASSEMBLY__
-
-#define __ksym_marker(sym)					\
-	.section ".discard.ksym","a" ;				\
-__ksym_marker_##sym: ;						\
-	.previous
-
-#else
-
-#define __ksym_marker(sym)	\
-	static int __ksym_marker_##sym[0] __section(".discard.ksym") __used
-
-#endif
+#elif defined(__ASSEMBLY__)
 
-#define __EXPORT_SYMBOL(sym, sec, ns)					\
-	__ksym_marker(sym);						\
-	__cond_export_sym(sym, sec, ns, __is_defined(__KSYM_##sym))
-#define __cond_export_sym(sym, sec, ns, conf)				\
-	___cond_export_sym(sym, sec, ns, conf)
-#define ___cond_export_sym(sym, sec, ns, enabled)			\
-	__cond_export_sym_##enabled(sym, sec, ns)
-#define __cond_export_sym_1(sym, sec, ns) ___EXPORT_SYMBOL(sym, sec, ns)
-
-#ifdef __GENKSYMS__
-#define __cond_export_sym_0(sym, sec, ns) __GENKSYMS_EXPORT_SYMBOL(sym)
-#else
-#define __cond_export_sym_0(sym, sec, ns) /* nothing */
-#endif
+#define __EXPORT_SYMBOL(sym, license, ns) \
+	___EXPORT_SYMBOL(sym, license, ns)
 
 #else
 
-#define __EXPORT_SYMBOL(sym, sec, ns)	___EXPORT_SYMBOL(sym, sec, ns)
+#define __EXPORT_SYMBOL(sym, license, ns)			\
+	extern typeof(sym) sym;					\
+	__ADDRESSABLE(sym)					\
+	asm(__stringify(___EXPORT_SYMBOL(sym, license, ns)))
 
 #endif /* CONFIG_MODULES */
 
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 210142c3ff00..4735b958097a 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -82,7 +82,7 @@ ifdef need-builtin
 targets-for-builtin += $(obj)/built-in.a
 endif
 
-targets-for-modules := $(foreach x, o mod $(if $(CONFIG_TRIM_UNUSED_KSYMS), usyms), \
+targets-for-modules := $(foreach x, o mod, \
 				$(patsubst %.o, %.$x, $(filter %.o, $(obj-m))))
 
 ifdef need-modorder
@@ -217,18 +217,12 @@ is-standard-object = $(if $(filter-out y%, $(OBJECT_FILES_NON_STANDARD_$(basetar
 
 $(obj)/%.o: objtool-enabled = $(if $(is-standard-object),$(if $(delay-objtool),$(is-single-obj-m),y))
 
-ifdef CONFIG_TRIM_UNUSED_KSYMS
-cmd_gen_ksymdeps = \
-	$(CONFIG_SHELL) $(srctree)/scripts/gen_ksymdeps.sh $@ >> $(dot-target).cmd
-endif
-
 ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),)
 cmd_warn_shared_object = $(if $(word 2, $(modname-multi)),$(warning $(kbuild-file): $*.o is added to multiple modules: $(modname-multi)))
 endif
 
 define rule_cc_o_c
 	$(call cmd_and_fixdep,cc_o_c)
-	$(call cmd,gen_ksymdeps)
 	$(call cmd,checksrc)
 	$(call cmd,checkdoc)
 	$(call cmd,gen_objtooldep)
@@ -239,7 +233,6 @@ endef
 
 define rule_as_o_S
 	$(call cmd_and_fixdep,as_o_S)
-	$(call cmd,gen_ksymdeps)
 	$(call cmd,gen_objtooldep)
 	$(call cmd,gen_symversions_S)
 	$(call cmd,warn_shared_object)
@@ -258,12 +251,6 @@ cmd_mod = printf '%s\n' $(call real-search, $*.o, .o, -objs -y -m) | \
 $(obj)/%.mod: FORCE
 	$(call if_changed,mod)
 
-# List module undefined symbols
-cmd_undefined_syms = $(NM) $< | sed -n 's/^  *U //p' > $@
-
-$(obj)/%.usyms: $(obj)/%.o FORCE
-	$(call if_changed,undefined_syms)
-
 quiet_cmd_cc_lst_c = MKLST   $@
       cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \
 		     $(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 074e27c0c140..39472e834b63 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -91,6 +91,13 @@ targets += .vmlinux.objs
 .vmlinux.objs: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE
 	$(call if_changed,vmlinux_objs)
 
+ifdef CONFIG_TRIM_UNUSED_KSYMS
+ksym-wl := $(CONFIG_UNUSED_KSYMS_WHITELIST)
+ksym-wl := $(if $(filter-out /%, $(ksym-wl)),$(srctree)/)$(ksym-wl)
+modpost-args += -t $(addprefix -u , $(ksym-wl))
+modpost-deps += $(ksym-wl)
+endif
+
 ifeq ($(wildcard vmlinux.o),)
 missing-input := vmlinux.o
 output-symdump := modules-only.symvers
diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
deleted file mode 100755
index f1b5ac818411..000000000000
--- a/scripts/adjust_autoksyms.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0-only
-
-# Script to update include/generated/autoksyms.h and dependency files
-#
-# Copyright:	(C) 2016  Linaro Limited
-# Created by:	Nicolas Pitre, January 2016
-#
-
-# Update the include/generated/autoksyms.h file.
-#
-# For each symbol being added or removed, the corresponding dependency
-# file's timestamp is updated to force a rebuild of the affected source
-# file. All arguments passed to this script are assumed to be a command
-# to be exec'd to trigger a rebuild of those files.
-
-set -e
-
-cur_ksyms_file="include/generated/autoksyms.h"
-new_ksyms_file="include/generated/autoksyms.h.tmpnew"
-
-info() {
-	if [ "$quiet" != "silent_" ]; then
-		printf "  %-7s %s\n" "$1" "$2"
-	fi
-}
-
-info "CHK" "$cur_ksyms_file"
-
-# Use "make V=1" to debug this script.
-case "$KBUILD_VERBOSE" in
-*1*)
-	set -x
-	;;
-esac
-
-# Generate a new symbol list file
-$CONFIG_SHELL $srctree/scripts/gen_autoksyms.sh --modorder "$new_ksyms_file"
-
-# Extract changes between old and new list and touch corresponding
-# dependency files.
-changed=$(
-count=0
-sort "$cur_ksyms_file" "$new_ksyms_file" | uniq -u |
-sed -n 's/^#define __KSYM_\(.*\) 1/\1/p' |
-while read sympath; do
-	if [ -z "$sympath" ]; then continue; fi
-	depfile="include/ksym/${sympath}"
-	mkdir -p "$(dirname "$depfile")"
-	touch "$depfile"
-	# Filesystems with coarse time precision may create timestamps
-	# equal to the one from a file that was very recently built and that
-	# needs to be rebuild. Let's guard against that by making sure our
-	# dep files are always newer than the first file we created here.
-	while [ ! "$depfile" -nt "$new_ksyms_file" ]; do
-		touch "$depfile"
-	done
-	echo $((count += 1))
-done | tail -1 )
-changed=${changed:-0}
-
-if [ $changed -gt 0 ]; then
-	# Replace the old list with tne new one
-	old=$(grep -c "^#define __KSYM_" "$cur_ksyms_file" || true)
-	new=$(grep -c "^#define __KSYM_" "$new_ksyms_file" || true)
-	info "KSYMS" "symbols: before=$old, after=$new, changed=$changed"
-	info "UPD" "$cur_ksyms_file"
-	mv -f "$new_ksyms_file" "$cur_ksyms_file"
-	# Then trigger a rebuild of affected source files
-	exec $@
-else
-	rm -f "$new_ksyms_file"
-fi
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index fa562806c2be..84b6efa849f4 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -246,8 +246,7 @@ static void *read_file(const char *filename)
 /* Ignore certain dependencies */
 static int is_ignored_file(const char *s, int len)
 {
-	return str_ends_with(s, len, "include/generated/autoconf.h") ||
-	       str_ends_with(s, len, "include/generated/autoksyms.h");
+	return str_ends_with(s, len, "include/generated/autoconf.h");
 }
 
 /* Do not parse these files */
diff --git a/scripts/gen_autoksyms.sh b/scripts/gen_autoksyms.sh
deleted file mode 100755
index 12bcfae940ee..000000000000
--- a/scripts/gen_autoksyms.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0-only
-
-# Create an autoksyms.h header file from the list of all module's needed symbols
-# as recorded in *.usyms files and the user-provided symbol whitelist.
-
-set -e
-
-# Use "make V=1" to debug this script.
-case "$KBUILD_VERBOSE" in
-*1*)
-	set -x
-	;;
-esac
-
-read_modorder=
-
-if [ "$1" = --modorder ]; then
-	shift
-	read_modorder=1
-fi
-
-output_file="$1"
-
-needed_symbols=
-
-# Special case for modversions (see modpost.c)
-if grep -q "^CONFIG_MODVERSIONS=y$" include/config/auto.conf; then
-	needed_symbols="$needed_symbols module_layout"
-fi
-
-ksym_wl=$(sed -n 's/^CONFIG_UNUSED_KSYMS_WHITELIST=\(.*\)$/\1/p' include/config/auto.conf)
-if [ -n "$ksym_wl" ]; then
-	[ "${ksym_wl}" != "${ksym_wl#/}" ] || ksym_wl="$abs_srctree/$ksym_wl"
-	if [ ! -f "$ksym_wl" ] || [ ! -r "$ksym_wl" ]; then
-		echo "ERROR: '$ksym_wl' whitelist file not found" >&2
-		exit 1
-	fi
-fi
-
-# Generate a new ksym list file with symbols needed by the current
-# set of modules.
-cat > "$output_file" << EOT
-/*
- * Automatically generated file; DO NOT EDIT.
- */
-
-EOT
-
-{
-	[ -n "${read_modorder}" ] && sed 's/o$/usyms/' modules.order | xargs cat
-	echo "$needed_symbols"
-	[ -n "$ksym_wl" ] && cat "$ksym_wl"
-} | sed -e 's/ /\n/g' | sed -n -e '/^$/!p' |
-# Remove the dot prefix for ppc64; symbol names with a dot (.) hold entry
-# point addresses.
-sed -e 's/^\.//' |
-sort -u |
-# Ignore __this_module. It's not an exported symbol, and will be resolved
-# when the final .ko's are linked.
-grep -v '^__this_module$' |
-sed -e 's/\(.*\)/#define __KSYM_\1 1/' >> "$output_file"
diff --git a/scripts/gen_ksymdeps.sh b/scripts/gen_ksymdeps.sh
deleted file mode 100755
index 8ee533f33659..000000000000
--- a/scripts/gen_ksymdeps.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-set -e
-
-# List of exported symbols
-#
-# If the object has no symbol, $NM warns 'no symbols'.
-# Suppress the stderr.
-# TODO:
-#   Use -q instead of 2>/dev/null when we upgrade the minimum version of
-#   binutils to 2.37, llvm to 13.0.0.
-ksyms=$($NM $1 2>/dev/null | sed -n 's/.*__ksym_marker_\(.*\)/\1/p')
-
-if [ -z "$ksyms" ]; then
-	exit 0
-fi
-
-echo
-echo "ksymdeps_$1 := \\"
-
-for s in $ksyms
-do
-	printf '    $(wildcard include/ksym/%s) \\\n' "$s"
-done
-
-echo
-echo "$1: \$(ksymdeps_$1)"
-echo
-echo "\$(ksymdeps_$1):"
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index a7c979b0ea21..3d9f3e2b2a2d 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -35,6 +35,9 @@ static bool warn_unresolved;
 
 static int sec_mismatch_count;
 static bool sec_mismatch_warn_only = true;
+/* Trim EXPORT_SYMBOLs that are unused by in-tree modules */
+static bool trim_unused_exports;
+
 /* ignore missing files */
 static bool ignore_missing_files;
 /* If set to 1, only warn (instead of error) about missing ns imports */
@@ -219,6 +222,7 @@ struct symbol {
 	bool weak;
 	bool is_func;
 	bool is_gpl_only;	/* exported by EXPORT_SYMBOL_GPL */
+	bool used;		/* there exists a user of this symbol */
 	char name[];
 };
 
@@ -1826,6 +1830,7 @@ static void check_exports(struct module *mod)
 			continue;
 		}
 
+		exp->used = true;
 		s->module = exp->module;
 		s->crc_valid = exp->crc_valid;
 		s->crc = exp->crc;
@@ -1849,6 +1854,23 @@ static void check_exports(struct module *mod)
 	}
 }
 
+static void handle_white_list_exports(const char *white_list)
+{
+	char *buf, *p, *name;
+
+	buf = read_text_file(white_list);
+	p = buf;
+
+	while ((name = strsep(&p, "\n"))) {
+		struct symbol *sym = find_symbol(name);
+
+		if (sym)
+			sym->used = true;
+	}
+
+	free(buf);
+}
+
 static void check_modname_len(struct module *mod)
 {
 	const char *mod_name;
@@ -1919,10 +1941,14 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod)
 
 	/* generate struct for exported symbols */
 	buf_printf(buf, "\n");
-	list_for_each_entry(sym, &mod->exported_symbols, list)
+	list_for_each_entry(sym, &mod->exported_symbols, list) {
+		if (trim_unused_exports && !sym->used)
+			continue;
+
 		buf_printf(buf, "KSYMTAB_%s(%s, \"%s\", \"%s\");\n",
 			   sym->is_func ? "FUNC" : "DATA", sym->name,
 			   sym->is_gpl_only ? "_gpl" : "", sym->namespace);
+	}
 
 	if (!modversions)
 		return;
@@ -1930,6 +1956,9 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod)
 	/* record CRCs for exported symbols */
 	buf_printf(buf, "\n");
 	list_for_each_entry(sym, &mod->exported_symbols, list) {
+		if (trim_unused_exports && !sym->used)
+			continue;
+
 		if (!sym->crc_valid)
 			warn("EXPORT symbol \"%s\" [%s%s] version generation failed, symbol will not be versioned.\n"
 			     "Is \"%s\" prototyped in <asm/asm-prototypes.h>?\n",
@@ -2093,9 +2122,6 @@ static void write_mod_c_file(struct module *mod)
 	char fname[PATH_MAX];
 	int ret;
 
-	check_modname_len(mod);
-	check_exports(mod);
-
 	add_header(&buf, mod);
 	add_exported_symbols(&buf, mod);
 	add_versions(&buf, mod);
@@ -2187,6 +2213,9 @@ static void write_dump(const char *fname)
 		if (mod->from_dump)
 			continue;
 		list_for_each_entry(sym, &mod->exported_symbols, list) {
+			if (trim_unused_exports && !sym->used)
+				continue;
+
 			buf_printf(&buf, "0x%08x\t%s\t%s\tEXPORT_SYMBOL%s\t%s\n",
 				   sym->crc, sym->name, mod->name,
 				   sym->is_gpl_only ? "_GPL" : "",
@@ -2229,12 +2258,13 @@ int main(int argc, char **argv)
 {
 	struct module *mod;
 	char *missing_namespace_deps = NULL;
+	char *unused_exports_white_list = NULL;
 	char *dump_write = NULL, *files_source = NULL;
 	int opt;
 	LIST_HEAD(dump_lists);
 	struct dump_list *dl, *dl2;
 
-	while ((opt = getopt(argc, argv, "ei:mnT:o:aWwENd:")) != -1) {
+	while ((opt = getopt(argc, argv, "ei:mnT:to:au:WwENd:")) != -1) {
 		switch (opt) {
 		case 'e':
 			external_module = true;
@@ -2259,6 +2289,12 @@ int main(int argc, char **argv)
 		case 'T':
 			files_source = optarg;
 			break;
+		case 't':
+			trim_unused_exports = true;
+			break;
+		case 'u':
+			unused_exports_white_list = optarg;
+			break;
 		case 'W':
 			extra_warn = true;
 			break;
@@ -2291,6 +2327,17 @@ int main(int argc, char **argv)
 	if (files_source)
 		read_symbols_from_files(files_source);
 
+	list_for_each_entry(mod, &modules, list) {
+		if (mod->from_dump || mod->is_vmlinux)
+			continue;
+
+		check_modname_len(mod);
+		check_exports(mod);
+	}
+
+	if (unused_exports_white_list)
+		handle_white_list_exports(unused_exports_white_list);
+
 	list_for_each_entry(mod, &modules, list) {
 		if (mod->from_dump)
 			continue;
diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files
index 7f432900671a..f3659ea0335b 100755
--- a/scripts/remove-stale-files
+++ b/scripts/remove-stale-files
@@ -33,3 +33,7 @@ rm -f rust/target.json
 rm -f scripts/bin2c
 
 rm -f .scmversion
+
+rm -rf include/ksym
+
+find . -name '*.usyms' | xargs rm -f
-- 
cgit v1.2.3


From 8ed7e33a685a679c04cfe5ffdbb3b4c396ac8076 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 12 Jun 2023 00:51:00 +0900
Subject: linux/export.h: rename 'sec' argument to 'license'

Now, EXPORT_SYMBOL() is populated in two stages. In the first stage,
all of EXPORT_SYMBOL/EXPORT_SYMBOL_GPL go into the same section,
'.export_symbol'.

'sec' does not make sense any more. Rename it to 'license'.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
---
 include/linux/export.h | 8 ++++----
 include/linux/pm.h     | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/export.h b/include/linux/export.h
index 1de600734071..beed8387e0a4 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -57,11 +57,11 @@ extern struct module __this_module;
  * be reused in other execution contexts such as the UEFI stub or the
  * decompressor.
  */
-#define __EXPORT_SYMBOL(sym, sec, ns)
+#define __EXPORT_SYMBOL(sym, license, ns)
 
 #elif defined(__GENKSYMS__)
 
-#define __EXPORT_SYMBOL(sym, sec, ns)	__GENKSYMS_EXPORT_SYMBOL(sym)
+#define __EXPORT_SYMBOL(sym, license, ns)	__GENKSYMS_EXPORT_SYMBOL(sym)
 
 #elif defined(__ASSEMBLY__)
 
@@ -78,9 +78,9 @@ extern struct module __this_module;
 #endif /* CONFIG_MODULES */
 
 #ifdef DEFAULT_SYMBOL_NAMESPACE
-#define _EXPORT_SYMBOL(sym, sec)	__EXPORT_SYMBOL(sym, sec, __stringify(DEFAULT_SYMBOL_NAMESPACE))
+#define _EXPORT_SYMBOL(sym, license)	__EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE))
 #else
-#define _EXPORT_SYMBOL(sym, sec)	__EXPORT_SYMBOL(sym, sec, "")
+#define _EXPORT_SYMBOL(sym, license)	__EXPORT_SYMBOL(sym, license, "")
 #endif
 
 #define EXPORT_SYMBOL(sym)		_EXPORT_SYMBOL(sym, "")
diff --git a/include/linux/pm.h b/include/linux/pm.h
index f615193587d2..badad7d11f4f 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -375,14 +375,14 @@ const struct dev_pm_ops name = { \
 }
 
 #ifdef CONFIG_PM
-#define _EXPORT_DEV_PM_OPS(name, sec, ns)				\
+#define _EXPORT_DEV_PM_OPS(name, license, ns)				\
 	const struct dev_pm_ops name;					\
-	__EXPORT_SYMBOL(name, sec, ns);					\
+	__EXPORT_SYMBOL(name, license, ns);				\
 	const struct dev_pm_ops name
 #define EXPORT_PM_FN_GPL(name)		EXPORT_SYMBOL_GPL(name)
 #define EXPORT_PM_FN_NS_GPL(name, ns)	EXPORT_SYMBOL_NS_GPL(name, ns)
 #else
-#define _EXPORT_DEV_PM_OPS(name, sec, ns)				\
+#define _EXPORT_DEV_PM_OPS(name, license, ns)				\
 	static __maybe_unused const struct dev_pm_ops __static_##name
 #define EXPORT_PM_FN_GPL(name)
 #define EXPORT_PM_FN_NS_GPL(name, ns)
-- 
cgit v1.2.3


From d8e4ebf87018736c0c29e2eb4afe3915156483cd Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Thu, 22 Jun 2023 11:06:32 +0200
Subject: spi: Create a helper to derive adaptive timeouts

Big transfers might take a bit of time, too constraining timeouts might
lead to false positives. In order to simplify the drivers work and with
the goal of factorizing code in mind, let's add a helper that can be
used by any spi controller driver to derive a relevant per-transfer
timeout value.

The logic is simple: we know how much time it would take to transfer a
byte, we can easily derive the total theoretical amount of time involved
for each transfer. We multiply it by two to have a bit of margin and
enforce a minimum of 500ms.

Suggested-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/r/Message-Id: <20230622090634.3411468-2-miquel.raynal@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index cfe42f8cd7a4..32c94eae8926 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1261,6 +1261,23 @@ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw)
 	return false;
 }
 
+/**
+ * spi_controller_xfer_timeout - Compute a suitable timeout value
+ * @ctlr: SPI device
+ * @xfer: Transfer descriptor
+ *
+ * Compute a relevant timeout value for the given transfer. We derive the time
+ * that it would take on a single data line and take twice this amount of time
+ * with a minimum of 500ms to avoid false positives on loaded systems.
+ *
+ * Returns: Transfer timeout value in milliseconds.
+ */
+static inline unsigned int spi_controller_xfer_timeout(struct spi_controller *ctlr,
+						       struct spi_transfer *xfer)
+{
+	return max(xfer->len * 8 * 2 / (xfer->speed_hz / 1000), 500U);
+}
+
 /*---------------------------------------------------------------------------*/
 
 /* SPI transfer replacement methods which make use of spi_res */
-- 
cgit v1.2.3


From 83f74441bcb16c324b7bdba0ab4261a44cb1ac21 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 11 Jun 2023 15:00:29 +0200
Subject: ftrace: Show all functions with addresses in
 available_filter_functions_addrs

Adding new available_filter_functions_addrs file that shows all available
functions (same as available_filter_functions) together with addresses,
like:

  # cat available_filter_functions_addrs | head
  ffffffff81000770 __traceiter_initcall_level
  ffffffff810007c0 __traceiter_initcall_start
  ffffffff81000810 __traceiter_initcall_finish
  ffffffff81000860 trace_initcall_finish_cb
  ...

Note displayed address is the patch-site address and can differ from
/proc/kallsyms address.

It's useful to have address avilable for traceable symbols, so we don't
need to allways cross check kallsyms with available_filter_functions
(or the other way around) and have all the data in single file.

For backwards compatibility reasons we can't change the existing
available_filter_functions file output, but we need to add new file.

The problem is that we need to do 2 passes:

 - through available_filter_functions and find out if the function is traceable
 - through /proc/kallsyms to get the address for traceable function

Having available_filter_functions symbols together with addresses allow
us to skip the kallsyms step and we are ok with the address in
available_filter_functions_addr not being the function entry, because
kprobe_multi uses fprobe and that handles both entry and patch-site
address properly.

We have 2 interfaces how to create kprobe_multi link:

  a) passing symbols to kernel

     1) user gathers symbols and need to ensure that they are
        trace-able -> pass through available_filter_functions file

     2) kernel takes those symbols and translates them to addresses
        through kallsyms api

     3) addresses are passed to fprobe/ftrace through:

         register_fprobe_ips
         -> ftrace_set_filter_ips

  b) passing addresses to kernel

     1) user gathers symbols and needs to ensure that they are
        trace-able -> pass through available_filter_functions file

     2) user takes those symbols and translates them to addresses
       through /proc/kallsyms

     3) addresses are passed to the kernel and kernel calls:

         register_fprobe_ips
         -> ftrace_set_filter_ips

The new available_filter_functions_addrs file helps us with option b),
because we can make 'b 1' and 'b 2' in one step - while filtering traceable
functions, we get the address directly.

Link: https://lore.kernel.org/linux-trace-kernel/20230611130029.1202298-1-jolsa@kernel.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Tested-by: Jackie Liu <liuyun01@kylinos.cn> # x86
Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/trace/ftrace.rst |  6 ++++++
 include/linux/ftrace.h         |  1 +
 kernel/trace/ftrace.c          | 37 +++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index df2d3e57a83f..b7308ab10c0e 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -324,6 +324,12 @@ of ftrace. Here is a list of some of the key files:
 	"set_graph_function", or "set_graph_notrace".
 	(See the section "dynamic ftrace" below for more details.)
 
+  available_filter_functions_addrs:
+
+	Similar to available_filter_functions, but with address displayed
+	for each function. The displayed address is the patch-site address
+	and can differ from /proc/kallsyms address.
+
   dyn_ftrace_total_info:
 
 	This file is for debugging purposes. The number of functions that
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 49f279f4c3a1..8e59bd954153 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -633,6 +633,7 @@ enum {
 	FTRACE_ITER_MOD		= (1 << 5),
 	FTRACE_ITER_ENABLED	= (1 << 6),
 	FTRACE_ITER_TOUCHED	= (1 << 7),
+	FTRACE_ITER_ADDRS	= (1 << 8),
 };
 
 void arch_ftrace_update_code(int command);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 764668467155..b24c573934af 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3861,6 +3861,9 @@ static int t_show(struct seq_file *m, void *v)
 	if (!rec)
 		return 0;
 
+	if (iter->flags & FTRACE_ITER_ADDRS)
+		seq_printf(m, "%lx ", rec->ip);
+
 	if (print_rec(m, rec->ip)) {
 		/* This should only happen when a rec is disabled */
 		WARN_ON_ONCE(!(rec->flags & FTRACE_FL_DISABLED));
@@ -3996,6 +3999,30 @@ ftrace_touched_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int
+ftrace_avail_addrs_open(struct inode *inode, struct file *file)
+{
+	struct ftrace_iterator *iter;
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_TRACEFS);
+	if (ret)
+		return ret;
+
+	if (unlikely(ftrace_disabled))
+		return -ENODEV;
+
+	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
+	if (!iter)
+		return -ENOMEM;
+
+	iter->pg = ftrace_pages_start;
+	iter->flags = FTRACE_ITER_ADDRS;
+	iter->ops = &global_ops;
+
+	return 0;
+}
+
 /**
  * ftrace_regex_open - initialize function tracer filter files
  * @ops: The ftrace_ops that hold the hash filters
@@ -5916,6 +5943,13 @@ static const struct file_operations ftrace_touched_fops = {
 	.release = seq_release_private,
 };
 
+static const struct file_operations ftrace_avail_addrs_fops = {
+	.open = ftrace_avail_addrs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
 static const struct file_operations ftrace_filter_fops = {
 	.open = ftrace_filter_open,
 	.read = seq_read,
@@ -6377,6 +6411,9 @@ static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
 	trace_create_file("available_filter_functions", TRACE_MODE_READ,
 			d_tracer, NULL, &ftrace_avail_fops);
 
+	trace_create_file("available_filter_functions_addrs", TRACE_MODE_READ,
+			d_tracer, NULL, &ftrace_avail_addrs_fops);
+
 	trace_create_file("enabled_functions", TRACE_MODE_READ,
 			d_tracer, NULL, &ftrace_enabled_fops);
 
-- 
cgit v1.2.3


From 4dd595c34c4bb22c16a76206a18c13e4e194335d Mon Sep 17 00:00:00 2001
From: Sohil Mehta <sohil.mehta@intel.com>
Date: Wed, 21 Jun 2023 22:36:00 +0000
Subject: syscalls: Remove file path comments from headers

Source file locations for syscall definitions can change over a period
of time. File paths in comments get stale and are hard to maintain long
term. Also, their usefulness is questionable since it would be easier to
locate a syscall definition using the SYSCALL_DEFINEx() macro.

Remove all source file path comments from the syscall headers. Also,
equalize the uneven line spacing (some of which is introduced due to the
deletions).

Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/compat.h                  |  82 ++-----------------
 include/linux/syscalls.h                | 140 ++++----------------------------
 include/uapi/asm-generic/unistd.h       | 129 +++++++----------------------
 kernel/sys_ni.c                         | 110 +------------------------
 tools/include/uapi/asm-generic/unistd.h | 129 +++++++----------------------
 5 files changed, 85 insertions(+), 505 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 44b1736c95b5..1cfa4f0f490a 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -581,11 +581,7 @@ asmlinkage long compat_sys_io_pgetevents_time64(compat_aio_context_t ctx_id,
 					struct io_event __user *events,
 					struct __kernel_timespec __user *timeout,
 					const struct __compat_aio_sigset __user *usig);
-
-/* fs/cookies.c */
 asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);
-
-/* fs/eventpoll.c */
 asmlinkage long compat_sys_epoll_pwait(int epfd,
 			struct epoll_event __user *events,
 			int maxevents, int timeout,
@@ -597,18 +593,12 @@ asmlinkage long compat_sys_epoll_pwait2(int epfd,
 			const struct __kernel_timespec __user *timeout,
 			const compat_sigset_t __user *sigmask,
 			compat_size_t sigsetsize);
-
-/* fs/fcntl.c */
 asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
 				 compat_ulong_t arg);
 asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
 				   compat_ulong_t arg);
-
-/* fs/ioctl.c */
 asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 				 compat_ulong_t arg);
-
-/* fs/open.c */
 asmlinkage long compat_sys_statfs(const char __user *pathname,
 				  struct compat_statfs __user *buf);
 asmlinkage long compat_sys_statfs64(const char __user *pathname,
@@ -623,13 +613,9 @@ asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t);
 /* No generic prototype for truncate64, ftruncate64, fallocate */
 asmlinkage long compat_sys_openat(int dfd, const char __user *filename,
 				  int flags, umode_t mode);
-
-/* fs/readdir.c */
 asmlinkage long compat_sys_getdents(unsigned int fd,
 				    struct compat_linux_dirent __user *dirent,
 				    unsigned int count);
-
-/* fs/read_write.c */
 asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int);
 /* No generic prototype for pread64 and pwrite64 */
 asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd,
@@ -649,14 +635,10 @@ asmlinkage long compat_sys_pwritev64(unsigned long fd,
 		const struct iovec __user *vec,
 		unsigned long vlen, loff_t pos);
 #endif
-
-/* fs/sendfile.c */
 asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
 				    compat_off_t __user *offset, compat_size_t count);
 asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd,
 				    compat_loff_t __user *offset, compat_size_t count);
-
-/* fs/select.c */
 asmlinkage long compat_sys_pselect6_time32(int n, compat_ulong_t __user *inp,
 				    compat_ulong_t __user *outp,
 				    compat_ulong_t __user *exp,
@@ -677,68 +659,45 @@ asmlinkage long compat_sys_ppoll_time64(struct pollfd __user *ufds,
 				 struct __kernel_timespec __user *tsp,
 				 const compat_sigset_t __user *sigmask,
 				 compat_size_t sigsetsize);
-
-/* fs/signalfd.c */
 asmlinkage long compat_sys_signalfd4(int ufd,
 				     const compat_sigset_t __user *sigmask,
 				     compat_size_t sigsetsize, int flags);
-
-/* fs/stat.c */
 asmlinkage long compat_sys_newfstatat(unsigned int dfd,
 				      const char __user *filename,
 				      struct compat_stat __user *statbuf,
 				      int flag);
 asmlinkage long compat_sys_newfstat(unsigned int fd,
 				    struct compat_stat __user *statbuf);
-
-/* fs/sync.c: No generic prototype for sync_file_range and sync_file_range2 */
-
-/* kernel/exit.c */
+/* No generic prototype for sync_file_range and sync_file_range2 */
 asmlinkage long compat_sys_waitid(int, compat_pid_t,
 		struct compat_siginfo __user *, int,
 		struct compat_rusage __user *);
-
-
-
-/* kernel/futex.c */
 asmlinkage long
 compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
 			   compat_size_t len);
 asmlinkage long
 compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 			   compat_size_t __user *len_ptr);
-
-/* kernel/itimer.c */
 asmlinkage long compat_sys_getitimer(int which,
 				     struct old_itimerval32 __user *it);
 asmlinkage long compat_sys_setitimer(int which,
 				     struct old_itimerval32 __user *in,
 				     struct old_itimerval32 __user *out);
-
-/* kernel/kexec.c */
 asmlinkage long compat_sys_kexec_load(compat_ulong_t entry,
 				      compat_ulong_t nr_segments,
 				      struct compat_kexec_segment __user *,
 				      compat_ulong_t flags);
-
-/* kernel/posix-timers.c */
 asmlinkage long compat_sys_timer_create(clockid_t which_clock,
 			struct compat_sigevent __user *timer_event_spec,
 			timer_t __user *created_timer_id);
-
-/* kernel/ptrace.c */
 asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
 				  compat_long_t addr, compat_long_t data);
-
-/* kernel/sched/core.c */
 asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
 				     unsigned int len,
 				     compat_ulong_t __user *user_mask_ptr);
 asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid,
 				     unsigned int len,
 				     compat_ulong_t __user *user_mask_ptr);
-
-/* kernel/signal.c */
 asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
 				       compat_stack_t __user *uoss_ptr);
 asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset,
@@ -763,25 +722,17 @@ asmlinkage long compat_sys_rt_sigtimedwait_time64(compat_sigset_t __user *uthese
 asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig,
 				struct compat_siginfo __user *uinfo);
 /* No generic prototype for rt_sigreturn */
-
-/* kernel/sys.c */
 asmlinkage long compat_sys_times(struct compat_tms __user *tbuf);
 asmlinkage long compat_sys_getrlimit(unsigned int resource,
 				     struct compat_rlimit __user *rlim);
 asmlinkage long compat_sys_setrlimit(unsigned int resource,
 				     struct compat_rlimit __user *rlim);
 asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru);
-
-/* kernel/time.c */
 asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv,
 		struct timezone __user *tz);
 asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv,
 		struct timezone __user *tz);
-
-/* kernel/timer.c */
 asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
-
-/* ipc/mqueue.c */
 asmlinkage long compat_sys_mq_open(const char __user *u_name,
 			int oflag, compat_mode_t mode,
 			struct compat_mq_attr __user *u_attr);
@@ -790,22 +741,14 @@ asmlinkage long compat_sys_mq_notify(mqd_t mqdes,
 asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes,
 			const struct compat_mq_attr __user *u_mqstat,
 			struct compat_mq_attr __user *u_omqstat);
-
-/* ipc/msg.c */
 asmlinkage long compat_sys_msgctl(int first, int second, void __user *uptr);
 asmlinkage long compat_sys_msgrcv(int msqid, compat_uptr_t msgp,
 		compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg);
 asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp,
 		compat_ssize_t msgsz, int msgflg);
-
-/* ipc/sem.c */
 asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg);
-
-/* ipc/shm.c */
 asmlinkage long compat_sys_shmctl(int first, int second, void __user *uptr);
 asmlinkage long compat_sys_shmat(int shmid, compat_uptr_t shmaddr, int shmflg);
-
-/* net/socket.c */
 asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len,
 			    unsigned flags, struct sockaddr __user *addr,
 			    int __user *addrlen);
@@ -813,20 +756,13 @@ asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg,
 				   unsigned flags);
 asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg,
 				   unsigned int flags);
-
-/* mm/filemap.c: No generic prototype for readahead */
-
-/* security/keys/keyctl.c */
+/* No generic prototype for readahead */
 asmlinkage long compat_sys_keyctl(u32 option,
 			      u32 arg2, u32 arg3, u32 arg4, u32 arg5);
-
-/* arch/example/kernel/sys_example.c */
 asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
 		     const compat_uptr_t __user *envp);
-
-/* mm/fadvise.c: No generic prototype for fadvise64_64 */
-
-/* mm/, CONFIG_MMU only */
+/* No generic prototype for fadvise64_64 */
+/* CONFIG_MMU only */
 asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid,
 					compat_pid_t pid, int sig,
 					struct compat_siginfo __user *uinfo);
@@ -896,18 +832,18 @@ asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32);
 asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len,
 				unsigned flags);
 
-/* obsolete: fs/readdir.c */
+/* obsolete */
 asmlinkage long compat_sys_old_readdir(unsigned int fd,
 				       struct compat_old_linux_dirent __user *,
 				       unsigned int count);
 
-/* obsolete: fs/select.c */
+/* obsolete */
 asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg);
 
-/* obsolete: ipc */
+/* obsolete */
 asmlinkage long compat_sys_ipc(u32, int, int, u32, compat_uptr_t, u32);
 
-/* obsolete: kernel/signal.c */
+/* obsolete */
 #ifdef __ARCH_WANT_SYS_SIGPENDING
 asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set);
 #endif
@@ -922,7 +858,7 @@ asmlinkage long compat_sys_sigaction(int sig,
                                    struct compat_old_sigaction __user *oact);
 #endif
 
-/* obsolete: net/socket.c */
+/* obsolete */
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
 
 #ifdef __ARCH_WANT_COMPAT_TRUNCATE64
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 33a0ee3bcb2e..8c37cf91c12d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -346,8 +346,6 @@ asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
 				const void __user *argp, size_t argsz);
 asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
 				void __user *arg, unsigned int nr_args);
-
-/* fs/xattr.c */
 asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
 			     const void __user *value, size_t size, int flags);
 asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name,
@@ -370,17 +368,9 @@ asmlinkage long sys_removexattr(const char __user *path,
 asmlinkage long sys_lremovexattr(const char __user *path,
 				 const char __user *name);
 asmlinkage long sys_fremovexattr(int fd, const char __user *name);
-
-/* fs/dcache.c */
 asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
-
-/* fs/cookies.c */
 asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user *buf, size_t len);
-
-/* fs/eventfd.c */
 asmlinkage long sys_eventfd2(unsigned int count, int flags);
-
-/* fs/eventpoll.c */
 asmlinkage long sys_epoll_create1(int flags);
 asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
 				struct epoll_event __user *event);
@@ -393,8 +383,6 @@ asmlinkage long sys_epoll_pwait2(int epfd, struct epoll_event __user *events,
 				 const struct __kernel_timespec __user *timeout,
 				 const sigset_t __user *sigmask,
 				 size_t sigsetsize);
-
-/* fs/fcntl.c */
 asmlinkage long sys_dup(unsigned int fildes);
 asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags);
 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg);
@@ -402,25 +390,15 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg);
 asmlinkage long sys_fcntl64(unsigned int fd,
 				unsigned int cmd, unsigned long arg);
 #endif
-
-/* fs/inotify_user.c */
 asmlinkage long sys_inotify_init1(int flags);
 asmlinkage long sys_inotify_add_watch(int fd, const char __user *path,
 					u32 mask);
 asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd);
-
-/* fs/ioctl.c */
 asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd,
 				unsigned long arg);
-
-/* fs/ioprio.c */
 asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
 asmlinkage long sys_ioprio_get(int which, int who);
-
-/* fs/locks.c */
 asmlinkage long sys_flock(unsigned int fd, unsigned int cmd);
-
-/* fs/namei.c */
 asmlinkage long sys_mknodat(int dfd, const char __user * filename, umode_t mode,
 			    unsigned dev);
 asmlinkage long sys_mkdirat(int dfd, const char __user * pathname, umode_t mode);
@@ -431,18 +409,12 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
 			   int newdfd, const char __user *newname, int flags);
 asmlinkage long sys_renameat(int olddfd, const char __user * oldname,
 			     int newdfd, const char __user * newname);
-
-/* fs/namespace.c */
 asmlinkage long sys_umount(char __user *name, int flags);
 asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name,
 				char __user *type, unsigned long flags,
 				void __user *data);
 asmlinkage long sys_pivot_root(const char __user *new_root,
 				const char __user *put_old);
-
-/* fs/nfsctl.c */
-
-/* fs/open.c */
 asmlinkage long sys_statfs(const char __user * path,
 				struct statfs __user *buf);
 asmlinkage long sys_statfs64(const char __user *path, size_t sz,
@@ -477,22 +449,14 @@ asmlinkage long sys_close(unsigned int fd);
 asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd,
 				unsigned int flags);
 asmlinkage long sys_vhangup(void);
-
-/* fs/pipe.c */
 asmlinkage long sys_pipe2(int __user *fildes, int flags);
-
-/* fs/quota.c */
 asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special,
 				qid_t id, void __user *addr);
 asmlinkage long sys_quotactl_fd(unsigned int fd, unsigned int cmd, qid_t id,
 				void __user *addr);
-
-/* fs/readdir.c */
 asmlinkage long sys_getdents64(unsigned int fd,
 				struct linux_dirent64 __user *dirent,
 				unsigned int count);
-
-/* fs/read_write.c */
 asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high,
 			unsigned long offset_low, loff_t __user *result,
 			unsigned int whence);
@@ -515,12 +479,8 @@ asmlinkage long sys_preadv(unsigned long fd, const struct iovec __user *vec,
 			   unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
 asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
 			    unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
-
-/* fs/sendfile.c */
 asmlinkage long sys_sendfile64(int out_fd, int in_fd,
 			       loff_t __user *offset, size_t count);
-
-/* fs/select.c */
 asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
 			     fd_set __user *, struct __kernel_timespec __user *,
 			     void __user *);
@@ -533,19 +493,13 @@ asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int,
 asmlinkage long sys_ppoll_time32(struct pollfd __user *, unsigned int,
 			  struct old_timespec32 __user *, const sigset_t __user *,
 			  size_t);
-
-/* fs/signalfd.c */
 asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, size_t sizemask, int flags);
-
-/* fs/splice.c */
 asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
 			     unsigned long nr_segs, unsigned int flags);
 asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
 			   int fd_out, loff_t __user *off_out,
 			   size_t len, unsigned int flags);
 asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags);
-
-/* fs/stat.c */
 asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf,
 			       int bufsiz);
 asmlinkage long sys_newfstatat(int dfd, const char __user *filename,
@@ -556,8 +510,6 @@ asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf);
 asmlinkage long sys_fstatat64(int dfd, const char __user *filename,
 			       struct stat64 __user *statbuf, int flag);
 #endif
-
-/* fs/sync.c */
 asmlinkage long sys_sync(void);
 asmlinkage long sys_fsync(unsigned int fd);
 asmlinkage long sys_fdatasync(unsigned int fd);
@@ -565,8 +517,6 @@ asmlinkage long sys_sync_file_range2(int fd, unsigned int flags,
 				     loff_t offset, loff_t nbytes);
 asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
 					unsigned int flags);
-
-/* fs/timerfd.c */
 asmlinkage long sys_timerfd_create(int clockid, int flags);
 asmlinkage long sys_timerfd_settime(int ufd, int flags,
 				    const struct __kernel_itimerspec __user *utmr,
@@ -577,39 +527,25 @@ asmlinkage long sys_timerfd_gettime32(int ufd,
 asmlinkage long sys_timerfd_settime32(int ufd, int flags,
 				   const struct old_itimerspec32 __user *utmr,
 				   struct old_itimerspec32 __user *otmr);
-
-/* fs/utimes.c */
 asmlinkage long sys_utimensat(int dfd, const char __user *filename,
 				struct __kernel_timespec __user *utimes,
 				int flags);
 asmlinkage long sys_utimensat_time32(unsigned int dfd,
 				const char __user *filename,
 				struct old_timespec32 __user *t, int flags);
-
-/* kernel/acct.c */
 asmlinkage long sys_acct(const char __user *name);
-
-/* kernel/capability.c */
 asmlinkage long sys_capget(cap_user_header_t header,
 				cap_user_data_t dataptr);
 asmlinkage long sys_capset(cap_user_header_t header,
 				const cap_user_data_t data);
-
-/* kernel/exec_domain.c */
 asmlinkage long sys_personality(unsigned int personality);
-
-/* kernel/exit.c */
 asmlinkage long sys_exit(int error_code);
 asmlinkage long sys_exit_group(int error_code);
 asmlinkage long sys_waitid(int which, pid_t pid,
 			   struct siginfo __user *infop,
 			   int options, struct rusage __user *ru);
-
-/* kernel/fork.c */
 asmlinkage long sys_set_tid_address(int __user *tidptr);
 asmlinkage long sys_unshare(unsigned long unshare_flags);
-
-/* kernel/futex/syscalls.c */
 asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
 			  const struct __kernel_timespec __user *utime,
 			  u32 __user *uaddr2, u32 val3);
@@ -625,31 +561,21 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
 				unsigned int nr_futexes, unsigned int flags,
 				struct __kernel_timespec __user *timeout, clockid_t clockid);
-
-/* kernel/hrtimer.c */
 asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
 			      struct __kernel_timespec __user *rmtp);
 asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp,
 				     struct old_timespec32 __user *rmtp);
-
-/* kernel/itimer.c */
 asmlinkage long sys_getitimer(int which, struct __kernel_old_itimerval __user *value);
 asmlinkage long sys_setitimer(int which,
 				struct __kernel_old_itimerval __user *value,
 				struct __kernel_old_itimerval __user *ovalue);
-
-/* kernel/kexec.c */
 asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
 				struct kexec_segment __user *segments,
 				unsigned long flags);
-
-/* kernel/module.c */
 asmlinkage long sys_init_module(void __user *umod, unsigned long len,
 				const char __user *uargs);
 asmlinkage long sys_delete_module(const char __user *name_user,
 				unsigned int flags);
-
-/* kernel/posix-timers.c */
 asmlinkage long sys_timer_create(clockid_t which_clock,
 				 struct sigevent __user *timer_event_spec,
 				 timer_t __user * created_timer_id);
@@ -683,15 +609,9 @@ asmlinkage long sys_clock_getres_time32(clockid_t which_clock,
 asmlinkage long sys_clock_nanosleep_time32(clockid_t which_clock, int flags,
 				struct old_timespec32 __user *rqtp,
 				struct old_timespec32 __user *rmtp);
-
-/* kernel/printk.c */
 asmlinkage long sys_syslog(int type, char __user *buf, int len);
-
-/* kernel/ptrace.c */
 asmlinkage long sys_ptrace(long request, long pid, unsigned long addr,
 			   unsigned long data);
-/* kernel/sched/core.c */
-
 asmlinkage long sys_sched_setparam(pid_t pid,
 					struct sched_param __user *param);
 asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
@@ -710,8 +630,6 @@ asmlinkage long sys_sched_rr_get_interval(pid_t pid,
 				struct __kernel_timespec __user *interval);
 asmlinkage long sys_sched_rr_get_interval_time32(pid_t pid,
 						 struct old_timespec32 __user *interval);
-
-/* kernel/signal.c */
 asmlinkage long sys_restart_syscall(void);
 asmlinkage long sys_kill(pid_t pid, int sig);
 asmlinkage long sys_tkill(pid_t pid, int sig);
@@ -737,8 +655,6 @@ asmlinkage long sys_rt_sigtimedwait_time32(const sigset_t __user *uthese,
 				const struct old_timespec32 __user *uts,
 				size_t sigsetsize);
 asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo);
-
-/* kernel/sys.c */
 asmlinkage long sys_setpriority(int which, int who, int niceval);
 asmlinkage long sys_getpriority(int which, int who);
 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd,
@@ -772,16 +688,12 @@ asmlinkage long sys_umask(int mask);
 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 			unsigned long arg4, unsigned long arg5);
 asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
-
-/* kernel/time.c */
 asmlinkage long sys_gettimeofday(struct __kernel_old_timeval __user *tv,
 				struct timezone __user *tz);
 asmlinkage long sys_settimeofday(struct __kernel_old_timeval __user *tv,
 				struct timezone __user *tz);
 asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p);
 asmlinkage long sys_adjtimex_time32(struct old_timex32 __user *txc_p);
-
-/* kernel/sys.c */
 asmlinkage long sys_getpid(void);
 asmlinkage long sys_getppid(void);
 asmlinkage long sys_getuid(void);
@@ -790,8 +702,6 @@ asmlinkage long sys_getgid(void);
 asmlinkage long sys_getegid(void);
 asmlinkage long sys_gettid(void);
 asmlinkage long sys_sysinfo(struct sysinfo __user *info);
-
-/* ipc/mqueue.c */
 asmlinkage long sys_mq_open(const char __user *name, int oflag, umode_t mode, struct mq_attr __user *attr);
 asmlinkage long sys_mq_unlink(const char __user *name);
 asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct __kernel_timespec __user *abs_timeout);
@@ -806,8 +716,6 @@ asmlinkage long sys_mq_timedsend_time32(mqd_t mqdes,
 			const char __user *u_msg_ptr,
 			unsigned int msg_len, unsigned int msg_prio,
 			const struct old_timespec32 __user *u_abs_timeout);
-
-/* ipc/msg.c */
 asmlinkage long sys_msgget(key_t key, int msgflg);
 asmlinkage long sys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf);
 asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf);
@@ -815,8 +723,6 @@ asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp,
 				size_t msgsz, long msgtyp, int msgflg);
 asmlinkage long sys_msgsnd(int msqid, struct msgbuf __user *msgp,
 				size_t msgsz, int msgflg);
-
-/* ipc/sem.c */
 asmlinkage long sys_semget(key_t key, int nsems, int semflg);
 asmlinkage long sys_semctl(int semid, int semnum, int cmd, unsigned long arg);
 asmlinkage long sys_old_semctl(int semid, int semnum, int cmd, unsigned long arg);
@@ -828,15 +734,11 @@ asmlinkage long sys_semtimedop_time32(int semid, struct sembuf __user *sops,
 				const struct old_timespec32 __user *timeout);
 asmlinkage long sys_semop(int semid, struct sembuf __user *sops,
 				unsigned nsops);
-
-/* ipc/shm.c */
 asmlinkage long sys_shmget(key_t key, size_t size, int flag);
 asmlinkage long sys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg);
 asmlinkage long sys_shmdt(char __user *shmaddr);
-
-/* net/socket.c */
 asmlinkage long sys_socket(int, int, int);
 asmlinkage long sys_socketpair(int, int, int, int __user *);
 asmlinkage long sys_bind(int, struct sockaddr __user *, int);
@@ -856,18 +758,12 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname,
 asmlinkage long sys_shutdown(int, int);
 asmlinkage long sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags);
 asmlinkage long sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags);
-
-/* mm/filemap.c */
 asmlinkage long sys_readahead(int fd, loff_t offset, size_t count);
-
-/* mm/nommu.c, also with MMU */
 asmlinkage long sys_brk(unsigned long brk);
 asmlinkage long sys_munmap(unsigned long addr, size_t len);
 asmlinkage long sys_mremap(unsigned long addr,
 			   unsigned long old_len, unsigned long new_len,
 			   unsigned long flags, unsigned long new_addr);
-
-/* security/keys/keyctl.c */
 asmlinkage long sys_add_key(const char __user *_type,
 			    const char __user *_description,
 			    const void __user *_payload,
@@ -879,8 +775,6 @@ asmlinkage long sys_request_key(const char __user *_type,
 				key_serial_t destringid);
 asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3,
 			   unsigned long arg4, unsigned long arg5);
-
-/* arch/example/kernel/sys_example.c */
 #ifdef CONFIG_CLONE_BACKWARDS
 asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, unsigned long,
 	       int __user *);
@@ -899,11 +793,9 @@ asmlinkage long sys_clone3(struct clone_args __user *uargs, size_t size);
 asmlinkage long sys_execve(const char __user *filename,
 		const char __user *const __user *argv,
 		const char __user *const __user *envp);
-
-/* mm/fadvise.c */
 asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice);
 
-/* mm/, CONFIG_MMU only */
+/* CONFIG_MMU only */
 asmlinkage long sys_swapon(const char __user *specialfile, int swap_flags);
 asmlinkage long sys_swapoff(const char __user *specialfile);
 asmlinkage long sys_mprotect(unsigned long start, size_t len,
@@ -941,7 +833,6 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 				const int __user *nodes,
 				int __user *status,
 				int flags);
-
 asmlinkage long sys_rt_tgsigqueueinfo(pid_t tgid, pid_t  pid, int sig,
 		siginfo_t __user *uinfo);
 asmlinkage long sys_perf_event_open(
@@ -954,7 +845,6 @@ asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg,
 asmlinkage long sys_recvmmsg_time32(int fd, struct mmsghdr __user *msg,
 			     unsigned int vlen, unsigned flags,
 			     struct old_timespec32 __user *timeout);
-
 asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr,
 				int options, struct rusage __user *ru);
 asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource,
@@ -1063,7 +953,7 @@ asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long l
  * Architecture-specific system calls
  */
 
-/* arch/x86/kernel/ioport.c */
+/* x86 */
 asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
 
 /* pciconfig: alpha, arm, arm64, ia64, sparc */
@@ -1171,11 +1061,11 @@ asmlinkage long sys_sysfs(int option,
 				unsigned long arg1, unsigned long arg2);
 asmlinkage long sys_fork(void);
 
-/* obsolete: kernel/time/time.c */
+/* obsolete */
 asmlinkage long sys_stime(__kernel_old_time_t __user *tptr);
 asmlinkage long sys_stime32(old_time32_t __user *tptr);
 
-/* obsolete: kernel/signal.c */
+/* obsolete */
 asmlinkage long sys_sigpending(old_sigset_t __user *uset);
 asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set,
 				old_sigset_t __user *oset);
@@ -1195,19 +1085,19 @@ asmlinkage long sys_sgetmask(void);
 asmlinkage long sys_ssetmask(int newmask);
 asmlinkage long sys_signal(int sig, __sighandler_t handler);
 
-/* obsolete: kernel/sched/core.c */
+/* obsolete */
 asmlinkage long sys_nice(int increment);
 
-/* obsolete: kernel/kexec_file.c */
+/* obsolete */
 asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd,
 				    unsigned long cmdline_len,
 				    const char __user *cmdline_ptr,
 				    unsigned long flags);
 
-/* obsolete: kernel/exit.c */
+/* obsolete */
 asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options);
 
-/* obsolete: kernel/uid16.c */
+/* obsolete */
 #ifdef CONFIG_HAVE_UID16
 asmlinkage long sys_chown16(const char __user *filename,
 				old_uid_t user, old_gid_t group);
@@ -1234,10 +1124,10 @@ asmlinkage long sys_getgid16(void);
 asmlinkage long sys_getegid16(void);
 #endif
 
-/* obsolete: net/socket.c */
+/* obsolete */
 asmlinkage long sys_socketcall(int call, unsigned long __user *args);
 
-/* obsolete: fs/stat.c */
+/* obsolete */
 asmlinkage long sys_stat(const char __user *filename,
 			struct __old_kernel_stat __user *statbuf);
 asmlinkage long sys_lstat(const char __user *filename,
@@ -1247,13 +1137,13 @@ asmlinkage long sys_fstat(unsigned int fd,
 asmlinkage long sys_readlink(const char __user *path,
 				char __user *buf, int bufsiz);
 
-/* obsolete: fs/select.c */
+/* obsolete */
 asmlinkage long sys_old_select(struct sel_arg_struct __user *arg);
 
-/* obsolete: fs/readdir.c */
+/* obsolete */
 asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int);
 
-/* obsolete: kernel/sys.c */
+/* obsolete */
 asmlinkage long sys_gethostname(char __user *name, int len);
 asmlinkage long sys_uname(struct old_utsname __user *);
 asmlinkage long sys_olduname(struct oldold_utsname __user *);
@@ -1261,11 +1151,11 @@ asmlinkage long sys_olduname(struct oldold_utsname __user *);
 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim);
 #endif
 
-/* obsolete: ipc */
+/* obsolete */
 asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second,
 		unsigned long third, void __user *ptr, long fifth);
 
-/* obsolete: mm/ */
+/* obsolete */
 asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, unsigned long pgoff);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 45fa180cc56a..dd7d8e10f16d 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -38,12 +38,12 @@ __SYSCALL(__NR_io_destroy, sys_io_destroy)
 __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
 #define __NR_io_cancel 3
 __SYSCALL(__NR_io_cancel, sys_io_cancel)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_io_getevents 4
 __SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents)
 #endif
 
-/* fs/xattr.c */
 #define __NR_setxattr 5
 __SYSCALL(__NR_setxattr, sys_setxattr)
 #define __NR_lsetxattr 6
@@ -68,58 +68,38 @@ __SYSCALL(__NR_removexattr, sys_removexattr)
 __SYSCALL(__NR_lremovexattr, sys_lremovexattr)
 #define __NR_fremovexattr 16
 __SYSCALL(__NR_fremovexattr, sys_fremovexattr)
-
-/* fs/dcache.c */
 #define __NR_getcwd 17
 __SYSCALL(__NR_getcwd, sys_getcwd)
-
-/* fs/cookies.c */
 #define __NR_lookup_dcookie 18
 __SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie)
-
-/* fs/eventfd.c */
 #define __NR_eventfd2 19
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
-
-/* fs/eventpoll.c */
 #define __NR_epoll_create1 20
 __SYSCALL(__NR_epoll_create1, sys_epoll_create1)
 #define __NR_epoll_ctl 21
 __SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
 #define __NR_epoll_pwait 22
 __SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait)
-
-/* fs/fcntl.c */
 #define __NR_dup 23
 __SYSCALL(__NR_dup, sys_dup)
 #define __NR_dup3 24
 __SYSCALL(__NR_dup3, sys_dup3)
 #define __NR3264_fcntl 25
 __SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64)
-
-/* fs/inotify_user.c */
 #define __NR_inotify_init1 26
 __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 #define __NR_inotify_add_watch 27
 __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
 #define __NR_inotify_rm_watch 28
 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
-
-/* fs/ioctl.c */
 #define __NR_ioctl 29
 __SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl)
-
-/* fs/ioprio.c */
 #define __NR_ioprio_set 30
 __SYSCALL(__NR_ioprio_set, sys_ioprio_set)
 #define __NR_ioprio_get 31
 __SYSCALL(__NR_ioprio_get, sys_ioprio_get)
-
-/* fs/locks.c */
 #define __NR_flock 32
 __SYSCALL(__NR_flock, sys_flock)
-
-/* fs/namei.c */
 #define __NR_mknodat 33
 __SYSCALL(__NR_mknodat, sys_mknodat)
 #define __NR_mkdirat 34
@@ -130,25 +110,21 @@ __SYSCALL(__NR_unlinkat, sys_unlinkat)
 __SYSCALL(__NR_symlinkat, sys_symlinkat)
 #define __NR_linkat 37
 __SYSCALL(__NR_linkat, sys_linkat)
+
 #ifdef __ARCH_WANT_RENAMEAT
 /* renameat is superseded with flags by renameat2 */
 #define __NR_renameat 38
 __SYSCALL(__NR_renameat, sys_renameat)
 #endif /* __ARCH_WANT_RENAMEAT */
 
-/* fs/namespace.c */
 #define __NR_umount2 39
 __SYSCALL(__NR_umount2, sys_umount)
 #define __NR_mount 40
 __SYSCALL(__NR_mount, sys_mount)
 #define __NR_pivot_root 41
 __SYSCALL(__NR_pivot_root, sys_pivot_root)
-
-/* fs/nfsctl.c */
 #define __NR_nfsservctl 42
 __SYSCALL(__NR_nfsservctl, sys_ni_syscall)
-
-/* fs/open.c */
 #define __NR3264_statfs 43
 __SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \
 	       compat_sys_statfs64)
@@ -161,7 +137,6 @@ __SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \
 #define __NR3264_ftruncate 46
 __SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \
 	       compat_sys_ftruncate64)
-
 #define __NR_fallocate 47
 __SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate)
 #define __NR_faccessat 48
@@ -186,20 +161,12 @@ __SYSCALL(__NR_openat, sys_openat)
 __SYSCALL(__NR_close, sys_close)
 #define __NR_vhangup 58
 __SYSCALL(__NR_vhangup, sys_vhangup)
-
-/* fs/pipe.c */
 #define __NR_pipe2 59
 __SYSCALL(__NR_pipe2, sys_pipe2)
-
-/* fs/quota.c */
 #define __NR_quotactl 60
 __SYSCALL(__NR_quotactl, sys_quotactl)
-
-/* fs/readdir.c */
 #define __NR_getdents64 61
 __SYSCALL(__NR_getdents64, sys_getdents64)
-
-/* fs/read_write.c */
 #define __NR3264_lseek 62
 __SC_3264(__NR3264_lseek, sys_llseek, sys_lseek)
 #define __NR_read 63
@@ -218,12 +185,9 @@ __SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
 __SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
 #define __NR_pwritev 70
 __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
-
-/* fs/sendfile.c */
 #define __NR3264_sendfile 71
 __SYSCALL(__NR3264_sendfile, sys_sendfile64)
 
-/* fs/select.c */
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_pselect6 72
 __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32)
@@ -231,21 +195,17 @@ __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_psel
 __SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32)
 #endif
 
-/* fs/signalfd.c */
 #define __NR_signalfd4 74
 __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
-
-/* fs/splice.c */
 #define __NR_vmsplice 75
 __SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
 __SYSCALL(__NR_tee, sys_tee)
-
-/* fs/stat.c */
 #define __NR_readlinkat 78
 __SYSCALL(__NR_readlinkat, sys_readlinkat)
+
 #if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
 #define __NR3264_fstatat 79
 __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
@@ -253,13 +213,13 @@ __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
 __SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
 #endif
 
-/* fs/sync.c */
 #define __NR_sync 81
 __SYSCALL(__NR_sync, sys_sync)
 #define __NR_fsync 82
 __SYSCALL(__NR_fsync, sys_fsync)
 #define __NR_fdatasync 83
 __SYSCALL(__NR_fdatasync, sys_fdatasync)
+
 #ifdef __ARCH_WANT_SYNC_FILE_RANGE2
 #define __NR_sync_file_range2 84
 __SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \
@@ -270,9 +230,9 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
 	  compat_sys_sync_file_range)
 #endif
 
-/* fs/timerfd.c */
 #define __NR_timerfd_create 85
 __SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_timerfd_settime 86
 __SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \
@@ -282,45 +242,35 @@ __SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \
 	  sys_timerfd_gettime)
 #endif
 
-/* fs/utimes.c */
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_utimensat 88
 __SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat)
 #endif
 
-/* kernel/acct.c */
 #define __NR_acct 89
 __SYSCALL(__NR_acct, sys_acct)
-
-/* kernel/capability.c */
 #define __NR_capget 90
 __SYSCALL(__NR_capget, sys_capget)
 #define __NR_capset 91
 __SYSCALL(__NR_capset, sys_capset)
-
-/* kernel/exec_domain.c */
 #define __NR_personality 92
 __SYSCALL(__NR_personality, sys_personality)
-
-/* kernel/exit.c */
 #define __NR_exit 93
 __SYSCALL(__NR_exit, sys_exit)
 #define __NR_exit_group 94
 __SYSCALL(__NR_exit_group, sys_exit_group)
 #define __NR_waitid 95
 __SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid)
-
-/* kernel/fork.c */
 #define __NR_set_tid_address 96
 __SYSCALL(__NR_set_tid_address, sys_set_tid_address)
 #define __NR_unshare 97
 __SYSCALL(__NR_unshare, sys_unshare)
 
-/* kernel/futex.c */
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_futex 98
 __SC_3264(__NR_futex, sys_futex_time32, sys_futex)
 #endif
+
 #define __NR_set_robust_list 99
 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
 	  compat_sys_set_robust_list)
@@ -328,43 +278,40 @@ __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
 __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
 	  compat_sys_get_robust_list)
 
-/* kernel/hrtimer.c */
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_nanosleep 101
 __SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep)
 #endif
 
-/* kernel/itimer.c */
 #define __NR_getitimer 102
 __SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer)
 #define __NR_setitimer 103
 __SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer)
-
-/* kernel/kexec.c */
 #define __NR_kexec_load 104
 __SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load)
-
-/* kernel/module.c */
 #define __NR_init_module 105
 __SYSCALL(__NR_init_module, sys_init_module)
 #define __NR_delete_module 106
 __SYSCALL(__NR_delete_module, sys_delete_module)
-
-/* kernel/posix-timers.c */
 #define __NR_timer_create 107
 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_timer_gettime 108
 __SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime)
 #endif
+
 #define __NR_timer_getoverrun 109
 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_timer_settime 110
 __SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime)
 #endif
+
 #define __NR_timer_delete 111
 __SYSCALL(__NR_timer_delete, sys_timer_delete)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_clock_settime 112
 __SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime)
@@ -377,15 +324,10 @@ __SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \
 	  sys_clock_nanosleep)
 #endif
 
-/* kernel/printk.c */
 #define __NR_syslog 116
 __SYSCALL(__NR_syslog, sys_syslog)
-
-/* kernel/ptrace.c */
 #define __NR_ptrace 117
 __SC_COMP(__NR_ptrace, sys_ptrace, compat_sys_ptrace)
-
-/* kernel/sched/core.c */
 #define __NR_sched_setparam 118
 __SYSCALL(__NR_sched_setparam, sys_sched_setparam)
 #define __NR_sched_setscheduler 119
@@ -406,13 +348,13 @@ __SYSCALL(__NR_sched_yield, sys_sched_yield)
 __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
 #define __NR_sched_get_priority_min 126
 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_sched_rr_get_interval 127
 __SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \
 	  sys_sched_rr_get_interval)
 #endif
 
-/* kernel/signal.c */
 #define __NR_restart_syscall 128
 __SYSCALL(__NR_restart_syscall, sys_restart_syscall)
 #define __NR_kill 129
@@ -431,18 +373,18 @@ __SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction)
 __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask)
 #define __NR_rt_sigpending 136
 __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_rt_sigtimedwait 137
 __SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \
 	  sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32)
 #endif
+
 #define __NR_rt_sigqueueinfo 138
 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
 	  compat_sys_rt_sigqueueinfo)
 #define __NR_rt_sigreturn 139
 __SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn)
-
-/* kernel/sys.c */
 #define __NR_setpriority 140
 __SYSCALL(__NR_setpriority, sys_setpriority)
 #define __NR_getpriority 141
@@ -507,7 +449,6 @@ __SYSCALL(__NR_prctl, sys_prctl)
 #define __NR_getcpu 168
 __SYSCALL(__NR_getcpu, sys_getcpu)
 
-/* kernel/time.c */
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_gettimeofday 169
 __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
@@ -517,7 +458,6 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
 __SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex)
 #endif
 
-/* kernel/sys.c */
 #define __NR_getpid 172
 __SYSCALL(__NR_getpid, sys_getpid)
 #define __NR_getppid 173
@@ -534,12 +474,11 @@ __SYSCALL(__NR_getegid, sys_getegid)
 __SYSCALL(__NR_gettid, sys_gettid)
 #define __NR_sysinfo 179
 __SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo)
-
-/* ipc/mqueue.c */
 #define __NR_mq_open 180
 __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
 #define __NR_mq_unlink 181
 __SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_mq_timedsend 182
 __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
@@ -547,12 +486,11 @@ __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
 __SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \
 	  sys_mq_timedreceive)
 #endif
+
 #define __NR_mq_notify 184
 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
 #define __NR_mq_getsetattr 185
 __SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr)
-
-/* ipc/msg.c */
 #define __NR_msgget 186
 __SYSCALL(__NR_msgget, sys_msgget)
 #define __NR_msgctl 187
@@ -561,20 +499,18 @@ __SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl)
 __SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv)
 #define __NR_msgsnd 189
 __SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd)
-
-/* ipc/sem.c */
 #define __NR_semget 190
 __SYSCALL(__NR_semget, sys_semget)
 #define __NR_semctl 191
 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_semtimedop 192
 __SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop)
 #endif
+
 #define __NR_semop 193
 __SYSCALL(__NR_semop, sys_semop)
-
-/* ipc/shm.c */
 #define __NR_shmget 194
 __SYSCALL(__NR_shmget, sys_shmget)
 #define __NR_shmctl 195
@@ -583,8 +519,6 @@ __SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl)
 __SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat)
 #define __NR_shmdt 197
 __SYSCALL(__NR_shmdt, sys_shmdt)
-
-/* net/socket.c */
 #define __NR_socket 198
 __SYSCALL(__NR_socket, sys_socket)
 #define __NR_socketpair 199
@@ -615,40 +549,30 @@ __SYSCALL(__NR_shutdown, sys_shutdown)
 __SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg)
 #define __NR_recvmsg 212
 __SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg)
-
-/* mm/filemap.c */
 #define __NR_readahead 213
 __SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead)
-
-/* mm/nommu.c, also with MMU */
 #define __NR_brk 214
 __SYSCALL(__NR_brk, sys_brk)
 #define __NR_munmap 215
 __SYSCALL(__NR_munmap, sys_munmap)
 #define __NR_mremap 216
 __SYSCALL(__NR_mremap, sys_mremap)
-
-/* security/keys/keyctl.c */
 #define __NR_add_key 217
 __SYSCALL(__NR_add_key, sys_add_key)
 #define __NR_request_key 218
 __SYSCALL(__NR_request_key, sys_request_key)
 #define __NR_keyctl 219
 __SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
-
-/* arch/example/kernel/sys_example.c */
 #define __NR_clone 220
 __SYSCALL(__NR_clone, sys_clone)
 #define __NR_execve 221
 __SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
-
 #define __NR3264_mmap 222
 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
-/* mm/fadvise.c */
 #define __NR3264_fadvise64 223
 __SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64)
 
-/* mm/, CONFIG_MMU only */
+/* CONFIG_MMU only */
 #ifndef __ARCH_NOMMU
 #define __NR_swapon 224
 __SYSCALL(__NR_swapon, sys_swapon)
@@ -691,6 +615,7 @@ __SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 #define __NR_accept4 242
 __SYSCALL(__NR_accept4, sys_accept4)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_recvmmsg 243
 __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32)
@@ -706,6 +631,7 @@ __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recv
 #define __NR_wait4 260
 __SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4)
 #endif
+
 #define __NR_prlimit64 261
 __SYSCALL(__NR_prlimit64, sys_prlimit64)
 #define __NR_fanotify_init 262
@@ -716,10 +642,12 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
 __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
 #define __NR_open_by_handle_at         265
 __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_clock_adjtime 266
 __SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime)
 #endif
+
 #define __NR_syncfs 267
 __SYSCALL(__NR_syncfs, sys_syncfs)
 #define __NR_setns 268
@@ -770,15 +698,19 @@ __SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
 __SYSCALL(__NR_pkey_free,     sys_pkey_free)
 #define __NR_statx 291
 __SYSCALL(__NR_statx,     sys_statx)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_io_pgetevents 292
 __SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents)
 #endif
+
 #define __NR_rseq 293
 __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_kexec_file_load 294
 __SYSCALL(__NR_kexec_file_load,     sys_kexec_file_load)
+
 /* 295 through 402 are unassigned to sync up with generic numbers, don't use */
+
 #if defined(__SYSCALL_COMPAT) || __BITS_PER_LONG == 32
 #define __NR_clock_gettime64 403
 __SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
@@ -844,13 +776,14 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
 __SYSCALL(__NR_fspick, sys_fspick)
 #define __NR_pidfd_open 434
 __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
+
 #ifdef __ARCH_WANT_SYS_CLONE3
 #define __NR_clone3 435
 __SYSCALL(__NR_clone3, sys_clone3)
 #endif
+
 #define __NR_close_range 436
 __SYSCALL(__NR_close_range, sys_close_range)
-
 #define __NR_openat2 437
 __SYSCALL(__NR_openat2, sys_openat2)
 #define __NR_pidfd_getfd 438
@@ -865,7 +798,6 @@ __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
 #define __NR_quotactl_fd 443
 __SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
-
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
 #define __NR_landlock_add_rule 445
@@ -877,12 +809,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
 #define __NR_memfd_secret 447
 __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
 #endif
+
 #define __NR_process_mrelease 448
 __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
-
 #define __NR_futex_waitv 449
 __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
-
 #define __NR_set_mempolicy_home_node 450
 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
 
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 860b2dcf3ac4..f207236002e9 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -51,99 +51,35 @@ COND_SYSCALL_COMPAT(io_pgetevents);
 COND_SYSCALL(io_uring_setup);
 COND_SYSCALL(io_uring_enter);
 COND_SYSCALL(io_uring_register);
-
-/* fs/xattr.c */
-
-/* fs/dcache.c */
-
-/* fs/cookies.c */
 COND_SYSCALL(lookup_dcookie);
 COND_SYSCALL_COMPAT(lookup_dcookie);
-
-/* fs/eventfd.c */
 COND_SYSCALL(eventfd2);
-
-/* fs/eventfd.c */
 COND_SYSCALL(epoll_create1);
 COND_SYSCALL(epoll_ctl);
 COND_SYSCALL(epoll_pwait);
 COND_SYSCALL_COMPAT(epoll_pwait);
 COND_SYSCALL(epoll_pwait2);
 COND_SYSCALL_COMPAT(epoll_pwait2);
-
-/* fs/fcntl.c */
-
-/* fs/inotify_user.c */
 COND_SYSCALL(inotify_init1);
 COND_SYSCALL(inotify_add_watch);
 COND_SYSCALL(inotify_rm_watch);
-
-/* fs/ioctl.c */
-
-/* fs/ioprio.c */
 COND_SYSCALL(ioprio_set);
 COND_SYSCALL(ioprio_get);
-
-/* fs/locks.c */
 COND_SYSCALL(flock);
-
-/* fs/namei.c */
-
-/* fs/namespace.c */
-
-/* fs/nfsctl.c */
-
-/* fs/open.c */
-
-/* fs/pipe.c */
-
-/* fs/quota.c */
 COND_SYSCALL(quotactl);
 COND_SYSCALL(quotactl_fd);
-
-/* fs/readdir.c */
-
-/* fs/read_write.c */
-
-/* fs/sendfile.c */
-
-/* fs/select.c */
-
-/* fs/signalfd.c */
 COND_SYSCALL(signalfd4);
 COND_SYSCALL_COMPAT(signalfd4);
-
-/* fs/splice.c */
-
-/* fs/stat.c */
-
-/* fs/sync.c */
-
-/* fs/timerfd.c */
 COND_SYSCALL(timerfd_create);
 COND_SYSCALL(timerfd_settime);
 COND_SYSCALL(timerfd_settime32);
 COND_SYSCALL(timerfd_gettime);
 COND_SYSCALL(timerfd_gettime32);
-
-/* fs/utimes.c */
-
-/* kernel/acct.c */
 COND_SYSCALL(acct);
-
-/* kernel/capability.c */
 COND_SYSCALL(capget);
 COND_SYSCALL(capset);
-
-/* kernel/exec_domain.c */
-
-/* kernel/exit.c */
-
-/* kernel/fork.c */
 /* __ARCH_WANT_SYS_CLONE3 */
 COND_SYSCALL(clone3);
-
-/* kernel/futex/syscalls.c */
 COND_SYSCALL(futex);
 COND_SYSCALL(futex_time32);
 COND_SYSCALL(set_robust_list);
@@ -151,29 +87,11 @@ COND_SYSCALL_COMPAT(set_robust_list);
 COND_SYSCALL(get_robust_list);
 COND_SYSCALL_COMPAT(get_robust_list);
 COND_SYSCALL(futex_waitv);
-
-/* kernel/hrtimer.c */
-
-/* kernel/itimer.c */
-
-/* kernel/kexec.c */
 COND_SYSCALL(kexec_load);
 COND_SYSCALL_COMPAT(kexec_load);
-
-/* kernel/module.c */
 COND_SYSCALL(init_module);
 COND_SYSCALL(delete_module);
-
-/* kernel/posix-timers.c */
-
-/* kernel/printk.c */
 COND_SYSCALL(syslog);
-
-/* kernel/ptrace.c */
-
-/* kernel/sched/core.c */
-
-/* kernel/sys.c */
 COND_SYSCALL(setregid);
 COND_SYSCALL(setgid);
 COND_SYSCALL(setreuid);
@@ -186,12 +104,6 @@ COND_SYSCALL(setfsuid);
 COND_SYSCALL(setfsgid);
 COND_SYSCALL(setgroups);
 COND_SYSCALL(getgroups);
-
-/* kernel/time.c */
-
-/* kernel/timer.c */
-
-/* ipc/mqueue.c */
 COND_SYSCALL(mq_open);
 COND_SYSCALL_COMPAT(mq_open);
 COND_SYSCALL(mq_unlink);
@@ -203,8 +115,6 @@ COND_SYSCALL(mq_notify);
 COND_SYSCALL_COMPAT(mq_notify);
 COND_SYSCALL(mq_getsetattr);
 COND_SYSCALL_COMPAT(mq_getsetattr);
-
-/* ipc/msg.c */
 COND_SYSCALL(msgget);
 COND_SYSCALL(old_msgctl);
 COND_SYSCALL(msgctl);
@@ -214,8 +124,6 @@ COND_SYSCALL(msgrcv);
 COND_SYSCALL_COMPAT(msgrcv);
 COND_SYSCALL(msgsnd);
 COND_SYSCALL_COMPAT(msgsnd);
-
-/* ipc/sem.c */
 COND_SYSCALL(semget);
 COND_SYSCALL(old_semctl);
 COND_SYSCALL(semctl);
@@ -224,8 +132,6 @@ COND_SYSCALL_COMPAT(old_semctl);
 COND_SYSCALL(semtimedop);
 COND_SYSCALL(semtimedop_time32);
 COND_SYSCALL(semop);
-
-/* ipc/shm.c */
 COND_SYSCALL(shmget);
 COND_SYSCALL(old_shmctl);
 COND_SYSCALL(shmctl);
@@ -234,8 +140,6 @@ COND_SYSCALL_COMPAT(old_shmctl);
 COND_SYSCALL(shmat);
 COND_SYSCALL_COMPAT(shmat);
 COND_SYSCALL(shmdt);
-
-/* net/socket.c */
 COND_SYSCALL(socket);
 COND_SYSCALL(socketpair);
 COND_SYSCALL(bind);
@@ -256,30 +160,18 @@ COND_SYSCALL(sendmsg);
 COND_SYSCALL_COMPAT(sendmsg);
 COND_SYSCALL(recvmsg);
 COND_SYSCALL_COMPAT(recvmsg);
-
-/* mm/filemap.c */
-
-/* mm/nommu.c, also with MMU */
 COND_SYSCALL(mremap);
-
-/* security/keys/keyctl.c */
 COND_SYSCALL(add_key);
 COND_SYSCALL(request_key);
 COND_SYSCALL(keyctl);
 COND_SYSCALL_COMPAT(keyctl);
-
-/* security/landlock/syscalls.c */
 COND_SYSCALL(landlock_create_ruleset);
 COND_SYSCALL(landlock_add_rule);
 COND_SYSCALL(landlock_restrict_self);
-
-/* arch/example/kernel/sys_example.c */
-
-/* mm/fadvise.c */
 COND_SYSCALL(fadvise64_64);
 COND_SYSCALL_COMPAT(fadvise64_64);
 
-/* mm/, CONFIG_MMU only */
+/* CONFIG_MMU only */
 COND_SYSCALL(swapon);
 COND_SYSCALL(swapoff);
 COND_SYSCALL(mprotect);
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 45fa180cc56a..dd7d8e10f16d 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -38,12 +38,12 @@ __SYSCALL(__NR_io_destroy, sys_io_destroy)
 __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
 #define __NR_io_cancel 3
 __SYSCALL(__NR_io_cancel, sys_io_cancel)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_io_getevents 4
 __SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents)
 #endif
 
-/* fs/xattr.c */
 #define __NR_setxattr 5
 __SYSCALL(__NR_setxattr, sys_setxattr)
 #define __NR_lsetxattr 6
@@ -68,58 +68,38 @@ __SYSCALL(__NR_removexattr, sys_removexattr)
 __SYSCALL(__NR_lremovexattr, sys_lremovexattr)
 #define __NR_fremovexattr 16
 __SYSCALL(__NR_fremovexattr, sys_fremovexattr)
-
-/* fs/dcache.c */
 #define __NR_getcwd 17
 __SYSCALL(__NR_getcwd, sys_getcwd)
-
-/* fs/cookies.c */
 #define __NR_lookup_dcookie 18
 __SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie)
-
-/* fs/eventfd.c */
 #define __NR_eventfd2 19
 __SYSCALL(__NR_eventfd2, sys_eventfd2)
-
-/* fs/eventpoll.c */
 #define __NR_epoll_create1 20
 __SYSCALL(__NR_epoll_create1, sys_epoll_create1)
 #define __NR_epoll_ctl 21
 __SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
 #define __NR_epoll_pwait 22
 __SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait)
-
-/* fs/fcntl.c */
 #define __NR_dup 23
 __SYSCALL(__NR_dup, sys_dup)
 #define __NR_dup3 24
 __SYSCALL(__NR_dup3, sys_dup3)
 #define __NR3264_fcntl 25
 __SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64)
-
-/* fs/inotify_user.c */
 #define __NR_inotify_init1 26
 __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 #define __NR_inotify_add_watch 27
 __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
 #define __NR_inotify_rm_watch 28
 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
-
-/* fs/ioctl.c */
 #define __NR_ioctl 29
 __SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl)
-
-/* fs/ioprio.c */
 #define __NR_ioprio_set 30
 __SYSCALL(__NR_ioprio_set, sys_ioprio_set)
 #define __NR_ioprio_get 31
 __SYSCALL(__NR_ioprio_get, sys_ioprio_get)
-
-/* fs/locks.c */
 #define __NR_flock 32
 __SYSCALL(__NR_flock, sys_flock)
-
-/* fs/namei.c */
 #define __NR_mknodat 33
 __SYSCALL(__NR_mknodat, sys_mknodat)
 #define __NR_mkdirat 34
@@ -130,25 +110,21 @@ __SYSCALL(__NR_unlinkat, sys_unlinkat)
 __SYSCALL(__NR_symlinkat, sys_symlinkat)
 #define __NR_linkat 37
 __SYSCALL(__NR_linkat, sys_linkat)
+
 #ifdef __ARCH_WANT_RENAMEAT
 /* renameat is superseded with flags by renameat2 */
 #define __NR_renameat 38
 __SYSCALL(__NR_renameat, sys_renameat)
 #endif /* __ARCH_WANT_RENAMEAT */
 
-/* fs/namespace.c */
 #define __NR_umount2 39
 __SYSCALL(__NR_umount2, sys_umount)
 #define __NR_mount 40
 __SYSCALL(__NR_mount, sys_mount)
 #define __NR_pivot_root 41
 __SYSCALL(__NR_pivot_root, sys_pivot_root)
-
-/* fs/nfsctl.c */
 #define __NR_nfsservctl 42
 __SYSCALL(__NR_nfsservctl, sys_ni_syscall)
-
-/* fs/open.c */
 #define __NR3264_statfs 43
 __SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \
 	       compat_sys_statfs64)
@@ -161,7 +137,6 @@ __SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \
 #define __NR3264_ftruncate 46
 __SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \
 	       compat_sys_ftruncate64)
-
 #define __NR_fallocate 47
 __SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate)
 #define __NR_faccessat 48
@@ -186,20 +161,12 @@ __SYSCALL(__NR_openat, sys_openat)
 __SYSCALL(__NR_close, sys_close)
 #define __NR_vhangup 58
 __SYSCALL(__NR_vhangup, sys_vhangup)
-
-/* fs/pipe.c */
 #define __NR_pipe2 59
 __SYSCALL(__NR_pipe2, sys_pipe2)
-
-/* fs/quota.c */
 #define __NR_quotactl 60
 __SYSCALL(__NR_quotactl, sys_quotactl)
-
-/* fs/readdir.c */
 #define __NR_getdents64 61
 __SYSCALL(__NR_getdents64, sys_getdents64)
-
-/* fs/read_write.c */
 #define __NR3264_lseek 62
 __SC_3264(__NR3264_lseek, sys_llseek, sys_lseek)
 #define __NR_read 63
@@ -218,12 +185,9 @@ __SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
 __SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
 #define __NR_pwritev 70
 __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
-
-/* fs/sendfile.c */
 #define __NR3264_sendfile 71
 __SYSCALL(__NR3264_sendfile, sys_sendfile64)
 
-/* fs/select.c */
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_pselect6 72
 __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32)
@@ -231,21 +195,17 @@ __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_psel
 __SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32)
 #endif
 
-/* fs/signalfd.c */
 #define __NR_signalfd4 74
 __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
-
-/* fs/splice.c */
 #define __NR_vmsplice 75
 __SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_splice 76
 __SYSCALL(__NR_splice, sys_splice)
 #define __NR_tee 77
 __SYSCALL(__NR_tee, sys_tee)
-
-/* fs/stat.c */
 #define __NR_readlinkat 78
 __SYSCALL(__NR_readlinkat, sys_readlinkat)
+
 #if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
 #define __NR3264_fstatat 79
 __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
@@ -253,13 +213,13 @@ __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
 __SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
 #endif
 
-/* fs/sync.c */
 #define __NR_sync 81
 __SYSCALL(__NR_sync, sys_sync)
 #define __NR_fsync 82
 __SYSCALL(__NR_fsync, sys_fsync)
 #define __NR_fdatasync 83
 __SYSCALL(__NR_fdatasync, sys_fdatasync)
+
 #ifdef __ARCH_WANT_SYNC_FILE_RANGE2
 #define __NR_sync_file_range2 84
 __SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \
@@ -270,9 +230,9 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
 	  compat_sys_sync_file_range)
 #endif
 
-/* fs/timerfd.c */
 #define __NR_timerfd_create 85
 __SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_timerfd_settime 86
 __SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \
@@ -282,45 +242,35 @@ __SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \
 	  sys_timerfd_gettime)
 #endif
 
-/* fs/utimes.c */
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_utimensat 88
 __SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat)
 #endif
 
-/* kernel/acct.c */
 #define __NR_acct 89
 __SYSCALL(__NR_acct, sys_acct)
-
-/* kernel/capability.c */
 #define __NR_capget 90
 __SYSCALL(__NR_capget, sys_capget)
 #define __NR_capset 91
 __SYSCALL(__NR_capset, sys_capset)
-
-/* kernel/exec_domain.c */
 #define __NR_personality 92
 __SYSCALL(__NR_personality, sys_personality)
-
-/* kernel/exit.c */
 #define __NR_exit 93
 __SYSCALL(__NR_exit, sys_exit)
 #define __NR_exit_group 94
 __SYSCALL(__NR_exit_group, sys_exit_group)
 #define __NR_waitid 95
 __SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid)
-
-/* kernel/fork.c */
 #define __NR_set_tid_address 96
 __SYSCALL(__NR_set_tid_address, sys_set_tid_address)
 #define __NR_unshare 97
 __SYSCALL(__NR_unshare, sys_unshare)
 
-/* kernel/futex.c */
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_futex 98
 __SC_3264(__NR_futex, sys_futex_time32, sys_futex)
 #endif
+
 #define __NR_set_robust_list 99
 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
 	  compat_sys_set_robust_list)
@@ -328,43 +278,40 @@ __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
 __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
 	  compat_sys_get_robust_list)
 
-/* kernel/hrtimer.c */
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_nanosleep 101
 __SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep)
 #endif
 
-/* kernel/itimer.c */
 #define __NR_getitimer 102
 __SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer)
 #define __NR_setitimer 103
 __SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer)
-
-/* kernel/kexec.c */
 #define __NR_kexec_load 104
 __SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load)
-
-/* kernel/module.c */
 #define __NR_init_module 105
 __SYSCALL(__NR_init_module, sys_init_module)
 #define __NR_delete_module 106
 __SYSCALL(__NR_delete_module, sys_delete_module)
-
-/* kernel/posix-timers.c */
 #define __NR_timer_create 107
 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_timer_gettime 108
 __SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime)
 #endif
+
 #define __NR_timer_getoverrun 109
 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_timer_settime 110
 __SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime)
 #endif
+
 #define __NR_timer_delete 111
 __SYSCALL(__NR_timer_delete, sys_timer_delete)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_clock_settime 112
 __SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime)
@@ -377,15 +324,10 @@ __SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \
 	  sys_clock_nanosleep)
 #endif
 
-/* kernel/printk.c */
 #define __NR_syslog 116
 __SYSCALL(__NR_syslog, sys_syslog)
-
-/* kernel/ptrace.c */
 #define __NR_ptrace 117
 __SC_COMP(__NR_ptrace, sys_ptrace, compat_sys_ptrace)
-
-/* kernel/sched/core.c */
 #define __NR_sched_setparam 118
 __SYSCALL(__NR_sched_setparam, sys_sched_setparam)
 #define __NR_sched_setscheduler 119
@@ -406,13 +348,13 @@ __SYSCALL(__NR_sched_yield, sys_sched_yield)
 __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
 #define __NR_sched_get_priority_min 126
 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_sched_rr_get_interval 127
 __SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \
 	  sys_sched_rr_get_interval)
 #endif
 
-/* kernel/signal.c */
 #define __NR_restart_syscall 128
 __SYSCALL(__NR_restart_syscall, sys_restart_syscall)
 #define __NR_kill 129
@@ -431,18 +373,18 @@ __SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction)
 __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask)
 #define __NR_rt_sigpending 136
 __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_rt_sigtimedwait 137
 __SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \
 	  sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32)
 #endif
+
 #define __NR_rt_sigqueueinfo 138
 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
 	  compat_sys_rt_sigqueueinfo)
 #define __NR_rt_sigreturn 139
 __SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn)
-
-/* kernel/sys.c */
 #define __NR_setpriority 140
 __SYSCALL(__NR_setpriority, sys_setpriority)
 #define __NR_getpriority 141
@@ -507,7 +449,6 @@ __SYSCALL(__NR_prctl, sys_prctl)
 #define __NR_getcpu 168
 __SYSCALL(__NR_getcpu, sys_getcpu)
 
-/* kernel/time.c */
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_gettimeofday 169
 __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
@@ -517,7 +458,6 @@ __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
 __SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex)
 #endif
 
-/* kernel/sys.c */
 #define __NR_getpid 172
 __SYSCALL(__NR_getpid, sys_getpid)
 #define __NR_getppid 173
@@ -534,12 +474,11 @@ __SYSCALL(__NR_getegid, sys_getegid)
 __SYSCALL(__NR_gettid, sys_gettid)
 #define __NR_sysinfo 179
 __SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo)
-
-/* ipc/mqueue.c */
 #define __NR_mq_open 180
 __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
 #define __NR_mq_unlink 181
 __SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_mq_timedsend 182
 __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
@@ -547,12 +486,11 @@ __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
 __SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \
 	  sys_mq_timedreceive)
 #endif
+
 #define __NR_mq_notify 184
 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
 #define __NR_mq_getsetattr 185
 __SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr)
-
-/* ipc/msg.c */
 #define __NR_msgget 186
 __SYSCALL(__NR_msgget, sys_msgget)
 #define __NR_msgctl 187
@@ -561,20 +499,18 @@ __SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl)
 __SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv)
 #define __NR_msgsnd 189
 __SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd)
-
-/* ipc/sem.c */
 #define __NR_semget 190
 __SYSCALL(__NR_semget, sys_semget)
 #define __NR_semctl 191
 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_semtimedop 192
 __SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop)
 #endif
+
 #define __NR_semop 193
 __SYSCALL(__NR_semop, sys_semop)
-
-/* ipc/shm.c */
 #define __NR_shmget 194
 __SYSCALL(__NR_shmget, sys_shmget)
 #define __NR_shmctl 195
@@ -583,8 +519,6 @@ __SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl)
 __SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat)
 #define __NR_shmdt 197
 __SYSCALL(__NR_shmdt, sys_shmdt)
-
-/* net/socket.c */
 #define __NR_socket 198
 __SYSCALL(__NR_socket, sys_socket)
 #define __NR_socketpair 199
@@ -615,40 +549,30 @@ __SYSCALL(__NR_shutdown, sys_shutdown)
 __SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg)
 #define __NR_recvmsg 212
 __SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg)
-
-/* mm/filemap.c */
 #define __NR_readahead 213
 __SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead)
-
-/* mm/nommu.c, also with MMU */
 #define __NR_brk 214
 __SYSCALL(__NR_brk, sys_brk)
 #define __NR_munmap 215
 __SYSCALL(__NR_munmap, sys_munmap)
 #define __NR_mremap 216
 __SYSCALL(__NR_mremap, sys_mremap)
-
-/* security/keys/keyctl.c */
 #define __NR_add_key 217
 __SYSCALL(__NR_add_key, sys_add_key)
 #define __NR_request_key 218
 __SYSCALL(__NR_request_key, sys_request_key)
 #define __NR_keyctl 219
 __SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
-
-/* arch/example/kernel/sys_example.c */
 #define __NR_clone 220
 __SYSCALL(__NR_clone, sys_clone)
 #define __NR_execve 221
 __SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
-
 #define __NR3264_mmap 222
 __SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
-/* mm/fadvise.c */
 #define __NR3264_fadvise64 223
 __SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64)
 
-/* mm/, CONFIG_MMU only */
+/* CONFIG_MMU only */
 #ifndef __ARCH_NOMMU
 #define __NR_swapon 224
 __SYSCALL(__NR_swapon, sys_swapon)
@@ -691,6 +615,7 @@ __SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 #define __NR_accept4 242
 __SYSCALL(__NR_accept4, sys_accept4)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_recvmmsg 243
 __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32)
@@ -706,6 +631,7 @@ __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recv
 #define __NR_wait4 260
 __SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4)
 #endif
+
 #define __NR_prlimit64 261
 __SYSCALL(__NR_prlimit64, sys_prlimit64)
 #define __NR_fanotify_init 262
@@ -716,10 +642,12 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
 __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
 #define __NR_open_by_handle_at         265
 __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_clock_adjtime 266
 __SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime)
 #endif
+
 #define __NR_syncfs 267
 __SYSCALL(__NR_syncfs, sys_syncfs)
 #define __NR_setns 268
@@ -770,15 +698,19 @@ __SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
 __SYSCALL(__NR_pkey_free,     sys_pkey_free)
 #define __NR_statx 291
 __SYSCALL(__NR_statx,     sys_statx)
+
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_io_pgetevents 292
 __SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents)
 #endif
+
 #define __NR_rseq 293
 __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_kexec_file_load 294
 __SYSCALL(__NR_kexec_file_load,     sys_kexec_file_load)
+
 /* 295 through 402 are unassigned to sync up with generic numbers, don't use */
+
 #if defined(__SYSCALL_COMPAT) || __BITS_PER_LONG == 32
 #define __NR_clock_gettime64 403
 __SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
@@ -844,13 +776,14 @@ __SYSCALL(__NR_fsmount, sys_fsmount)
 __SYSCALL(__NR_fspick, sys_fspick)
 #define __NR_pidfd_open 434
 __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
+
 #ifdef __ARCH_WANT_SYS_CLONE3
 #define __NR_clone3 435
 __SYSCALL(__NR_clone3, sys_clone3)
 #endif
+
 #define __NR_close_range 436
 __SYSCALL(__NR_close_range, sys_close_range)
-
 #define __NR_openat2 437
 __SYSCALL(__NR_openat2, sys_openat2)
 #define __NR_pidfd_getfd 438
@@ -865,7 +798,6 @@ __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
 #define __NR_quotactl_fd 443
 __SYSCALL(__NR_quotactl_fd, sys_quotactl_fd)
-
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
 #define __NR_landlock_add_rule 445
@@ -877,12 +809,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
 #define __NR_memfd_secret 447
 __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
 #endif
+
 #define __NR_process_mrelease 448
 __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
-
 #define __NR_futex_waitv 449
 __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
-
 #define __NR_set_mempolicy_home_node 450
 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
 
-- 
cgit v1.2.3


From 31b5a547622b3782388eb676081da1eefe5b98d2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Jun 2023 19:44:22 +0200
Subject: wifi: ieee80211: fix erroneous NSTR bitmap size checks

The complete profile bit together with the NSTR link pair
present bit indicate whether or not the NSTR bitmap is,
the NSTR bitmap size just indicates how big it is.

Fixes: 7b6f08771bf6 ("wifi: ieee80211: Support validating ML station profile length")
Fixes: 5c1f97537bfb ("wifi: mac80211: store BSS param change count from assoc response")
Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 6f1747a9c106..4b998090898e 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -4920,7 +4920,7 @@ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data,
 	if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT)
 		info_len += 2;
 	if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE &&
-	    control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) {
+	    control & IEEE80211_MLE_STA_CONTROL_NSTR_LINK_PAIR_PRESENT) {
 		if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE)
 			info_len += 2;
 		else
@@ -4959,7 +4959,7 @@ ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(const struct ieee80211_mle_per_sta
 	if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT)
 		pos += 2;
 	if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE &&
-	    control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) {
+	    control & IEEE80211_MLE_STA_CONTROL_NSTR_LINK_PAIR_PRESENT) {
 		if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE)
 			pos += 2;
 		else
-- 
cgit v1.2.3


From c1d2ba10f594046831d14b03f194e8d05e78abad Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Mon, 27 Feb 2023 11:24:36 -0800
Subject: lib/bitmap: drop optimization of bitmap_{from,to}_arr64

bitmap_{from,to}_arr64() optimization is overly optimistic on 32-bit LE
architectures when it's wired to bitmap_copy_clear_tail().

bitmap_copy_clear_tail() takes care of unused bits in the bitmap up to
the next word boundary. But on 32-bit machines when copying bits from
bitmap to array of 64-bit words, it's expected that the unused part of
a recipient array must be cleared up to 64-bit boundary, so the last 4
bytes may stay untouched when nbits % 64 <= 32.

While the copying part of the optimization works correct, that clear-tail
trick makes corresponding tests reasonably fail:

test_bitmap: bitmap_to_arr64(nbits == 1): tail is not safely cleared: 0xa5a5a5a500000001 (must be 0x0000000000000001)

Fix it by removing bitmap_{from,to}_arr64() optimization for 32-bit LE
arches.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/lkml/20230225184702.GA3587246@roeck-us.net/
Fixes: 0a97953fd221 ("lib: add bitmap_{from,to}_arr64")
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
---
 include/linux/bitmap.h | 8 +++-----
 lib/bitmap.c           | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 7d6d73b78147..03644237e1ef 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -302,12 +302,10 @@ void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap,
 #endif
 
 /*
- * On 64-bit systems bitmaps are represented as u64 arrays internally. On LE32
- * machines the order of hi and lo parts of numbers match the bitmap structure.
- * In both cases conversion is not needed when copying data from/to arrays of
- * u64.
+ * On 64-bit systems bitmaps are represented as u64 arrays internally. So,
+ * the conversion is not needed when copying data from/to arrays of u64.
  */
-#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN)
+#if BITS_PER_LONG == 32
 void bitmap_from_arr64(unsigned long *bitmap, const u64 *buf, unsigned int nbits);
 void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits);
 #else
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 1c81413c51f8..ddb31015e38a 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -1495,7 +1495,7 @@ void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits)
 EXPORT_SYMBOL(bitmap_to_arr32);
 #endif
 
-#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN)
+#if BITS_PER_LONG == 32
 /**
  * bitmap_from_arr64 - copy the contents of u64 array of bits to bitmap
  *	@bitmap: array of unsigned longs, the destination bitmap
-- 
cgit v1.2.3


From cdd2d06fbc0a58297f782c8eb7e2f3c0b1dc367e Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 24 Jan 2023 08:02:43 +0800
Subject: nodemask: Drop duplicate check in for_each_node_mask()

The return value type is changed from 'int' to 'unsigned int' since
commit 0dfe54071d7c8 ("nodemask: Fix return values to be unsigned").
Besides, the conversion between 'int' and 'unsigned int' on the
parameter @node is guaranteed to be safe due to the limited range of
MAX_NUMNODES and CONFIG_NODES_SHIFT. By the way, '(node >= 0)' should
have been '(node) >= 0' actually.

It's unnecessary to check if their return values are greater or equal
to 0 in for_each_node_mask(). Remove it.

No functional change intended.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/nodemask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index bb0ee80526b2..8d07116caaf1 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -385,7 +385,7 @@ static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
 #if MAX_NUMNODES > 1
 #define for_each_node_mask(node, mask)				    \
 	for ((node) = first_node(mask);				    \
-	     (node >= 0) && (node) < MAX_NUMNODES;		    \
+	     (node) < MAX_NUMNODES;				    \
 	     (node) = next_node((node), (mask)))
 #else /* MAX_NUMNODES == 1 */
 #define for_each_node_mask(node, mask)                                  \
-- 
cgit v1.2.3


From f99d471afa03f770149f1cc60a288b9a08285903 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 16 Jun 2023 13:06:22 +0100
Subject: net: phylink: add PCS negotiation mode

PCS have to work out whether they should enable PCS negotiation by
looking at the "mode" and "interface" arguments, and the Autoneg bit
in the advertising mask.

This leads to some complex logic, so lets pull that out into phylink
and instead pass a "neg_mode" argument to the PCS configuration and
link up methods, instead of the "mode" argument.

In order to transition drivers, add a "neg_mode" flag to the phylink
PCS structure to PCS can indicate whether they want to be passed the
neg_mode or the old mode argument.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/E1qA8De-00EaFA-Ht@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phylink.c |  45 +++++++++++++++-----
 include/linux/phylink.h   | 104 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 132 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 97c15e1f81de..5d8f8b84908c 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -71,6 +71,7 @@ struct phylink {
 	struct mutex state_mutex;
 	struct phylink_link_state phy_state;
 	struct work_struct resolve;
+	unsigned int pcs_neg_mode;
 
 	bool mac_link_dropped;
 	bool using_mac_select_pcs;
@@ -992,23 +993,23 @@ static void phylink_resolve_an_pause(struct phylink_link_state *state)
 	}
 }
 
-static int phylink_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+static int phylink_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
 			      const struct phylink_link_state *state,
 			      bool permit_pause_to_mac)
 {
 	if (!pcs)
 		return 0;
 
-	return pcs->ops->pcs_config(pcs, mode, state->interface,
+	return pcs->ops->pcs_config(pcs, neg_mode, state->interface,
 				    state->advertising, permit_pause_to_mac);
 }
 
-static void phylink_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+static void phylink_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
 				phy_interface_t interface, int speed,
 				int duplex)
 {
 	if (pcs && pcs->ops->pcs_link_up)
-		pcs->ops->pcs_link_up(pcs, mode, interface, speed, duplex);
+		pcs->ops->pcs_link_up(pcs, neg_mode, interface, speed, duplex);
 }
 
 static void phylink_pcs_poll_stop(struct phylink *pl)
@@ -1058,10 +1059,15 @@ static void phylink_major_config(struct phylink *pl, bool restart,
 	struct phylink_pcs *pcs = NULL;
 	bool pcs_changed = false;
 	unsigned int rate_kbd;
+	unsigned int neg_mode;
 	int err;
 
 	phylink_dbg(pl, "major config %s\n", phy_modes(state->interface));
 
+	pl->pcs_neg_mode = phylink_pcs_neg_mode(pl->cur_link_an_mode,
+						state->interface,
+						state->advertising);
+
 	if (pl->using_mac_select_pcs) {
 		pcs = pl->mac_ops->mac_select_pcs(pl->config, state->interface);
 		if (IS_ERR(pcs)) {
@@ -1094,9 +1100,12 @@ static void phylink_major_config(struct phylink *pl, bool restart,
 
 	phylink_mac_config(pl, state);
 
-	err = phylink_pcs_config(pl->pcs, pl->cur_link_an_mode, state,
-				 !!(pl->link_config.pause &
-				    MLO_PAUSE_AN));
+	neg_mode = pl->cur_link_an_mode;
+	if (pl->pcs && pl->pcs->neg_mode)
+		neg_mode = pl->pcs_neg_mode;
+
+	err = phylink_pcs_config(pl->pcs, neg_mode, state,
+				 !!(pl->link_config.pause & MLO_PAUSE_AN));
 	if (err < 0)
 		phylink_err(pl, "pcs_config failed: %pe\n",
 			    ERR_PTR(err));
@@ -1131,6 +1140,7 @@ static void phylink_major_config(struct phylink *pl, bool restart,
  */
 static int phylink_change_inband_advert(struct phylink *pl)
 {
+	unsigned int neg_mode;
 	int ret;
 
 	if (test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state))
@@ -1149,12 +1159,20 @@ static int phylink_change_inband_advert(struct phylink *pl)
 		    __ETHTOOL_LINK_MODE_MASK_NBITS, pl->link_config.advertising,
 		    pl->link_config.pause);
 
+	/* Recompute the PCS neg mode */
+	pl->pcs_neg_mode = phylink_pcs_neg_mode(pl->cur_link_an_mode,
+					pl->link_config.interface,
+					pl->link_config.advertising);
+
+	neg_mode = pl->cur_link_an_mode;
+	if (pl->pcs->neg_mode)
+		neg_mode = pl->pcs_neg_mode;
+
 	/* Modern PCS-based method; update the advert at the PCS, and
 	 * restart negotiation if the pcs_config() helper indicates that
 	 * the programmed advertisement has changed.
 	 */
-	ret = phylink_pcs_config(pl->pcs, pl->cur_link_an_mode,
-				 &pl->link_config,
+	ret = phylink_pcs_config(pl->pcs, neg_mode, &pl->link_config,
 				 !!(pl->link_config.pause & MLO_PAUSE_AN));
 	if (ret < 0)
 		return ret;
@@ -1257,6 +1275,7 @@ static void phylink_link_up(struct phylink *pl,
 			    struct phylink_link_state link_state)
 {
 	struct net_device *ndev = pl->netdev;
+	unsigned int neg_mode;
 	int speed, duplex;
 	bool rx_pause;
 
@@ -1287,8 +1306,12 @@ static void phylink_link_up(struct phylink *pl,
 
 	pl->cur_interface = link_state.interface;
 
-	phylink_pcs_link_up(pl->pcs, pl->cur_link_an_mode, pl->cur_interface,
-			    speed, duplex);
+	neg_mode = pl->cur_link_an_mode;
+	if (pl->pcs && pl->pcs->neg_mode)
+		neg_mode = pl->pcs_neg_mode;
+
+	phylink_pcs_link_up(pl->pcs, neg_mode, pl->cur_interface, speed,
+			    duplex);
 
 	pl->mac_ops->mac_link_up(pl->config, pl->phydev, pl->cur_link_an_mode,
 				 pl->cur_interface, speed, duplex,
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 0cf07d7d11b8..2b322d7fa51a 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -21,6 +21,24 @@ enum {
 	MLO_AN_FIXED,	/* Fixed-link mode */
 	MLO_AN_INBAND,	/* In-band protocol */
 
+	/* PCS "negotiation" mode.
+	 *  PHYLINK_PCS_NEG_NONE - protocol has no inband capability
+	 *  PHYLINK_PCS_NEG_OUTBAND - some out of band or fixed link setting
+	 *  PHYLINK_PCS_NEG_INBAND_DISABLED - inband mode disabled, e.g.
+	 *				      1000base-X with autoneg off
+	 *  PHYLINK_PCS_NEG_INBAND_ENABLED - inband mode enabled
+	 * Additionally, this can be tested using bitmasks:
+	 *  PHYLINK_PCS_NEG_INBAND - inband mode selected
+	 *  PHYLINK_PCS_NEG_ENABLED - negotiation mode enabled
+	 */
+	PHYLINK_PCS_NEG_NONE = 0,
+	PHYLINK_PCS_NEG_ENABLED = BIT(4),
+	PHYLINK_PCS_NEG_OUTBAND = BIT(5),
+	PHYLINK_PCS_NEG_INBAND = BIT(6),
+	PHYLINK_PCS_NEG_INBAND_DISABLED = PHYLINK_PCS_NEG_INBAND,
+	PHYLINK_PCS_NEG_INBAND_ENABLED = PHYLINK_PCS_NEG_INBAND |
+					 PHYLINK_PCS_NEG_ENABLED,
+
 	/* MAC_SYM_PAUSE and MAC_ASYM_PAUSE are used when configuring our
 	 * autonegotiation advertisement. They correspond to the PAUSE and
 	 * ASM_DIR bits defined by 802.3, respectively.
@@ -79,6 +97,70 @@ static inline bool phylink_autoneg_inband(unsigned int mode)
 	return mode == MLO_AN_INBAND;
 }
 
+/**
+ * phylink_pcs_neg_mode() - helper to determine PCS inband mode
+ * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
+ * @interface: interface mode to be used
+ * @advertising: adertisement ethtool link mode mask
+ *
+ * Determines the negotiation mode to be used by the PCS, and returns
+ * one of:
+ * %PHYLINK_PCS_NEG_NONE: interface mode does not support inband
+ * %PHYLINK_PCS_NEG_OUTBAND: an out of band mode (e.g. reading the PHY)
+ *   will be used.
+ * %PHYLINK_PCS_NEG_INBAND_DISABLED: inband mode selected but autoneg disabled
+ * %PHYLINK_PCS_NEG_INBAND_ENABLED: inband mode selected and autoneg enabled
+ *
+ * Note: this is for cases where the PCS itself is involved in negotiation
+ * (e.g. Clause 37, SGMII and similar) not Clause 73.
+ */
+static inline unsigned int phylink_pcs_neg_mode(unsigned int mode,
+						phy_interface_t interface,
+						const unsigned long *advertising)
+{
+	unsigned int neg_mode;
+
+	switch (interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_QSGMII:
+	case PHY_INTERFACE_MODE_QUSGMII:
+	case PHY_INTERFACE_MODE_USXGMII:
+		/* These protocols are designed for use with a PHY which
+		 * communicates its negotiation result back to the MAC via
+		 * inband communication. Note: there exist PHYs that run
+		 * with SGMII but do not send the inband data.
+		 */
+		if (!phylink_autoneg_inband(mode))
+			neg_mode = PHYLINK_PCS_NEG_OUTBAND;
+		else
+			neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
+		break;
+
+	case PHY_INTERFACE_MODE_1000BASEX:
+	case PHY_INTERFACE_MODE_2500BASEX:
+		/* 1000base-X is designed for use media-side for Fibre
+		 * connections, and thus the Autoneg bit needs to be
+		 * taken into account. We also do this for 2500base-X
+		 * as well, but drivers may not support this, so may
+		 * need to override this.
+		 */
+		if (!phylink_autoneg_inband(mode))
+			neg_mode = PHYLINK_PCS_NEG_OUTBAND;
+		else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+					   advertising))
+			neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
+		else
+			neg_mode = PHYLINK_PCS_NEG_INBAND_DISABLED;
+		break;
+
+	default:
+		neg_mode = PHYLINK_PCS_NEG_NONE;
+		break;
+	}
+
+	return neg_mode;
+}
+
 /**
  * struct phylink_link_state - link state structure
  * @advertising: ethtool bitmask containing advertised link modes
@@ -436,6 +518,7 @@ struct phylink_pcs_ops;
 /**
  * struct phylink_pcs - PHYLINK PCS instance
  * @ops: a pointer to the &struct phylink_pcs_ops structure
+ * @neg_mode: provide PCS neg mode via "mode" argument
  * @poll: poll the PCS for link changes
  *
  * This structure is designed to be embedded within the PCS private data,
@@ -443,6 +526,7 @@ struct phylink_pcs_ops;
  */
 struct phylink_pcs {
 	const struct phylink_pcs_ops *ops;
+	bool neg_mode;
 	bool poll;
 };
 
@@ -460,12 +544,12 @@ struct phylink_pcs_ops {
 			    const struct phylink_link_state *state);
 	void (*pcs_get_state)(struct phylink_pcs *pcs,
 			      struct phylink_link_state *state);
-	int (*pcs_config)(struct phylink_pcs *pcs, unsigned int mode,
+	int (*pcs_config)(struct phylink_pcs *pcs, unsigned int neg_mode,
 			  phy_interface_t interface,
 			  const unsigned long *advertising,
 			  bool permit_pause_to_mac);
 	void (*pcs_an_restart)(struct phylink_pcs *pcs);
-	void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int mode,
+	void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int neg_mode,
 			    phy_interface_t interface, int speed, int duplex);
 };
 
@@ -508,7 +592,7 @@ void pcs_get_state(struct phylink_pcs *pcs,
 /**
  * pcs_config() - Configure the PCS mode and advertisement
  * @pcs: a pointer to a &struct phylink_pcs.
- * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
+ * @neg_mode: link negotiation mode (see below)
  * @interface: interface mode to be used
  * @advertising: adertisement ethtool link mode mask
  * @permit_pause_to_mac: permit forwarding pause resolution to MAC
@@ -526,8 +610,12 @@ void pcs_get_state(struct phylink_pcs *pcs,
  * For 1000BASE-X, the advertisement should be programmed into the PCS.
  *
  * For most 10GBASE-R, there is no advertisement.
+ *
+ * The %neg_mode argument should be tested via the phylink_mode_*() family of
+ * functions, or for PCS that set pcs->neg_mode true, should be tested
+ * against the %PHYLINK_PCS_NEG_* definitions.
  */
-int pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
 	       phy_interface_t interface, const unsigned long *advertising,
 	       bool permit_pause_to_mac);
 
@@ -543,7 +631,7 @@ void pcs_an_restart(struct phylink_pcs *pcs);
 /**
  * pcs_link_up() - program the PCS for the resolved link configuration
  * @pcs: a pointer to a &struct phylink_pcs.
- * @mode: link autonegotiation mode
+ * @neg_mode: link negotiation mode (see below)
  * @interface: link &typedef phy_interface_t mode
  * @speed: link speed
  * @duplex: link duplex
@@ -552,8 +640,12 @@ void pcs_an_restart(struct phylink_pcs *pcs);
  * the resolved link parameters. For example, a PCS operating in SGMII
  * mode without in-band AN needs to be manually configured for the link
  * and duplex setting. Otherwise, this should be a no-op.
+ *
+ * The %mode argument should be tested via the phylink_mode_*() family of
+ * functions, or for PCS that set pcs->neg_mode true, should be tested
+ * against the %PHYLINK_PCS_NEG_* definitions.
  */
-void pcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
 		 phy_interface_t interface, int speed, int duplex);
 #endif
 
-- 
cgit v1.2.3


From febf2aaf05641f3258cc30e072aff65cffc7c82c Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 16 Jun 2023 13:06:32 +0100
Subject: net: phylink: pass neg_mode into phylink_mii_c22_pcs_config()

Convert fman_dtsec, xilinx_axienet and pcs-lynx to pass the neg_mode
into phylink_mii_c22_pcs_config(). Where appropriate, drivers are
updated to have neg_mode passed into their pcs_config() and
pcs_link_up() functions. For other drivers, we just hoist the call
to phylink_pcs_neg_mode() to their pcs_config() method out of
phylink_mii_c22_pcs_config().

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/E1qA8Do-00EaFM-Ra@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/fman/fman_dtsec.c  |  7 ++++---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c |  6 ++++--
 drivers/net/pcs/pcs-lynx.c                        | 18 ++++++++++++------
 drivers/net/phy/phylink.c                         |  9 ++++-----
 include/linux/phylink.h                           |  5 +++--
 5 files changed, 27 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index d528ca681b6f..3088da7adf0f 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -763,15 +763,15 @@ static void dtsec_pcs_get_state(struct phylink_pcs *pcs,
 	phylink_mii_c22_pcs_get_state(dtsec->tbidev, state);
 }
 
-static int dtsec_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+static int dtsec_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
 			    phy_interface_t interface,
 			    const unsigned long *advertising,
 			    bool permit_pause_to_mac)
 {
 	struct fman_mac *dtsec = pcs_to_dtsec(pcs);
 
-	return phylink_mii_c22_pcs_config(dtsec->tbidev, mode, interface,
-					  advertising);
+	return phylink_mii_c22_pcs_config(dtsec->tbidev, interface,
+					  advertising, neg_mode);
 }
 
 static void dtsec_pcs_an_restart(struct phylink_pcs *pcs)
@@ -1447,6 +1447,7 @@ int dtsec_initialization(struct mac_device *mac_dev,
 		goto _return_fm_mac_free;
 	}
 	dtsec->pcs.ops = &dtsec_pcs_ops;
+	dtsec->pcs.neg_mode = true;
 	dtsec->pcs.poll = true;
 
 	supported = mac_dev->phylink_config.supported_interfaces;
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 3e310b55bce2..ae7b9af7b7d7 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1631,7 +1631,7 @@ static void axienet_pcs_an_restart(struct phylink_pcs *pcs)
 	phylink_mii_c22_pcs_an_restart(pcs_phy);
 }
 
-static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
 			      phy_interface_t interface,
 			      const unsigned long *advertising,
 			      bool permit_pause_to_mac)
@@ -1653,7 +1653,8 @@ static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
 		}
 	}
 
-	ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising);
+	ret = phylink_mii_c22_pcs_config(pcs_phy, interface, advertising,
+					 neg_mode);
 	if (ret < 0)
 		netdev_warn(ndev, "Failed to configure PCS: %d\n", ret);
 
@@ -2129,6 +2130,7 @@ static int axienet_probe(struct platform_device *pdev)
 		}
 		of_node_put(np);
 		lp->pcs.ops = &axienet_pcs_ops;
+		lp->pcs.neg_mode = true;
 		lp->pcs.poll = true;
 	}
 
diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index fca48ebf0b81..25bd4b45eb7b 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -112,9 +112,10 @@ static void lynx_pcs_get_state(struct phylink_pcs *pcs,
 		state->link, state->an_complete);
 }
 
-static int lynx_pcs_config_giga(struct mdio_device *pcs, unsigned int mode,
+static int lynx_pcs_config_giga(struct mdio_device *pcs,
 				phy_interface_t interface,
-				const unsigned long *advertising)
+				const unsigned long *advertising,
+				unsigned int neg_mode)
 {
 	int link_timer_ns;
 	u32 link_timer;
@@ -132,8 +133,9 @@ static int lynx_pcs_config_giga(struct mdio_device *pcs, unsigned int mode,
 	if (interface == PHY_INTERFACE_MODE_1000BASEX) {
 		if_mode = 0;
 	} else {
+		/* SGMII and QSGMII */
 		if_mode = IF_MODE_SGMII_EN;
-		if (mode == MLO_AN_INBAND)
+		if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
 			if_mode |= IF_MODE_USE_SGMII_AN;
 	}
 
@@ -143,7 +145,8 @@ static int lynx_pcs_config_giga(struct mdio_device *pcs, unsigned int mode,
 	if (err)
 		return err;
 
-	return phylink_mii_c22_pcs_config(pcs, mode, interface, advertising);
+	return phylink_mii_c22_pcs_config(pcs, interface, advertising,
+					  neg_mode);
 }
 
 static int lynx_pcs_config_usxgmii(struct mdio_device *pcs, unsigned int mode,
@@ -170,13 +173,16 @@ static int lynx_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
 			   bool permit)
 {
 	struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs);
+	unsigned int neg_mode;
+
+	neg_mode = phylink_pcs_neg_mode(mode, ifmode, advertising);
 
 	switch (ifmode) {
 	case PHY_INTERFACE_MODE_1000BASEX:
 	case PHY_INTERFACE_MODE_SGMII:
 	case PHY_INTERFACE_MODE_QSGMII:
-		return lynx_pcs_config_giga(lynx->mdio, mode, ifmode,
-					    advertising);
+		return lynx_pcs_config_giga(lynx->mdio, ifmode, advertising,
+					    neg_mode);
 	case PHY_INTERFACE_MODE_2500BASEX:
 		if (phylink_autoneg_inband(mode)) {
 			dev_err(&lynx->mdio->dev,
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 567fd22a8924..d0aaa5cad853 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -3545,20 +3545,20 @@ EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_encode_advertisement);
 /**
  * phylink_mii_c22_pcs_config() - configure clause 22 PCS
  * @pcs: a pointer to a &struct mdio_device.
- * @mode: link autonegotiation mode
  * @interface: the PHY interface mode being configured
  * @advertising: the ethtool advertisement mask
+ * @neg_mode: PCS negotiation mode
  *
  * Configure a Clause 22 PCS PHY with the appropriate negotiation
  * parameters for the @mode, @interface and @advertising parameters.
  * Returns negative error number on failure, zero if the advertisement
  * has not changed, or positive if there is a change.
  */
-int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode,
+int phylink_mii_c22_pcs_config(struct mdio_device *pcs,
 			       phy_interface_t interface,
-			       const unsigned long *advertising)
+			       const unsigned long *advertising,
+			       unsigned int neg_mode)
 {
-	unsigned int neg_mode;
 	bool changed = 0;
 	u16 bmcr;
 	int ret, adv;
@@ -3572,7 +3572,6 @@ int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode,
 		changed = ret;
 	}
 
-	neg_mode = phylink_pcs_neg_mode(mode, interface, advertising);
 	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
 		bmcr = BMCR_ANENABLE;
 	else
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 2b322d7fa51a..516240f1e950 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -743,9 +743,10 @@ void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs,
 				   struct phylink_link_state *state);
 int phylink_mii_c22_pcs_encode_advertisement(phy_interface_t interface,
 					     const unsigned long *advertising);
-int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode,
+int phylink_mii_c22_pcs_config(struct mdio_device *pcs,
 			       phy_interface_t interface,
-			       const unsigned long *advertising);
+			       const unsigned long *advertising,
+			       unsigned int neg_mode);
 void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs);
 
 void phylink_resolve_c73(struct phylink_link_state *state);
-- 
cgit v1.2.3


From a3a47cfb88fcdf862594eae417611ef533ed8bae Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Fri, 16 Jun 2023 13:06:37 +0100
Subject: net: pcs: xpcs: update PCS driver to use neg_mode

Update xpcs to use neg_mode to configure whether inband negotiation
should be used. We need to update sja1105 as well as that directly
calls into the XPCS driver's config function.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/E1qA8Dt-00EaFS-W9@rmk-PC.armlinux.org.uk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 14 +++++------
 drivers/net/pcs/pcs-xpcs.c             | 43 ++++++++++++++++++----------------
 include/linux/pcs/pcs-xpcs.h           |  4 ++--
 3 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index b70dcf32a26d..a55a6436fc05 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2314,7 +2314,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 
 	for (i = 0; i < ds->num_ports; i++) {
 		struct dw_xpcs *xpcs = priv->xpcs[i];
-		unsigned int mode;
+		unsigned int neg_mode;
 
 		rc = sja1105_adjust_port_config(priv, i, speed_mbps[i]);
 		if (rc < 0)
@@ -2324,17 +2324,15 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 			continue;
 
 		if (bmcr[i] & BMCR_ANENABLE)
-			mode = MLO_AN_INBAND;
-		else if (priv->fixed_link[i])
-			mode = MLO_AN_FIXED;
+			neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED;
 		else
-			mode = MLO_AN_PHY;
+			neg_mode = PHYLINK_PCS_NEG_OUTBAND;
 
-		rc = xpcs_do_config(xpcs, priv->phy_mode[i], mode, NULL);
+		rc = xpcs_do_config(xpcs, priv->phy_mode[i], NULL, neg_mode);
 		if (rc < 0)
 			goto out;
 
-		if (!phylink_autoneg_inband(mode)) {
+		if (neg_mode == PHYLINK_PCS_NEG_OUTBAND) {
 			int speed = SPEED_UNKNOWN;
 
 			if (priv->phy_mode[i] == PHY_INTERFACE_MODE_2500BASEX)
@@ -2346,7 +2344,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
 			else
 				speed = SPEED_10;
 
-			xpcs_link_up(&xpcs->pcs, mode, priv->phy_mode[i],
+			xpcs_link_up(&xpcs->pcs, neg_mode, priv->phy_mode[i],
 				     speed, DUPLEX_FULL);
 		}
 	}
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index e4e59aa9faf7..44b037646865 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -657,7 +657,8 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable)
 }
 EXPORT_SYMBOL_GPL(xpcs_config_eee);
 
-static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int mode)
+static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs,
+				      unsigned int neg_mode)
 {
 	int ret, mdio_ctrl;
 
@@ -707,7 +708,7 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int mode)
 	if (ret < 0)
 		return ret;
 
-	if (phylink_autoneg_inband(mode))
+	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
 		ret |= DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
 	else
 		ret &= ~DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW;
@@ -716,14 +717,15 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, unsigned int mode)
 	if (ret < 0)
 		return ret;
 
-	if (phylink_autoneg_inband(mode))
+	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
 		ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL,
 				 mdio_ctrl | AN_CL37_EN);
 
 	return ret;
 }
 
-static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs, unsigned int mode,
+static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs,
+					  unsigned int neg_mode,
 					  const unsigned long *advertising)
 {
 	phy_interface_t interface = PHY_INTERFACE_MODE_1000BASEX;
@@ -774,8 +776,7 @@ static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs, unsigned int mod
 	if (ret < 0)
 		return ret;
 
-	if (phylink_autoneg_inband(mode) &&
-	    linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, advertising)) {
+	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) {
 		ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_CTRL,
 				 mdio_ctrl | AN_CL37_EN);
 		if (ret < 0)
@@ -808,7 +809,7 @@ static int xpcs_config_2500basex(struct dw_xpcs *xpcs)
 }
 
 int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
-		   unsigned int mode, const unsigned long *advertising)
+		   const unsigned long *advertising, unsigned int neg_mode)
 {
 	const struct xpcs_compat *compat;
 	int ret;
@@ -821,19 +822,19 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
 	case DW_10GBASER:
 		break;
 	case DW_AN_C73:
-		if (test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, advertising)) {
+		if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) {
 			ret = xpcs_config_aneg_c73(xpcs, compat);
 			if (ret)
 				return ret;
 		}
 		break;
 	case DW_AN_C37_SGMII:
-		ret = xpcs_config_aneg_c37_sgmii(xpcs, mode);
+		ret = xpcs_config_aneg_c37_sgmii(xpcs, neg_mode);
 		if (ret)
 			return ret;
 		break;
 	case DW_AN_C37_1000BASEX:
-		ret = xpcs_config_aneg_c37_1000basex(xpcs, mode,
+		ret = xpcs_config_aneg_c37_1000basex(xpcs, neg_mode,
 						     advertising);
 		if (ret)
 			return ret;
@@ -857,14 +858,14 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
 }
 EXPORT_SYMBOL_GPL(xpcs_do_config);
 
-static int xpcs_config(struct phylink_pcs *pcs, unsigned int mode,
+static int xpcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
 		       phy_interface_t interface,
 		       const unsigned long *advertising,
 		       bool permit_pause_to_mac)
 {
 	struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
 
-	return xpcs_do_config(xpcs, interface, mode, advertising);
+	return xpcs_do_config(xpcs, interface, advertising, neg_mode);
 }
 
 static int xpcs_get_state_c73(struct dw_xpcs *xpcs,
@@ -898,7 +899,8 @@ static int xpcs_get_state_c73(struct dw_xpcs *xpcs,
 
 		state->link = 0;
 
-		return xpcs_do_config(xpcs, state->interface, MLO_AN_INBAND, NULL);
+		return xpcs_do_config(xpcs, state->interface, NULL,
+				      PHYLINK_PCS_NEG_INBAND_ENABLED);
 	}
 
 	/* There is no point doing anything else if the link is down. */
@@ -1046,12 +1048,12 @@ static void xpcs_get_state(struct phylink_pcs *pcs,
 	}
 }
 
-static void xpcs_link_up_sgmii(struct dw_xpcs *xpcs, unsigned int mode,
+static void xpcs_link_up_sgmii(struct dw_xpcs *xpcs, unsigned int neg_mode,
 			       int speed, int duplex)
 {
 	int val, ret;
 
-	if (phylink_autoneg_inband(mode))
+	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
 		return;
 
 	val = mii_bmcr_encode_fixed(speed, duplex);
@@ -1060,12 +1062,12 @@ static void xpcs_link_up_sgmii(struct dw_xpcs *xpcs, unsigned int mode,
 		pr_err("%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret));
 }
 
-static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int mode,
+static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int neg_mode,
 				   int speed, int duplex)
 {
 	int val, ret;
 
-	if (phylink_autoneg_inband(mode))
+	if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED)
 		return;
 
 	switch (speed) {
@@ -1089,7 +1091,7 @@ static void xpcs_link_up_1000basex(struct dw_xpcs *xpcs, unsigned int mode,
 		pr_err("%s: xpcs_write returned %pe\n", __func__, ERR_PTR(ret));
 }
 
-void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
 		  phy_interface_t interface, int speed, int duplex)
 {
 	struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs);
@@ -1097,9 +1099,9 @@ void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
 	if (interface == PHY_INTERFACE_MODE_USXGMII)
 		return xpcs_config_usxgmii(xpcs, speed);
 	if (interface == PHY_INTERFACE_MODE_SGMII)
-		return xpcs_link_up_sgmii(xpcs, mode, speed, duplex);
+		return xpcs_link_up_sgmii(xpcs, neg_mode, speed, duplex);
 	if (interface == PHY_INTERFACE_MODE_1000BASEX)
-		return xpcs_link_up_1000basex(xpcs, mode, speed, duplex);
+		return xpcs_link_up_1000basex(xpcs, neg_mode, speed, duplex);
 }
 EXPORT_SYMBOL_GPL(xpcs_link_up);
 
@@ -1283,6 +1285,7 @@ static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev,
 		}
 
 		xpcs->pcs.ops = &xpcs_phylink_ops;
+		xpcs->pcs.neg_mode = true;
 		if (compat->an_mode == DW_10GBASER)
 			return xpcs;
 
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index ec8175b847cc..ff99cf7a5d0d 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -29,10 +29,10 @@ struct dw_xpcs {
 };
 
 int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
-void xpcs_link_up(struct phylink_pcs *pcs, unsigned int mode,
+void xpcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
 		  phy_interface_t interface, int speed, int duplex);
 int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface,
-		   unsigned int mode, const unsigned long *advertising);
+		   const unsigned long *advertising, unsigned int neg_mode);
 void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
 int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
 		    int enable);
-- 
cgit v1.2.3


From 6cb8815f41a966b217c0d9826c592254d72dcc31 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 15 Jun 2023 18:28:48 +0800
Subject: crypto: sig - Add interface for sign/verify

Split out the sign/verify functionality from the existing akcipher
interface.  Most algorithms in akcipher either support encryption
and decryption, or signing and verify.  Only one supports both.

As a signature algorithm may not support encryption at all, these
two should be spearated.

For now sig is simply a wrapper around akcipher as all algorithms
remain unchanged.  This is a first step and allows users to start
allocating sig instead of akcipher.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig                |  10 +++
 crypto/Makefile               |   1 +
 crypto/akcipher.c             |  53 +++++++++-----
 crypto/internal.h             |  20 ++++++
 crypto/sig.c                  | 159 ++++++++++++++++++++++++++++++++++++++++++
 include/crypto/internal/sig.h |  17 +++++
 include/crypto/sig.h          | 140 +++++++++++++++++++++++++++++++++++++
 include/linux/crypto.h        |   3 +-
 8 files changed, 385 insertions(+), 18 deletions(-)
 create mode 100644 crypto/sig.c
 create mode 100644 include/crypto/internal/sig.h
 create mode 100644 include/crypto/sig.h

(limited to 'include/linux')

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 8b8bb97d1d77..650b1b3620d8 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -72,6 +72,15 @@ config CRYPTO_AEAD2
 	tristate
 	select CRYPTO_ALGAPI2
 
+config CRYPTO_SIG
+	tristate
+	select CRYPTO_SIG2
+	select CRYPTO_ALGAPI
+
+config CRYPTO_SIG2
+	tristate
+	select CRYPTO_ALGAPI2
+
 config CRYPTO_SKCIPHER
 	tristate
 	select CRYPTO_SKCIPHER2
@@ -143,6 +152,7 @@ config CRYPTO_MANAGER2
 	select CRYPTO_ACOMP2
 	select CRYPTO_AEAD2
 	select CRYPTO_AKCIPHER2
+	select CRYPTO_SIG2
 	select CRYPTO_HASH2
 	select CRYPTO_KPP2
 	select CRYPTO_RNG2
diff --git a/crypto/Makefile b/crypto/Makefile
index 155ab671a1b4..953a7e105e58 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -25,6 +25,7 @@ crypto_hash-y += shash.o
 obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o
 
 obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o
+obj-$(CONFIG_CRYPTO_SIG2) += sig.o
 obj-$(CONFIG_CRYPTO_KPP2) += kpp.o
 
 dh_generic-y := dh.o
diff --git a/crypto/akcipher.c b/crypto/akcipher.c
index 2d10b58c4010..152cfba1346c 100644
--- a/crypto/akcipher.c
+++ b/crypto/akcipher.c
@@ -18,18 +18,7 @@
 
 #include "internal.h"
 
-struct crypto_akcipher_sync_data {
-	struct crypto_akcipher *tfm;
-	const void *src;
-	void *dst;
-	unsigned int slen;
-	unsigned int dlen;
-
-	struct akcipher_request *req;
-	struct crypto_wait cwait;
-	struct scatterlist sg;
-	u8 *buf;
-};
+#define CRYPTO_ALG_TYPE_AHASH_MASK	0x0000000e
 
 static int __maybe_unused crypto_akcipher_report(
 	struct sk_buff *skb, struct crypto_alg *alg)
@@ -119,7 +108,7 @@ static const struct crypto_type crypto_akcipher_type = {
 	.report_stat = crypto_akcipher_report_stat,
 #endif
 	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
-	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_AHASH_MASK,
 	.type = CRYPTO_ALG_TYPE_AKCIPHER,
 	.tfmsize = offsetof(struct crypto_akcipher, base),
 };
@@ -200,7 +189,7 @@ int akcipher_register_instance(struct crypto_template *tmpl,
 }
 EXPORT_SYMBOL_GPL(akcipher_register_instance);
 
-static int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data)
+int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data)
 {
 	unsigned int reqsize = crypto_akcipher_reqsize(data->tfm);
 	unsigned int mlen = max(data->slen, data->dlen);
@@ -223,7 +212,7 @@ static int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data)
 	data->buf = buf;
 	memcpy(buf, data->src, data->slen);
 
-	sg = &data->sg;
+	sg = data->sg;
 	sg_init_one(sg, buf, mlen);
 	akcipher_request_set_crypt(req, sg, sg, data->slen, data->dlen);
 
@@ -233,9 +222,9 @@ static int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(crypto_akcipher_sync_prep);
 
-static int crypto_akcipher_sync_post(struct crypto_akcipher_sync_data *data,
-				     int err)
+int crypto_akcipher_sync_post(struct crypto_akcipher_sync_data *data, int err)
 {
 	err = crypto_wait_req(err, &data->cwait);
 	memcpy(data->dst, data->buf, data->dlen);
@@ -243,6 +232,7 @@ static int crypto_akcipher_sync_post(struct crypto_akcipher_sync_data *data,
 	kfree_sensitive(data->req);
 	return err;
 }
+EXPORT_SYMBOL_GPL(crypto_akcipher_sync_post);
 
 int crypto_akcipher_sync_encrypt(struct crypto_akcipher *tfm,
 				 const void *src, unsigned int slen,
@@ -281,5 +271,34 @@ int crypto_akcipher_sync_decrypt(struct crypto_akcipher *tfm,
 }
 EXPORT_SYMBOL_GPL(crypto_akcipher_sync_decrypt);
 
+static void crypto_exit_akcipher_ops_sig(struct crypto_tfm *tfm)
+{
+	struct crypto_akcipher **ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_akcipher(*ctx);
+}
+
+int crypto_init_akcipher_ops_sig(struct crypto_tfm *tfm)
+{
+	struct crypto_akcipher **ctx = crypto_tfm_ctx(tfm);
+	struct crypto_alg *calg = tfm->__crt_alg;
+	struct crypto_akcipher *akcipher;
+
+	if (!crypto_mod_get(calg))
+		return -EAGAIN;
+
+	akcipher = crypto_create_tfm(calg, &crypto_akcipher_type);
+	if (IS_ERR(akcipher)) {
+		crypto_mod_put(calg);
+		return PTR_ERR(akcipher);
+	}
+
+	*ctx = akcipher;
+	tfm->exit = crypto_exit_akcipher_ops_sig;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_init_akcipher_ops_sig);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Generic public key cipher type");
diff --git a/crypto/internal.h b/crypto/internal.h
index e8c3aad71aa9..e3cf5a658d51 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -18,9 +18,12 @@
 #include <linux/numa.h>
 #include <linux/refcount.h>
 #include <linux/rwsem.h>
+#include <linux/scatterlist.h>
 #include <linux/sched.h>
 #include <linux/types.h>
 
+struct akcipher_request;
+struct crypto_akcipher;
 struct crypto_instance;
 struct crypto_template;
 
@@ -32,6 +35,19 @@ struct crypto_larval {
 	bool test_started;
 };
 
+struct crypto_akcipher_sync_data {
+	struct crypto_akcipher *tfm;
+	const void *src;
+	void *dst;
+	unsigned int slen;
+	unsigned int dlen;
+
+	struct akcipher_request *req;
+	struct crypto_wait cwait;
+	struct scatterlist sg[2];
+	u8 *buf;
+};
+
 enum {
 	CRYPTOA_UNSPEC,
 	CRYPTOA_ALG,
@@ -111,6 +127,10 @@ void *crypto_create_tfm_node(struct crypto_alg *alg,
 void *crypto_clone_tfm(const struct crypto_type *frontend,
 		       struct crypto_tfm *otfm);
 
+int crypto_akcipher_sync_prep(struct crypto_akcipher_sync_data *data);
+int crypto_akcipher_sync_post(struct crypto_akcipher_sync_data *data, int err);
+int crypto_init_akcipher_ops_sig(struct crypto_tfm *tfm);
+
 static inline void *crypto_create_tfm(struct crypto_alg *alg,
 			const struct crypto_type *frontend)
 {
diff --git a/crypto/sig.c b/crypto/sig.c
new file mode 100644
index 000000000000..d812555c88af
--- /dev/null
+++ b/crypto/sig.c
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Public Key Signature Algorithm
+ *
+ * Copyright (c) 2023 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+
+#include <crypto/akcipher.h>
+#include <crypto/internal/sig.h>
+#include <linux/cryptouser.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <net/netlink.h>
+
+#include "internal.h"
+
+#define CRYPTO_ALG_TYPE_SIG_MASK	0x0000000e
+
+static const struct crypto_type crypto_sig_type;
+
+static inline struct crypto_sig *__crypto_sig_tfm(struct crypto_tfm *tfm)
+{
+	return container_of(tfm, struct crypto_sig, base);
+}
+
+static int crypto_sig_init_tfm(struct crypto_tfm *tfm)
+{
+	if (tfm->__crt_alg->cra_type != &crypto_sig_type)
+		return crypto_init_akcipher_ops_sig(tfm);
+
+	return 0;
+}
+
+static void __maybe_unused crypto_sig_show(struct seq_file *m,
+					   struct crypto_alg *alg)
+{
+	seq_puts(m, "type         : sig\n");
+}
+
+static int __maybe_unused crypto_sig_report(struct sk_buff *skb,
+					    struct crypto_alg *alg)
+{
+	struct crypto_report_akcipher rsig = {};
+
+	strscpy(rsig.type, "sig", sizeof(rsig.type));
+
+	return nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, sizeof(rsig), &rsig);
+}
+
+static int __maybe_unused crypto_sig_report_stat(struct sk_buff *skb,
+						 struct crypto_alg *alg)
+{
+	struct crypto_stat_akcipher rsig = {};
+
+	strscpy(rsig.type, "sig", sizeof(rsig.type));
+
+	return nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER, sizeof(rsig), &rsig);
+}
+
+static const struct crypto_type crypto_sig_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_sig_init_tfm,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_sig_show,
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_USER)
+	.report = crypto_sig_report,
+#endif
+#ifdef CONFIG_CRYPTO_STATS
+	.report_stat = crypto_sig_report_stat,
+#endif
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_SIG_MASK,
+	.type = CRYPTO_ALG_TYPE_SIG,
+	.tfmsize = offsetof(struct crypto_sig, base),
+};
+
+struct crypto_sig *crypto_alloc_sig(const char *alg_name, u32 type, u32 mask)
+{
+	return crypto_alloc_tfm(alg_name, &crypto_sig_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_sig);
+
+int crypto_sig_maxsize(struct crypto_sig *tfm)
+{
+	struct crypto_akcipher **ctx = crypto_sig_ctx(tfm);
+
+	return crypto_akcipher_maxsize(*ctx);
+}
+EXPORT_SYMBOL_GPL(crypto_sig_maxsize);
+
+int crypto_sig_sign(struct crypto_sig *tfm,
+		    const void *src, unsigned int slen,
+		    void *dst, unsigned int dlen)
+{
+	struct crypto_akcipher **ctx = crypto_sig_ctx(tfm);
+	struct crypto_akcipher_sync_data data = {
+		.tfm = *ctx,
+		.src = src,
+		.dst = dst,
+		.slen = slen,
+		.dlen = dlen,
+	};
+
+	return crypto_akcipher_sync_prep(&data) ?:
+	       crypto_akcipher_sync_post(&data,
+					 crypto_akcipher_sign(data.req));
+}
+EXPORT_SYMBOL_GPL(crypto_sig_sign);
+
+int crypto_sig_verify(struct crypto_sig *tfm,
+		      const void *src, unsigned int slen,
+		      const void *digest, unsigned int dlen)
+{
+	struct crypto_akcipher **ctx = crypto_sig_ctx(tfm);
+	struct crypto_akcipher_sync_data data = {
+		.tfm = *ctx,
+		.src = src,
+		.slen = slen,
+		.dlen = dlen,
+	};
+	int err;
+
+	err = crypto_akcipher_sync_prep(&data);
+	if (err)
+		return err;
+
+	sg_init_table(data.sg, 2);
+	sg_set_buf(&data.sg[0], src, slen);
+	sg_set_buf(&data.sg[1], digest, dlen);
+
+	return crypto_akcipher_sync_post(&data,
+					 crypto_akcipher_verify(data.req));
+}
+EXPORT_SYMBOL_GPL(crypto_sig_verify);
+
+int crypto_sig_set_pubkey(struct crypto_sig *tfm,
+			  const void *key, unsigned int keylen)
+{
+	struct crypto_akcipher **ctx = crypto_sig_ctx(tfm);
+
+	return crypto_akcipher_set_pub_key(*ctx, key, keylen);
+}
+EXPORT_SYMBOL_GPL(crypto_sig_set_pubkey);
+
+int crypto_sig_set_privkey(struct crypto_sig *tfm,
+			  const void *key, unsigned int keylen)
+{
+	struct crypto_akcipher **ctx = crypto_sig_ctx(tfm);
+
+	return crypto_akcipher_set_priv_key(*ctx, key, keylen);
+}
+EXPORT_SYMBOL_GPL(crypto_sig_set_privkey);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Public Key Signature Algorithms");
diff --git a/include/crypto/internal/sig.h b/include/crypto/internal/sig.h
new file mode 100644
index 000000000000..97cb26ef8115
--- /dev/null
+++ b/include/crypto/internal/sig.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Public Key Signature Algorithm
+ *
+ * Copyright (c) 2023 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+#ifndef _CRYPTO_INTERNAL_SIG_H
+#define _CRYPTO_INTERNAL_SIG_H
+
+#include <crypto/algapi.h>
+#include <crypto/sig.h>
+
+static inline void *crypto_sig_ctx(struct crypto_sig *tfm)
+{
+	return crypto_tfm_ctx(&tfm->base);
+}
+#endif
diff --git a/include/crypto/sig.h b/include/crypto/sig.h
new file mode 100644
index 000000000000..641b4714c448
--- /dev/null
+++ b/include/crypto/sig.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Public Key Signature Algorithm
+ *
+ * Copyright (c) 2023 Herbert Xu <herbert@gondor.apana.org.au>
+ */
+#ifndef _CRYPTO_SIG_H
+#define _CRYPTO_SIG_H
+
+#include <linux/crypto.h>
+
+/**
+ * struct crypto_sig - user-instantiated objects which encapsulate
+ * algorithms and core processing logic
+ *
+ * @base:	Common crypto API algorithm data structure
+ */
+struct crypto_sig {
+	struct crypto_tfm base;
+};
+
+/**
+ * DOC: Generic Public Key Signature API
+ *
+ * The Public Key Signature API is used with the algorithms of type
+ * CRYPTO_ALG_TYPE_SIG (listed as type "sig" in /proc/crypto)
+ */
+
+/**
+ * crypto_alloc_sig() - allocate signature tfm handle
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	      signing algorithm e.g. "ecdsa"
+ * @type: specifies the type of the algorithm
+ * @mask: specifies the mask for the algorithm
+ *
+ * Allocate a handle for public key signature algorithm. The returned struct
+ * crypto_sig is the handle that is required for any subsequent
+ * API invocation for signature operations.
+ *
+ * Return: allocated handle in case of success; IS_ERR() is true in case
+ *	   of an error, PTR_ERR() returns the error code.
+ */
+struct crypto_sig *crypto_alloc_sig(const char *alg_name, u32 type, u32 mask);
+
+static inline struct crypto_tfm *crypto_sig_tfm(struct crypto_sig *tfm)
+{
+	return &tfm->base;
+}
+
+/**
+ * crypto_free_sig() - free signature tfm handle
+ *
+ * @tfm: signature tfm handle allocated with crypto_alloc_sig()
+ *
+ * If @tfm is a NULL or error pointer, this function does nothing.
+ */
+static inline void crypto_free_sig(struct crypto_sig *tfm)
+{
+	crypto_destroy_tfm(tfm, crypto_sig_tfm(tfm));
+}
+
+/**
+ * crypto_sig_maxsize() - Get len for output buffer
+ *
+ * Function returns the dest buffer size required for a given key.
+ * Function assumes that the key is already set in the transformation. If this
+ * function is called without a setkey or with a failed setkey, you will end up
+ * in a NULL dereference.
+ *
+ * @tfm:	signature tfm handle allocated with crypto_alloc_sig()
+ */
+int crypto_sig_maxsize(struct crypto_sig *tfm);
+
+/**
+ * crypto_sig_sign() - Invoke signing operation
+ *
+ * Function invokes the specific signing operation for a given algorithm
+ *
+ * @tfm:	signature tfm handle allocated with crypto_alloc_sig()
+ * @src:	source buffer
+ * @slen:	source length
+ * @dst:	destinatino obuffer
+ * @dlen:	destination length
+ *
+ * Return: zero on success; error code in case of error
+ */
+int crypto_sig_sign(struct crypto_sig *tfm,
+		    const void *src, unsigned int slen,
+		    void *dst, unsigned int dlen);
+
+/**
+ * crypto_sig_verify() - Invoke signature verification
+ *
+ * Function invokes the specific signature verification operation
+ * for a given algorithm.
+ *
+ * @tfm:	signature tfm handle allocated with crypto_alloc_sig()
+ * @src:	source buffer
+ * @slen:	source length
+ * @digest:	digest
+ * @dlen:	digest length
+ *
+ * Return: zero on verification success; error code in case of error.
+ */
+int crypto_sig_verify(struct crypto_sig *tfm,
+		      const void *src, unsigned int slen,
+		      const void *digest, unsigned int dlen);
+
+/**
+ * crypto_sig_set_pubkey() - Invoke set public key operation
+ *
+ * Function invokes the algorithm specific set key function, which knows
+ * how to decode and interpret the encoded key and parameters
+ *
+ * @tfm:	tfm handle
+ * @key:	BER encoded public key, algo OID, paramlen, BER encoded
+ *		parameters
+ * @keylen:	length of the key (not including other data)
+ *
+ * Return: zero on success; error code in case of error
+ */
+int crypto_sig_set_pubkey(struct crypto_sig *tfm,
+			  const void *key, unsigned int keylen);
+
+/**
+ * crypto_sig_set_privkey() - Invoke set private key operation
+ *
+ * Function invokes the algorithm specific set key function, which knows
+ * how to decode and interpret the encoded key and parameters
+ *
+ * @tfm:	tfm handle
+ * @key:	BER encoded private key, algo OID, paramlen, BER encoded
+ *		parameters
+ * @keylen:	length of the key (not including other data)
+ *
+ * Return: zero on success; error code in case of error
+ */
+int crypto_sig_set_privkey(struct crypto_sig *tfm,
+			   const void *key, unsigned int keylen);
+#endif
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index fa310ac1db59..31f6fee0c36c 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -25,11 +25,12 @@
 #define CRYPTO_ALG_TYPE_COMPRESS	0x00000002
 #define CRYPTO_ALG_TYPE_AEAD		0x00000003
 #define CRYPTO_ALG_TYPE_SKCIPHER	0x00000005
+#define CRYPTO_ALG_TYPE_AKCIPHER	0x00000006
+#define CRYPTO_ALG_TYPE_SIG		0x00000007
 #define CRYPTO_ALG_TYPE_KPP		0x00000008
 #define CRYPTO_ALG_TYPE_ACOMPRESS	0x0000000a
 #define CRYPTO_ALG_TYPE_SCOMPRESS	0x0000000b
 #define CRYPTO_ALG_TYPE_RNG		0x0000000c
-#define CRYPTO_ALG_TYPE_AKCIPHER	0x0000000d
 #define CRYPTO_ALG_TYPE_HASH		0x0000000e
 #define CRYPTO_ALG_TYPE_SHASH		0x0000000e
 #define CRYPTO_ALG_TYPE_AHASH		0x0000000f
-- 
cgit v1.2.3


From afa4bb778e48d79e4a642ed41e3b4e0de7489a6c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 23 Jun 2023 12:08:14 -0700
Subject: workqueue: clean up WORK_* constant types, clarify masking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dave Airlie reports that gcc-13.1.1 has started complaining about some
of the workqueue code in 32-bit arm builds:

  kernel/workqueue.c: In function ‘get_work_pwq’:
  kernel/workqueue.c:713:24: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast]
    713 |                 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
        |                        ^
  [ ... a couple of other cases ... ]

and while it's not immediately clear exactly why gcc started complaining
about it now, I suspect it's some C23-induced enum type handlign fixup in
gcc-13 is the cause.

Whatever the reason for starting to complain, the code and data types
are indeed disgusting enough that the complaint is warranted.

The wq code ends up creating various "helper constants" (like that
WORK_STRUCT_WQ_DATA_MASK) using an enum type, which is all kinds of
confused.  The mask needs to be 'unsigned long', not some unspecified
enum type.

To make matters worse, the actual "mask and cast to a pointer" is
repeated a couple of times, and the cast isn't even always done to the
right pointer, but - as the error case above - to a 'void *' with then
the compiler finishing the job.

That's now how we roll in the kernel.

So create the masks using the proper types rather than some ambiguous
enumeration, and use a nice helper that actually does the type
conversion in one well-defined place.

Incidentally, this magically makes clang generate better code.  That,
admittedly, is really just a sign of clang having been seriously
confused before, and cleaning up the typing unconfuses the compiler too.

Reported-by: Dave Airlie <airlied@gmail.com>
Link: https://lore.kernel.org/lkml/CAPM=9twNnV4zMCvrPkw3H-ajZOH-01JVh_kDrxdPYQErz8ZTdA@mail.gmail.com/
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 15 ++++++++-------
 kernel/workqueue.c        | 13 ++++++++-----
 2 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 3992c994787f..683efe29fa69 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -68,7 +68,6 @@ enum {
 	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_COLOR_SHIFT,
 
 	__WORK_OFFQ_CANCELING	= WORK_OFFQ_FLAG_BASE,
-	WORK_OFFQ_CANCELING	= (1 << __WORK_OFFQ_CANCELING),
 
 	/*
 	 * When a work item is off queue, its high bits point to the last
@@ -79,12 +78,6 @@ enum {
 	WORK_OFFQ_POOL_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
 	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
 	WORK_OFFQ_POOL_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
-	WORK_OFFQ_POOL_NONE	= (1LU << WORK_OFFQ_POOL_BITS) - 1,
-
-	/* convenience constants */
-	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
-	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_POOL	= (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
@@ -94,6 +87,14 @@ enum {
 	WORKER_DESC_LEN		= 24,
 };
 
+/* Convenience constants - of type 'unsigned long', not 'enum'! */
+#define WORK_OFFQ_CANCELING	(1ul << __WORK_OFFQ_CANCELING)
+#define WORK_OFFQ_POOL_NONE	((1ul << WORK_OFFQ_POOL_BITS) - 1)
+#define WORK_STRUCT_NO_POOL	(WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT)
+
+#define WORK_STRUCT_FLAG_MASK    ((1ul << WORK_STRUCT_FLAG_BITS) - 1)
+#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
+
 struct work_struct {
 	atomic_long_t data;
 	struct list_head entry;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4666a1a92a31..c913e333cce8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -705,12 +705,17 @@ static void clear_work_data(struct work_struct *work)
 	set_work_data(work, WORK_STRUCT_NO_POOL, 0);
 }
 
+static inline struct pool_workqueue *work_struct_pwq(unsigned long data)
+{
+	return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK);
+}
+
 static struct pool_workqueue *get_work_pwq(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
 	if (data & WORK_STRUCT_PWQ)
-		return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
+		return work_struct_pwq(data);
 	else
 		return NULL;
 }
@@ -738,8 +743,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
 	assert_rcu_or_pool_mutex();
 
 	if (data & WORK_STRUCT_PWQ)
-		return ((struct pool_workqueue *)
-			(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
+		return work_struct_pwq(data)->pool;
 
 	pool_id = data >> WORK_OFFQ_POOL_SHIFT;
 	if (pool_id == WORK_OFFQ_POOL_NONE)
@@ -760,8 +764,7 @@ static int get_work_pool_id(struct work_struct *work)
 	unsigned long data = atomic_long_read(&work->data);
 
 	if (data & WORK_STRUCT_PWQ)
-		return ((struct pool_workqueue *)
-			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
+		return work_struct_pwq(data)->pool->id;
 
 	return data >> WORK_OFFQ_POOL_SHIFT;
 }
-- 
cgit v1.2.3


From 9ee473c259de393718ae1c6fdd51271d5d087c4b Mon Sep 17 00:00:00 2001
From: Lama Kayal <lkayal@nvidia.com>
Date: Mon, 12 Jun 2023 16:34:43 +0300
Subject: net/mlx5: Fix reserved at offset in hca_cap register

A member of struct mlx5_ifc_cmd_hca_cap_bits has been mistakenly
assigned the wrong reserved_at offset value. Correct it to align to the
right value, thus avoid future miscalculation.

Signed-off-by: Lama Kayal <lkayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 354c7e326eab..33344a71c3e3 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1710,9 +1710,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         regexp_params[0x1];
 	u8         uar_sz[0x6];
 	u8         port_selection_cap[0x1];
-	u8         reserved_at_248[0x1];
+	u8         reserved_at_251[0x1];
 	u8         umem_uid_0[0x1];
-	u8         reserved_at_250[0x5];
+	u8         reserved_at_253[0x5];
 	u8         log_pg_sz[0x8];
 
 	u8         bf[0x1];
-- 
cgit v1.2.3


From f6ec33979e9ae7fcc2b5582bd3bfdfaa1fc98a78 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Sat, 15 Apr 2023 11:35:27 +0900
Subject: PCI: endpoint: Move pci_epf_type_add_cfs() code

pci_epf_type_add_cfs() is called only from pci_ep_cfs_add_type_group() in
drivers/pci/endpoint/pci-ep-cfs.c, so there is no need to export this
function.  Move its code from pci-epf-core.c to pci-ep-cfs.c as a static
function.

Link: https://lore.kernel.org/r/20230415023542.77601-3-dlemoal@kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
---
 drivers/pci/endpoint/pci-ep-cfs.c   | 36 ++++++++++++++++++++++++++++++++++++
 drivers/pci/endpoint/pci-epf-core.c | 32 --------------------------------
 include/linux/pci-epf.h             |  2 --
 3 files changed, 36 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/endpoint/pci-ep-cfs.c b/drivers/pci/endpoint/pci-ep-cfs.c
index e255a8415bd5..cd99ac8c3794 100644
--- a/drivers/pci/endpoint/pci-ep-cfs.c
+++ b/drivers/pci/endpoint/pci-ep-cfs.c
@@ -509,6 +509,42 @@ static const struct config_item_type pci_epf_type = {
 	.ct_owner	= THIS_MODULE,
 };
 
+/**
+ * pci_epf_type_add_cfs() - Help function drivers to expose function specific
+ *                          attributes in configfs
+ * @epf: the EPF device that has to be configured using configfs
+ * @group: the parent configfs group (corresponding to entries in
+ *         pci_epf_device_id)
+ *
+ * Invoke to expose function specific attributes in configfs.
+ *
+ * Return: A pointer to a config_group structure or NULL if the function driver
+ * does not have anything to expose (attributes configured by user) or if
+ * the function driver does not implement the add_cfs() method.
+ *
+ * Returns an error pointer if this function is called for an unbound EPF device
+ * or if the EPF driver add_cfs() method fails.
+ */
+static struct config_group *pci_epf_type_add_cfs(struct pci_epf *epf,
+						 struct config_group *group)
+{
+	struct config_group *epf_type_group;
+
+	if (!epf->driver) {
+		dev_err(&epf->dev, "epf device not bound to driver\n");
+		return NULL;
+	}
+
+	if (!epf->driver->ops->add_cfs)
+		return NULL;
+
+	mutex_lock(&epf->lock);
+	epf_type_group = epf->driver->ops->add_cfs(epf, group);
+	mutex_unlock(&epf->lock);
+
+	return epf_type_group;
+}
+
 static void pci_ep_cfs_add_type_group(struct pci_epf_group *epf_group)
 {
 	struct config_group *group;
diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c
index 2036e38be093..355a6f56fcea 100644
--- a/drivers/pci/endpoint/pci-epf-core.c
+++ b/drivers/pci/endpoint/pci-epf-core.c
@@ -20,38 +20,6 @@ static DEFINE_MUTEX(pci_epf_mutex);
 static struct bus_type pci_epf_bus_type;
 static const struct device_type pci_epf_type;
 
-/**
- * pci_epf_type_add_cfs() - Help function drivers to expose function specific
- *                          attributes in configfs
- * @epf: the EPF device that has to be configured using configfs
- * @group: the parent configfs group (corresponding to entries in
- *         pci_epf_device_id)
- *
- * Invoke to expose function specific attributes in configfs. If the function
- * driver does not have anything to expose (attributes configured by user),
- * return NULL.
- */
-struct config_group *pci_epf_type_add_cfs(struct pci_epf *epf,
-					  struct config_group *group)
-{
-	struct config_group *epf_type_group;
-
-	if (!epf->driver) {
-		dev_err(&epf->dev, "epf device not bound to driver\n");
-		return NULL;
-	}
-
-	if (!epf->driver->ops->add_cfs)
-		return NULL;
-
-	mutex_lock(&epf->lock);
-	epf_type_group = epf->driver->ops->add_cfs(epf, group);
-	mutex_unlock(&epf->lock);
-
-	return epf_type_group;
-}
-EXPORT_SYMBOL_GPL(pci_epf_type_add_cfs);
-
 /**
  * pci_epf_unbind() - Notify the function driver that the binding between the
  *		      EPF device and EPC device has been lost
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index a215dc8ce693..b8441db2fa52 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -214,8 +214,6 @@ void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar,
 			enum pci_epc_interface_type type);
 int pci_epf_bind(struct pci_epf *epf);
 void pci_epf_unbind(struct pci_epf *epf);
-struct config_group *pci_epf_type_add_cfs(struct pci_epf *epf,
-					  struct config_group *group);
 int pci_epf_add_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf);
 void pci_epf_remove_vepf(struct pci_epf *epf_pf, struct pci_epf *epf_vf);
 #endif /* __LINUX_PCI_EPF_H */
-- 
cgit v1.2.3


From 081c715dfd50542e89df5ee12a8e32e7ed936cd1 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Fri, 2 Jun 2023 17:17:49 +0530
Subject: PCI: endpoint: Pass EPF device ID to the probe function

Currently, the EPF probe function doesn't get the device ID argument needed
to correctly identify the device table ID of the EPF device.

When multiple entries are added to the "struct pci_epf_device_id" table,
the probe function needs to identify the correct one. This is achieved by
modifying the pci_epf_match_id() function to return the match ID pointer
and passing it to the driver's probe function.

pci_epf_device_match() function can return bool based on the return value
of pci_epf_match_id().

Link: https://lore.kernel.org/r/20230602114756.36586-3-manivannan.sadhasivam@linaro.org
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Kishon Vijay Abraham I <kishon@kernel.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
---
 drivers/pci/endpoint/functions/pci-epf-ntb.c  |  4 +++-
 drivers/pci/endpoint/functions/pci-epf-test.c |  3 ++-
 drivers/pci/endpoint/functions/pci-epf-vntb.c |  4 +++-
 drivers/pci/endpoint/pci-epf-core.c           | 10 +++++-----
 include/linux/pci-epf.h                       |  5 ++++-
 5 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/endpoint/functions/pci-epf-ntb.c b/drivers/pci/endpoint/functions/pci-epf-ntb.c
index 9a00448c7e61..9aac2c6f3bb9 100644
--- a/drivers/pci/endpoint/functions/pci-epf-ntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-ntb.c
@@ -2075,11 +2075,13 @@ static struct config_group *epf_ntb_add_cfs(struct pci_epf *epf,
 /**
  * epf_ntb_probe() - Probe NTB function driver
  * @epf: NTB endpoint function device
+ * @id: NTB endpoint function device ID
  *
  * Probe NTB function driver when endpoint function bus detects a NTB
  * endpoint function.
  */
-static int epf_ntb_probe(struct pci_epf *epf)
+static int epf_ntb_probe(struct pci_epf *epf,
+			 const struct pci_epf_device_id *id)
 {
 	struct epf_ntb *ntb;
 	struct device *dev;
diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index 623b08caa998..1f0d2b84296a 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -952,7 +952,8 @@ static const struct pci_epf_device_id pci_epf_test_ids[] = {
 	{},
 };
 
-static int pci_epf_test_probe(struct pci_epf *epf)
+static int pci_epf_test_probe(struct pci_epf *epf,
+			      const struct pci_epf_device_id *id)
 {
 	struct pci_epf_test *epf_test;
 	struct device *dev = &epf->dev;
diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index b7c7a8af99f4..6b433cfbe471 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -1395,13 +1395,15 @@ static struct pci_epf_ops epf_ntb_ops = {
 /**
  * epf_ntb_probe() - Probe NTB function driver
  * @epf: NTB endpoint function device
+ * @id: NTB endpoint function device ID
  *
  * Probe NTB function driver when endpoint function bus detects a NTB
  * endpoint function.
  *
  * Returns: Zero for success, or an error code in case of failure
  */
-static int epf_ntb_probe(struct pci_epf *epf)
+static int epf_ntb_probe(struct pci_epf *epf,
+			 const struct pci_epf_device_id *id)
 {
 	struct epf_ntb *ntb;
 	struct device *dev;
diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c
index 355a6f56fcea..2c32de667937 100644
--- a/drivers/pci/endpoint/pci-epf-core.c
+++ b/drivers/pci/endpoint/pci-epf-core.c
@@ -461,16 +461,16 @@ static const struct device_type pci_epf_type = {
 	.release	= pci_epf_dev_release,
 };
 
-static int
+static const struct pci_epf_device_id *
 pci_epf_match_id(const struct pci_epf_device_id *id, const struct pci_epf *epf)
 {
 	while (id->name[0]) {
 		if (strcmp(epf->name, id->name) == 0)
-			return true;
+			return id;
 		id++;
 	}
 
-	return false;
+	return NULL;
 }
 
 static int pci_epf_device_match(struct device *dev, struct device_driver *drv)
@@ -479,7 +479,7 @@ static int pci_epf_device_match(struct device *dev, struct device_driver *drv)
 	struct pci_epf_driver *driver = to_pci_epf_driver(drv);
 
 	if (driver->id_table)
-		return pci_epf_match_id(driver->id_table, epf);
+		return !!pci_epf_match_id(driver->id_table, epf);
 
 	return !strcmp(epf->name, drv->name);
 }
@@ -494,7 +494,7 @@ static int pci_epf_device_probe(struct device *dev)
 
 	epf->driver = driver;
 
-	return driver->probe(epf);
+	return driver->probe(epf, pci_epf_match_id(driver->id_table, epf));
 }
 
 static void pci_epf_device_remove(struct device *dev)
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index b8441db2fa52..a3bad37ccae6 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -89,7 +89,8 @@ struct pci_epc_event_ops {
  * @id_table: identifies EPF devices for probing
  */
 struct pci_epf_driver {
-	int	(*probe)(struct pci_epf *epf);
+	int	(*probe)(struct pci_epf *epf,
+			 const struct pci_epf_device_id *id);
 	void	(*remove)(struct pci_epf *epf);
 
 	struct device_driver	driver;
@@ -131,6 +132,7 @@ struct pci_epf_bar {
  * @epc: the EPC device to which this EPF device is bound
  * @epf_pf: the physical EPF device to which this virtual EPF device is bound
  * @driver: the EPF driver to which this EPF device is bound
+ * @id: Pointer to the EPF device ID
  * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc
  * @lock: mutex to protect pci_epf_ops
  * @sec_epc: the secondary EPC device to which this EPF device is bound
@@ -158,6 +160,7 @@ struct pci_epf {
 	struct pci_epc		*epc;
 	struct pci_epf		*epf_pf;
 	struct pci_epf_driver	*driver;
+	const struct pci_epf_device_id *id;
 	struct list_head	list;
 	/* mutex to protect against concurrent access of pci_epf_ops */
 	struct mutex		lock;
-- 
cgit v1.2.3


From a1f6c3d7d3a2fdcb7bf77da17a17944c81ca13de Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Fri, 2 Jun 2023 17:17:51 +0530
Subject: PCI: endpoint: Add linkdown notifier support

Add support to notify the EPF device about the linkdown event from the EPC
device.

Link: https://lore.kernel.org/r/20230602114756.36586-5-manivannan.sadhasivam@linaro.org
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Kishon Vijay Abraham I <kishon@kernel.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
---
 drivers/pci/endpoint/pci-epc-core.c | 26 ++++++++++++++++++++++++++
 include/linux/pci-epc.h             |  1 +
 include/linux/pci-epf.h             |  2 ++
 3 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index 0cf602c83d4a..e0570b52698d 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -706,6 +706,32 @@ void pci_epc_linkup(struct pci_epc *epc)
 }
 EXPORT_SYMBOL_GPL(pci_epc_linkup);
 
+/**
+ * pci_epc_linkdown() - Notify the EPF device that EPC device has dropped the
+ *			connection with the Root Complex.
+ * @epc: the EPC device which has dropped the link with the host
+ *
+ * Invoke to Notify the EPF device that the EPC device has dropped the
+ * connection with the Root Complex.
+ */
+void pci_epc_linkdown(struct pci_epc *epc)
+{
+	struct pci_epf *epf;
+
+	if (!epc || IS_ERR(epc))
+		return;
+
+	mutex_lock(&epc->list_lock);
+	list_for_each_entry(epf, &epc->pci_epf, list) {
+		mutex_lock(&epf->lock);
+		if (epf->event_ops && epf->event_ops->link_down)
+			epf->event_ops->link_down(epf);
+		mutex_unlock(&epf->lock);
+	}
+	mutex_unlock(&epc->list_lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_linkdown);
+
 /**
  * pci_epc_init_notify() - Notify the EPF device that EPC device's core
  *			   initialization is completed.
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 301bb0e53707..63a6cc5e5282 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -203,6 +203,7 @@ void pci_epc_destroy(struct pci_epc *epc);
 int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf,
 		    enum pci_epc_interface_type type);
 void pci_epc_linkup(struct pci_epc *epc);
+void pci_epc_linkdown(struct pci_epc *epc);
 void pci_epc_init_notify(struct pci_epc *epc);
 void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf,
 			enum pci_epc_interface_type type);
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index a3bad37ccae6..73d783af4d56 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -71,10 +71,12 @@ struct pci_epf_ops {
  * struct pci_epf_event_ops - Callbacks for capturing the EPC events
  * @core_init: Callback for the EPC initialization complete event
  * @link_up: Callback for the EPC link up event
+ * @link_down: Callback for the EPC link down event
  */
 struct pci_epc_event_ops {
 	int (*core_init)(struct pci_epf *epf);
 	int (*link_up)(struct pci_epf *epf);
+	int (*link_down)(struct pci_epf *epf);
 };
 
 /**
-- 
cgit v1.2.3


From 6360efb96b19d89990b2a5bf3a73c689a429f5da Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Fri, 2 Jun 2023 17:17:52 +0530
Subject: PCI: endpoint: Add BME notifier support

Add support to notify the EPF device about the Bus Master Enable (BME)
event received by the EPC device from the Root complex.

Link: https://lore.kernel.org/r/20230602114756.36586-6-manivannan.sadhasivam@linaro.org
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Kishon Vijay Abraham I <kishon@kernel.org>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
---
 drivers/pci/endpoint/pci-epc-core.c | 26 ++++++++++++++++++++++++++
 include/linux/pci-epc.h             |  1 +
 include/linux/pci-epf.h             |  2 ++
 3 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index e0570b52698d..6c54fa5684d2 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -758,6 +758,32 @@ void pci_epc_init_notify(struct pci_epc *epc)
 }
 EXPORT_SYMBOL_GPL(pci_epc_init_notify);
 
+/**
+ * pci_epc_bme_notify() - Notify the EPF device that the EPC device has received
+ *			  the BME event from the Root complex
+ * @epc: the EPC device that received the BME event
+ *
+ * Invoke to Notify the EPF device that the EPC device has received the Bus
+ * Master Enable (BME) event from the Root complex
+ */
+void pci_epc_bme_notify(struct pci_epc *epc)
+{
+	struct pci_epf *epf;
+
+	if (!epc || IS_ERR(epc))
+		return;
+
+	mutex_lock(&epc->list_lock);
+	list_for_each_entry(epf, &epc->pci_epf, list) {
+		mutex_lock(&epf->lock);
+		if (epf->event_ops && epf->event_ops->bme)
+			epf->event_ops->bme(epf);
+		mutex_unlock(&epf->lock);
+	}
+	mutex_unlock(&epc->list_lock);
+}
+EXPORT_SYMBOL_GPL(pci_epc_bme_notify);
+
 /**
  * pci_epc_destroy() - destroy the EPC device
  * @epc: the EPC device that has to be destroyed
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 63a6cc5e5282..5cb694031072 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -205,6 +205,7 @@ int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf,
 void pci_epc_linkup(struct pci_epc *epc);
 void pci_epc_linkdown(struct pci_epc *epc);
 void pci_epc_init_notify(struct pci_epc *epc);
+void pci_epc_bme_notify(struct pci_epc *epc);
 void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf,
 			enum pci_epc_interface_type type);
 int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h
index 73d783af4d56..3f44b6aec477 100644
--- a/include/linux/pci-epf.h
+++ b/include/linux/pci-epf.h
@@ -72,11 +72,13 @@ struct pci_epf_ops {
  * @core_init: Callback for the EPC initialization complete event
  * @link_up: Callback for the EPC link up event
  * @link_down: Callback for the EPC link down event
+ * @bme: Callback for the EPC BME (Bus Master Enable) event
  */
 struct pci_epc_event_ops {
 	int (*core_init)(struct pci_epf *epf);
 	int (*link_up)(struct pci_epf *epf);
 	int (*link_down)(struct pci_epf *epf);
+	int (*bme)(struct pci_epf *epf);
 };
 
 /**
-- 
cgit v1.2.3


From 61167ad5fecdeaa037f3df1ba354dddd5f66a1ed Mon Sep 17 00:00:00 2001
From: Yajun Deng <yajun.deng@linux.dev>
Date: Mon, 19 Jun 2023 10:34:06 +0800
Subject: mm: pass nid to reserve_bootmem_region()

early_pfn_to_nid() is called frequently in init_reserved_page(), it
returns the node id of the PFN.  These PFN are probably from the same
memory region, they have the same node id.  It's not necessary to call
early_pfn_to_nid() for each PFN.

Pass nid to reserve_bootmem_region() and drop the call to
early_pfn_to_nid() in init_reserved_page().  Also, set nid on all reserved
pages before doing this, as some reserved memory regions may not be set
nid.

The most beneficial function is memmap_init_reserved_pages() if
CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled.

The following data was tested on an x86 machine with 190GB of RAM.

before:
memmap_init_reserved_pages()  67ms

after:
memmap_init_reserved_pages()  20ms

Link: https://lkml.kernel.org/r/20230619023406.424298-1-yajun.deng@linux.dev
Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h |  3 ++-
 mm/memblock.c      | 31 +++++++++++++++++++++----------
 mm/mm_init.c       | 30 +++++++++++++++++-------------
 3 files changed, 40 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cf43deb25553..9ecb8b9c07f6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2940,7 +2940,8 @@ extern unsigned long free_reserved_area(void *start, void *end,
 
 extern void adjust_managed_page_count(struct page *page, long count);
 
-extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
+extern void reserve_bootmem_region(phys_addr_t start,
+				   phys_addr_t end, int nid);
 
 /* Free the reserved page into the buddy system, so it gets managed. */
 static inline void free_reserved_page(struct page *page)
diff --git a/mm/memblock.c b/mm/memblock.c
index da4264528e1e..46739551d4d1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2082,19 +2082,30 @@ static void __init memmap_init_reserved_pages(void)
 {
 	struct memblock_region *region;
 	phys_addr_t start, end;
-	u64 i;
+	int nid;
+
+	/*
+	 * set nid on all reserved pages and also treat struct
+	 * pages for the NOMAP regions as PageReserved
+	 */
+	for_each_mem_region(region) {
+		nid = memblock_get_region_node(region);
+		start = region->base;
+		end = start + region->size;
+
+		if (memblock_is_nomap(region))
+			reserve_bootmem_region(start, end, nid);
+
+		memblock_set_node(start, end, &memblock.reserved, nid);
+	}
 
 	/* initialize struct pages for the reserved regions */
-	for_each_reserved_mem_range(i, &start, &end)
-		reserve_bootmem_region(start, end);
+	for_each_reserved_mem_region(region) {
+		nid = memblock_get_region_node(region);
+		start = region->base;
+		end = start + region->size;
 
-	/* and also treat struct pages for the NOMAP regions as PageReserved */
-	for_each_mem_region(region) {
-		if (memblock_is_nomap(region)) {
-			start = region->base;
-			end = start + region->size;
-			reserve_bootmem_region(start, end);
-		}
+		reserve_bootmem_region(start, end, nid);
 	}
 }
 
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 122e9bf3fa73..7ffa609673ea 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -646,10 +646,8 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
 }
 
 /* Returns true if the struct page for the pfn is initialised */
-static inline bool __meminit early_page_initialised(unsigned long pfn)
+static inline bool __meminit early_page_initialised(unsigned long pfn, int nid)
 {
-	int nid = early_pfn_to_nid(pfn);
-
 	if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn)
 		return false;
 
@@ -695,15 +693,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 	return false;
 }
 
-static void __meminit init_reserved_page(unsigned long pfn)
+static void __meminit init_reserved_page(unsigned long pfn, int nid)
 {
 	pg_data_t *pgdat;
-	int nid, zid;
+	int zid;
 
-	if (early_page_initialised(pfn))
+	if (early_page_initialised(pfn, nid))
 		return;
 
-	nid = early_pfn_to_nid(pfn);
 	pgdat = NODE_DATA(nid);
 
 	for (zid = 0; zid < MAX_NR_ZONES; zid++) {
@@ -717,7 +714,7 @@ static void __meminit init_reserved_page(unsigned long pfn)
 #else
 static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
 
-static inline bool early_page_initialised(unsigned long pfn)
+static inline bool early_page_initialised(unsigned long pfn, int nid)
 {
 	return true;
 }
@@ -727,7 +724,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 	return false;
 }
 
-static inline void init_reserved_page(unsigned long pfn)
+static inline void init_reserved_page(unsigned long pfn, int nid)
 {
 }
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
@@ -738,7 +735,8 @@ static inline void init_reserved_page(unsigned long pfn)
  * marks the pages PageReserved. The remaining valid pages are later
  * sent to the buddy page allocator.
  */
-void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
+void __meminit reserve_bootmem_region(phys_addr_t start,
+				      phys_addr_t end, int nid)
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long end_pfn = PFN_UP(end);
@@ -747,7 +745,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
 		if (pfn_valid(start_pfn)) {
 			struct page *page = pfn_to_page(start_pfn);
 
-			init_reserved_page(start_pfn);
+			init_reserved_page(start_pfn, nid);
 
 			/* Avoid false-positive PageTail() */
 			INIT_LIST_HEAD(&page->lru);
@@ -2572,8 +2570,14 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
 void __init memblock_free_pages(struct page *page, unsigned long pfn,
 							unsigned int order)
 {
-	if (!early_page_initialised(pfn))
-		return;
+
+	if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
+		int nid = early_pfn_to_nid(pfn);
+
+		if (!early_page_initialised(pfn, nid))
+			return;
+	}
+
 	if (!kmsan_memblock_free_pages(page, order)) {
 		/* KMSAN will take care of these pages. */
 		return;
-- 
cgit v1.2.3


From 982a7194afc9a58ec55ed174c61869c2722bb918 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Jun 2023 17:45:46 +0100
Subject: mm: add __folio_batch_release()

This performs the same role as __pagevec_release(), ie skipping the check
for batch length of 0.

Link: https://lkml.kernel.org/r/20230621164557.3510324-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagevec.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index f582f7213ea5..42aad53e382e 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -127,9 +127,15 @@ static inline unsigned folio_batch_add(struct folio_batch *fbatch,
 	return fbatch_space(fbatch);
 }
 
+static inline void __folio_batch_release(struct folio_batch *fbatch)
+{
+	__pagevec_release((struct pagevec *)fbatch);
+}
+
 static inline void folio_batch_release(struct folio_batch *fbatch)
 {
-	pagevec_release((struct pagevec *)fbatch);
+	if (folio_batch_count(fbatch))
+		__folio_batch_release(fbatch);
 }
 
 void folio_batch_remove_exceptionals(struct folio_batch *fbatch);
-- 
cgit v1.2.3


From bdadc6d83156016d2b5eed582c1458c881c53a1e Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Jun 2023 17:45:47 +0100
Subject: scatterlist: add sg_set_folio()

This wrapper for sg_set_page() lets drivers add folios to a scatterlist
more easily.  We could, perhaps, do better by using a different page in
the folio if offset is larger than UINT_MAX, but let's hope we get a
better data structure than this before we need to care about such large
folios.

Link: https://lkml.kernel.org/r/20230621164557.3510324-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/scatterlist.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index ec46d8e8e49d..77df3d7b18a6 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -141,6 +141,30 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page,
 	sg->length = len;
 }
 
+/**
+ * sg_set_folio - Set sg entry to point at given folio
+ * @sg:		 SG entry
+ * @folio:	 The folio
+ * @len:	 Length of data
+ * @offset:	 Offset into folio
+ *
+ * Description:
+ *   Use this function to set an sg entry pointing at a folio, never assign
+ *   the folio directly. We encode sg table information in the lower bits
+ *   of the folio pointer. See sg_page() for looking up the page belonging
+ *   to an sg entry.
+ *
+ **/
+static inline void sg_set_folio(struct scatterlist *sg, struct folio *folio,
+			       size_t len, size_t offset)
+{
+	WARN_ON_ONCE(len > UINT_MAX);
+	WARN_ON_ONCE(offset > UINT_MAX);
+	sg_assign_page(sg, &folio->page);
+	sg->offset = offset;
+	sg->length = len;
+}
+
 static inline struct page *sg_page(struct scatterlist *sg)
 {
 #ifdef CONFIG_DEBUG_SG
-- 
cgit v1.2.3


From e0b72c14d8dcc9477e580c261041dae86d4906fe Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Jun 2023 17:45:50 +0100
Subject: mm: remove check_move_unevictable_pages()

All callers have now been converted to call
check_move_unevictable_folios().

Link: https://lkml.kernel.org/r/20230621164557.3510324-7-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h |  1 -
 mm/vmscan.c          | 17 -----------------
 2 files changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ce7e82cf787f..456546443f1f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -439,7 +439,6 @@ static inline bool node_reclaim_enabled(void)
 }
 
 void check_move_unevictable_folios(struct folio_batch *fbatch);
-void check_move_unevictable_pages(struct pagevec *pvec);
 
 extern void __meminit kswapd_run(int nid);
 extern void __meminit kswapd_stop(int nid);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 27f90896f789..049342b6317c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -8075,23 +8075,6 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
 }
 #endif
 
-void check_move_unevictable_pages(struct pagevec *pvec)
-{
-	struct folio_batch fbatch;
-	unsigned i;
-
-	folio_batch_init(&fbatch);
-	for (i = 0; i < pvec->nr; i++) {
-		struct page *page = pvec->pages[i];
-
-		if (PageTransTail(page))
-			continue;
-		folio_batch_add(&fbatch, page_folio(page));
-	}
-	check_move_unevictable_folios(&fbatch);
-}
-EXPORT_SYMBOL_GPL(check_move_unevictable_pages);
-
 /**
  * check_move_unevictable_folios - Move evictable folios to appropriate zone
  * lru list
-- 
cgit v1.2.3


From ce06442812fc584337c5b23a43bd2be7d037041d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Jun 2023 17:45:51 +0100
Subject: pagevec: rename fbatch_count()

This should always have been called folio_batch_count().

Link: https://lkml.kernel.org/r/20230621164557.3510324-8-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagevec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 42aad53e382e..3a9d29dd28a3 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -105,7 +105,7 @@ static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
 	return fbatch->nr;
 }
 
-static inline unsigned int fbatch_space(struct folio_batch *fbatch)
+static inline unsigned int folio_batch_space(struct folio_batch *fbatch)
 {
 	return PAGEVEC_SIZE - fbatch->nr;
 }
@@ -124,7 +124,7 @@ static inline unsigned folio_batch_add(struct folio_batch *fbatch,
 		struct folio *folio)
 {
 	fbatch->folios[fbatch->nr++] = folio;
-	return fbatch_space(fbatch);
+	return folio_batch_space(fbatch);
 }
 
 static inline void __folio_batch_release(struct folio_batch *fbatch)
-- 
cgit v1.2.3


From 76fa88429075667fe76d4905f2f471e0ac3d543c Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Jun 2023 17:45:53 +0100
Subject: net: convert sunrpc from pagevec to folio_batch

Remove the last usage of pagevecs.  There is a slight change here; we now
free the folio_batch as soon as it fills up instead of freeing the
folio_batch when we try to add a page to a full batch.  This should have
no effect in practice.

Link: https://lkml.kernel.org/r/20230621164557.3510324-10-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sunrpc/svc.h |  2 +-
 net/sunrpc/svc.c           | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 762d7231e574..a3a64fb4053c 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -223,7 +223,7 @@ struct svc_rqst {
 	struct page *		*rq_next_page; /* next reply page to use */
 	struct page *		*rq_page_end;  /* one past the last page */
 
-	struct pagevec		rq_pvec;
+	struct folio_batch	rq_fbatch;
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 	struct bio_vec		rq_bvec[RPCSVC_MAXPAGES];
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 79967b6925bd..8b9011bbece7 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -649,7 +649,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
 	if (!rqstp)
 		return rqstp;
 
-	pagevec_init(&rqstp->rq_pvec);
+	folio_batch_init(&rqstp->rq_fbatch);
 
 	__set_bit(RQ_BUSY, &rqstp->rq_flags);
 	rqstp->rq_server = serv;
@@ -860,9 +860,9 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
 	}
 
 	if (*rqstp->rq_next_page) {
-		if (!pagevec_space(&rqstp->rq_pvec))
-			__pagevec_release(&rqstp->rq_pvec);
-		pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page);
+		if (!folio_batch_add(&rqstp->rq_fbatch,
+				page_folio(*rqstp->rq_next_page)))
+			__folio_batch_release(&rqstp->rq_fbatch);
 	}
 
 	get_page(page);
@@ -896,7 +896,7 @@ void svc_rqst_release_pages(struct svc_rqst *rqstp)
 void
 svc_rqst_free(struct svc_rqst *rqstp)
 {
-	pagevec_release(&rqstp->rq_pvec);
+	folio_batch_release(&rqstp->rq_fbatch);
 	svc_release_buffer(rqstp);
 	if (rqstp->rq_scratch_page)
 		put_page(rqstp->rq_scratch_page);
-- 
cgit v1.2.3


From 1e0877d58b1e22517d8939b22b963c043e6c63fd Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 21 Jun 2023 17:45:54 +0100
Subject: mm: remove struct pagevec

All users are now converted to use the folio_batch so we can get rid of
this data structure.

Link: https://lkml.kernel.org/r/20230621164557.3510324-11-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagevec.h | 63 ++++---------------------------------------------
 mm/swap.c               | 18 +++++++-------
 2 files changed, 13 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 3a9d29dd28a3..87cc678adc85 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -3,65 +3,18 @@
  * include/linux/pagevec.h
  *
  * In many places it is efficient to batch an operation up against multiple
- * pages.  A pagevec is a multipage container which is used for that.
+ * folios.  A folio_batch is a container which is used for that.
  */
 
 #ifndef _LINUX_PAGEVEC_H
 #define _LINUX_PAGEVEC_H
 
-#include <linux/xarray.h>
+#include <linux/types.h>
 
-/* 15 pointers + header align the pagevec structure to a power of two */
+/* 15 pointers + header align the folio_batch structure to a power of two */
 #define PAGEVEC_SIZE	15
 
-struct page;
 struct folio;
-struct address_space;
-
-/* Layout must match folio_batch */
-struct pagevec {
-	unsigned char nr;
-	bool percpu_pvec_drained;
-	struct page *pages[PAGEVEC_SIZE];
-};
-
-void __pagevec_release(struct pagevec *pvec);
-
-static inline void pagevec_init(struct pagevec *pvec)
-{
-	pvec->nr = 0;
-	pvec->percpu_pvec_drained = false;
-}
-
-static inline void pagevec_reinit(struct pagevec *pvec)
-{
-	pvec->nr = 0;
-}
-
-static inline unsigned pagevec_count(struct pagevec *pvec)
-{
-	return pvec->nr;
-}
-
-static inline unsigned pagevec_space(struct pagevec *pvec)
-{
-	return PAGEVEC_SIZE - pvec->nr;
-}
-
-/*
- * Add a page to a pagevec.  Returns the number of slots still available.
- */
-static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page)
-{
-	pvec->pages[pvec->nr++] = page;
-	return pagevec_space(pvec);
-}
-
-static inline void pagevec_release(struct pagevec *pvec)
-{
-	if (pagevec_count(pvec))
-		__pagevec_release(pvec);
-}
 
 /**
  * struct folio_batch - A collection of folios.
@@ -78,11 +31,6 @@ struct folio_batch {
 	struct folio *folios[PAGEVEC_SIZE];
 };
 
-/* Layout must match pagevec */
-static_assert(sizeof(struct pagevec) == sizeof(struct folio_batch));
-static_assert(offsetof(struct pagevec, pages) ==
-		offsetof(struct folio_batch, folios));
-
 /**
  * folio_batch_init() - Initialise a batch of folios
  * @fbatch: The folio batch.
@@ -127,10 +75,7 @@ static inline unsigned folio_batch_add(struct folio_batch *fbatch,
 	return folio_batch_space(fbatch);
 }
 
-static inline void __folio_batch_release(struct folio_batch *fbatch)
-{
-	__pagevec_release((struct pagevec *)fbatch);
-}
+void __folio_batch_release(struct folio_batch *pvec);
 
 static inline void folio_batch_release(struct folio_batch *fbatch)
 {
diff --git a/mm/swap.c b/mm/swap.c
index 423199ee8478..10348c1cf9c5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1044,25 +1044,25 @@ void release_pages(release_pages_arg arg, int nr)
 EXPORT_SYMBOL(release_pages);
 
 /*
- * The pages which we're about to release may be in the deferred lru-addition
+ * The folios which we're about to release may be in the deferred lru-addition
  * queues.  That would prevent them from really being freed right now.  That's
- * OK from a correctness point of view but is inefficient - those pages may be
+ * OK from a correctness point of view but is inefficient - those folios may be
  * cache-warm and we want to give them back to the page allocator ASAP.
  *
- * So __pagevec_release() will drain those queues here.
+ * So __folio_batch_release() will drain those queues here.
  * folio_batch_move_lru() calls folios_put() directly to avoid
  * mutual recursion.
  */
-void __pagevec_release(struct pagevec *pvec)
+void __folio_batch_release(struct folio_batch *fbatch)
 {
-	if (!pvec->percpu_pvec_drained) {
+	if (!fbatch->percpu_pvec_drained) {
 		lru_add_drain();
-		pvec->percpu_pvec_drained = true;
+		fbatch->percpu_pvec_drained = true;
 	}
-	release_pages(pvec->pages, pagevec_count(pvec));
-	pagevec_reinit(pvec);
+	release_pages(fbatch->folios, folio_batch_count(fbatch));
+	folio_batch_reinit(fbatch);
 }
-EXPORT_SYMBOL(__pagevec_release);
+EXPORT_SYMBOL(__folio_batch_release);
 
 /**
  * folio_batch_remove_exceptionals() - Prune non-folios from a batch.
-- 
cgit v1.2.3


From 7302338a14f97eb44cd13f34aab0dc6596f1632c Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 19 Jun 2023 19:07:18 +0800
Subject: mm: kill [add|del]_page_to_lru_list()

Now no one call [add|del]_page_to_lru_list(), let's drop unused page
interfaces.

Link:https://lkml.kernel.org/r/20230619110718.65679-2-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: James Gowans <jgowans@amazon.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_inline.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 08c2bcefcb2b..21d6c72bcc71 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -323,12 +323,6 @@ void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
 		list_add(&folio->lru, &lruvec->lists[lru]);
 }
 
-static __always_inline void add_page_to_lru_list(struct page *page,
-				struct lruvec *lruvec)
-{
-	lruvec_add_folio(lruvec, page_folio(page));
-}
-
 static __always_inline
 void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
 {
@@ -357,12 +351,6 @@ void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
 			-folio_nr_pages(folio));
 }
 
-static __always_inline void del_page_from_lru_list(struct page *page,
-				struct lruvec *lruvec)
-{
-	lruvec_del_folio(lruvec, page_folio(page));
-}
-
 #ifdef CONFIG_ANON_VMA_NAME
 /*
  * mmap_lock should be read-locked when calling anon_vma_name(). Caller should
-- 
cgit v1.2.3


From 1bc545bff45ce9eefc176ccf663074462a209cb6 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Wed, 21 Jun 2023 02:31:01 +0000
Subject: mm/vmscan: fix root proactive reclaim unthrottling unbalanced node

When memory.reclaim was introduced, it became the first case where
cgroup_reclaim() is true for the root cgroup.  Johannes concluded [1] that
for most cases this is okay, except for one case.  Historically, kswapd
would throttle reclaim on a node if a lot of pages marked for reclaim are
under writeback (aka the node is congested).  This occurred by setting
LRUVEC_CONGESTED bit in lruvec->flags.  The bit would be cleared when the
node is balanced.

Similarly, cgroup reclaim would set the same bit when an lruvec is
congested, and clear it on the way out of reclaim (to throttle local
reclaimers).

Before the introduction of memory.reclaim, the root memcg was the only
target of kswapd reclaim, and non-root memcgs were the only targets of
cgroup reclaim, so they would never interfere.  Using the same bit for
both was fine.  After memory.reclaim, it is possible for cgroup reclaim on
the root cgroup to clear the bit set by kswapd.  This would result in
reclaim on the node to be unthrottled before the node is balanced.

Fix this by introducing separate bits for cgroup-level and node-level
congestion.  kswapd can unthrottle an lruvec that is marked as congested
by cgroup reclaim (as the entire node should no longer be congested), but
not vice versa (to prevent premature unthrottling before the entire node
is balanced).

[1]https://lore.kernel.org/lkml/20230405200150.GA35884@cmpxchg.org/

Link: https://lkml.kernel.org/r/20230621023101.432780-1-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Closes: https://lore.kernel.org/lkml/20230405200150.GA35884@cmpxchg.org/
Cc: Michal Hocko <mhocko@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 18 +++++++++++++++---
 mm/vmscan.c            | 19 ++++++++++++-------
 2 files changed, 27 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e822335f214..d863698a84e0 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -293,9 +293,21 @@ static inline bool is_active_lru(enum lru_list lru)
 #define ANON_AND_FILE 2
 
 enum lruvec_flags {
-	LRUVEC_CONGESTED,		/* lruvec has many dirty pages
-					 * backed by a congested BDI
-					 */
+	/*
+	 * An lruvec has many dirty pages backed by a congested BDI:
+	 * 1. LRUVEC_CGROUP_CONGESTED is set by cgroup-level reclaim.
+	 *    It can be cleared by cgroup reclaim or kswapd.
+	 * 2. LRUVEC_NODE_CONGESTED is set by kswapd node-level reclaim.
+	 *    It can only be cleared by kswapd.
+	 *
+	 * Essentially, kswapd can unthrottle an lruvec throttled by cgroup
+	 * reclaim, but not vice versa. This only applies to the root cgroup.
+	 * The goal is to prevent cgroup reclaim on the root cgroup (e.g.
+	 * memory.reclaim) to unthrottle an unbalanced node (that was throttled
+	 * by kswapd).
+	 */
+	LRUVEC_CGROUP_CONGESTED,
+	LRUVEC_NODE_CONGESTED,
 };
 
 #endif /* !__GENERATING_BOUNDS_H */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b7068be8a034..1080209a568b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6578,10 +6578,13 @@ again:
 	 * Legacy memcg will stall in page writeback so avoid forcibly
 	 * stalling in reclaim_throttle().
 	 */
-	if ((current_is_kswapd() ||
-	     (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) &&
-	    sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
-		set_bit(LRUVEC_CONGESTED, &target_lruvec->flags);
+	if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested) {
+		if (cgroup_reclaim(sc) && writeback_throttling_sane(sc))
+			set_bit(LRUVEC_CGROUP_CONGESTED, &target_lruvec->flags);
+
+		if (current_is_kswapd())
+			set_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags);
+	}
 
 	/*
 	 * Stall direct reclaim for IO completions if the lruvec is
@@ -6591,7 +6594,8 @@ again:
 	 */
 	if (!current_is_kswapd() && current_may_throttle() &&
 	    !sc->hibernation_mode &&
-	    test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
+	    (test_bit(LRUVEC_CGROUP_CONGESTED, &target_lruvec->flags) ||
+	     test_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags)))
 		reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED);
 
 	if (should_continue_reclaim(pgdat, nr_node_reclaimed, sc))
@@ -6848,7 +6852,7 @@ retry:
 
 			lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup,
 						   zone->zone_pgdat);
-			clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
+			clear_bit(LRUVEC_CGROUP_CONGESTED, &lruvec->flags);
 		}
 	}
 
@@ -7237,7 +7241,8 @@ static void clear_pgdat_congested(pg_data_t *pgdat)
 {
 	struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat);
 
-	clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
+	clear_bit(LRUVEC_NODE_CONGESTED, &lruvec->flags);
+	clear_bit(LRUVEC_CGROUP_CONGESTED, &lruvec->flags);
 	clear_bit(PGDAT_DIRTY, &pgdat->flags);
 	clear_bit(PGDAT_WRITEBACK, &pgdat->flags);
 }
-- 
cgit v1.2.3


From acc72d59c7509540c27c49625cb4b5a8db1f1a84 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 22 Jun 2023 22:49:48 -0700
Subject: mm/hugetlb: remove hugetlb_set_page_subpool()

All users have been converted to hugetlb_set_folio_subpool() so we can
safely remove this function.

Link: https://lkml.kernel.org/r/20230623054948.280627-1-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index beb7c63d2871..ca3c8e10f24a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -761,12 +761,6 @@ static inline void hugetlb_set_folio_subpool(struct folio *folio,
 	folio->_hugetlb_subpool = subpool;
 }
 
-static inline void hugetlb_set_page_subpool(struct page *hpage,
-					struct hugepage_subpool *subpool)
-{
-	hugetlb_set_folio_subpool(page_folio(hpage), subpool);
-}
-
 static inline struct hstate *hstate_file(struct file *f)
 {
 	return hstate_inode(file_inode(f));
-- 
cgit v1.2.3


From c2508ec5a58db67093f4fb8bf89a9a7c53a109e9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 15 Jun 2023 15:17:36 -0700
Subject: mm: introduce new 'lock_mm_and_find_vma()' page fault helper

.. and make x86 use it.

This basically extracts the existing x86 "find and expand faulting vma"
code, but extends it to also take the mmap lock for writing in case we
actually do need to expand the vma.

We've historically short-circuited that case, and have some rather ugly
special logic to serialize the stack segment expansion (since we only
hold the mmap lock for reading) that doesn't match the normal VM
locking.

That slight violation of locking worked well, right up until it didn't:
the maple tree code really does want proper locking even for simple
extension of an existing vma.

So extract the code for "look up the vma of the fault" from x86, fix it
up to do the necessary write locking, and make it available as a helper
function for other architectures that can use the common helper.

Note: I say "common helper", but it really only handles the normal
stack-grows-down case.  Which is all architectures except for PA-RISC
and IA64.  So some rare architectures can't use the helper, but if they
care they'll just need to open-code this logic.

It's also worth pointing out that this code really would like to have an
optimistic "mmap_upgrade_trylock()" to make it quicker to go from a
read-lock (for the common case) to taking the write lock (for having to
extend the vma) in the normal single-threaded situation where there is
no other locking activity.

But that _is_ all the very uncommon special case, so while it would be
nice to have such an operation, it probably doesn't matter in reality.
I did put in the skeleton code for such a possible future expansion,
even if it only acts as pseudo-documentation for what we're doing.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig    |   1 +
 arch/x86/mm/fault.c |  52 +---------------------
 include/linux/mm.h  |   2 +
 mm/Kconfig          |   4 ++
 mm/memory.c         | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 130 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 53bab123a8ee..cb1031018afa 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -276,6 +276,7 @@ config X86
 	select HAVE_GENERIC_VDSO
 	select HOTPLUG_SMT			if SMP
 	select IRQ_FORCED_THREADING
+	select LOCK_MM_AND_FIND_VMA
 	select NEED_PER_CPU_EMBED_FIRST_CHUNK
 	select NEED_PER_CPU_PAGE_FIRST_CHUNK
 	select NEED_SG_DMA_LENGTH
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4399983c50c..e8711b2cafaf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -880,12 +880,6 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
 	__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
 }
 
-static noinline void
-bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
-{
-	__bad_area(regs, error_code, address, 0, SEGV_MAPERR);
-}
-
 static inline bool bad_area_access_from_pkeys(unsigned long error_code,
 		struct vm_area_struct *vma)
 {
@@ -1366,51 +1360,10 @@ void do_user_addr_fault(struct pt_regs *regs,
 lock_mmap:
 #endif /* CONFIG_PER_VMA_LOCK */
 
-	/*
-	 * Kernel-mode access to the user address space should only occur
-	 * on well-defined single instructions listed in the exception
-	 * tables.  But, an erroneous kernel fault occurring outside one of
-	 * those areas which also holds mmap_lock might deadlock attempting
-	 * to validate the fault against the address space.
-	 *
-	 * Only do the expensive exception table search when we might be at
-	 * risk of a deadlock.  This happens if we
-	 * 1. Failed to acquire mmap_lock, and
-	 * 2. The access did not originate in userspace.
-	 */
-	if (unlikely(!mmap_read_trylock(mm))) {
-		if (!user_mode(regs) && !search_exception_tables(regs->ip)) {
-			/*
-			 * Fault from code in kernel from
-			 * which we do not expect faults.
-			 */
-			bad_area_nosemaphore(regs, error_code, address);
-			return;
-		}
 retry:
-		mmap_read_lock(mm);
-	} else {
-		/*
-		 * The above down_read_trylock() might have succeeded in
-		 * which case we'll have missed the might_sleep() from
-		 * down_read():
-		 */
-		might_sleep();
-	}
-
-	vma = find_vma(mm, address);
+	vma = lock_mm_and_find_vma(mm, address, regs);
 	if (unlikely(!vma)) {
-		bad_area(regs, error_code, address);
-		return;
-	}
-	if (likely(vma->vm_start <= address))
-		goto good_area;
-	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
-		bad_area(regs, error_code, address);
-		return;
-	}
-	if (unlikely(expand_stack(vma, address))) {
-		bad_area(regs, error_code, address);
+		bad_area_nosemaphore(regs, error_code, address);
 		return;
 	}
 
@@ -1418,7 +1371,6 @@ retry:
 	 * Ok, we have a good vm_area for this memory access, so
 	 * we can handle it..
 	 */
-good_area:
 	if (unlikely(access_error(error_code, vma))) {
 		bad_area_access_error(regs, error_code, address, vma);
 		return;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27ce77080c79..570cf906fbcc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2325,6 +2325,8 @@ void unmap_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t nr, bool even_cows);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+		unsigned long address, struct pt_regs *regs);
 #else
 static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 					 unsigned long address, unsigned int flags,
diff --git a/mm/Kconfig b/mm/Kconfig
index 7672a22647b4..e3454087fd31 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1206,6 +1206,10 @@ config PER_VMA_LOCK
 	  This feature allows locking each virtual memory area separately when
 	  handling page faults instead of taking mmap_lock.
 
+config LOCK_MM_AND_FIND_VMA
+	bool
+	depends on !STACK_GROWSUP
+
 source "mm/damon/Kconfig"
 
 endmenu
diff --git a/mm/memory.c b/mm/memory.c
index f69fbc251198..1a427097b71f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5262,6 +5262,127 @@ out:
 }
 EXPORT_SYMBOL_GPL(handle_mm_fault);
 
+#ifdef CONFIG_LOCK_MM_AND_FIND_VMA
+#include <linux/extable.h>
+
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+	/* Even if this succeeds, make it clear we *might* have slept */
+	if (likely(mmap_read_trylock(mm))) {
+		might_sleep();
+		return true;
+	}
+
+	if (regs && !user_mode(regs)) {
+		unsigned long ip = instruction_pointer(regs);
+		if (!search_exception_tables(ip))
+			return false;
+	}
+
+	mmap_read_lock(mm);
+	return true;
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+	/*
+	 * We don't have this operation yet.
+	 *
+	 * It should be easy enough to do: it's basically a
+	 *    atomic_long_try_cmpxchg_acquire()
+	 * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+	 * it also needs the proper lockdep magic etc.
+	 */
+	return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+	mmap_read_unlock(mm);
+	if (regs && !user_mode(regs)) {
+		unsigned long ip = instruction_pointer(regs);
+		if (!search_exception_tables(ip))
+			return false;
+	}
+	mmap_write_lock(mm);
+	return true;
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+			unsigned long addr, struct pt_regs *regs)
+{
+	struct vm_area_struct *vma;
+
+	if (!get_mmap_lock_carefully(mm, regs))
+		return NULL;
+
+	vma = find_vma(mm, addr);
+	if (likely(vma && (vma->vm_start <= addr)))
+		return vma;
+
+	/*
+	 * Well, dang. We might still be successful, but only
+	 * if we can extend a vma to do so.
+	 */
+	if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+		mmap_read_unlock(mm);
+		return NULL;
+	}
+
+	/*
+	 * We can try to upgrade the mmap lock atomically,
+	 * in which case we can continue to use the vma
+	 * we already looked up.
+	 *
+	 * Otherwise we'll have to drop the mmap lock and
+	 * re-take it, and also look up the vma again,
+	 * re-checking it.
+	 */
+	if (!mmap_upgrade_trylock(mm)) {
+		if (!upgrade_mmap_lock_carefully(mm, regs))
+			return NULL;
+
+		vma = find_vma(mm, addr);
+		if (!vma)
+			goto fail;
+		if (vma->vm_start <= addr)
+			goto success;
+		if (!(vma->vm_flags & VM_GROWSDOWN))
+			goto fail;
+	}
+
+	if (expand_stack(vma, addr))
+		goto fail;
+
+success:
+	mmap_write_downgrade(mm);
+	return vma;
+
+fail:
+	mmap_write_unlock(mm);
+	return NULL;
+}
+#endif
+
 #ifdef CONFIG_PER_VMA_LOCK
 /*
  * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
-- 
cgit v1.2.3


From f440fa1ac955e2898893f9301568435eb5cdfc4b Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Fri, 16 Jun 2023 15:58:54 -0700
Subject: mm: make find_extend_vma() fail if write lock not held

Make calls to extend_vma() and find_extend_vma() fail if the write lock
is required.

To avoid making this a flag-day event, this still allows the old
read-locking case for the trivial situations, and passes in a flag to
say "is it write-locked".  That way write-lockers can say "yes, I'm
being careful", and legacy users will continue to work in all the common
cases until they have been fully converted to the new world order.

Co-Developed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c    |  6 +++---
 fs/exec.c          |  5 +++--
 include/linux/mm.h | 10 +++++++---
 mm/memory.c        |  2 +-
 mm/mmap.c          | 50 +++++++++++++++++++++++++++++++++-----------------
 mm/nommu.c         |  3 ++-
 6 files changed, 49 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 1033fbdfdbec..869c3aa0e455 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -320,10 +320,10 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
 	 * Grow the stack manually; some architectures have a limit on how
 	 * far ahead a user-space access may be in order to grow the stack.
 	 */
-	if (mmap_read_lock_killable(mm))
+	if (mmap_write_lock_killable(mm))
 		return -EINTR;
-	vma = find_extend_vma(mm, bprm->p);
-	mmap_read_unlock(mm);
+	vma = find_extend_vma_locked(mm, bprm->p, true);
+	mmap_write_unlock(mm);
 	if (!vma)
 		return -EFAULT;
 
diff --git a/fs/exec.c b/fs/exec.c
index a466e797c8e2..a61eb256e5e4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -205,7 +205,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 
 #ifdef CONFIG_STACK_GROWSUP
 	if (write) {
-		ret = expand_downwards(bprm->vma, pos);
+		/* We claim to hold the lock - nobody to race with */
+		ret = expand_downwards(bprm->vma, pos, true);
 		if (ret < 0)
 			return NULL;
 	}
@@ -853,7 +854,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	stack_base = vma->vm_end - stack_expand;
 #endif
 	current->mm->start_stack = bprm->p;
-	ret = expand_stack(vma, stack_base);
+	ret = expand_stack_locked(vma, stack_base, true);
 	if (ret)
 		ret = -EFAULT;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 570cf906fbcc..01a016521b60 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3192,11 +3192,13 @@ extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
 
 extern unsigned long stack_guard_gap;
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
-extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
+		bool write_locked);
+#define expand_stack(vma,addr) expand_stack_locked(vma,addr,false)
 
 /* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */
-extern int expand_downwards(struct vm_area_struct *vma,
-		unsigned long address);
+int expand_downwards(struct vm_area_struct *vma, unsigned long address,
+		bool write_locked);
 #if VM_GROWSUP
 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
 #else
@@ -3297,6 +3299,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
 #endif
 
 struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *,
+		unsigned long addr, bool write_locked);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/memory.c b/mm/memory.c
index 1dff248805bf..a81f5d0997ad 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5368,7 +5368,7 @@ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
 			goto fail;
 	}
 
-	if (expand_stack(vma, addr))
+	if (expand_stack_locked(vma, addr, true))
 		goto fail;
 
 success:
diff --git a/mm/mmap.c b/mm/mmap.c
index 6d120bf1d0bc..2c44ac108a3c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1935,7 +1935,8 @@ static int acct_stack_growth(struct vm_area_struct *vma,
  * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
  * vma is the last one with address > vma->vm_end.  Have to extend vma.
  */
-int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+int expand_upwards(struct vm_area_struct *vma, unsigned long address,
+		bool write_locked)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *next;
@@ -1959,6 +1960,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 	if (gap_addr < address || gap_addr > TASK_SIZE)
 		gap_addr = TASK_SIZE;
 
+	if (!write_locked)
+		return -EAGAIN;
 	next = find_vma_intersection(mm, vma->vm_end, gap_addr);
 	if (next && vma_is_accessible(next)) {
 		if (!(next->vm_flags & VM_GROWSUP))
@@ -2028,7 +2031,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
  */
-int expand_downwards(struct vm_area_struct *vma, unsigned long address)
+int expand_downwards(struct vm_area_struct *vma, unsigned long address,
+		bool write_locked)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start);
@@ -2042,10 +2046,13 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 	/* Enforce stack_guard_gap */
 	prev = mas_prev(&mas, 0);
 	/* Check that both stack segments have the same anon_vma? */
-	if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
-			vma_is_accessible(prev)) {
-		if (address - prev->vm_end < stack_guard_gap)
+	if (prev) {
+		if (!(prev->vm_flags & VM_GROWSDOWN) &&
+		    vma_is_accessible(prev) &&
+		    (address - prev->vm_end < stack_guard_gap))
 			return -ENOMEM;
+		if (!write_locked && (prev->vm_end == address))
+			return -EAGAIN;
 	}
 
 	if (mas_preallocate(&mas, GFP_KERNEL))
@@ -2124,13 +2131,14 @@ static int __init cmdline_parse_stack_guard_gap(char *p)
 __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
 
 #ifdef CONFIG_STACK_GROWSUP
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
+		bool write_locked)
 {
-	return expand_upwards(vma, address);
+	return expand_upwards(vma, address, write_locked);
 }
 
-struct vm_area_struct *
-find_extend_vma(struct mm_struct *mm, unsigned long addr)
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
+		unsigned long addr, bool write_locked)
 {
 	struct vm_area_struct *vma, *prev;
 
@@ -2138,20 +2146,25 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 	vma = find_vma_prev(mm, addr, &prev);
 	if (vma && (vma->vm_start <= addr))
 		return vma;
-	if (!prev || expand_stack(prev, addr))
+	if (!prev)
+		return NULL;
+	if (expand_stack_locked(prev, addr, write_locked))
 		return NULL;
 	if (prev->vm_flags & VM_LOCKED)
 		populate_vma_page_range(prev, addr, prev->vm_end, NULL);
 	return prev;
 }
 #else
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
+		bool write_locked)
 {
-	return expand_downwards(vma, address);
+	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
+		return -EINVAL;
+	return expand_downwards(vma, address, write_locked);
 }
 
-struct vm_area_struct *
-find_extend_vma(struct mm_struct *mm, unsigned long addr)
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
+		unsigned long addr, bool write_locked)
 {
 	struct vm_area_struct *vma;
 	unsigned long start;
@@ -2162,10 +2175,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 		return NULL;
 	if (vma->vm_start <= addr)
 		return vma;
-	if (!(vma->vm_flags & VM_GROWSDOWN))
-		return NULL;
 	start = vma->vm_start;
-	if (expand_stack(vma, addr))
+	if (expand_stack_locked(vma, addr, write_locked))
 		return NULL;
 	if (vma->vm_flags & VM_LOCKED)
 		populate_vma_page_range(vma, addr, start, NULL);
@@ -2173,6 +2184,11 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 }
 #endif
 
+struct vm_area_struct *find_extend_vma(struct mm_struct *mm,
+		unsigned long addr)
+{
+	return find_extend_vma_locked(mm, addr, false);
+}
 EXPORT_SYMBOL_GPL(find_extend_vma);
 
 /*
diff --git a/mm/nommu.c b/mm/nommu.c
index f670d9979a26..f476c9ed36b3 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -643,7 +643,8 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
  * expand a stack to a given address
  * - not supported under NOMMU conditions
  */
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
+		bool write_locked)
 {
 	return -ENOMEM;
 }
-- 
cgit v1.2.3


From dc97391e661009eab46783030d2404c9b6e6f2e7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 23 Jun 2023 23:55:12 +0100
Subject: sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES)

Remove ->sendpage() and ->sendpage_locked().  sendmsg() with
MSG_SPLICE_PAGES should be used instead.  This allows multiple pages and
multipage folios to be passed through.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de> # for net/can
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-afs@lists.infradead.org
cc: mptcp@lists.linux.dev
cc: rds-devel@oss.oracle.com
cc: tipc-discussion@lists.sourceforge.net
cc: virtualization@lists.linux-foundation.org
Link: https://lore.kernel.org/r/20230623225513.2732256-16-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/bpf/map_sockmap.rst                  | 10 ++---
 Documentation/filesystems/locking.rst              |  2 -
 Documentation/filesystems/vfs.rst                  |  1 -
 Documentation/networking/scaling.rst               |  4 +-
 crypto/af_alg.c                                    | 28 -------------
 crypto/algif_aead.c                                | 22 ++--------
 crypto/algif_rng.c                                 |  2 -
 crypto/algif_skcipher.c                            | 14 -------
 .../ethernet/chelsio/inline_crypto/chtls/chtls.h   |  2 -
 .../chelsio/inline_crypto/chtls/chtls_io.c         | 14 -------
 .../chelsio/inline_crypto/chtls/chtls_main.c       |  1 -
 fs/nfsd/vfs.c                                      |  2 +-
 include/crypto/if_alg.h                            |  2 -
 include/linux/net.h                                |  8 ----
 include/net/inet_common.h                          |  2 -
 include/net/sock.h                                 |  6 ---
 include/net/tcp.h                                  |  4 --
 net/appletalk/ddp.c                                |  1 -
 net/atm/pvc.c                                      |  1 -
 net/atm/svc.c                                      |  1 -
 net/ax25/af_ax25.c                                 |  1 -
 net/caif/caif_socket.c                             |  2 -
 net/can/bcm.c                                      |  1 -
 net/can/isotp.c                                    |  1 -
 net/can/j1939/socket.c                             |  1 -
 net/can/raw.c                                      |  1 -
 net/core/sock.c                                    | 35 +---------------
 net/dccp/ipv4.c                                    |  1 -
 net/dccp/ipv6.c                                    |  1 -
 net/ieee802154/socket.c                            |  2 -
 net/ipv4/af_inet.c                                 | 21 ----------
 net/ipv4/tcp.c                                     | 43 ++-----------------
 net/ipv4/tcp_bpf.c                                 | 23 +----------
 net/ipv4/tcp_ipv4.c                                |  1 -
 net/ipv4/udp.c                                     | 15 -------
 net/ipv4/udp_impl.h                                |  2 -
 net/ipv4/udplite.c                                 |  1 -
 net/ipv6/af_inet6.c                                |  3 --
 net/ipv6/raw.c                                     |  1 -
 net/ipv6/tcp_ipv6.c                                |  1 -
 net/kcm/kcmsock.c                                  | 20 ---------
 net/key/af_key.c                                   |  1 -
 net/l2tp/l2tp_ip.c                                 |  1 -
 net/l2tp/l2tp_ip6.c                                |  1 -
 net/llc/af_llc.c                                   |  1 -
 net/mctp/af_mctp.c                                 |  1 -
 net/mptcp/protocol.c                               |  2 -
 net/netlink/af_netlink.c                           |  1 -
 net/netrom/af_netrom.c                             |  1 -
 net/packet/af_packet.c                             |  2 -
 net/phonet/socket.c                                |  2 -
 net/qrtr/af_qrtr.c                                 |  1 -
 net/rds/af_rds.c                                   |  1 -
 net/rose/af_rose.c                                 |  1 -
 net/rxrpc/af_rxrpc.c                               |  1 -
 net/sctp/protocol.c                                |  1 -
 net/socket.c                                       | 48 ----------------------
 net/tipc/socket.c                                  |  3 --
 net/tls/tls.h                                      |  6 ---
 net/tls/tls_device.c                               | 17 --------
 net/tls/tls_main.c                                 |  7 ----
 net/tls/tls_sw.c                                   | 35 ----------------
 net/unix/af_unix.c                                 | 19 ---------
 net/vmw_vsock/af_vsock.c                           |  3 --
 net/x25/af_x25.c                                   |  1 -
 net/xdp/xsk.c                                      |  1 -
 66 files changed, 20 insertions(+), 442 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/bpf/map_sockmap.rst b/Documentation/bpf/map_sockmap.rst
index cc92047c6630..2d630686a00b 100644
--- a/Documentation/bpf/map_sockmap.rst
+++ b/Documentation/bpf/map_sockmap.rst
@@ -240,11 +240,11 @@ offsets into ``msg``, respectively.
 If a program of type ``BPF_PROG_TYPE_SK_MSG`` is run on a ``msg`` it can only
 parse data that the (``data``, ``data_end``) pointers have already consumed.
 For ``sendmsg()`` hooks this is likely the first scatterlist element. But for
-calls relying on the ``sendpage`` handler (e.g., ``sendfile()``) this will be
-the range (**0**, **0**) because the data is shared with user space and by
-default the objective is to avoid allowing user space to modify data while (or
-after) BPF verdict is being decided. This helper can be used to pull in data
-and to set the start and end pointers to given values. Data will be copied if
+calls relying on MSG_SPLICE_PAGES (e.g., ``sendfile()``) this will be the
+range (**0**, **0**) because the data is shared with user space and by default
+the objective is to avoid allowing user space to modify data while (or after)
+BPF verdict is being decided. This helper can be used to pull in data and to
+set the start and end pointers to given values. Data will be copied if
 necessary (i.e., if data was not linear and if start and end pointers do not
 point to the same chunk).
 
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index aa1a233b0fa8..ed148919e11a 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -521,8 +521,6 @@ prototypes::
 	int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
 	int (*fasync) (int, struct file *, int);
 	int (*lock) (struct file *, int, struct file_lock *);
-	ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
-			loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long,
 			unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 769be5230210..cb2a97e49872 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -1086,7 +1086,6 @@ This describes how the VFS can manipulate an open file.  As of kernel
 		int (*fsync) (struct file *, loff_t, loff_t, int datasync);
 		int (*fasync) (int, struct file *, int);
 		int (*lock) (struct file *, int, struct file_lock *);
-		ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 		unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 		int (*check_flags)(int);
 		int (*flock) (struct file *, int, struct file_lock *);
diff --git a/Documentation/networking/scaling.rst b/Documentation/networking/scaling.rst
index 3d435caa3ef2..92c9fb46d6a2 100644
--- a/Documentation/networking/scaling.rst
+++ b/Documentation/networking/scaling.rst
@@ -269,8 +269,8 @@ a single application thread handles flows with many different flow hashes.
 rps_sock_flow_table is a global flow table that contains the *desired* CPU
 for flows: the CPU that is currently processing the flow in userspace.
 Each table value is a CPU index that is updated during calls to recvmsg
-and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage()
-and tcp_splice_read()).
+and sendmsg (specifically, inet_recvmsg(), inet_sendmsg() and
+tcp_splice_read()).
 
 When the scheduler moves a thread to a new CPU while it has outstanding
 receive packets on the old CPU, packets may arrive out of order. To
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index cdb1dcc5dd1a..6218c773d71c 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -482,7 +482,6 @@ static const struct proto_ops alg_proto_ops = {
 	.listen		=	sock_no_listen,
 	.shutdown	=	sock_no_shutdown,
 	.mmap		=	sock_no_mmap,
-	.sendpage	=	sock_no_sendpage,
 	.sendmsg	=	sock_no_sendmsg,
 	.recvmsg	=	sock_no_recvmsg,
 
@@ -1106,33 +1105,6 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(af_alg_sendmsg);
 
-/**
- * af_alg_sendpage - sendpage system call handler
- * @sock: socket of connection to user space to write to
- * @page: data to send
- * @offset: offset into page to begin sending
- * @size: length of data
- * @flags: message send/receive flags
- *
- * This is a generic implementation of sendpage to fill ctx->tsgl_list.
- */
-ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
-			int offset, size_t size, int flags)
-{
-	struct bio_vec bvec;
-	struct msghdr msg = {
-		.msg_flags = flags | MSG_SPLICE_PAGES,
-	};
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		msg.msg_flags |= MSG_MORE;
-
-	bvec_set_page(&bvec, page, size, offset);
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
-	return sock_sendmsg(sock, &msg);
-}
-EXPORT_SYMBOL_GPL(af_alg_sendpage);
-
 /**
  * af_alg_free_resources - release resources required for crypto request
  * @areq: Request holding the TX and RX SGL
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 35bfa283748d..7d58cbbce4af 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -9,10 +9,10 @@
  * The following concept of the memory management is used:
  *
  * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
- * filled by user space with the data submitted via sendpage. Filling up
- * the TX SGL does not cause a crypto operation -- the data will only be
- * tracked by the kernel. Upon receipt of one recvmsg call, the caller must
- * provide a buffer which is tracked with the RX SGL.
+ * filled by user space with the data submitted via sendmsg (maybe with
+ * MSG_SPLICE_PAGES).  Filling up the TX SGL does not cause a crypto operation
+ * -- the data will only be tracked by the kernel. Upon receipt of one recvmsg
+ * call, the caller must provide a buffer which is tracked with the RX SGL.
  *
  * During the processing of the recvmsg operation, the cipher request is
  * allocated and prepared. As part of the recvmsg operation, the processed
@@ -370,7 +370,6 @@ static struct proto_ops algif_aead_ops = {
 
 	.release	=	af_alg_release,
 	.sendmsg	=	aead_sendmsg,
-	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	aead_recvmsg,
 	.poll		=	af_alg_poll,
 };
@@ -422,18 +421,6 @@ static int aead_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
 	return aead_sendmsg(sock, msg, size);
 }
 
-static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page,
-				       int offset, size_t size, int flags)
-{
-	int err;
-
-	err = aead_check_key(sock);
-	if (err)
-		return err;
-
-	return af_alg_sendpage(sock, page, offset, size, flags);
-}
-
 static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
 				  size_t ignored, int flags)
 {
@@ -461,7 +448,6 @@ static struct proto_ops algif_aead_ops_nokey = {
 
 	.release	=	af_alg_release,
 	.sendmsg	=	aead_sendmsg_nokey,
-	.sendpage	=	aead_sendpage_nokey,
 	.recvmsg	=	aead_recvmsg_nokey,
 	.poll		=	af_alg_poll,
 };
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 407408c43730..10c41adac3b1 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -174,7 +174,6 @@ static struct proto_ops algif_rng_ops = {
 	.bind		=	sock_no_bind,
 	.accept		=	sock_no_accept,
 	.sendmsg	=	sock_no_sendmsg,
-	.sendpage	=	sock_no_sendpage,
 
 	.release	=	af_alg_release,
 	.recvmsg	=	rng_recvmsg,
@@ -192,7 +191,6 @@ static struct proto_ops __maybe_unused algif_rng_test_ops = {
 	.mmap		=	sock_no_mmap,
 	.bind		=	sock_no_bind,
 	.accept		=	sock_no_accept,
-	.sendpage	=	sock_no_sendpage,
 
 	.release	=	af_alg_release,
 	.recvmsg	=	rng_test_recvmsg,
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index b1f321b9f846..9ada9b741af8 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -194,7 +194,6 @@ static struct proto_ops algif_skcipher_ops = {
 
 	.release	=	af_alg_release,
 	.sendmsg	=	skcipher_sendmsg,
-	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	skcipher_recvmsg,
 	.poll		=	af_alg_poll,
 };
@@ -246,18 +245,6 @@ static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
 	return skcipher_sendmsg(sock, msg, size);
 }
 
-static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
-				       int offset, size_t size, int flags)
-{
-	int err;
-
-	err = skcipher_check_key(sock);
-	if (err)
-		return err;
-
-	return af_alg_sendpage(sock, page, offset, size, flags);
-}
-
 static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
 				  size_t ignored, int flags)
 {
@@ -285,7 +272,6 @@ static struct proto_ops algif_skcipher_ops_nokey = {
 
 	.release	=	af_alg_release,
 	.sendmsg	=	skcipher_sendmsg_nokey,
-	.sendpage	=	skcipher_sendpage_nokey,
 	.recvmsg	=	skcipher_recvmsg_nokey,
 	.poll		=	af_alg_poll,
 };
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
index da4818d2c856..68562a82d036 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h
@@ -569,8 +569,6 @@ int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int chtls_recvmsg(struct sock *sk, struct msghdr *msg,
 		  size_t len, int flags, int *addr_len);
 void chtls_splice_eof(struct socket *sock);
-int chtls_sendpage(struct sock *sk, struct page *page,
-		   int offset, size_t size, int flags);
 int send_tx_flowc_wr(struct sock *sk, int compl,
 		     u32 snd_nxt, u32 rcv_nxt);
 void chtls_tcp_push(struct sock *sk, int flags);
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
index e08ac960c967..5fc64e47568a 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
@@ -1246,20 +1246,6 @@ void chtls_splice_eof(struct socket *sock)
 	release_sock(sk);
 }
 
-int chtls_sendpage(struct sock *sk, struct page *page,
-		   int offset, size_t size, int flags)
-{
-	struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
-	struct bio_vec bvec;
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		msg.msg_flags |= MSG_MORE;
-
-	bvec_set_page(&bvec, page, size, offset);
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
-	return chtls_sendmsg(sk, &msg, size);
-}
-
 static void chtls_select_window(struct sock *sk)
 {
 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
index 6b6787eafd2f..455a54708be4 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c
@@ -607,7 +607,6 @@ static void __init chtls_init_ulp_ops(void)
 	chtls_cpl_prot.shutdown		= chtls_shutdown;
 	chtls_cpl_prot.sendmsg		= chtls_sendmsg;
 	chtls_cpl_prot.splice_eof	= chtls_splice_eof;
-	chtls_cpl_prot.sendpage		= chtls_sendpage;
 	chtls_cpl_prot.recvmsg		= chtls_recvmsg;
 	chtls_cpl_prot.setsockopt	= chtls_setsockopt;
 	chtls_cpl_prot.getsockopt	= chtls_getsockopt;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index db67f8e19344..8879e207ff5a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -936,7 +936,7 @@ nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
 
 /*
  * Grab and keep cached pages associated with a file in the svc_rqst
- * so that they can be passed to the network sendmsg/sendpage routines
+ * so that they can be passed to the network sendmsg routines
  * directly. They will be released after the sending has completed.
  *
  * Return values: Number of bytes consumed, or -EIO if there are no
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 34224e77f5a2..ef8ce86b1f78 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -229,8 +229,6 @@ void af_alg_wmem_wakeup(struct sock *sk);
 int af_alg_wait_for_data(struct sock *sk, unsigned flags, unsigned min);
 int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		   unsigned int ivsize);
-ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
-			int offset, size_t size, int flags);
 void af_alg_free_resources(struct af_alg_async_req *areq);
 void af_alg_async_cb(void *data, int err);
 __poll_t af_alg_poll(struct file *file, struct socket *sock,
diff --git a/include/linux/net.h b/include/linux/net.h
index 23324e9a2b3d..41c608c1b02c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -207,8 +207,6 @@ struct proto_ops {
 				      size_t total_len, int flags);
 	int		(*mmap)	     (struct file *file, struct socket *sock,
 				      struct vm_area_struct * vma);
-	ssize_t		(*sendpage)  (struct socket *sock, struct page *page,
-				      int offset, size_t size, int flags);
 	ssize_t 	(*splice_read)(struct socket *sock,  loff_t *ppos,
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
 	void		(*splice_eof)(struct socket *sock);
@@ -222,8 +220,6 @@ struct proto_ops {
 				     sk_read_actor_t recv_actor);
 	/* This is different from read_sock(), it reads an entire skb at a time. */
 	int		(*read_skb)(struct sock *sk, skb_read_actor_t recv_actor);
-	int		(*sendpage_locked)(struct sock *sk, struct page *page,
-					   int offset, size_t size, int flags);
 	int		(*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
 					  size_t size);
 	int		(*set_rcvlowat)(struct sock *sk, int val);
@@ -341,10 +337,6 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags);
 int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
 int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
-int kernel_sendpage(struct socket *sock, struct page *page, int offset,
-		    size_t size, int flags);
-int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
-			   size_t size, int flags);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
 /* Routine returns the IP overhead imposed by a (caller-protected) socket. */
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index a75333342c4e..b86b8e21de7f 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -36,8 +36,6 @@ void __inet_accept(struct socket *sock, struct socket *newsock,
 int inet_send_prepare(struct sock *sk);
 int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size);
 void inet_splice_eof(struct socket *sock);
-ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
-		      size_t size, int flags);
 int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		 int flags);
 int inet_shutdown(struct socket *sock, int how);
diff --git a/include/net/sock.h b/include/net/sock.h
index 62a1b99da349..121284f455a8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1277,8 +1277,6 @@ struct proto {
 					   size_t len);
 	int			(*recvmsg)(struct sock *sk, struct msghdr *msg,
 					   size_t len, int flags, int *addr_len);
-	int			(*sendpage)(struct sock *sk, struct page *page,
-					int offset, size_t size, int flags);
 	void			(*splice_eof)(struct socket *sock);
 	int			(*bind)(struct sock *sk,
 					struct sockaddr *addr, int addr_len);
@@ -1919,10 +1917,6 @@ int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len);
 int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int);
 int sock_no_mmap(struct file *file, struct socket *sock,
 		 struct vm_area_struct *vma);
-ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset,
-			 size_t size, int flags);
-ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
-				int offset, size_t size, int flags);
 
 /*
  * Functions to fill in entries in struct proto_ops when a protocol
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 31b534370787..226bce6d1e8c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -329,10 +329,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
 int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied,
 			 size_t size, struct ubuf_info *uarg);
 void tcp_splice_eof(struct socket *sock);
-int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
-		 int flags);
-int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
-			size_t size, int flags);
 int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
 int tcp_wmem_schedule(struct sock *sk, int copy);
 void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index a06f4d4a6f47..8978fb6212ff 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1929,7 +1929,6 @@ static const struct proto_ops atalk_dgram_ops = {
 	.sendmsg	= atalk_sendmsg,
 	.recvmsg	= atalk_recvmsg,
 	.mmap		= sock_no_mmap,
-	.sendpage	= sock_no_sendpage,
 };
 
 static struct notifier_block ddp_notifier = {
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 53e7d3f39e26..66d9a9bd5896 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -126,7 +126,6 @@ static const struct proto_ops pvc_proto_ops = {
 	.sendmsg =	vcc_sendmsg,
 	.recvmsg =	vcc_recvmsg,
 	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
 };
 
 
diff --git a/net/atm/svc.c b/net/atm/svc.c
index d83556d8beb9..36a814f1fbd1 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -654,7 +654,6 @@ static const struct proto_ops svc_proto_ops = {
 	.sendmsg =	vcc_sendmsg,
 	.recvmsg =	vcc_recvmsg,
 	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
 };
 
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index d8da400cb4de..5db805d5f74d 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -2022,7 +2022,6 @@ static const struct proto_ops ax25_proto_ops = {
 	.sendmsg	= ax25_sendmsg,
 	.recvmsg	= ax25_recvmsg,
 	.mmap		= sock_no_mmap,
-	.sendpage	= sock_no_sendpage,
 };
 
 /*
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 4eebcc66c19a..9c82698da4f5 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -976,7 +976,6 @@ static const struct proto_ops caif_seqpacket_ops = {
 	.sendmsg = caif_seqpkt_sendmsg,
 	.recvmsg = caif_seqpkt_recvmsg,
 	.mmap = sock_no_mmap,
-	.sendpage = sock_no_sendpage,
 };
 
 static const struct proto_ops caif_stream_ops = {
@@ -996,7 +995,6 @@ static const struct proto_ops caif_stream_ops = {
 	.sendmsg = caif_stream_sendmsg,
 	.recvmsg = caif_stream_recvmsg,
 	.mmap = sock_no_mmap,
-	.sendpage = sock_no_sendpage,
 };
 
 /* This function is called when a socket is finally destroyed. */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index a962ec2b8ba5..9ba35685b043 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1703,7 +1703,6 @@ static const struct proto_ops bcm_ops = {
 	.sendmsg       = bcm_sendmsg,
 	.recvmsg       = bcm_recvmsg,
 	.mmap          = sock_no_mmap,
-	.sendpage      = sock_no_sendpage,
 };
 
 static struct proto bcm_proto __read_mostly = {
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 84f9aba02901..1f25b45868cf 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1699,7 +1699,6 @@ static const struct proto_ops isotp_ops = {
 	.sendmsg = isotp_sendmsg,
 	.recvmsg = isotp_recvmsg,
 	.mmap = sock_no_mmap,
-	.sendpage = sock_no_sendpage,
 };
 
 static struct proto isotp_proto __read_mostly = {
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 35970c25496a..feaec4ad6d16 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -1306,7 +1306,6 @@ static const struct proto_ops j1939_ops = {
 	.sendmsg = j1939_sk_sendmsg,
 	.recvmsg = j1939_sk_recvmsg,
 	.mmap = sock_no_mmap,
-	.sendpage = sock_no_sendpage,
 };
 
 static struct proto j1939_proto __read_mostly = {
diff --git a/net/can/raw.c b/net/can/raw.c
index f64469b98260..15c79b079184 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -962,7 +962,6 @@ static const struct proto_ops raw_ops = {
 	.sendmsg       = raw_sendmsg,
 	.recvmsg       = raw_recvmsg,
 	.mmap          = sock_no_mmap,
-	.sendpage      = sock_no_sendpage,
 };
 
 static struct proto raw_proto __read_mostly = {
diff --git a/net/core/sock.c b/net/core/sock.c
index 5f1747c12004..de719094b804 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3261,36 +3261,6 @@ void __receive_sock(struct file *file)
 	}
 }
 
-ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
-{
-	ssize_t res;
-	struct msghdr msg = {.msg_flags = flags};
-	struct kvec iov;
-	char *kaddr = kmap(page);
-	iov.iov_base = kaddr + offset;
-	iov.iov_len = size;
-	res = kernel_sendmsg(sock, &msg, &iov, 1, size);
-	kunmap(page);
-	return res;
-}
-EXPORT_SYMBOL(sock_no_sendpage);
-
-ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
-				int offset, size_t size, int flags)
-{
-	ssize_t res;
-	struct msghdr msg = {.msg_flags = flags};
-	struct kvec iov;
-	char *kaddr = kmap(page);
-
-	iov.iov_base = kaddr + offset;
-	iov.iov_len = size;
-	res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
-	kunmap(page);
-	return res;
-}
-EXPORT_SYMBOL(sock_no_sendpage_locked);
-
 /*
  *	Default Socket Callbacks
  */
@@ -4046,7 +4016,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
 {
 
 	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
-			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
+			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
 		   proto->name,
 		   proto->obj_size,
 		   sock_prot_inuse_get(seq_file_net(seq), proto),
@@ -4067,7 +4037,6 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
 		   proto_method_implemented(proto->getsockopt),
 		   proto_method_implemented(proto->sendmsg),
 		   proto_method_implemented(proto->recvmsg),
-		   proto_method_implemented(proto->sendpage),
 		   proto_method_implemented(proto->bind),
 		   proto_method_implemented(proto->backlog_rcv),
 		   proto_method_implemented(proto->hash),
@@ -4088,7 +4057,7 @@ static int proto_seq_show(struct seq_file *seq, void *v)
 			   "maxhdr",
 			   "slab",
 			   "module",
-			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
+			   "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n");
 	else
 		proto_seq_printf(seq, list_entry(v, struct proto, node));
 	return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 3ab68415d121..fa8079303cb0 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1010,7 +1010,6 @@ static const struct proto_ops inet_dccp_ops = {
 	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = sock_common_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
 };
 
 static struct inet_protosw dccp_v4_protosw = {
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 93c98990d726..7249ef218178 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1087,7 +1087,6 @@ static const struct proto_ops inet6_dccp_ops = {
 	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = sock_common_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	   = inet6_compat_ioctl,
 #endif
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 9c124705120d..00302e8b9615 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -426,7 +426,6 @@ static const struct proto_ops ieee802154_raw_ops = {
 	.sendmsg	   = ieee802154_sock_sendmsg,
 	.recvmsg	   = sock_common_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
 };
 
 /* DGRAM Sockets (802.15.4 dataframes) */
@@ -989,7 +988,6 @@ static const struct proto_ops ieee802154_dgram_ops = {
 	.sendmsg	   = ieee802154_sock_sendmsg,
 	.recvmsg	   = sock_common_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
 };
 
 static void ieee802154_sock_destruct(struct sock *sk)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 38e649fb4474..9b2ca2fcc5a1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -847,23 +847,6 @@ void inet_splice_eof(struct socket *sock)
 }
 EXPORT_SYMBOL_GPL(inet_splice_eof);
 
-ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
-		      size_t size, int flags)
-{
-	struct sock *sk = sock->sk;
-	const struct proto *prot;
-
-	if (unlikely(inet_send_prepare(sk)))
-		return -EAGAIN;
-
-	/* IPV6_ADDRFORM can change sk->sk_prot under us. */
-	prot = READ_ONCE(sk->sk_prot);
-	if (prot->sendpage)
-		return prot->sendpage(sk, page, offset, size, flags);
-	return sock_no_sendpage(sock, page, offset, size, flags);
-}
-EXPORT_SYMBOL(inet_sendpage);
-
 INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *,
 					  size_t, int, int *));
 int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
@@ -1067,12 +1050,10 @@ const struct proto_ops inet_stream_ops = {
 	.mmap		   = tcp_mmap,
 #endif
 	.splice_eof	   = inet_splice_eof,
-	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
 	.read_skb	   = tcp_read_skb,
 	.sendmsg_locked    = tcp_sendmsg_locked,
-	.sendpage_locked   = tcp_sendpage_locked,
 	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	   = inet_compat_ioctl,
@@ -1102,7 +1083,6 @@ const struct proto_ops inet_dgram_ops = {
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.splice_eof	   = inet_splice_eof,
-	.sendpage	   = inet_sendpage,
 	.set_peek_off	   = sk_set_peek_off,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	   = inet_compat_ioctl,
@@ -1134,7 +1114,6 @@ static const struct proto_ops inet_sockraw_ops = {
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.splice_eof	   = inet_splice_eof,
-	.sendpage	   = inet_sendpage,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	   = inet_compat_ioctl,
 #endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d56edc2c885f..e03e08745308 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -923,11 +923,10 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
 	return mss_now;
 }
 
-/* In some cases, both sendpage() and sendmsg() could have added
- * an skb to the write queue, but failed adding payload on it.
- * We need to remove it to consume less memory, but more
- * importantly be able to generate EPOLLOUT for Edge Trigger epoll()
- * users.
+/* In some cases, both sendmsg() could have added an skb to the write queue,
+ * but failed adding payload on it.  We need to remove it to consume less
+ * memory, but more importantly be able to generate EPOLLOUT for Edge Trigger
+ * epoll() users.
  */
 void tcp_remove_empty_skb(struct sock *sk)
 {
@@ -975,40 +974,6 @@ int tcp_wmem_schedule(struct sock *sk, int copy)
 	return min(copy, sk->sk_forward_alloc);
 }
 
-int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
-			size_t size, int flags)
-{
-	struct bio_vec bvec;
-	struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
-
-	if (!(sk->sk_route_caps & NETIF_F_SG))
-		return sock_no_sendpage_locked(sk, page, offset, size, flags);
-
-	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
-
-	bvec_set_page(&bvec, page, size, offset);
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		msg.msg_flags |= MSG_MORE;
-
-	return tcp_sendmsg_locked(sk, &msg, size);
-}
-EXPORT_SYMBOL_GPL(tcp_sendpage_locked);
-
-int tcp_sendpage(struct sock *sk, struct page *page, int offset,
-		 size_t size, int flags)
-{
-	int ret;
-
-	lock_sock(sk);
-	ret = tcp_sendpage_locked(sk, page, offset, size, flags);
-	release_sock(sk);
-
-	return ret;
-}
-EXPORT_SYMBOL(tcp_sendpage);
-
 void tcp_free_fastopen_req(struct tcp_sock *tp)
 {
 	if (tp->fastopen_req) {
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 31d6005cea9b..81f0dff69e0b 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -486,7 +486,7 @@ static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	long timeo;
 	int flags;
 
-	/* Don't let internal sendpage flags through */
+	/* Don't let internal flags through */
 	flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED);
 	flags |= MSG_NO_SHARED_FRAGS;
 
@@ -566,23 +566,6 @@ out_err:
 	return copied ? copied : err;
 }
 
-static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset,
-			    size_t size, int flags)
-{
-	struct bio_vec bvec;
-	struct msghdr msg = {
-		.msg_flags = flags | MSG_SPLICE_PAGES,
-	};
-
-	bvec_set_page(&bvec, page, size, offset);
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		msg.msg_flags |= MSG_MORE;
-
-	return tcp_bpf_sendmsg(sk, &msg, size);
-}
-
 enum {
 	TCP_BPF_IPV4,
 	TCP_BPF_IPV6,
@@ -612,7 +595,6 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
 
 	prot[TCP_BPF_TX]			= prot[TCP_BPF_BASE];
 	prot[TCP_BPF_TX].sendmsg		= tcp_bpf_sendmsg;
-	prot[TCP_BPF_TX].sendpage		= tcp_bpf_sendpage;
 
 	prot[TCP_BPF_RX]			= prot[TCP_BPF_BASE];
 	prot[TCP_BPF_RX].recvmsg		= tcp_bpf_recvmsg_parser;
@@ -647,8 +629,7 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
 	 * indeed valid assumptions.
 	 */
 	return ops->recvmsg  == tcp_recvmsg &&
-	       ops->sendmsg  == tcp_sendmsg &&
-	       ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
+	       ops->sendmsg  == tcp_sendmsg ? 0 : -ENOTSUPP;
 }
 
 int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9213804b034f..fd365de4d5ff 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3117,7 +3117,6 @@ struct proto tcp_prot = {
 	.recvmsg		= tcp_recvmsg,
 	.sendmsg		= tcp_sendmsg,
 	.splice_eof		= tcp_splice_eof,
-	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v4_do_rcv,
 	.release_cb		= tcp_release_cb,
 	.hash			= inet_hash,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 48fdcd3cad9c..42a96b3547c9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1340,20 +1340,6 @@ void udp_splice_eof(struct socket *sock)
 }
 EXPORT_SYMBOL_GPL(udp_splice_eof);
 
-int udp_sendpage(struct sock *sk, struct page *page, int offset,
-		 size_t size, int flags)
-{
-	struct bio_vec bvec;
-	struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES };
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		msg.msg_flags |= MSG_MORE;
-
-	bvec_set_page(&bvec, page, size, offset);
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
-	return udp_sendmsg(sk, &msg, size);
-}
-
 #define UDP_SKB_IS_STATELESS 0x80000000
 
 /* all head states (dst, sk, nf conntrack) except skb extensions are
@@ -2933,7 +2919,6 @@ struct proto udp_prot = {
 	.sendmsg		= udp_sendmsg,
 	.recvmsg		= udp_recvmsg,
 	.splice_eof		= udp_splice_eof,
-	.sendpage		= udp_sendpage,
 	.release_cb		= ip4_datagram_release_cb,
 	.hash			= udp_lib_hash,
 	.unhash			= udp_lib_unhash,
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 4ba7a88a1b1d..e1ff3a375996 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -19,8 +19,6 @@ int udp_getsockopt(struct sock *sk, int level, int optname,
 
 int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
 		int *addr_len);
-int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
-		 int flags);
 void udp_destroy_sock(struct sock *sk);
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 143f93a12f25..39ecdad1b50c 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -56,7 +56,6 @@ struct proto 	udplite_prot = {
 	.getsockopt	   = udp_getsockopt,
 	.sendmsg	   = udp_sendmsg,
 	.recvmsg	   = udp_recvmsg,
-	.sendpage	   = udp_sendpage,
 	.hash		   = udp_lib_hash,
 	.unhash		   = udp_lib_unhash,
 	.rehash		   = udp_v4_rehash,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b3451cf47d29..5d593ddc0347 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -696,9 +696,7 @@ const struct proto_ops inet6_stream_ops = {
 	.mmap		   = tcp_mmap,
 #endif
 	.splice_eof	   = inet_splice_eof,
-	.sendpage	   = inet_sendpage,
 	.sendmsg_locked    = tcp_sendmsg_locked,
-	.sendpage_locked   = tcp_sendpage_locked,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
 	.read_skb	   = tcp_read_skb,
@@ -729,7 +727,6 @@ const struct proto_ops inet6_dgram_ops = {
 	.recvmsg	   = inet6_recvmsg,		/* retpoline's sake */
 	.read_skb	   = udp_read_skb,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
 	.set_peek_off	   = sk_set_peek_off,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	   = inet6_compat_ioctl,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index c9caeb5a43ed..ac1cef094c5f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1296,7 +1296,6 @@ const struct proto_ops inet6_sockraw_ops = {
 	.sendmsg	   = inet_sendmsg,		/* ok		*/
 	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	   = inet6_compat_ioctl,
 #endif
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c17c8ff94b79..40dd92a2f480 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2151,7 +2151,6 @@ struct proto tcpv6_prot = {
 	.recvmsg		= tcp_recvmsg,
 	.sendmsg		= tcp_sendmsg,
 	.splice_eof		= tcp_splice_eof,
-	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v6_do_rcv,
 	.release_cb		= tcp_release_cb,
 	.hash			= inet6_hash,
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d0537c1c8cd7..393f01b2a7e6 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -963,24 +963,6 @@ static void kcm_splice_eof(struct socket *sock)
 	release_sock(sk);
 }
 
-static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
-			    int offset, size_t size, int flags)
-
-{
-	struct bio_vec bvec;
-	struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		msg.msg_flags |= MSG_MORE;
-
-	if (flags & MSG_OOB)
-		return -EOPNOTSUPP;
-
-	bvec_set_page(&bvec, page, size, offset);
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
-	return kcm_sendmsg(sock, &msg, size);
-}
-
 static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
 		       size_t len, int flags)
 {
@@ -1769,7 +1751,6 @@ static const struct proto_ops kcm_dgram_ops = {
 	.recvmsg =	kcm_recvmsg,
 	.mmap =		sock_no_mmap,
 	.splice_eof =	kcm_splice_eof,
-	.sendpage =	kcm_sendpage,
 };
 
 static const struct proto_ops kcm_seqpacket_ops = {
@@ -1791,7 +1772,6 @@ static const struct proto_ops kcm_seqpacket_ops = {
 	.recvmsg =	kcm_recvmsg,
 	.mmap =		sock_no_mmap,
 	.splice_eof =	kcm_splice_eof,
-	.sendpage =	kcm_sendpage,
 	.splice_read =	kcm_splice_read,
 };
 
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 31ab12fd720a..ede3c6a60353 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3761,7 +3761,6 @@ static const struct proto_ops pfkey_ops = {
 	.listen		=	sock_no_listen,
 	.shutdown	=	sock_no_shutdown,
 	.mmap		=	sock_no_mmap,
-	.sendpage	=	sock_no_sendpage,
 
 	/* Now the operations that really occur. */
 	.release	=	pfkey_release,
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 2b795c1064f5..f9073bc7281f 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -624,7 +624,6 @@ static const struct proto_ops l2tp_ip_ops = {
 	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = sock_common_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
 };
 
 static struct inet_protosw l2tp_ip_protosw = {
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 5137ea1861ce..b1623f9c4f92 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -751,7 +751,6 @@ static const struct proto_ops l2tp_ip6_ops = {
 	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = sock_common_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	   = inet6_compat_ioctl,
 #endif
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 9ffbc667be6c..57c35c960b2c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1232,7 +1232,6 @@ static const struct proto_ops llc_ui_ops = {
 	.sendmsg     = llc_ui_sendmsg,
 	.recvmsg     = llc_ui_recvmsg,
 	.mmap	     = sock_no_mmap,
-	.sendpage    = sock_no_sendpage,
 };
 
 static const char llc_proc_err_msg[] __initconst =
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index bb4bd0b6a4f7..f6be58b68c6f 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -485,7 +485,6 @@ static const struct proto_ops mctp_dgram_ops = {
 	.sendmsg	= mctp_sendmsg,
 	.recvmsg	= mctp_recvmsg,
 	.mmap		= sock_no_mmap,
-	.sendpage	= sock_no_sendpage,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= mctp_compat_ioctl,
 #endif
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index bd023debedc8..e892673deb73 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3866,7 +3866,6 @@ static const struct proto_ops mptcp_stream_ops = {
 	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = inet_sendpage,
 };
 
 static struct inet_protosw mptcp_protosw = {
@@ -3961,7 +3960,6 @@ static const struct proto_ops mptcp_v6_stream_ops = {
 	.sendmsg	   = inet6_sendmsg,
 	.recvmsg	   = inet6_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = inet_sendpage,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	   = inet6_compat_ioctl,
 #endif
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index cbd9aa7ee24a..39cfb778ebc5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2815,7 +2815,6 @@ static const struct proto_ops netlink_ops = {
 	.sendmsg =	netlink_sendmsg,
 	.recvmsg =	netlink_recvmsg,
 	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
 };
 
 static const struct net_proto_family netlink_family_ops = {
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 5a4cb796150f..eb8ccbd58df7 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1364,7 +1364,6 @@ static const struct proto_ops nr_proto_ops = {
 	.sendmsg	=	nr_sendmsg,
 	.recvmsg	=	nr_recvmsg,
 	.mmap		=	sock_no_mmap,
-	.sendpage	=	sock_no_sendpage,
 };
 
 static struct notifier_block nr_dev_notifier = {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a2dbeb264f26..85ff90a03b0c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4621,7 +4621,6 @@ static const struct proto_ops packet_ops_spkt = {
 	.sendmsg =	packet_sendmsg_spkt,
 	.recvmsg =	packet_recvmsg,
 	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
 };
 
 static const struct proto_ops packet_ops = {
@@ -4643,7 +4642,6 @@ static const struct proto_ops packet_ops = {
 	.sendmsg =	packet_sendmsg,
 	.recvmsg =	packet_recvmsg,
 	.mmap =		packet_mmap,
-	.sendpage =	sock_no_sendpage,
 };
 
 static const struct net_proto_family packet_family_ops = {
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 967f9b4dc026..1018340d89a7 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -441,7 +441,6 @@ const struct proto_ops phonet_dgram_ops = {
 	.sendmsg	= pn_socket_sendmsg,
 	.recvmsg	= sock_common_recvmsg,
 	.mmap		= sock_no_mmap,
-	.sendpage	= sock_no_sendpage,
 };
 
 const struct proto_ops phonet_stream_ops = {
@@ -462,7 +461,6 @@ const struct proto_ops phonet_stream_ops = {
 	.sendmsg	= pn_socket_sendmsg,
 	.recvmsg	= sock_common_recvmsg,
 	.mmap		= sock_no_mmap,
-	.sendpage	= sock_no_sendpage,
 };
 EXPORT_SYMBOL(phonet_stream_ops);
 
diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c
index 76f0434d3d06..78beb74146e7 100644
--- a/net/qrtr/af_qrtr.c
+++ b/net/qrtr/af_qrtr.c
@@ -1244,7 +1244,6 @@ static const struct proto_ops qrtr_proto_ops = {
 	.shutdown	= sock_no_shutdown,
 	.release	= qrtr_release,
 	.mmap		= sock_no_mmap,
-	.sendpage	= sock_no_sendpage,
 };
 
 static struct proto qrtr_proto = {
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 3ff6995244e5..01c4cdfef45d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -653,7 +653,6 @@ static const struct proto_ops rds_proto_ops = {
 	.sendmsg =	rds_sendmsg,
 	.recvmsg =	rds_recvmsg,
 	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
 };
 
 static void rds_sock_destruct(struct sock *sk)
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index ca2b17f32670..49dafe9ac72f 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1496,7 +1496,6 @@ static const struct proto_ops rose_proto_ops = {
 	.sendmsg	=	rose_sendmsg,
 	.recvmsg	=	rose_recvmsg,
 	.mmap		=	sock_no_mmap,
-	.sendpage	=	sock_no_sendpage,
 };
 
 static struct notifier_block rose_dev_notifier = {
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index da0b3b5157d5..f2cf4aa99db2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -954,7 +954,6 @@ static const struct proto_ops rxrpc_rpc_ops = {
 	.sendmsg	= rxrpc_sendmsg,
 	.recvmsg	= rxrpc_recvmsg,
 	.mmap		= sock_no_mmap,
-	.sendpage	= sock_no_sendpage,
 };
 
 static struct proto rxrpc_proto = {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 664d1f2e9121..274d07bd774f 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1133,7 +1133,6 @@ static const struct proto_ops inet_seqpacket_ops = {
 	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = sock_no_sendpage,
 };
 
 /* Registration with AF_INET family.  */
diff --git a/net/socket.c b/net/socket.c
index b778fc03c6e0..8c3c8b29995a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3552,54 +3552,6 @@ int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
 }
 EXPORT_SYMBOL(kernel_getpeername);
 
-/**
- *	kernel_sendpage - send a &page through a socket (kernel space)
- *	@sock: socket
- *	@page: page
- *	@offset: page offset
- *	@size: total size in bytes
- *	@flags: flags (MSG_DONTWAIT, ...)
- *
- *	Returns the total amount sent in bytes or an error.
- */
-
-int kernel_sendpage(struct socket *sock, struct page *page, int offset,
-		    size_t size, int flags)
-{
-	if (sock->ops->sendpage) {
-		/* Warn in case the improper page to zero-copy send */
-		WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
-		return sock->ops->sendpage(sock, page, offset, size, flags);
-	}
-	return sock_no_sendpage(sock, page, offset, size, flags);
-}
-EXPORT_SYMBOL(kernel_sendpage);
-
-/**
- *	kernel_sendpage_locked - send a &page through the locked sock (kernel space)
- *	@sk: sock
- *	@page: page
- *	@offset: page offset
- *	@size: total size in bytes
- *	@flags: flags (MSG_DONTWAIT, ...)
- *
- *	Returns the total amount sent in bytes or an error.
- *	Caller must hold @sk.
- */
-
-int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
-			   size_t size, int flags)
-{
-	struct socket *sock = sk->sk_socket;
-
-	if (sock->ops->sendpage_locked)
-		return sock->ops->sendpage_locked(sk, page, offset, size,
-						  flags);
-
-	return sock_no_sendpage_locked(sk, page, offset, size, flags);
-}
-EXPORT_SYMBOL(kernel_sendpage_locked);
-
 /**
  *	kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
  *	@sock: socket
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index dd73d71c02a9..ef8e5139a873 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3375,7 +3375,6 @@ static const struct proto_ops msg_ops = {
 	.sendmsg	= tipc_sendmsg,
 	.recvmsg	= tipc_recvmsg,
 	.mmap		= sock_no_mmap,
-	.sendpage	= sock_no_sendpage
 };
 
 static const struct proto_ops packet_ops = {
@@ -3396,7 +3395,6 @@ static const struct proto_ops packet_ops = {
 	.sendmsg	= tipc_send_packet,
 	.recvmsg	= tipc_recvmsg,
 	.mmap		= sock_no_mmap,
-	.sendpage	= sock_no_sendpage
 };
 
 static const struct proto_ops stream_ops = {
@@ -3417,7 +3415,6 @@ static const struct proto_ops stream_ops = {
 	.sendmsg	= tipc_sendstream,
 	.recvmsg	= tipc_recvstream,
 	.mmap		= sock_no_mmap,
-	.sendpage	= sock_no_sendpage
 };
 
 static const struct net_proto_family tipc_family_ops = {
diff --git a/net/tls/tls.h b/net/tls/tls.h
index d002c3af1966..86cef1c68e03 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -98,10 +98,6 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
 void tls_sw_strparser_done(struct tls_context *tls_ctx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 void tls_sw_splice_eof(struct socket *sock);
-int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
-			   int offset, size_t size, int flags);
-int tls_sw_sendpage(struct sock *sk, struct page *page,
-		    int offset, size_t size, int flags);
 void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
 void tls_sw_release_resources_tx(struct sock *sk);
 void tls_sw_free_ctx_tx(struct tls_context *tls_ctx);
@@ -117,8 +113,6 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
 
 int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 void tls_device_splice_eof(struct socket *sock);
-int tls_device_sendpage(struct sock *sk, struct page *page,
-			int offset, size_t size, int flags);
 int tls_tx_records(struct sock *sk, int flags);
 
 void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 975299d7213b..840ee06f1708 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -621,23 +621,6 @@ void tls_device_splice_eof(struct socket *sock)
 	mutex_unlock(&tls_ctx->tx_lock);
 }
 
-int tls_device_sendpage(struct sock *sk, struct page *page,
-			int offset, size_t size, int flags)
-{
-	struct bio_vec bvec;
-	struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		msg.msg_flags |= MSG_MORE;
-
-	if (flags & MSG_OOB)
-		return -EOPNOTSUPP;
-
-	bvec_set_page(&bvec, page, size, offset);
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
-	return tls_device_sendmsg(sk, &msg, size);
-}
-
 struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
 				       u32 seq, u64 *p_record_sn)
 {
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 7b9c83dd7de2..d5ed4d47b16e 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -958,7 +958,6 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
 
 	ops[TLS_SW  ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
 	ops[TLS_SW  ][TLS_BASE].splice_eof	= tls_sw_splice_eof;
-	ops[TLS_SW  ][TLS_BASE].sendpage_locked	= tls_sw_sendpage_locked;
 
 	ops[TLS_BASE][TLS_SW  ] = ops[TLS_BASE][TLS_BASE];
 	ops[TLS_BASE][TLS_SW  ].splice_read	= tls_sw_splice_read;
@@ -970,17 +969,14 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
 
 #ifdef CONFIG_TLS_DEVICE
 	ops[TLS_HW  ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
-	ops[TLS_HW  ][TLS_BASE].sendpage_locked	= NULL;
 
 	ops[TLS_HW  ][TLS_SW  ] = ops[TLS_BASE][TLS_SW  ];
-	ops[TLS_HW  ][TLS_SW  ].sendpage_locked	= NULL;
 
 	ops[TLS_BASE][TLS_HW  ] = ops[TLS_BASE][TLS_SW  ];
 
 	ops[TLS_SW  ][TLS_HW  ] = ops[TLS_SW  ][TLS_SW  ];
 
 	ops[TLS_HW  ][TLS_HW  ] = ops[TLS_HW  ][TLS_SW  ];
-	ops[TLS_HW  ][TLS_HW  ].sendpage_locked	= NULL;
 #endif
 #ifdef CONFIG_TLS_TOE
 	ops[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
@@ -1029,7 +1025,6 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 	prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
 	prot[TLS_SW][TLS_BASE].sendmsg		= tls_sw_sendmsg;
 	prot[TLS_SW][TLS_BASE].splice_eof	= tls_sw_splice_eof;
-	prot[TLS_SW][TLS_BASE].sendpage		= tls_sw_sendpage;
 
 	prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
 	prot[TLS_BASE][TLS_SW].recvmsg		  = tls_sw_recvmsg;
@@ -1045,12 +1040,10 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 	prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
 	prot[TLS_HW][TLS_BASE].sendmsg		= tls_device_sendmsg;
 	prot[TLS_HW][TLS_BASE].splice_eof	= tls_device_splice_eof;
-	prot[TLS_HW][TLS_BASE].sendpage		= tls_device_sendpage;
 
 	prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
 	prot[TLS_HW][TLS_SW].sendmsg		= tls_device_sendmsg;
 	prot[TLS_HW][TLS_SW].splice_eof		= tls_device_splice_eof;
-	prot[TLS_HW][TLS_SW].sendpage		= tls_device_sendpage;
 
 	prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW];
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 319f61590d2c..9b3aa89a4292 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1281,41 +1281,6 @@ unlock:
 	mutex_unlock(&tls_ctx->tx_lock);
 }
 
-int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
-			   int offset, size_t size, int flags)
-{
-	struct bio_vec bvec;
-	struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
-
-	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
-		      MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY |
-		      MSG_NO_SHARED_FRAGS))
-		return -EOPNOTSUPP;
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		msg.msg_flags |= MSG_MORE;
-
-	bvec_set_page(&bvec, page, size, offset);
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
-	return tls_sw_sendmsg_locked(sk, &msg, size);
-}
-
-int tls_sw_sendpage(struct sock *sk, struct page *page,
-		    int offset, size_t size, int flags)
-{
-	struct bio_vec bvec;
-	struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
-
-	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
-		      MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
-		return -EOPNOTSUPP;
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		msg.msg_flags |= MSG_MORE;
-
-	bvec_set_page(&bvec, page, size, offset);
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
-	return tls_sw_sendmsg(sk, &msg, size);
-}
-
 static int
 tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock,
 		bool released)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f9d196439b49..f2f234f0b92c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -758,8 +758,6 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
 static int unix_shutdown(struct socket *, int);
 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
-static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
-				    size_t size, int flags);
 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 				       struct pipe_inode_info *, size_t size,
 				       unsigned int flags);
@@ -852,7 +850,6 @@ static const struct proto_ops unix_stream_ops = {
 	.recvmsg =	unix_stream_recvmsg,
 	.read_skb =	unix_stream_read_skb,
 	.mmap =		sock_no_mmap,
-	.sendpage =	unix_stream_sendpage,
 	.splice_read =	unix_stream_splice_read,
 	.set_peek_off =	unix_set_peek_off,
 	.show_fdinfo =	unix_show_fdinfo,
@@ -878,7 +875,6 @@ static const struct proto_ops unix_dgram_ops = {
 	.read_skb =	unix_read_skb,
 	.recvmsg =	unix_dgram_recvmsg,
 	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
 	.set_peek_off =	unix_set_peek_off,
 	.show_fdinfo =	unix_show_fdinfo,
 };
@@ -902,7 +898,6 @@ static const struct proto_ops unix_seqpacket_ops = {
 	.sendmsg =	unix_seqpacket_sendmsg,
 	.recvmsg =	unix_seqpacket_recvmsg,
 	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
 	.set_peek_off =	unix_set_peek_off,
 	.show_fdinfo =	unix_show_fdinfo,
 };
@@ -2294,20 +2289,6 @@ out_err:
 	return sent ? : err;
 }
 
-static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
-				    int offset, size_t size, int flags)
-{
-	struct bio_vec bvec;
-	struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES };
-
-	if (flags & MSG_SENDPAGE_NOTLAST)
-		msg.msg_flags |= MSG_MORE;
-
-	bvec_set_page(&bvec, page, size, offset);
-	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
-	return unix_stream_sendmsg(socket, &msg, size);
-}
-
 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
 				  size_t len)
 {
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index efb8a0937a13..020cf17ab7e4 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1306,7 +1306,6 @@ static const struct proto_ops vsock_dgram_ops = {
 	.sendmsg = vsock_dgram_sendmsg,
 	.recvmsg = vsock_dgram_recvmsg,
 	.mmap = sock_no_mmap,
-	.sendpage = sock_no_sendpage,
 	.read_skb = vsock_read_skb,
 };
 
@@ -2234,7 +2233,6 @@ static const struct proto_ops vsock_stream_ops = {
 	.sendmsg = vsock_connectible_sendmsg,
 	.recvmsg = vsock_connectible_recvmsg,
 	.mmap = sock_no_mmap,
-	.sendpage = sock_no_sendpage,
 	.set_rcvlowat = vsock_set_rcvlowat,
 	.read_skb = vsock_read_skb,
 };
@@ -2257,7 +2255,6 @@ static const struct proto_ops vsock_seqpacket_ops = {
 	.sendmsg = vsock_connectible_sendmsg,
 	.recvmsg = vsock_connectible_recvmsg,
 	.mmap = sock_no_mmap,
-	.sendpage = sock_no_sendpage,
 	.read_skb = vsock_read_skb,
 };
 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5c7ad301d742..0fb5143bec7a 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1757,7 +1757,6 @@ static const struct proto_ops x25_proto_ops = {
 	.sendmsg =	x25_sendmsg,
 	.recvmsg =	x25_recvmsg,
 	.mmap =		sock_no_mmap,
-	.sendpage =	sock_no_sendpage,
 };
 
 static struct packet_type x25_packet_type __read_mostly = {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index cc1e7f15fa73..5a8c0dd250af 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1389,7 +1389,6 @@ static const struct proto_ops xsk_proto_ops = {
 	.sendmsg	= xsk_sendmsg,
 	.recvmsg	= xsk_recvmsg,
 	.mmap		= xsk_mmap,
-	.sendpage	= sock_no_sendpage,
 };
 
 static void xsk_destruct(struct sock *sk)
-- 
cgit v1.2.3


From b848b26c6672c9b977890ba85f5a155e5eb221f0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 23 Jun 2023 23:55:13 +0100
Subject: net: Kill MSG_SENDPAGE_NOTLAST

Now that ->sendpage() has been removed, MSG_SENDPAGE_NOTLAST can be cleaned
up.  Things were converted to use MSG_MORE instead, but the protocol
sendpage stubs still convert MSG_SENDPAGE_NOTLAST to MSG_MORE, which is now
unnecessary.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
cc: linux-afs@lists.infradead.org
cc: mptcp@lists.linux.dev
cc: rds-devel@oss.oracle.com
cc: tipc-discussion@lists.sourceforge.net
cc: virtualization@lists.linux-foundation.org
Link: https://lore.kernel.org/r/20230623225513.2732256-17-dhowells@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/socket.h                         | 4 +---
 net/tls/tls_device.c                           | 3 +--
 net/tls/tls_main.c                             | 2 +-
 net/tls/tls_sw.c                               | 2 +-
 tools/perf/trace/beauty/include/linux/socket.h | 1 -
 tools/perf/trace/beauty/msg_flags.c            | 5 +----
 6 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 58204700018a..39b74d83c7c4 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -319,7 +319,6 @@ struct ucred {
 #define MSG_MORE	0x8000	/* Sender will send more */
 #define MSG_WAITFORONE	0x10000	/* recvmmsg(): block until 1+ packets avail */
 #define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */
-#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
 #define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
 #define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
@@ -341,8 +340,7 @@ struct ucred {
 
 /* Flags to be cleared on entry by sendmsg and sendmmsg syscalls */
 #define MSG_INTERNAL_SENDMSG_FLAGS \
-	(MSG_SPLICE_PAGES | MSG_SENDPAGE_NOPOLICY | MSG_SENDPAGE_NOTLAST | \
-	 MSG_SENDPAGE_DECRYPTED)
+	(MSG_SPLICE_PAGES | MSG_SENDPAGE_NOPOLICY | MSG_SENDPAGE_DECRYPTED)
 
 /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
 #define SOL_IP		0
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 840ee06f1708..2021fe557e50 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -441,8 +441,7 @@ static int tls_push_data(struct sock *sk,
 	long timeo;
 
 	if (flags &
-	    ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST |
-	      MSG_SPLICE_PAGES))
+	    ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SPLICE_PAGES))
 		return -EOPNOTSUPP;
 
 	if (unlikely(sk->sk_err))
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index d5ed4d47b16e..b6896126bb92 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -127,7 +127,7 @@ int tls_push_sg(struct sock *sk,
 {
 	struct bio_vec bvec;
 	struct msghdr msg = {
-		.msg_flags = MSG_SENDPAGE_NOTLAST | MSG_SPLICE_PAGES | flags,
+		.msg_flags = MSG_SPLICE_PAGES | flags,
 	};
 	int ret = 0;
 	struct page *p;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9b3aa89a4292..53f944e6d8ef 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1194,7 +1194,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 
 	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
 			       MSG_CMSG_COMPAT | MSG_SPLICE_PAGES |
-			       MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+			       MSG_SENDPAGE_NOPOLICY))
 		return -EOPNOTSUPP;
 
 	ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 13c3a237b9c9..3bef212a24d7 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -318,7 +318,6 @@ struct ucred {
 #define MSG_MORE	0x8000	/* Sender will send more */
 #define MSG_WAITFORONE	0x10000	/* recvmmsg(): block until 1+ packets avail */
 #define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */
-#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
 #define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
 #define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
index ea68db08b8e7..5cdebd7ece7e 100644
--- a/tools/perf/trace/beauty/msg_flags.c
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -8,9 +8,6 @@
 #ifndef MSG_WAITFORONE
 #define MSG_WAITFORONE		   0x10000
 #endif
-#ifndef MSG_SENDPAGE_NOTLAST
-#define MSG_SENDPAGE_NOTLAST	   0x20000
-#endif
 #ifndef MSG_FASTOPEN
 #define MSG_FASTOPEN		0x20000000
 #endif
@@ -50,7 +47,7 @@ static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
 	P_MSG_FLAG(NOSIGNAL);
 	P_MSG_FLAG(MORE);
 	P_MSG_FLAG(WAITFORONE);
-	P_MSG_FLAG(SENDPAGE_NOTLAST);
+	P_MSG_FLAG(SPLICE_PAGES);
 	P_MSG_FLAG(FASTOPEN);
 	P_MSG_FLAG(CMSG_CLOEXEC);
 #undef P_MSG_FLAG
-- 
cgit v1.2.3


From c6b7a3a26e809c9d2a51ae303764c1d2994f31cf Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sat, 24 Jun 2023 21:01:05 +0800
Subject: blk-mq: fix two misuses on RQF_USE_SCHED

Request allocated from sched tags can't be issued via ->queue_rqs()
directly, since driver tag isn't allocated yet. This is the 1st misuse
of RQF_USE_SCHED for figuring out plug->has_elevator.

Request allocated from sched tags can't be ended by
blk_mq_end_request_batch() too, fix the 2nd RQF_USE_SCHED misuse
in blk_mq_add_to_batch().

Without this patch, NVMe uring cmd passthrough IO workload can run into
hang easily with real io scheduler.

Fixes: dd6216bb16e8 ("blk-mq: make sure elevator callbacks aren't called for passthrough request")
Reported-by: Guangwu Zhang <guazhang@redhat.com>
Closes: https://lore.kernel.org/linux-block/CAGS2=YrBjpLPOKa-gzcKuuOG60AGth5794PNCDwatdnnscB9ug@mail.gmail.com/
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20230624130105.1443879-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 6 +++++-
 include/linux/blk-mq.h | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 720b5061ffe8..32e50bc0cbb0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1280,7 +1280,11 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 
 	if (!plug->multiple_queues && last && last->q != rq->q)
 		plug->multiple_queues = true;
-	if (!plug->has_elevator && (rq->rq_flags & RQF_USE_SCHED))
+	/*
+	 * Any request allocated from sched tags can't be issued to
+	 * ->queue_rqs() directly
+	 */
+	if (!plug->has_elevator && (rq->rq_flags & RQF_SCHED_TAGS))
 		plug->has_elevator = true;
 	rq->rq_next = NULL;
 	rq_list_add(&plug->mq_list, rq);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f401067ac03a..aaed687a454c 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -852,7 +852,11 @@ static inline bool blk_mq_add_to_batch(struct request *req,
 				       struct io_comp_batch *iob, int ioerror,
 				       void (*complete)(struct io_comp_batch *))
 {
-	if (!iob || (req->rq_flags & RQF_USE_SCHED) || ioerror ||
+	/*
+	 * blk_mq_end_request_batch() can't end request allocated from
+	 * sched tags
+	 */
+	if (!iob || (req->rq_flags & RQF_SCHED_TAGS) || ioerror ||
 			(req->end_io && !blk_rq_is_passthrough(req)))
 		return false;
 
-- 
cgit v1.2.3


From 9338c2233b97f97aa68bc42f53b06e90def129d7 Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Mon, 5 Jun 2023 11:28:40 +0100
Subject: iscsi_ibft: Fix finding the iBFT under Xen Dom 0

To facilitate diskless iSCSI boot, the firmware can place a table of
configuration details in memory called the iBFT. The presence of this
table is not specified, nor is the precise location (and it's not in the
E820) so the kernel has to search for a magic marker to find it.

When running under Xen, Dom 0 does not have access to the entire host's
memory, only certain regions which are identity-mapped which means that
the pseudo-physical address in Dom0 == real host physical address.
Add the iBFT search bounds as a reserved region which causes it to be
identity-mapped in xen_set_identity_and_remap_chunk() which allows Dom0
access to the specific physical memory to correctly search for the iBFT
magic marker (and later access the full table).

This necessitates moving the call to reserve_ibft_region() somewhat
later so that it is called after e820__memory_setup() which is when the
Xen identity mapping adjustments are applied. The precise location of
the call is not too important so I've put it alongside dmi_setup() which
does similar scanning of memory for configuration tables.

Finally in the iBFT find code, instead of using isa_bus_to_virt() which
doesn't do the right thing under Xen, use early_memremap() like the
dmi_setup() code does.

The result of these changes is that it is possible to boot a diskless
Xen + Dom0 running off an iSCSI disk whereas previously it would fail to
find the iBFT and consequently, the iSCSI root disk.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Acked-by: Konrad Rzeszutek Wilk <konrad@darnok.org>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com> # for x86
Link: https://lore.kernel.org/r/20230605102840.1521549-1-ross.lagerwall@citrix.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/kernel/setup.c            |  2 +-
 arch/x86/xen/setup.c               | 28 +++++++++++++++++++---------
 drivers/firmware/iscsi_ibft_find.c | 26 +++++++++++++++++---------
 include/linux/iscsi_ibft.h         | 10 +++++++++-
 4 files changed, 46 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 16babff771bd..616b80507abd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -796,7 +796,6 @@ static void __init early_reserve_memory(void)
 
 	memblock_x86_reserve_range_setup_data();
 
-	reserve_ibft_region();
 	reserve_bios_regions();
 	trim_snb_memory();
 }
@@ -1032,6 +1031,7 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled(EFI_BOOT))
 		efi_init();
 
+	reserve_ibft_region();
 	dmi_setup();
 
 	/*
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c2be3efb2ba0..8b5cf7bb1f47 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/iscsi_ibft.h>
 #include <linux/sched.h>
 #include <linux/kstrtox.h>
 #include <linux/mm.h>
@@ -764,17 +765,26 @@ char * __init xen_memory_setup(void)
 	BUG_ON(memmap.nr_entries == 0);
 	xen_e820_table.nr_entries = memmap.nr_entries;
 
-	/*
-	 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
-	 * regions, so if we're using the machine memory map leave the
-	 * region as RAM as it is in the pseudo-physical map.
-	 *
-	 * UNUSABLE regions in domUs are not handled and will need
-	 * a patch in the future.
-	 */
-	if (xen_initial_domain())
+	if (xen_initial_domain()) {
+		/*
+		 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
+		 * regions, so if we're using the machine memory map leave the
+		 * region as RAM as it is in the pseudo-physical map.
+		 *
+		 * UNUSABLE regions in domUs are not handled and will need
+		 * a patch in the future.
+		 */
 		xen_ignore_unusable();
 
+#ifdef CONFIG_ISCSI_IBFT_FIND
+		/* Reserve 0.5 MiB to 1 MiB region so iBFT can be found */
+		xen_e820_table.entries[xen_e820_table.nr_entries].addr = IBFT_START;
+		xen_e820_table.entries[xen_e820_table.nr_entries].size = IBFT_END - IBFT_START;
+		xen_e820_table.entries[xen_e820_table.nr_entries].type = E820_TYPE_RESERVED;
+		xen_e820_table.nr_entries++;
+#endif
+	}
+
 	/* Make sure the Xen-supplied memory map is well-ordered. */
 	e820__update_table(&xen_e820_table);
 
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 94b49ccd23ac..71f51303c2ba 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -42,8 +42,6 @@ static const struct {
 };
 
 #define IBFT_SIGN_LEN 4
-#define IBFT_START 0x80000 /* 512kB */
-#define IBFT_END 0x100000 /* 1MB */
 #define VGA_MEM 0xA0000 /* VGA buffer */
 #define VGA_SIZE 0x20000 /* 128kB */
 
@@ -52,9 +50,9 @@ static const struct {
  */
 void __init reserve_ibft_region(void)
 {
-	unsigned long pos;
+	unsigned long pos, virt_pos = 0;
 	unsigned int len = 0;
-	void *virt;
+	void *virt = NULL;
 	int i;
 
 	ibft_phys_addr = 0;
@@ -70,13 +68,20 @@ void __init reserve_ibft_region(void)
 		 * so skip that area */
 		if (pos == VGA_MEM)
 			pos += VGA_SIZE;
-		virt = isa_bus_to_virt(pos);
+
+		/* Map page by page */
+		if (offset_in_page(pos) == 0) {
+			if (virt)
+				early_memunmap(virt, PAGE_SIZE);
+			virt = early_memremap_ro(pos, PAGE_SIZE);
+			virt_pos = pos;
+		}
 
 		for (i = 0; i < ARRAY_SIZE(ibft_signs); i++) {
-			if (memcmp(virt, ibft_signs[i].sign, IBFT_SIGN_LEN) ==
-			    0) {
+			if (memcmp(virt + (pos - virt_pos), ibft_signs[i].sign,
+				   IBFT_SIGN_LEN) == 0) {
 				unsigned long *addr =
-				    (unsigned long *)isa_bus_to_virt(pos + 4);
+				    (unsigned long *)(virt + pos - virt_pos + 4);
 				len = *addr;
 				/* if the length of the table extends past 1M,
 				 * the table cannot be valid. */
@@ -84,9 +89,12 @@ void __init reserve_ibft_region(void)
 					ibft_phys_addr = pos;
 					memblock_reserve(ibft_phys_addr, PAGE_ALIGN(len));
 					pr_info("iBFT found at %pa.\n", &ibft_phys_addr);
-					return;
+					goto out;
 				}
 			}
 		}
 	}
+
+out:
+	early_memunmap(virt, PAGE_SIZE);
 }
diff --git a/include/linux/iscsi_ibft.h b/include/linux/iscsi_ibft.h
index 790e7fcfc1a6..e2742748104d 100644
--- a/include/linux/iscsi_ibft.h
+++ b/include/linux/iscsi_ibft.h
@@ -21,12 +21,20 @@
  */
 extern phys_addr_t ibft_phys_addr;
 
+#ifdef CONFIG_ISCSI_IBFT_FIND
+
 /*
  * Routine used to find and reserve the iSCSI Boot Format Table. The
  * physical address is set in the ibft_phys_addr variable.
  */
-#ifdef CONFIG_ISCSI_IBFT_FIND
 void reserve_ibft_region(void);
+
+/*
+ * Physical bounds to search for the iSCSI Boot Format Table.
+ */
+#define IBFT_START 0x80000 /* 512kB */
+#define IBFT_END 0x100000 /* 1MB */
+
 #else
 static inline void reserve_ibft_region(void) {}
 #endif
-- 
cgit v1.2.3


From 54da6a0924311c7cf5015533991e44fb8eb12773 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 26 May 2023 12:23:48 +0200
Subject: locking: Introduce __cleanup() based infrastructure

Use __attribute__((__cleanup__(func))) to build:

 - simple auto-release pointers using __free()

 - 'classes' with constructor and destructor semantics for
   scope-based resource management.

 - lock guards based on the above classes.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230612093537.614161713%40infradead.org
---
 include/linux/cleanup.h             | 171 ++++++++++++++++++++++++++++++++++++
 include/linux/compiler-clang.h      |   9 ++
 include/linux/compiler_attributes.h |   6 ++
 include/linux/device.h              |   7 ++
 include/linux/file.h                |   6 ++
 include/linux/irqflags.h            |   7 ++
 include/linux/mutex.h               |   4 +
 include/linux/percpu.h              |   4 +
 include/linux/preempt.h             |   5 ++
 include/linux/rcupdate.h            |   3 +
 include/linux/rwsem.h               |   8 ++
 include/linux/sched/task.h          |   2 +
 include/linux/slab.h                |   3 +
 include/linux/spinlock.h            |  31 +++++++
 include/linux/srcu.h                |   5 ++
 scripts/checkpatch.pl               |   2 +-
 16 files changed, 272 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/cleanup.h

(limited to 'include/linux')

diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
new file mode 100644
index 000000000000..53f1a7a932b0
--- /dev/null
+++ b/include/linux/cleanup.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_GUARDS_H
+#define __LINUX_GUARDS_H
+
+#include <linux/compiler.h>
+
+/*
+ * DEFINE_FREE(name, type, free):
+ *	simple helper macro that defines the required wrapper for a __free()
+ *	based cleanup function. @free is an expression using '_T' to access
+ *	the variable.
+ *
+ * __free(name):
+ *	variable attribute to add a scoped based cleanup to the variable.
+ *
+ * no_free_ptr(var):
+ *	like a non-atomic xchg(var, NULL), such that the cleanup function will
+ *	be inhibited -- provided it sanely deals with a NULL value.
+ *
+ * return_ptr(p):
+ *	returns p while inhibiting the __free().
+ *
+ * Ex.
+ *
+ * DEFINE_FREE(kfree, void *, if (_T) kfree(_T))
+ *
+ *	struct obj *p __free(kfree) = kmalloc(...);
+ *	if (!p)
+ *		return NULL;
+ *
+ *	if (!init_obj(p))
+ *		return NULL;
+ *
+ *	return_ptr(p);
+ */
+
+#define DEFINE_FREE(_name, _type, _free) \
+	static inline void __free_##_name(void *p) { _type _T = *(_type *)p; _free; }
+
+#define __free(_name)	__cleanup(__free_##_name)
+
+#define no_free_ptr(p) \
+	({ __auto_type __ptr = (p); (p) = NULL; __ptr; })
+
+#define return_ptr(p)	return no_free_ptr(p)
+
+
+/*
+ * DEFINE_CLASS(name, type, exit, init, init_args...):
+ *	helper to define the destructor and constructor for a type.
+ *	@exit is an expression using '_T' -- similar to FREE above.
+ *	@init is an expression in @init_args resulting in @type
+ *
+ * EXTEND_CLASS(name, ext, init, init_args...):
+ *	extends class @name to @name@ext with the new constructor
+ *
+ * CLASS(name, var)(args...):
+ *	declare the variable @var as an instance of the named class
+ *
+ * Ex.
+ *
+ * DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd)
+ *
+ *	CLASS(fdget, f)(fd);
+ *	if (!f.file)
+ *		return -EBADF;
+ *
+ *	// use 'f' without concern
+ */
+
+#define DEFINE_CLASS(_name, _type, _exit, _init, _init_args...)		\
+typedef _type class_##_name##_t;					\
+static inline void class_##_name##_destructor(_type *p)			\
+{ _type _T = *p; _exit; }						\
+static inline _type class_##_name##_constructor(_init_args)		\
+{ _type t = _init; return t; }
+
+#define EXTEND_CLASS(_name, ext, _init, _init_args...)			\
+typedef class_##_name##_t class_##_name##ext##_t;			\
+static inline void class_##_name##ext##_destructor(class_##_name##_t *p)\
+{ class_##_name##_destructor(p); }					\
+static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
+{ class_##_name##_t t = _init; return t; }
+
+#define CLASS(_name, var)						\
+	class_##_name##_t var __cleanup(class_##_name##_destructor) =	\
+		class_##_name##_constructor
+
+
+/*
+ * DEFINE_GUARD(name, type, lock, unlock):
+ *	trivial wrapper around DEFINE_CLASS() above specifically
+ *	for locks.
+ *
+ * guard(name):
+ *	an anonymous instance of the (guard) class
+ *
+ * scoped_guard (name, args...) { }:
+ *	similar to CLASS(name, scope)(args), except the variable (with the
+ *	explicit name 'scope') is declard in a for-loop such that its scope is
+ *	bound to the next (compound) statement.
+ *
+ */
+
+#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
+	DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T)
+
+#define guard(_name) \
+	CLASS(_name, __UNIQUE_ID(guard))
+
+#define scoped_guard(_name, args...)					\
+	for (CLASS(_name, scope)(args),					\
+	     *done = NULL; !done; done = (void *)1)
+
+/*
+ * Additional helper macros for generating lock guards with types, either for
+ * locks that don't have a native type (eg. RCU, preempt) or those that need a
+ * 'fat' pointer (eg. spin_lock_irqsave).
+ *
+ * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...)
+ * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...)
+ *
+ * will result in the following type:
+ *
+ *   typedef struct {
+ *	type *lock;		// 'type := void' for the _0 variant
+ *	__VA_ARGS__;
+ *   } class_##name##_t;
+ *
+ * As above, both _lock and _unlock are statements, except this time '_T' will
+ * be a pointer to the above struct.
+ */
+
+#define __DEFINE_UNLOCK_GUARD(_name, _type, _unlock, ...)		\
+typedef struct {							\
+	_type *lock;							\
+	__VA_ARGS__;							\
+} class_##_name##_t;							\
+									\
+static inline void class_##_name##_destructor(class_##_name##_t *_T)	\
+{									\
+	if (_T->lock) { _unlock; }					\
+}
+
+
+#define __DEFINE_LOCK_GUARD_1(_name, _type, _lock)			\
+static inline class_##_name##_t class_##_name##_constructor(_type *l)	\
+{									\
+	class_##_name##_t _t = { .lock = l }, *_T = &_t;		\
+	_lock;								\
+	return _t;							\
+}
+
+#define __DEFINE_LOCK_GUARD_0(_name, _lock)				\
+static inline class_##_name##_t class_##_name##_constructor(void)	\
+{									\
+	class_##_name##_t _t = { .lock = (void*)1 },			\
+			 *_T __maybe_unused = &_t;			\
+	_lock;								\
+	return _t;							\
+}
+
+#define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...)		\
+__DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__)		\
+__DEFINE_LOCK_GUARD_1(_name, _type, _lock)
+
+#define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...)			\
+__DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__)		\
+__DEFINE_LOCK_GUARD_0(_name, _lock)
+
+#endif /* __LINUX_GUARDS_H */
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 6cfd6902bd5b..9b673fefcef8 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -5,6 +5,15 @@
 
 /* Compiler specific definitions for Clang compiler */
 
+/*
+ * Clang prior to 17 is being silly and considers many __cleanup() variables
+ * as unused (because they are, their sole purpose is to go out of scope).
+ *
+ * https://reviews.llvm.org/D152180
+ */
+#undef __cleanup
+#define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func)))
+
 /* same as gcc, this was present in clang-2.6 so we can assume it works
  * with any version that can compile the kernel
  */
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index e659cb6fded3..081deaef1f0f 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -69,6 +69,12 @@
  */
 #define __assume_aligned(a, ...)        __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
 
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-cleanup-variable-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#cleanup
+ */
+#define __cleanup(func)			__attribute__((__cleanup__(func)))
+
 /*
  * Note the long name.
  *
diff --git a/include/linux/device.h b/include/linux/device.h
index 472dd24d4823..2b64268d776c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -30,6 +30,7 @@
 #include <linux/device/bus.h>
 #include <linux/device/class.h>
 #include <linux/device/driver.h>
+#include <linux/cleanup.h>
 #include <asm/device.h>
 
 struct device;
@@ -899,6 +900,9 @@ void device_unregister(struct device *dev);
 void device_initialize(struct device *dev);
 int __must_check device_add(struct device *dev);
 void device_del(struct device *dev);
+
+DEFINE_FREE(device_del, struct device *, if (_T) device_del(_T))
+
 int device_for_each_child(struct device *dev, void *data,
 			  int (*fn)(struct device *dev, void *data));
 int device_for_each_child_reverse(struct device *dev, void *data,
@@ -1066,6 +1070,9 @@ extern int (*platform_notify_remove)(struct device *dev);
  */
 struct device *get_device(struct device *dev);
 void put_device(struct device *dev);
+
+DEFINE_FREE(put_device, struct device *, if (_T) put_device(_T))
+
 bool kill_device(struct device *dev);
 
 #ifdef CONFIG_DEVTMPFS
diff --git a/include/linux/file.h b/include/linux/file.h
index 39704eae83e2..6e9099d29343 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -10,6 +10,7 @@
 #include <linux/types.h>
 #include <linux/posix_types.h>
 #include <linux/errno.h>
+#include <linux/cleanup.h>
 
 struct file;
 
@@ -80,6 +81,8 @@ static inline void fdput_pos(struct fd f)
 	fdput(f);
 }
 
+DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd)
+
 extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
 extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
 extern void set_close_on_exec(unsigned int fd, int flag);
@@ -88,6 +91,9 @@ extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile);
 extern int get_unused_fd_flags(unsigned flags);
 extern void put_unused_fd(unsigned int fd);
 
+DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
+	     get_unused_fd_flags(flags), unsigned flags)
+
 extern void fd_install(unsigned int fd, struct file *file);
 
 extern int __receive_fd(struct file *file, int __user *ufd,
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 5ec0fa71399e..2b665c32f5fe 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -13,6 +13,7 @@
 #define _LINUX_TRACE_IRQFLAGS_H
 
 #include <linux/typecheck.h>
+#include <linux/cleanup.h>
 #include <asm/irqflags.h>
 #include <asm/percpu.h>
 
@@ -267,4 +268,10 @@ extern void warn_bogus_irq_restore(void);
 
 #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags)
 
+DEFINE_LOCK_GUARD_0(irq, local_irq_disable(), local_irq_enable())
+DEFINE_LOCK_GUARD_0(irqsave,
+		    local_irq_save(_T->flags),
+		    local_irq_restore(_T->flags),
+		    unsigned long flags)
+
 #endif
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 8f226d460f51..a33aa9eb9fc3 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -19,6 +19,7 @@
 #include <asm/processor.h>
 #include <linux/osq_lock.h>
 #include <linux/debug_locks.h>
+#include <linux/cleanup.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define __DEP_MAP_MUTEX_INITIALIZER(lockname)			\
@@ -219,4 +220,7 @@ extern void mutex_unlock(struct mutex *lock);
 
 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
+DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T))
+DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T))
+
 #endif /* __LINUX_MUTEX_H */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 1338ea2aa720..d33ab1799932 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -8,6 +8,7 @@
 #include <linux/cpumask.h>
 #include <linux/pfn.h>
 #include <linux/init.h>
+#include <linux/cleanup.h>
 
 #include <asm/percpu.h>
 
@@ -127,6 +128,9 @@ extern void __init setup_per_cpu_areas(void);
 extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1);
 extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1);
 extern void free_percpu(void __percpu *__pdata);
+
+DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T))
+
 extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 
 #define alloc_percpu_gfp(type, gfp)					\
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 0df425bf9bd7..1424670df161 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -8,6 +8,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cleanup.h>
 #include <linux/list.h>
 
 /*
@@ -463,4 +464,8 @@ static __always_inline void preempt_enable_nested(void)
 		preempt_enable();
 }
 
+DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
+DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
+DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
+
 #endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index dcd2cf1e8326..7aa090329ba2 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -27,6 +27,7 @@
 #include <linux/preempt.h>
 #include <linux/bottom_half.h>
 #include <linux/lockdep.h>
+#include <linux/cleanup.h>
 #include <asm/processor.h>
 #include <linux/cpumask.h>
 #include <linux/context_tracking_irq.h>
@@ -1095,4 +1096,6 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f)
 extern int rcu_expedited;
 extern int rcu_normal;
 
+DEFINE_LOCK_GUARD_0(rcu, rcu_read_lock(), rcu_read_unlock())
+
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index efa5c324369a..1dd530ce8b45 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -15,6 +15,7 @@
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
 #include <linux/err.h>
+#include <linux/cleanup.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define __RWSEM_DEP_MAP_INIT(lockname)			\
@@ -201,6 +202,13 @@ extern void up_read(struct rw_semaphore *sem);
  */
 extern void up_write(struct rw_semaphore *sem);
 
+DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T))
+DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T))
+
+DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T))
+DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T))
+
+
 /*
  * downgrade write lock to read lock
  */
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index e0f5ac90a228..dd35ce28bb90 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -125,6 +125,8 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
+DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T))
+
 static inline void put_task_struct_many(struct task_struct *t, int nr)
 {
 	if (refcount_sub_and_test(nr, &t->usage))
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 6b3e155b70bf..c1fb57d6107c 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 #include <linux/percpu-refcount.h>
+#include <linux/cleanup.h>
 
 
 /*
@@ -211,6 +212,8 @@ void kfree(const void *objp);
 void kfree_sensitive(const void *objp);
 size_t __ksize(const void *objp);
 
+DEFINE_FREE(kfree, void *, if (_T) kfree(_T))
+
 /**
  * ksize - Report actual allocation size of associated object
  *
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index be48f1cb1878..31d3d747a9db 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -61,6 +61,7 @@
 #include <linux/stringify.h>
 #include <linux/bottom_half.h>
 #include <linux/lockdep.h>
+#include <linux/cleanup.h>
 #include <asm/barrier.h>
 #include <asm/mmiowb.h>
 
@@ -502,5 +503,35 @@ int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
 
 void free_bucket_spinlocks(spinlock_t *locks);
 
+DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t,
+		    raw_spin_lock(_T->lock),
+		    raw_spin_unlock(_T->lock))
+
+DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t,
+		    raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING),
+		    raw_spin_unlock(_T->lock))
+
+DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t,
+		    raw_spin_lock_irq(_T->lock),
+		    raw_spin_unlock_irq(_T->lock))
+
+DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t,
+		    raw_spin_lock_irqsave(_T->lock, _T->flags),
+		    raw_spin_unlock_irqrestore(_T->lock, _T->flags),
+		    unsigned long flags)
+
+DEFINE_LOCK_GUARD_1(spinlock, spinlock_t,
+		    spin_lock(_T->lock),
+		    spin_unlock(_T->lock))
+
+DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t,
+		    spin_lock_irq(_T->lock),
+		    spin_unlock_irq(_T->lock))
+
+DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t,
+		    spin_lock_irqsave(_T->lock, _T->flags),
+		    spin_unlock_irqrestore(_T->lock, _T->flags),
+		    unsigned long flags)
+
 #undef __LINUX_INSIDE_SPINLOCK_H
 #endif /* __LINUX_SPINLOCK_H */
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 41c4b26fb1c1..b3b1def982dd 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -343,4 +343,9 @@ static inline void smp_mb__after_srcu_read_unlock(void)
 	/* __srcu_read_unlock has smp_mb() internally so nothing to do here. */
 }
 
+DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct,
+		    _T->idx = srcu_read_lock(_T->lock),
+		    srcu_read_unlock(_T->lock, _T->idx),
+		    int idx)
+
 #endif
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index b30114d637c4..3a6df9c2977b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5046,7 +5046,7 @@ sub process {
 				if|for|while|switch|return|case|
 				volatile|__volatile__|
 				__attribute__|format|__extension__|
-				asm|__asm__)$/x)
+				asm|__asm__|scoped_guard)$/x)
 			{
 			# cpp #define statements have non-optional spaces, ie
 			# if there is a space between the name and the open
-- 
cgit v1.2.3


From f18e7122cc73d9218930156fa38f050a2e37de57 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 22 Jun 2023 23:11:01 -0700
Subject: linux/netfilter.h: fix kernel-doc warnings

kernel-doc does not support DECLARE_PER_CPU(), so don't mark it with
kernel-doc notation.

One comment block is not kernel-doc notation, so just use
"/*" to begin the comment.

Quietens these warnings:

netfilter.h:493: warning: Function parameter or member 'bool' not described in 'DECLARE_PER_CPU'
netfilter.h:493: warning: Function parameter or member 'nf_skb_duplicated' not described in 'DECLARE_PER_CPU'
netfilter.h:493: warning: expecting prototype for nf_skb_duplicated(). Prototype was for DECLARE_PER_CPU() instead
netfilter.h:496: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 * Contains bitmask of ctnetlink event subscribers, if any.

Fixes: e7c8899f3e6f ("netfilter: move tee_active to core")
Fixes: fdf6491193e4 ("netfilter: ctnetlink: make event listener tracking global")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 0762444e3767..d4fed4c508ca 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -481,7 +481,7 @@ struct nfnl_ct_hook {
 };
 extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook;
 
-/**
+/*
  * nf_skb_duplicated - TEE target has sent a packet
  *
  * When a xtables target sends a packet, the OUTPUT and POSTROUTING
@@ -492,7 +492,7 @@ extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook;
  */
 DECLARE_PER_CPU(bool, nf_skb_duplicated);
 
-/**
+/*
  * Contains bitmask of ctnetlink event subscribers, if any.
  * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag.
  */
-- 
cgit v1.2.3


From 1ea7ca1b090145519aad998679222f0a14ab8fce Mon Sep 17 00:00:00 2001
From: Jane Chu <jane.chu@oracle.com>
Date: Thu, 15 Jun 2023 12:13:25 -0600
Subject: dax: enable dax fault handler to report VM_FAULT_HWPOISON

When multiple processes mmap() a dax file, then at some point,
a process issues a 'load' and consumes a hwpoison, the process
receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb
set for the poison scope. Soon after, any other process issues
a 'load' to the poisoned page (that is unmapped from the kernel
side by memory_failure), it receives a SIGBUS with
si_code = BUS_ADRERR and without valid si_lsb.

This is confusing to user, and is different from page fault due
to poison in RAM memory, also some helpful information is lost.

Channel dax backend driver's poison detection to the filesystem
such that instead of reporting VM_FAULT_SIGBUS, it could report
VM_FAULT_HWPOISON.

If user level block IO syscalls fail due to poison, the errno will
be converted to EIO to maintain block API consistency.

Signed-off-by: Jane Chu <jane.chu@oracle.com>
Link: https://lore.kernel.org/r/20230615181325.1327259-2-jane.chu@oracle.com
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 drivers/dax/super.c          |  5 ++++-
 drivers/nvdimm/pmem.c        |  2 +-
 drivers/s390/block/dcssblk.c |  3 ++-
 fs/dax.c                     | 11 ++++++-----
 fs/fuse/virtio_fs.c          |  3 ++-
 include/linux/dax.h          | 13 +++++++++++++
 include/linux/mm.h           |  2 ++
 7 files changed, 30 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index c4c4728a36e4..0da9232ea175 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -203,6 +203,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
 			size_t nr_pages)
 {
+	int ret;
+
 	if (!dax_alive(dax_dev))
 		return -ENXIO;
 	/*
@@ -213,7 +215,8 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
 	if (nr_pages != 1)
 		return -EIO;
 
-	return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
+	ret = dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
+	return dax_mem2blk_err(ret);
 }
 EXPORT_SYMBOL_GPL(dax_zero_page_range);
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ceea55f621cc..46e094e56159 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
 		long actual_nr;
 
 		if (mode != DAX_RECOVERY_WRITE)
-			return -EIO;
+			return -EHWPOISON;
 
 		/*
 		 * Set the recovery stride is set to kernel page size because
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index c09f2e053bf8..ee47ac520cd4 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -54,7 +54,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
 	rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
 			&kaddr, NULL);
 	if (rc < 0)
-		return rc;
+		return dax_mem2blk_err(rc);
+
 	memset(kaddr, 0, nr_pages << PAGE_SHIFT);
 	dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
 	return 0;
diff --git a/fs/dax.c b/fs/dax.c
index cb36c6746fc4..906ecbd541a3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size,
 	if (!zero_edge) {
 		ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
 		if (ret)
-			return ret;
+			return dax_mem2blk_err(ret);
 	}
 
 	if (copy_all) {
@@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
 
 out_unlock:
 	dax_read_unlock(id);
-	return ret;
+	return dax_mem2blk_err(ret);
 }
 
 int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
@@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
 	ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, &kaddr,
 				NULL);
 	if (ret < 0)
-		return ret;
+		return dax_mem2blk_err(ret);
+
 	memset(kaddr + offset, 0, size);
 	if (iomap->flags & IOMAP_F_SHARED)
 		ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
@@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 
 		map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
 				DAX_ACCESS, &kaddr, NULL);
-		if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
+		if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) {
 			map_len = dax_direct_access(dax_dev, pgoff,
 					PHYS_PFN(size), DAX_RECOVERY_WRITE,
 					&kaddr, NULL);
@@ -1506,7 +1507,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 				recovery = true;
 		}
 		if (map_len < 0) {
-			ret = map_len;
+			ret = dax_mem2blk_err(map_len);
 			break;
 		}
 
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 4d8d4f16c727..5f1be1da92ce 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -775,7 +775,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
 	rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
 			       NULL);
 	if (rc < 0)
-		return rc;
+		return dax_mem2blk_err(rc);
+
 	memset(kaddr, 0, nr_pages << PAGE_SHIFT);
 	dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
 	return 0;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index bf6258472e49..261944ec0887 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -261,6 +261,19 @@ static inline bool dax_mapping(struct address_space *mapping)
 	return mapping->host && IS_DAX(mapping->host);
 }
 
+/*
+ * Due to dax's memory and block duo personalities, hwpoison reporting
+ * takes into consideration which personality is presently visible.
+ * When dax acts like a block device, such as in block IO, an encounter of
+ * dax hwpoison is reported as -EIO.
+ * When dax acts like memory, such as in page fault, a detection of hwpoison
+ * is reported as -EHWPOISON which leads to VM_FAULT_HWPOISON.
+ */
+static inline int dax_mem2blk_err(int err)
+{
+	return (err == -EHWPOISON) ? -EIO : err;
+}
+
 #ifdef CONFIG_DEV_DAX_HMEM_DEVICES
 void hmem_register_resource(int target_nid, struct resource *r);
 #else
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27ce77080c79..052ac9317365 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3342,6 +3342,8 @@ static inline vm_fault_t vmf_error(int err)
 {
 	if (err == -ENOMEM)
 		return VM_FAULT_OOM;
+	else if (err == -EHWPOISON)
+		return VM_FAULT_HWPOISON;
 	return VM_FAULT_SIGBUS;
 }
 
-- 
cgit v1.2.3


From 5c5bd1fef3ec913f9c597c6f61a9b903096415bf Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Wed, 15 Mar 2023 09:31:25 +0800
Subject: jbd2: remove unused feature macros

JBD2_HAS_[IN|RO_]COMPAT_FEATURE macros are no longer used, just remove
them.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230315013128.3911115-4-chengzhihao1@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/jbd2.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f619bae1dcc5..a91cf9c7a94b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -274,17 +274,6 @@ typedef struct journal_superblock_s
 /* 0x0400 */
 } journal_superblock_t;
 
-/* Use the jbd2_{has,set,clear}_feature_* helpers; these will be removed */
-#define JBD2_HAS_COMPAT_FEATURE(j,mask)					\
-	((j)->j_format_version >= 2 &&					\
-	 ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask))))
-#define JBD2_HAS_RO_COMPAT_FEATURE(j,mask)				\
-	((j)->j_format_version >= 2 &&					\
-	 ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask))))
-#define JBD2_HAS_INCOMPAT_FEATURE(j,mask)				\
-	((j)->j_format_version >= 2 &&					\
-	 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
-
 #define JBD2_FEATURE_COMPAT_CHECKSUM		0x00000001
 
 #define JBD2_FEATURE_INCOMPAT_REVOKE		0x00000001
-- 
cgit v1.2.3


From 5cf036d4f1489d7ba04b948e415f662521902c30 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Wed, 15 Mar 2023 09:31:26 +0800
Subject: jbd2: switch to check format version in superblock directly

We should only check and set extented features if journal format version
is 2, and now we check the in memory copy of the superblock
'journal->j_format_version', which relys on the parameter initialization
sequence, switch to use the h_blocktype in superblock cloud be more
clear.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230315013128.3911115-5-chengzhihao1@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/journal.c    | 16 +++++++---------
 include/linux/jbd2.h | 17 ++++++++++++++---
 2 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8ae419152ff6..8d5fe6738cc4 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2062,10 +2062,12 @@ int jbd2_journal_load(journal_t *journal)
 		return err;
 
 	sb = journal->j_superblock;
-	/* If this is a V2 superblock, then we have to check the
-	 * features flags on it. */
 
-	if (journal->j_format_version >= 2) {
+	/*
+	 * If this is a V2 superblock, then we have to check the
+	 * features flags on it.
+	 */
+	if (jbd2_format_support_feature(journal)) {
 		if ((sb->s_feature_ro_compat &
 		     ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
 		    (sb->s_feature_incompat &
@@ -2227,7 +2229,7 @@ int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
 	if (journal->j_format_version == 0 &&
 	    journal_get_superblock(journal) != 0)
 		return 0;
-	if (journal->j_format_version == 1)
+	if (!jbd2_format_support_feature(journal))
 		return 0;
 
 	sb = journal->j_superblock;
@@ -2257,11 +2259,7 @@ int jbd2_journal_check_available_features(journal_t *journal, unsigned long comp
 	if (!compat && !ro && !incompat)
 		return 1;
 
-	/* We can support any known requested features iff the
-	 * superblock is in version 2.  Otherwise we fail to support any
-	 * extended sb features. */
-
-	if (journal->j_format_version != 2)
+	if (!jbd2_format_support_feature(journal))
 		return 0;
 
 	if ((compat   & JBD2_KNOWN_COMPAT_FEATURES) == compat &&
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a91cf9c7a94b..1ffcea5c024e 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1313,11 +1313,22 @@ struct journal_s
 		rwsem_release(&j->j_trans_commit_map, _THIS_IP_); \
 	} while (0)
 
+/*
+ * We can support any known requested features iff the
+ * superblock is not in version 1.  Otherwise we fail to support any
+ * extended sb features.
+ */
+static inline bool jbd2_format_support_feature(journal_t *j)
+{
+	return j->j_superblock->s_header.h_blocktype !=
+					cpu_to_be32(JBD2_SUPERBLOCK_V1);
+}
+
 /* journal feature predicate functions */
 #define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \
 static inline bool jbd2_has_feature_##name(journal_t *j) \
 { \
-	return ((j)->j_format_version >= 2 && \
+	return (jbd2_format_support_feature(j) && \
 		((j)->j_superblock->s_feature_compat & \
 		 cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname)) != 0); \
 } \
@@ -1335,7 +1346,7 @@ static inline void jbd2_clear_feature_##name(journal_t *j) \
 #define JBD2_FEATURE_RO_COMPAT_FUNCS(name, flagname) \
 static inline bool jbd2_has_feature_##name(journal_t *j) \
 { \
-	return ((j)->j_format_version >= 2 && \
+	return (jbd2_format_support_feature(j) && \
 		((j)->j_superblock->s_feature_ro_compat & \
 		 cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname)) != 0); \
 } \
@@ -1353,7 +1364,7 @@ static inline void jbd2_clear_feature_##name(journal_t *j) \
 #define JBD2_FEATURE_INCOMPAT_FUNCS(name, flagname) \
 static inline bool jbd2_has_feature_##name(journal_t *j) \
 { \
-	return ((j)->j_format_version >= 2 && \
+	return (jbd2_format_support_feature(j) && \
 		((j)->j_superblock->s_feature_incompat & \
 		 cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname)) != 0); \
 } \
-- 
cgit v1.2.3


From 04c2e98179658d223665661f12c5043224e8f8d3 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Wed, 15 Mar 2023 09:31:28 +0800
Subject: jbd2: remove j_format_version

journal->j_format_version is no longer used, remove it.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230315013128.3911115-7-chengzhihao1@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/journal.c    | 9 ---------
 include/linux/jbd2.h | 5 -----
 2 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ee678f9e40c4..837a9a85e585 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2006,15 +2006,6 @@ static int load_superblock(journal_t *journal)
 
 	sb = journal->j_superblock;
 
-	switch (be32_to_cpu(sb->s_header.h_blocktype)) {
-	case JBD2_SUPERBLOCK_V1:
-		journal->j_format_version = 1;
-		break;
-	case JBD2_SUPERBLOCK_V2:
-		journal->j_format_version = 2;
-		break;
-	}
-
 	journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
 	journal->j_tail = be32_to_cpu(sb->s_start);
 	journal->j_first = be32_to_cpu(sb->s_first);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 1ffcea5c024e..6990fc891612 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -792,11 +792,6 @@ struct journal_s
 	 */
 	journal_superblock_t	*j_superblock;
 
-	/**
-	 * @j_format_version: Version of the superblock format.
-	 */
-	int			j_format_version;
-
 	/**
 	 * @j_state_lock: Protect the various scalars in the journal.
 	 */
-- 
cgit v1.2.3


From c7fc60555864c0e67f5e5754a9053986f8fb8491 Mon Sep 17 00:00:00 2001
From: Zhang Yi <yi.zhang@huawei.com>
Date: Wed, 22 Mar 2023 09:33:51 +0800
Subject: jbd2: continue to record log between each mount

For a newly mounted file system, the journal committing thread always
record new transactions from the start of the journal area, no matter
whether the journal was clean or just has been recovered. So the logdump
code in debugfs cannot dump continuous logs between each mount, it is
disadvantageous to analysis corrupted file system image and locate the
file system inconsistency bugs.

If we get a corrupted file system in the running products and want to
find out what has happened, besides lookup the system log, one effective
way is to backtrack the journal log. But we may not always run e2fsck
before each mount and the default fsck -a mode also cannot always
checkout all inconsistencies, so it could left over some inconsistencies
into the next mount until we detect it. Finally, transactions in the
journal may probably discontinuous and some relatively new transactions
has been covered, it becomes hard to analyse. If we could record
transactions continuously between each mount, we could acquire more
useful info from the journal. Like this:

 |Previous mount checkpointed/recovered logs|Current mount logs         |
 |{------}{---}{--------} ... {------}| ... |{======}{========}...000000|

And yes the journal area is limited and cannot record everything, the
problematic transaction may also be covered even if we do this, but
this is still useful for fuzzy tests and short-running products.

This patch save the head blocknr in the superblock after flushing the
journal or unmounting the file system, let the next mount could continue
to record new transaction behind it. This change is backward compatible
because the old kernel does not care about the head blocknr of the
journal. It is also fine if we mount a clean old image without valid
head blocknr, we fail back to set it to s_first just like before.
Finally, for the case of mount an unclean file system, we could also get
the journal head easily after scanning/replaying the journal, it will
continue to record new transaction after the recovered transactions.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230322013353.1843306-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/journal.c    | 18 ++++++++++++++++--
 fs/jbd2/recovery.c   | 22 +++++++++++++++++-----
 include/linux/jbd2.h |  9 +++++++--
 3 files changed, 40 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 837a9a85e585..b5e57735ab3f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1559,8 +1559,21 @@ static int journal_reset(journal_t *journal)
 	journal->j_first = first;
 	journal->j_last = last;
 
-	journal->j_head = journal->j_first;
-	journal->j_tail = journal->j_first;
+	if (journal->j_head != 0 && journal->j_flags & JBD2_CYCLE_RECORD) {
+		/*
+		 * Disable the cycled recording mode if the journal head block
+		 * number is not correct.
+		 */
+		if (journal->j_head < first || journal->j_head >= last) {
+			printk(KERN_WARNING "JBD2: Incorrect Journal head block %lu, "
+			       "disable journal_cycle_record\n",
+			       journal->j_head);
+			journal->j_head = journal->j_first;
+		}
+	} else {
+		journal->j_head = journal->j_first;
+	}
+	journal->j_tail = journal->j_head;
 	journal->j_free = journal->j_last - journal->j_first;
 
 	journal->j_tail_sequence = journal->j_transaction_sequence;
@@ -1732,6 +1745,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
 
 	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
 	sb->s_start    = cpu_to_be32(0);
+	sb->s_head     = cpu_to_be32(journal->j_head);
 	if (jbd2_has_feature_fast_commit(journal)) {
 		/*
 		 * When journal is clean, no need to commit fast commit flag and
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 8286a9ec122f..0184931d47f7 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -29,6 +29,7 @@ struct recovery_info
 {
 	tid_t		start_transaction;
 	tid_t		end_transaction;
+	unsigned long	head_block;
 
 	int		nr_replays;
 	int		nr_revokes;
@@ -301,11 +302,11 @@ int jbd2_journal_recover(journal_t *journal)
 	 * is always zero if, and only if, the journal was cleanly
 	 * unmounted.
 	 */
-
 	if (!sb->s_start) {
-		jbd2_debug(1, "No recovery required, last transaction %d\n",
-			  be32_to_cpu(sb->s_sequence));
+		jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n",
+			  be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head));
 		journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
+		journal->j_head = be32_to_cpu(sb->s_head);
 		return 0;
 	}
 
@@ -324,6 +325,9 @@ int jbd2_journal_recover(journal_t *journal)
 	/* Restart the log at the next transaction ID, thus invalidating
 	 * any existing commit records in the log. */
 	journal->j_transaction_sequence = ++info.end_transaction;
+	journal->j_head = info.head_block;
+	jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n",
+		  journal->j_transaction_sequence, journal->j_head);
 
 	jbd2_journal_clear_revoke(journal);
 	err2 = sync_blockdev(journal->j_fs_dev);
@@ -364,6 +368,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
 	if (err) {
 		printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
 		++journal->j_transaction_sequence;
+		journal->j_head = journal->j_first;
 	} else {
 #ifdef CONFIG_JBD2_DEBUG
 		int dropped = info.end_transaction - 
@@ -373,6 +378,7 @@ int jbd2_journal_skip_recovery(journal_t *journal)
 			  dropped, (dropped == 1) ? "" : "s");
 #endif
 		journal->j_transaction_sequence = ++info.end_transaction;
+		journal->j_head = info.head_block;
 	}
 
 	journal->j_tail = 0;
@@ -462,7 +468,7 @@ static int do_one_pass(journal_t *journal,
 			struct recovery_info *info, enum passtype pass)
 {
 	unsigned int		first_commit_ID, next_commit_ID;
-	unsigned long		next_log_block;
+	unsigned long		next_log_block, head_block;
 	int			err, success = 0;
 	journal_superblock_t *	sb;
 	journal_header_t *	tmp;
@@ -485,6 +491,7 @@ static int do_one_pass(journal_t *journal,
 	sb = journal->j_superblock;
 	next_commit_ID = be32_to_cpu(sb->s_sequence);
 	next_log_block = be32_to_cpu(sb->s_start);
+	head_block = next_log_block;
 
 	first_commit_ID = next_commit_ID;
 	if (pass == PASS_SCAN)
@@ -809,6 +816,7 @@ static int do_one_pass(journal_t *journal,
 				if (commit_time < last_trans_commit_time)
 					goto ignore_crc_mismatch;
 				info->end_transaction = next_commit_ID;
+				info->head_block = head_block;
 
 				if (!jbd2_has_feature_async_commit(journal)) {
 					journal->j_failed_commit =
@@ -817,8 +825,10 @@ static int do_one_pass(journal_t *journal,
 					break;
 				}
 			}
-			if (pass == PASS_SCAN)
+			if (pass == PASS_SCAN) {
 				last_trans_commit_time = commit_time;
+				head_block = next_log_block;
+			}
 			brelse(bh);
 			next_commit_ID++;
 			continue;
@@ -868,6 +878,8 @@ static int do_one_pass(journal_t *journal,
 	if (pass == PASS_SCAN) {
 		if (!info->end_transaction)
 			info->end_transaction = next_commit_ID;
+		if (!info->head_block)
+			info->head_block = head_block;
 	} else {
 		/* It's really bad news if different passes end up at
 		 * different places (but possible due to IO errors). */
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 6990fc891612..d860499e15e4 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -265,8 +265,10 @@ typedef struct journal_superblock_s
 	__u8	s_padding2[3];
 /* 0x0054 */
 	__be32	s_num_fc_blks;		/* Number of fast commit blocks */
-/* 0x0058 */
-	__u32	s_padding[41];
+	__be32	s_head;			/* blocknr of head of log, only uptodate
+					 * while the filesystem is clean */
+/* 0x005C */
+	__u32	s_padding[40];
 	__be32	s_checksum;		/* crc32c(superblock) */
 
 /* 0x0100 */
@@ -1395,6 +1397,9 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit,	FAST_COMMIT)
 #define JBD2_ABORT_ON_SYNCDATA_ERR	0x040	/* Abort the journal on file
 						 * data write error in ordered
 						 * mode */
+#define JBD2_CYCLE_RECORD		0x080	/* Journal cycled record log on
+						 * clean and empty filesystem
+						 * logging area */
 #define JBD2_FAST_COMMIT_ONGOING	0x100	/* Fast commit is ongoing */
 #define JBD2_FULL_COMMIT_ONGOING	0x200	/* Full commit is ongoing */
 #define JBD2_JOURNAL_FLUSH_DISCARD	0x0001
-- 
cgit v1.2.3


From a37c0191acbd58efab4da43372585207f30e3102 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Fri, 19 May 2023 14:56:22 -0700
Subject: virtio: allow caller to override device id in vp_modern

To add a bit of vendor flexibility with various virtio based devices,
allow the caller to check for a different device id.  This adds a function
pointer field to struct virtio_pci_modern_device to specify an override
device id check.  If defined by the driver, this function will be called
to check that the PCI device is the vendor's expected device, and will
return the found device id to be stored in mdev->id.device.  This allows
vendors with alternative vendor device ids to use this library on their
own device BAR.

Note: A lot of the diff in this is simply indenting the existing code
into an else block.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20230519215632.12343-2-shannon.nelson@amd.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_modern_dev.c | 30 +++++++++++++++++++-----------
 include/linux/virtio_pci_modern.h      |  3 +++
 2 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c
index 869cb46bef96..9b2d6614de67 100644
--- a/drivers/virtio/virtio_pci_modern_dev.c
+++ b/drivers/virtio/virtio_pci_modern_dev.c
@@ -218,21 +218,29 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
 	int err, common, isr, notify, device;
 	u32 notify_length;
 	u32 notify_offset;
+	int devid;
 
 	check_offsets();
 
-	/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
-	if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
-		return -ENODEV;
-
-	if (pci_dev->device < 0x1040) {
-		/* Transitional devices: use the PCI subsystem device id as
-		 * virtio device id, same as legacy driver always did.
-		 */
-		mdev->id.device = pci_dev->subsystem_device;
+	if (mdev->device_id_check) {
+		devid = mdev->device_id_check(pci_dev);
+		if (devid < 0)
+			return devid;
+		mdev->id.device = devid;
 	} else {
-		/* Modern devices: simply use PCI device id, but start from 0x1040. */
-		mdev->id.device = pci_dev->device - 0x1040;
+		/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
+		if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
+			return -ENODEV;
+
+		if (pci_dev->device < 0x1040) {
+			/* Transitional devices: use the PCI subsystem device id as
+			 * virtio device id, same as legacy driver always did.
+			 */
+			mdev->id.device = pci_dev->subsystem_device;
+		} else {
+			/* Modern devices: simply use PCI device id, but start from 0x1040. */
+			mdev->id.device = pci_dev->device - 0x1040;
+		}
 	}
 	mdev->id.vendor = pci_dev->subsystem_vendor;
 
diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h
index c4eeb79b0139..e7b1db1dd0bb 100644
--- a/include/linux/virtio_pci_modern.h
+++ b/include/linux/virtio_pci_modern.h
@@ -38,6 +38,9 @@ struct virtio_pci_modern_device {
 	int modern_bars;
 
 	struct virtio_device_id id;
+
+	/* optional check for vendor virtio device, returns dev_id or -ERRNO */
+	int (*device_id_check)(struct pci_dev *pdev);
 };
 
 /*
-- 
cgit v1.2.3


From 5d7d82d39eb4cab2c7d0d85baaee0fc45d2c7900 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Fri, 19 May 2023 14:56:23 -0700
Subject: virtio: allow caller to override device DMA mask in vp_modern

To add a bit of vendor flexibility with various virtio based devices,
allow the caller to specify a different DMA mask.  This adds a dma_mask
field to struct virtio_pci_modern_device.  If defined by the driver,
this mask will be used in a call to dma_set_mask_and_coherent() instead
of the traditional DMA_BIT_MASK(64).  This allows limiting the DMA space
on vendor devices with address limitations.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20230519215632.12343-3-shannon.nelson@amd.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_modern_dev.c | 3 ++-
 include/linux/virtio_pci_modern.h      | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c
index 9b2d6614de67..aad7d9296e77 100644
--- a/drivers/virtio/virtio_pci_modern_dev.c
+++ b/drivers/virtio/virtio_pci_modern_dev.c
@@ -268,7 +268,8 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
 		return -EINVAL;
 	}
 
-	err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
+	err = dma_set_mask_and_coherent(&pci_dev->dev,
+					mdev->dma_mask ? : DMA_BIT_MASK(64));
 	if (err)
 		err = dma_set_mask_and_coherent(&pci_dev->dev,
 						DMA_BIT_MASK(32));
diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h
index e7b1db1dd0bb..067ac1d789bc 100644
--- a/include/linux/virtio_pci_modern.h
+++ b/include/linux/virtio_pci_modern.h
@@ -41,6 +41,9 @@ struct virtio_pci_modern_device {
 
 	/* optional check for vendor virtio device, returns dev_id or -ERRNO */
 	int (*device_id_check)(struct pci_dev *pdev);
+
+	/* optional mask for devices with limited DMA space */
+	u64 dma_mask;
 };
 
 /*
-- 
cgit v1.2.3


From a16291b5bcbbd75586c8396555a0ee9fd4183372 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Fri, 19 May 2023 14:56:24 -0700
Subject: pds_vdpa: Add new vDPA driver for AMD/Pensando DSC

This is the initial auxiliary driver framework for a new vDPA
device driver, an auxiliary_bus client of the pds_core driver.
The pds_core driver supplies the PCI services for the VF device
and for accessing the adminq in the PF device.

This patch adds the very basics of registering for the auxiliary
device and setting up debugfs entries.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20230519215632.12343-4-shannon.nelson@amd.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vdpa/Makefile          |  1 +
 drivers/vdpa/pds/Makefile      |  8 ++++
 drivers/vdpa/pds/aux_drv.c     | 83 ++++++++++++++++++++++++++++++++++++++++++
 drivers/vdpa/pds/aux_drv.h     | 15 ++++++++
 drivers/vdpa/pds/debugfs.c     | 25 +++++++++++++
 drivers/vdpa/pds/debugfs.h     | 12 ++++++
 include/linux/pds/pds_common.h |  2 +
 7 files changed, 146 insertions(+)
 create mode 100644 drivers/vdpa/pds/Makefile
 create mode 100644 drivers/vdpa/pds/aux_drv.c
 create mode 100644 drivers/vdpa/pds/aux_drv.h
 create mode 100644 drivers/vdpa/pds/debugfs.c
 create mode 100644 drivers/vdpa/pds/debugfs.h

(limited to 'include/linux')

diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile
index 59396ff2a318..8f53c6f3cca7 100644
--- a/drivers/vdpa/Makefile
+++ b/drivers/vdpa/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_MLX5_VDPA) += mlx5/
 obj-$(CONFIG_VP_VDPA)    += virtio_pci/
 obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/
 obj-$(CONFIG_SNET_VDPA) += solidrun/
+obj-$(CONFIG_PDS_VDPA) += pds/
diff --git a/drivers/vdpa/pds/Makefile b/drivers/vdpa/pds/Makefile
new file mode 100644
index 000000000000..a9cd2f450ae1
--- /dev/null
+++ b/drivers/vdpa/pds/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright(c) 2023 Advanced Micro Devices, Inc
+
+obj-$(CONFIG_PDS_VDPA) := pds_vdpa.o
+
+pds_vdpa-y := aux_drv.o
+
+pds_vdpa-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/drivers/vdpa/pds/aux_drv.c b/drivers/vdpa/pds/aux_drv.c
new file mode 100644
index 000000000000..e4a0ad61ea22
--- /dev/null
+++ b/drivers/vdpa/pds/aux_drv.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/auxiliary_bus.h>
+#include <linux/pci.h>
+
+#include <linux/pds/pds_common.h>
+#include <linux/pds/pds_core_if.h>
+#include <linux/pds/pds_adminq.h>
+#include <linux/pds/pds_auxbus.h>
+
+#include "aux_drv.h"
+#include "debugfs.h"
+
+static const struct auxiliary_device_id pds_vdpa_id_table[] = {
+	{ .name = PDS_VDPA_DEV_NAME, },
+	{},
+};
+
+static int pds_vdpa_probe(struct auxiliary_device *aux_dev,
+			  const struct auxiliary_device_id *id)
+
+{
+	struct pds_auxiliary_dev *padev =
+		container_of(aux_dev, struct pds_auxiliary_dev, aux_dev);
+	struct pds_vdpa_aux *vdpa_aux;
+
+	vdpa_aux = kzalloc(sizeof(*vdpa_aux), GFP_KERNEL);
+	if (!vdpa_aux)
+		return -ENOMEM;
+
+	vdpa_aux->padev = padev;
+	auxiliary_set_drvdata(aux_dev, vdpa_aux);
+
+	return 0;
+}
+
+static void pds_vdpa_remove(struct auxiliary_device *aux_dev)
+{
+	struct pds_vdpa_aux *vdpa_aux = auxiliary_get_drvdata(aux_dev);
+	struct device *dev = &aux_dev->dev;
+
+	kfree(vdpa_aux);
+	auxiliary_set_drvdata(aux_dev, NULL);
+
+	dev_info(dev, "Removed\n");
+}
+
+static struct auxiliary_driver pds_vdpa_driver = {
+	.name = PDS_DEV_TYPE_VDPA_STR,
+	.probe = pds_vdpa_probe,
+	.remove = pds_vdpa_remove,
+	.id_table = pds_vdpa_id_table,
+};
+
+static void __exit pds_vdpa_cleanup(void)
+{
+	auxiliary_driver_unregister(&pds_vdpa_driver);
+
+	pds_vdpa_debugfs_destroy();
+}
+module_exit(pds_vdpa_cleanup);
+
+static int __init pds_vdpa_init(void)
+{
+	int err;
+
+	pds_vdpa_debugfs_create();
+
+	err = auxiliary_driver_register(&pds_vdpa_driver);
+	if (err) {
+		pr_err("%s: aux driver register failed: %pe\n",
+		       PDS_VDPA_DRV_NAME, ERR_PTR(err));
+		pds_vdpa_debugfs_destroy();
+	}
+
+	return err;
+}
+module_init(pds_vdpa_init);
+
+MODULE_DESCRIPTION(PDS_VDPA_DRV_DESCRIPTION);
+MODULE_AUTHOR("Advanced Micro Devices, Inc");
+MODULE_LICENSE("GPL");
diff --git a/drivers/vdpa/pds/aux_drv.h b/drivers/vdpa/pds/aux_drv.h
new file mode 100644
index 000000000000..f1e99359424e
--- /dev/null
+++ b/drivers/vdpa/pds/aux_drv.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _AUX_DRV_H_
+#define _AUX_DRV_H_
+
+#define PDS_VDPA_DRV_DESCRIPTION    "AMD/Pensando vDPA VF Device Driver"
+#define PDS_VDPA_DRV_NAME           KBUILD_MODNAME
+
+struct pds_vdpa_aux {
+	struct pds_auxiliary_dev *padev;
+
+	struct dentry *dentry;
+};
+#endif /* _AUX_DRV_H_ */
diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c
new file mode 100644
index 000000000000..5be22fb7a76a
--- /dev/null
+++ b/drivers/vdpa/pds/debugfs.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/pci.h>
+
+#include <linux/pds/pds_common.h>
+#include <linux/pds/pds_core_if.h>
+#include <linux/pds/pds_adminq.h>
+#include <linux/pds/pds_auxbus.h>
+
+#include "aux_drv.h"
+#include "debugfs.h"
+
+static struct dentry *dbfs_dir;
+
+void pds_vdpa_debugfs_create(void)
+{
+	dbfs_dir = debugfs_create_dir(PDS_VDPA_DRV_NAME, NULL);
+}
+
+void pds_vdpa_debugfs_destroy(void)
+{
+	debugfs_remove_recursive(dbfs_dir);
+	dbfs_dir = NULL;
+}
diff --git a/drivers/vdpa/pds/debugfs.h b/drivers/vdpa/pds/debugfs.h
new file mode 100644
index 000000000000..658849591a99
--- /dev/null
+++ b/drivers/vdpa/pds/debugfs.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _PDS_VDPA_DEBUGFS_H_
+#define _PDS_VDPA_DEBUGFS_H_
+
+#include <linux/debugfs.h>
+
+void pds_vdpa_debugfs_create(void);
+void pds_vdpa_debugfs_destroy(void);
+
+#endif /* _PDS_VDPA_DEBUGFS_H_ */
diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h
index 060331486d50..2a0d1669cfd0 100644
--- a/include/linux/pds/pds_common.h
+++ b/include/linux/pds/pds_common.h
@@ -39,6 +39,8 @@ enum pds_core_vif_types {
 #define PDS_DEV_TYPE_RDMA_STR	"RDMA"
 #define PDS_DEV_TYPE_LM_STR	"LM"
 
+#define PDS_VDPA_DEV_NAME	PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR
+
 #define PDS_CORE_IFNAMSIZ		16
 
 /**
-- 
cgit v1.2.3


From a8492cd8cde0906b93c819992d50ac6d79a5cad1 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Fri, 19 May 2023 14:56:25 -0700
Subject: pds_vdpa: move enum from common to adminq header

The pds_core_logical_qtype enum and IFNAMSIZ are not needed
in the common PDS header, only needed when working with the
adminq, so move them to the adminq header.

Note: This patch might conflict with pds_vfio patches that are
      in review, depending on which patchset gets pulled first.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20230519215632.12343-5-shannon.nelson@amd.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/pds/pds_adminq.h | 21 +++++++++++++++++++++
 include/linux/pds/pds_common.h | 21 ---------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
index 98a60ce87b92..61b0a8634e1a 100644
--- a/include/linux/pds/pds_adminq.h
+++ b/include/linux/pds/pds_adminq.h
@@ -222,6 +222,27 @@ enum pds_core_lif_type {
 	PDS_CORE_LIF_TYPE_DEFAULT = 0,
 };
 
+#define PDS_CORE_IFNAMSIZ		16
+
+/**
+ * enum pds_core_logical_qtype - Logical Queue Types
+ * @PDS_CORE_QTYPE_ADMINQ:    Administrative Queue
+ * @PDS_CORE_QTYPE_NOTIFYQ:   Notify Queue
+ * @PDS_CORE_QTYPE_RXQ:       Receive Queue
+ * @PDS_CORE_QTYPE_TXQ:       Transmit Queue
+ * @PDS_CORE_QTYPE_EQ:        Event Queue
+ * @PDS_CORE_QTYPE_MAX:       Max queue type supported
+ */
+enum pds_core_logical_qtype {
+	PDS_CORE_QTYPE_ADMINQ  = 0,
+	PDS_CORE_QTYPE_NOTIFYQ = 1,
+	PDS_CORE_QTYPE_RXQ     = 2,
+	PDS_CORE_QTYPE_TXQ     = 3,
+	PDS_CORE_QTYPE_EQ      = 4,
+
+	PDS_CORE_QTYPE_MAX     = 16   /* don't change - used in struct size */
+};
+
 /**
  * union pds_core_lif_config - LIF configuration
  * @state:	    LIF state (enum pds_core_lif_state)
diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h
index 2a0d1669cfd0..435c8e8161c2 100644
--- a/include/linux/pds/pds_common.h
+++ b/include/linux/pds/pds_common.h
@@ -41,27 +41,6 @@ enum pds_core_vif_types {
 
 #define PDS_VDPA_DEV_NAME	PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR
 
-#define PDS_CORE_IFNAMSIZ		16
-
-/**
- * enum pds_core_logical_qtype - Logical Queue Types
- * @PDS_CORE_QTYPE_ADMINQ:    Administrative Queue
- * @PDS_CORE_QTYPE_NOTIFYQ:   Notify Queue
- * @PDS_CORE_QTYPE_RXQ:       Receive Queue
- * @PDS_CORE_QTYPE_TXQ:       Transmit Queue
- * @PDS_CORE_QTYPE_EQ:        Event Queue
- * @PDS_CORE_QTYPE_MAX:       Max queue type supported
- */
-enum pds_core_logical_qtype {
-	PDS_CORE_QTYPE_ADMINQ  = 0,
-	PDS_CORE_QTYPE_NOTIFYQ = 1,
-	PDS_CORE_QTYPE_RXQ     = 2,
-	PDS_CORE_QTYPE_TXQ     = 3,
-	PDS_CORE_QTYPE_EQ      = 4,
-
-	PDS_CORE_QTYPE_MAX     = 16   /* don't change - used in struct size */
-};
-
 int pdsc_register_notify(struct notifier_block *nb);
 void pdsc_unregister_notify(struct notifier_block *nb);
 void *pdsc_get_pf_struct(struct pci_dev *vf_pdev);
-- 
cgit v1.2.3


From e0c6de13ff87b917407e8c817c1007c3dc7ece6f Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Fri, 19 May 2023 14:56:26 -0700
Subject: pds_vdpa: new adminq entries

Add new adminq definitions in support for vDPA operations.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Message-Id: <20230519215632.12343-6-shannon.nelson@amd.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/pds/pds_adminq.h | 226 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 226 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
index 61b0a8634e1a..bcba7fda3cc9 100644
--- a/include/linux/pds/pds_adminq.h
+++ b/include/linux/pds/pds_adminq.h
@@ -605,6 +605,219 @@ struct pds_core_q_init_comp {
 	u8     color;
 };
 
+/*
+ * enum pds_vdpa_cmd_opcode - vDPA Device commands
+ */
+enum pds_vdpa_cmd_opcode {
+	PDS_VDPA_CMD_INIT		= 48,
+	PDS_VDPA_CMD_IDENT		= 49,
+	PDS_VDPA_CMD_RESET		= 51,
+	PDS_VDPA_CMD_VQ_RESET		= 52,
+	PDS_VDPA_CMD_VQ_INIT		= 53,
+	PDS_VDPA_CMD_STATUS_UPDATE	= 54,
+	PDS_VDPA_CMD_SET_FEATURES	= 55,
+	PDS_VDPA_CMD_SET_ATTR		= 56,
+};
+
+/**
+ * struct pds_vdpa_cmd - generic command
+ * @opcode:	Opcode
+ * @vdpa_index:	Index for vdpa subdevice
+ * @vf_id:	VF id
+ */
+struct pds_vdpa_cmd {
+	u8     opcode;
+	u8     vdpa_index;
+	__le16 vf_id;
+};
+
+/**
+ * struct pds_vdpa_init_cmd - INIT command
+ * @opcode:	Opcode PDS_VDPA_CMD_INIT
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id:	VF id
+ */
+struct pds_vdpa_init_cmd {
+	u8     opcode;
+	u8     vdpa_index;
+	__le16 vf_id;
+};
+
+/**
+ * struct pds_vdpa_ident - vDPA identification data
+ * @hw_features:	vDPA features supported by device
+ * @max_vqs:		max queues available (2 queues for a single queuepair)
+ * @max_qlen:		log(2) of maximum number of descriptors
+ * @min_qlen:		log(2) of minimum number of descriptors
+ *
+ * This struct is used in a DMA block that is set up for the PDS_VDPA_CMD_IDENT
+ * transaction.  Set up the DMA block and send the address in the IDENT cmd
+ * data, the DSC will write the ident information, then we can remove the DMA
+ * block after reading the answer.  If the completion status is 0, then there
+ * is valid information, else there was an error and the data should be invalid.
+ */
+struct pds_vdpa_ident {
+	__le64 hw_features;
+	__le16 max_vqs;
+	__le16 max_qlen;
+	__le16 min_qlen;
+};
+
+/**
+ * struct pds_vdpa_ident_cmd - IDENT command
+ * @opcode:	Opcode PDS_VDPA_CMD_IDENT
+ * @rsvd:       Word boundary padding
+ * @vf_id:	VF id
+ * @len:	length of ident info DMA space
+ * @ident_pa:	address for DMA of ident info (struct pds_vdpa_ident)
+ *			only used for this transaction, then forgotten by DSC
+ */
+struct pds_vdpa_ident_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 vf_id;
+	__le32 len;
+	__le64 ident_pa;
+};
+
+/**
+ * struct pds_vdpa_status_cmd - STATUS_UPDATE command
+ * @opcode:	Opcode PDS_VDPA_CMD_STATUS_UPDATE
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id:	VF id
+ * @status:	new status bits
+ */
+struct pds_vdpa_status_cmd {
+	u8     opcode;
+	u8     vdpa_index;
+	__le16 vf_id;
+	u8     status;
+};
+
+/**
+ * enum pds_vdpa_attr - List of VDPA device attributes
+ * @PDS_VDPA_ATTR_MAC:          MAC address
+ * @PDS_VDPA_ATTR_MAX_VQ_PAIRS: Max virtqueue pairs
+ */
+enum pds_vdpa_attr {
+	PDS_VDPA_ATTR_MAC          = 1,
+	PDS_VDPA_ATTR_MAX_VQ_PAIRS = 2,
+};
+
+/**
+ * struct pds_vdpa_setattr_cmd - SET_ATTR command
+ * @opcode:		Opcode PDS_VDPA_CMD_SET_ATTR
+ * @vdpa_index:		Index for vdpa subdevice
+ * @vf_id:		VF id
+ * @attr:		attribute to be changed (enum pds_vdpa_attr)
+ * @pad:		Word boundary padding
+ * @mac:		new mac address to be assigned as vdpa device address
+ * @max_vq_pairs:	new limit of virtqueue pairs
+ */
+struct pds_vdpa_setattr_cmd {
+	u8     opcode;
+	u8     vdpa_index;
+	__le16 vf_id;
+	u8     attr;
+	u8     pad[3];
+	union {
+		u8 mac[6];
+		__le16 max_vq_pairs;
+	} __packed;
+};
+
+/**
+ * struct pds_vdpa_vq_init_cmd - queue init command
+ * @opcode: Opcode PDS_VDPA_CMD_VQ_INIT
+ * @vdpa_index:	Index for vdpa subdevice
+ * @vf_id:	VF id
+ * @qid:	Queue id (bit0 clear = rx, bit0 set = tx, qid=N is ctrlq)
+ * @len:	log(2) of max descriptor count
+ * @desc_addr:	DMA address of descriptor area
+ * @avail_addr:	DMA address of available descriptors (aka driver area)
+ * @used_addr:	DMA address of used descriptors (aka device area)
+ * @intr_index:	interrupt index
+ * @avail_index:	initial device position in available ring
+ * @used_index:	initial device position in used ring
+ */
+struct pds_vdpa_vq_init_cmd {
+	u8     opcode;
+	u8     vdpa_index;
+	__le16 vf_id;
+	__le16 qid;
+	__le16 len;
+	__le64 desc_addr;
+	__le64 avail_addr;
+	__le64 used_addr;
+	__le16 intr_index;
+	__le16 avail_index;
+	__le16 used_index;
+};
+
+/**
+ * struct pds_vdpa_vq_init_comp - queue init completion
+ * @status:	Status of the command (enum pds_core_status_code)
+ * @hw_qtype:	HW queue type, used in doorbell selection
+ * @hw_qindex:	HW queue index, used in doorbell selection
+ * @rsvd:	Word boundary padding
+ * @color:	Color bit
+ */
+struct pds_vdpa_vq_init_comp {
+	u8     status;
+	u8     hw_qtype;
+	__le16 hw_qindex;
+	u8     rsvd[11];
+	u8     color;
+};
+
+/**
+ * struct pds_vdpa_vq_reset_cmd - queue reset command
+ * @opcode:	Opcode PDS_VDPA_CMD_VQ_RESET
+ * @vdpa_index:	Index for vdpa subdevice
+ * @vf_id:	VF id
+ * @qid:	Queue id
+ */
+struct pds_vdpa_vq_reset_cmd {
+	u8     opcode;
+	u8     vdpa_index;
+	__le16 vf_id;
+	__le16 qid;
+};
+
+/**
+ * struct pds_vdpa_vq_reset_comp - queue reset completion
+ * @status:	Status of the command (enum pds_core_status_code)
+ * @rsvd0:	Word boundary padding
+ * @avail_index:	current device position in available ring
+ * @used_index:	current device position in used ring
+ * @rsvd:	Word boundary padding
+ * @color:	Color bit
+ */
+struct pds_vdpa_vq_reset_comp {
+	u8     status;
+	u8     rsvd0;
+	__le16 avail_index;
+	__le16 used_index;
+	u8     rsvd[9];
+	u8     color;
+};
+
+/**
+ * struct pds_vdpa_set_features_cmd - set hw features
+ * @opcode: Opcode PDS_VDPA_CMD_SET_FEATURES
+ * @vdpa_index:	Index for vdpa subdevice
+ * @vf_id:	VF id
+ * @rsvd:       Word boundary padding
+ * @features:	Feature bit mask
+ */
+struct pds_vdpa_set_features_cmd {
+	u8     opcode;
+	u8     vdpa_index;
+	__le16 vf_id;
+	__le32 rsvd;
+	__le64 features;
+};
+
 union pds_core_adminq_cmd {
 	u8     opcode;
 	u8     bytes[64];
@@ -621,6 +834,16 @@ union pds_core_adminq_cmd {
 
 	struct pds_core_q_identify_cmd    q_ident;
 	struct pds_core_q_init_cmd        q_init;
+
+	struct pds_vdpa_cmd		  vdpa;
+	struct pds_vdpa_init_cmd	  vdpa_init;
+	struct pds_vdpa_ident_cmd	  vdpa_ident;
+	struct pds_vdpa_status_cmd	  vdpa_status;
+	struct pds_vdpa_setattr_cmd	  vdpa_setattr;
+	struct pds_vdpa_set_features_cmd  vdpa_set_features;
+	struct pds_vdpa_vq_init_cmd	  vdpa_vq_init;
+	struct pds_vdpa_vq_reset_cmd	  vdpa_vq_reset;
+
 };
 
 union pds_core_adminq_comp {
@@ -642,6 +865,9 @@ union pds_core_adminq_comp {
 
 	struct pds_core_q_identify_comp   q_ident;
 	struct pds_core_q_init_comp       q_init;
+
+	struct pds_vdpa_vq_init_comp	  vdpa_vq_init;
+	struct pds_vdpa_vq_reset_comp	  vdpa_vq_reset;
 };
 
 #ifndef __CHECKER__
-- 
cgit v1.2.3


From 33bd91fd24367ad664de6c05d0e0206d0b149767 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@kernel.org>
Date: Thu, 25 May 2023 16:35:42 +0200
Subject: virtio: Add missing documentation for structure fields

Add missing documentation for the vqs_list_lock field of struct virtio_device,
and the validate field of struct virtio_driver.

./scripts/kernel-doc says:

 .../virtio.h:131: warning: Function parameter or member 'vqs_list_lock' not described in 'virtio_device'
 .../virtio.h:192: warning: Function parameter or member 'validate' not described in 'virtio_driver'
 2 warnings as Errors

No functional changes intended.

Signed-off-by: Simon Horman <horms@kernel.org>
Message-Id: <20230510-virtio-kdoc-v3-1-e2681ed7a425@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
---
 include/linux/virtio.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b93238db94e3..de6041deee37 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -103,6 +103,7 @@ int virtqueue_resize(struct virtqueue *vq, u32 num,
  * @config_enabled: configuration change reporting enabled
  * @config_change_pending: configuration change reported while disabled
  * @config_lock: protects configuration change reporting
+ * @vqs_list_lock: protects @vqs.
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
@@ -117,7 +118,7 @@ struct virtio_device {
 	bool config_enabled;
 	bool config_change_pending;
 	spinlock_t config_lock;
-	spinlock_t vqs_list_lock; /* Protects VQs list access */
+	spinlock_t vqs_list_lock;
 	struct device dev;
 	struct virtio_device_id id;
 	const struct virtio_config_ops *config;
@@ -160,6 +161,8 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev);
  * @feature_table_size: number of entries in the feature table array.
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
+ * @validate: the function to call to validate features and config space.
+ *            Returns 0 or -errno.
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @scan: optional function to call after successful probe; intended
  *    for virtio-scsi to invoke a scan.
-- 
cgit v1.2.3


From 1a3f6fc430ed220889c7fb1a63bc2a30267ebc2a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 26 Jun 2023 14:46:40 -0700
Subject: phylink: ReST-ify the phylink_pcs_neg_mode() kdoc

Stephen reports warnings when rendering phylink kdocs as HTML:

  include/linux/phylink.h:110: ERROR: Unexpected indentation.
  include/linux/phylink.h:111: WARNING: Block quote ends without a blank line; unexpected unindent.
  include/linux/phylink.h:614: WARNING: Inline literal start-string without end-string.
  include/linux/phylink.h:644: WARNING: Inline literal start-string without end-string.

Make phylink_pcs_neg_mode() use a proper list format to fix the first
two warnings.

The last two warnings, AFAICT, come from the use of shorthand like
phylink_mode_*(). Perhaps those should be special-cased at the Sphinx
level.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Link: https://lore.kernel.org/all/20230626162908.2f149f98@canb.auug.org.au/
Link: https://lore.kernel.org/r/20230626214640.3142252-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phylink.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 516240f1e950..1817940a3418 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -105,11 +105,13 @@ static inline bool phylink_autoneg_inband(unsigned int mode)
  *
  * Determines the negotiation mode to be used by the PCS, and returns
  * one of:
- * %PHYLINK_PCS_NEG_NONE: interface mode does not support inband
- * %PHYLINK_PCS_NEG_OUTBAND: an out of band mode (e.g. reading the PHY)
+ *
+ * - %PHYLINK_PCS_NEG_NONE: interface mode does not support inband
+ * - %PHYLINK_PCS_NEG_OUTBAND: an out of band mode (e.g. reading the PHY)
  *   will be used.
- * %PHYLINK_PCS_NEG_INBAND_DISABLED: inband mode selected but autoneg disabled
- * %PHYLINK_PCS_NEG_INBAND_ENABLED: inband mode selected and autoneg enabled
+ * - %PHYLINK_PCS_NEG_INBAND_DISABLED: inband mode selected but autoneg
+ *   disabled
+ * - %PHYLINK_PCS_NEG_INBAND_ENABLED: inband mode selected and autoneg enabled
  *
  * Note: this is for cases where the PCS itself is involved in negotiation
  * (e.g. Clause 37, SGMII and similar) not Clause 73.
-- 
cgit v1.2.3


From 8d7071af890768438c14db6172cc8f9f4d04e184 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 24 Jun 2023 13:45:51 -0700
Subject: mm: always expand the stack with the mmap write lock held

This finishes the job of always holding the mmap write lock when
extending the user stack vma, and removes the 'write_locked' argument
from the vm helper functions again.

For some cases, we just avoid expanding the stack at all: drivers and
page pinning really shouldn't be extending any stacks.  Let's see if any
strange users really wanted that.

It's worth noting that architectures that weren't converted to the new
lock_mm_and_find_vma() helper function are left using the legacy
"expand_stack()" function, but it has been changed to drop the mmap_lock
and take it for writing while expanding the vma.  This makes it fairly
straightforward to convert the remaining architectures.

As a result of dropping and re-taking the lock, the calling conventions
for this function have also changed, since the old vma may no longer be
valid.  So it will now return the new vma if successful, and NULL - and
the lock dropped - if the area could not be extended.

Tested-by: Vegard Nossum <vegard.nossum@oracle.com>
Tested-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> # ia64
Tested-by: Frank Scheiner <frank.scheiner@web.de> # ia64
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/fault.c         |  36 +++----------
 arch/m68k/mm/fault.c         |   9 ++--
 arch/microblaze/mm/fault.c   |   5 +-
 arch/openrisc/mm/fault.c     |   5 +-
 arch/parisc/mm/fault.c       |  23 ++++----
 arch/s390/mm/fault.c         |   5 +-
 arch/sparc/mm/fault_64.c     |   8 +--
 arch/um/kernel/trap.c        |  11 ++--
 drivers/iommu/amd/iommu_v2.c |   4 +-
 drivers/iommu/iommu-sva.c    |   2 +-
 fs/binfmt_elf.c              |   2 +-
 fs/exec.c                    |   4 +-
 include/linux/mm.h           |  16 ++----
 mm/gup.c                     |   6 +--
 mm/memory.c                  |  10 +++-
 mm/mmap.c                    | 121 ++++++++++++++++++++++++++++++++++---------
 mm/nommu.c                   |  18 +++----
 17 files changed, 169 insertions(+), 116 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 85c4d9ac8686..5458b52b4009 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -110,10 +110,12 @@ retry:
          * register backing store that needs to expand upwards, in
          * this case vma will be null, but prev_vma will ne non-null
          */
-        if (( !vma && prev_vma ) || (address < vma->vm_start) )
-		goto check_expansion;
+        if (( !vma && prev_vma ) || (address < vma->vm_start) ) {
+		vma = expand_stack(mm, address);
+		if (!vma)
+			goto bad_area_nosemaphore;
+	}
 
-  good_area:
 	code = SEGV_ACCERR;
 
 	/* OK, we've got a good vm_area for this memory area.  Check the access permissions: */
@@ -177,35 +179,9 @@ retry:
 	mmap_read_unlock(mm);
 	return;
 
-  check_expansion:
-	if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
-		if (!vma)
-			goto bad_area;
-		if (!(vma->vm_flags & VM_GROWSDOWN))
-			goto bad_area;
-		if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
-		    || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
-			goto bad_area;
-		if (expand_stack(vma, address))
-			goto bad_area;
-	} else {
-		vma = prev_vma;
-		if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
-		    || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
-			goto bad_area;
-		/*
-		 * Since the register backing store is accessed sequentially,
-		 * we disallow growing it by more than a page at a time.
-		 */
-		if (address > vma->vm_end + PAGE_SIZE - sizeof(long))
-			goto bad_area;
-		if (expand_upwards(vma, address))
-			goto bad_area;
-	}
-	goto good_area;
-
   bad_area:
 	mmap_read_unlock(mm);
+  bad_area_nosemaphore:
 	if ((isr & IA64_ISR_SP)
 	    || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
 	{
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 228128e45c67..c290c5c0cfb9 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -105,8 +105,9 @@ retry:
 		if (address + 256 < rdusp())
 			goto map_err;
 	}
-	if (expand_stack(vma, address))
-		goto map_err;
+	vma = expand_stack(mm, address);
+	if (!vma)
+		goto map_err_nosemaphore;
 
 /*
  * Ok, we have a good vm_area for this memory access, so
@@ -196,10 +197,12 @@ bus_err:
 	goto send_sig;
 
 map_err:
+	mmap_read_unlock(mm);
+map_err_nosemaphore:
 	current->thread.signo = SIGSEGV;
 	current->thread.code = SEGV_MAPERR;
 	current->thread.faddr = address;
-	goto send_sig;
+	return send_fault_sig(regs);
 
 acc_err:
 	current->thread.signo = SIGSEGV;
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index 687714db6f4d..d3c3c33b73a6 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -192,8 +192,9 @@ retry:
 			&& (kernel_mode(regs) || !store_updates_sp(regs)))
 				goto bad_area;
 	}
-	if (expand_stack(vma, address))
-		goto bad_area;
+	vma = expand_stack(mm, address);
+	if (!vma)
+		goto bad_area_nosemaphore;
 
 good_area:
 	code = SEGV_ACCERR;
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 6734fee3134f..a9dcd4381d1a 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -127,8 +127,9 @@ retry:
 		if (address + PAGE_SIZE < regs->sp)
 			goto bad_area;
 	}
-	if (expand_stack(vma, address))
-		goto bad_area;
+	vma = expand_stack(mm, address);
+	if (!vma)
+		goto bad_area_nosemaphore;
 
 	/*
 	 * Ok, we have a good vm_area for this memory access, so
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 6941fdbf2517..6e894afa4249 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -288,15 +288,19 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
 retry:
 	mmap_read_lock(mm);
 	vma = find_vma_prev(mm, address, &prev_vma);
-	if (!vma || address < vma->vm_start)
-		goto check_expansion;
+	if (!vma || address < vma->vm_start) {
+		if (!prev || !(prev->vm_flags & VM_GROWSUP))
+			goto bad_area;
+		vma = expand_stack(mm, address);
+		if (!vma)
+			goto bad_area_nosemaphore;
+	}
+
 /*
  * Ok, we have a good vm_area for this memory access. We still need to
  * check the access permissions.
  */
 
-good_area:
-
 	if ((vma->vm_flags & acc_type) != acc_type)
 		goto bad_area;
 
@@ -347,17 +351,13 @@ good_area:
 	mmap_read_unlock(mm);
 	return;
 
-check_expansion:
-	vma = prev_vma;
-	if (vma && (expand_stack(vma, address) == 0))
-		goto good_area;
-
 /*
  * Something tried to access memory that isn't in our memory map..
  */
 bad_area:
 	mmap_read_unlock(mm);
 
+bad_area_nosemaphore:
 	if (user_mode(regs)) {
 		int signo, si_code;
 
@@ -449,7 +449,7 @@ handle_nadtlb_fault(struct pt_regs *regs)
 {
 	unsigned long insn = regs->iir;
 	int breg, treg, xreg, val = 0;
-	struct vm_area_struct *vma, *prev_vma;
+	struct vm_area_struct *vma;
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	unsigned long address;
@@ -485,7 +485,7 @@ handle_nadtlb_fault(struct pt_regs *regs)
 				/* Search for VMA */
 				address = regs->ior;
 				mmap_read_lock(mm);
-				vma = find_vma_prev(mm, address, &prev_vma);
+				vma = vma_lookup(mm, address);
 				mmap_read_unlock(mm);
 
 				/*
@@ -494,7 +494,6 @@ handle_nadtlb_fault(struct pt_regs *regs)
 				 */
 				acc_type = (insn & 0x40) ? VM_WRITE : VM_READ;
 				if (vma
-				    && address >= vma->vm_start
 				    && (vma->vm_flags & acc_type) == acc_type)
 					val = 1;
 			}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index b65144c392b0..dbe8394234e2 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -457,8 +457,9 @@ retry:
 	if (unlikely(vma->vm_start > address)) {
 		if (!(vma->vm_flags & VM_GROWSDOWN))
 			goto out_up;
-		if (expand_stack(vma, address))
-			goto out_up;
+		vma = expand_stack(mm, address);
+		if (!vma)
+			goto out;
 	}
 
 	/*
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index d91305de694c..69ff07bc6c07 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -383,8 +383,9 @@ continue_fault:
 				goto bad_area;
 		}
 	}
-	if (expand_stack(vma, address))
-		goto bad_area;
+	vma = expand_stack(mm, address);
+	if (!vma)
+		goto bad_area_nosemaphore;
 	/*
 	 * Ok, we have a good vm_area for this memory access, so
 	 * we can handle it..
@@ -487,8 +488,9 @@ exit_exception:
 	 * Fix it, but check if it's kernel or user first..
 	 */
 bad_area:
-	insn = get_fault_insn(regs, insn);
 	mmap_read_unlock(mm);
+bad_area_nosemaphore:
+	insn = get_fault_insn(regs, insn);
 
 handle_kernel_fault:
 	do_kernel_fault(regs, si_code, fault_code, insn, address);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index d3ce21c4ca32..6d8ae86ae978 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -47,14 +47,15 @@ retry:
 	vma = find_vma(mm, address);
 	if (!vma)
 		goto out;
-	else if (vma->vm_start <= address)
+	if (vma->vm_start <= address)
 		goto good_area;
-	else if (!(vma->vm_flags & VM_GROWSDOWN))
+	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto out;
-	else if (is_user && !ARCH_IS_STACKGROW(address))
-		goto out;
-	else if (expand_stack(vma, address))
+	if (is_user && !ARCH_IS_STACKGROW(address))
 		goto out;
+	vma = expand_stack(mm, address);
+	if (!vma)
+		goto out_nosemaphore;
 
 good_area:
 	*code_out = SEGV_ACCERR;
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 864e4ffb6aa9..261352a23271 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -485,8 +485,8 @@ static void do_fault(struct work_struct *work)
 	flags |= FAULT_FLAG_REMOTE;
 
 	mmap_read_lock(mm);
-	vma = find_extend_vma(mm, address);
-	if (!vma || address < vma->vm_start)
+	vma = vma_lookup(mm, address);
+	if (!vma)
 		/* failed to get a vma in the right range */
 		goto out;
 
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 9821bc44f5ac..3ebd4b6586b3 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -175,7 +175,7 @@ iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
 
 	mmap_read_lock(mm);
 
-	vma = find_extend_vma(mm, prm->addr);
+	vma = vma_lookup(mm, prm->addr);
 	if (!vma)
 		/* Unmapped area */
 		goto out_put_mm;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 869c3aa0e455..befa93582ed7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -322,7 +322,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
 	 */
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
-	vma = find_extend_vma_locked(mm, bprm->p, true);
+	vma = find_extend_vma_locked(mm, bprm->p);
 	mmap_write_unlock(mm);
 	if (!vma)
 		return -EFAULT;
diff --git a/fs/exec.c b/fs/exec.c
index 66e3e22ffb8a..b84b4fee0f82 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -211,7 +211,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 	 */
 	if (write && pos < vma->vm_start) {
 		mmap_write_lock(mm);
-		ret = expand_downwards(vma, pos, true);
+		ret = expand_downwards(vma, pos);
 		if (unlikely(ret < 0)) {
 			mmap_write_unlock(mm);
 			return NULL;
@@ -859,7 +859,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	stack_base = vma->vm_end - stack_expand;
 #endif
 	current->mm->start_stack = bprm->p;
-	ret = expand_stack_locked(vma, stack_base, true);
+	ret = expand_stack_locked(vma, stack_base);
 	if (ret)
 		ret = -EFAULT;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 01a016521b60..4a9533efbd5d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3192,18 +3192,11 @@ extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
 
 extern unsigned long stack_guard_gap;
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
-int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked);
-#define expand_stack(vma,addr) expand_stack_locked(vma,addr,false)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address);
+struct vm_area_struct *expand_stack(struct mm_struct * mm, unsigned long addr);
 
 /* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */
-int expand_downwards(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked);
-#if VM_GROWSUP
-extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
-#else
-  #define expand_upwards(vma, address) (0)
-#endif
+int expand_downwards(struct vm_area_struct *vma, unsigned long address);
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
@@ -3298,9 +3291,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
 			unsigned long start, unsigned long end);
 #endif
 
-struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 struct vm_area_struct *find_extend_vma_locked(struct mm_struct *,
-		unsigned long addr, bool write_locked);
+		unsigned long addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/gup.c b/mm/gup.c
index bbe416236593..e6cdfee4451f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1096,7 +1096,7 @@ static long __get_user_pages(struct mm_struct *mm,
 
 		/* first iteration or cross vma bound */
 		if (!vma || start >= vma->vm_end) {
-			vma = find_extend_vma(mm, start);
+			vma = vma_lookup(mm, start);
 			if (!vma && in_gate_area(mm, start)) {
 				ret = get_gate_page(mm, start & PAGE_MASK,
 						gup_flags, &vma,
@@ -1265,8 +1265,8 @@ int fixup_user_fault(struct mm_struct *mm,
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 retry:
-	vma = find_extend_vma(mm, address);
-	if (!vma || address < vma->vm_start)
+	vma = vma_lookup(mm, address);
+	if (!vma)
 		return -EFAULT;
 
 	if (!vma_permits_fault(vma, fault_flags))
diff --git a/mm/memory.c b/mm/memory.c
index a81f5d0997ad..5ce82a76201d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5368,7 +5368,7 @@ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
 			goto fail;
 	}
 
-	if (expand_stack_locked(vma, addr, true))
+	if (expand_stack_locked(vma, addr))
 		goto fail;
 
 success:
@@ -5713,6 +5713,14 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
 	if (mmap_read_lock_killable(mm))
 		return 0;
 
+	/* We might need to expand the stack to access it */
+	vma = vma_lookup(mm, addr);
+	if (!vma) {
+		vma = expand_stack(mm, addr);
+		if (!vma)
+			return 0;
+	}
+
 	/* ignore errors, just check how much was successfully transferred */
 	while (len) {
 		int bytes, ret, offset;
diff --git a/mm/mmap.c b/mm/mmap.c
index 2c44ac108a3c..bc510361acec 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1935,8 +1935,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
  * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
  * vma is the last one with address > vma->vm_end.  Have to extend vma.
  */
-int expand_upwards(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked)
+static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *next;
@@ -1960,8 +1959,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address,
 	if (gap_addr < address || gap_addr > TASK_SIZE)
 		gap_addr = TASK_SIZE;
 
-	if (!write_locked)
-		return -EAGAIN;
 	next = find_vma_intersection(mm, vma->vm_end, gap_addr);
 	if (next && vma_is_accessible(next)) {
 		if (!(next->vm_flags & VM_GROWSUP))
@@ -2030,15 +2027,18 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address,
 
 /*
  * vma is the first one with address < vma->vm_start.  Have to extend vma.
+ * mmap_lock held for writing.
  */
-int expand_downwards(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked)
+int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start);
 	struct vm_area_struct *prev;
 	int error = 0;
 
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		return -EFAULT;
+
 	address &= PAGE_MASK;
 	if (address < mmap_min_addr || address < FIRST_USER_ADDRESS)
 		return -EPERM;
@@ -2051,8 +2051,6 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address,
 		    vma_is_accessible(prev) &&
 		    (address - prev->vm_end < stack_guard_gap))
 			return -ENOMEM;
-		if (!write_locked && (prev->vm_end == address))
-			return -EAGAIN;
 	}
 
 	if (mas_preallocate(&mas, GFP_KERNEL))
@@ -2131,14 +2129,12 @@ static int __init cmdline_parse_stack_guard_gap(char *p)
 __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
 
 #ifdef CONFIG_STACK_GROWSUP
-int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
 {
-	return expand_upwards(vma, address, write_locked);
+	return expand_upwards(vma, address);
 }
 
-struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
-		unsigned long addr, bool write_locked)
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)
 {
 	struct vm_area_struct *vma, *prev;
 
@@ -2148,23 +2144,21 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
 		return vma;
 	if (!prev)
 		return NULL;
-	if (expand_stack_locked(prev, addr, write_locked))
+	if (expand_stack_locked(prev, addr))
 		return NULL;
 	if (prev->vm_flags & VM_LOCKED)
 		populate_vma_page_range(prev, addr, prev->vm_end, NULL);
 	return prev;
 }
 #else
-int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
 {
 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
 		return -EINVAL;
-	return expand_downwards(vma, address, write_locked);
+	return expand_downwards(vma, address);
 }
 
-struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
-		unsigned long addr, bool write_locked)
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)
 {
 	struct vm_area_struct *vma;
 	unsigned long start;
@@ -2176,7 +2170,7 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
 	if (vma->vm_start <= addr)
 		return vma;
 	start = vma->vm_start;
-	if (expand_stack_locked(vma, addr, write_locked))
+	if (expand_stack_locked(vma, addr))
 		return NULL;
 	if (vma->vm_flags & VM_LOCKED)
 		populate_vma_page_range(vma, addr, start, NULL);
@@ -2184,12 +2178,91 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm,
 }
 #endif
 
-struct vm_area_struct *find_extend_vma(struct mm_struct *mm,
-		unsigned long addr)
+/*
+ * IA64 has some horrid mapping rules: it can expand both up and down,
+ * but with various special rules.
+ *
+ * We'll get rid of this architecture eventually, so the ugliness is
+ * temporary.
+ */
+#ifdef CONFIG_IA64
+static inline bool vma_expand_ok(struct vm_area_struct *vma, unsigned long addr)
+{
+	return REGION_NUMBER(addr) == REGION_NUMBER(vma->vm_start) &&
+		REGION_OFFSET(addr) < RGN_MAP_LIMIT;
+}
+
+/*
+ * IA64 stacks grow down, but there's a special register backing store
+ * that can grow up. Only sequentially, though, so the new address must
+ * match vm_end.
+ */
+static inline int vma_expand_up(struct vm_area_struct *vma, unsigned long addr)
+{
+	if (!vma_expand_ok(vma, addr))
+		return -EFAULT;
+	if (vma->vm_end != (addr & PAGE_MASK))
+		return -EFAULT;
+	return expand_upwards(vma, addr);
+}
+
+static inline bool vma_expand_down(struct vm_area_struct *vma, unsigned long addr)
+{
+	if (!vma_expand_ok(vma, addr))
+		return -EFAULT;
+	return expand_downwards(vma, addr);
+}
+
+#elif defined(CONFIG_STACK_GROWSUP)
+
+#define vma_expand_up(vma,addr) expand_upwards(vma, addr)
+#define vma_expand_down(vma, addr) (-EFAULT)
+
+#else
+
+#define vma_expand_up(vma,addr) (-EFAULT)
+#define vma_expand_down(vma, addr) expand_downwards(vma, addr)
+
+#endif
+
+/*
+ * expand_stack(): legacy interface for page faulting. Don't use unless
+ * you have to.
+ *
+ * This is called with the mm locked for reading, drops the lock, takes
+ * the lock for writing, tries to look up a vma again, expands it if
+ * necessary, and downgrades the lock to reading again.
+ *
+ * If no vma is found or it can't be expanded, it returns NULL and has
+ * dropped the lock.
+ */
+struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
 {
-	return find_extend_vma_locked(mm, addr, false);
+	struct vm_area_struct *vma, *prev;
+
+	mmap_read_unlock(mm);
+	if (mmap_write_lock_killable(mm))
+		return NULL;
+
+	vma = find_vma_prev(mm, addr, &prev);
+	if (vma && vma->vm_start <= addr)
+		goto success;
+
+	if (prev && !vma_expand_up(prev, addr)) {
+		vma = prev;
+		goto success;
+	}
+
+	if (vma && !vma_expand_down(vma, addr))
+		goto success;
+
+	mmap_write_unlock(mm);
+	return NULL;
+
+success:
+	mmap_write_downgrade(mm);
+	return vma;
 }
-EXPORT_SYMBOL_GPL(find_extend_vma);
 
 /*
  * Ok - we have the memory areas we should free on a maple tree so release them,
diff --git a/mm/nommu.c b/mm/nommu.c
index f476c9ed36b3..37d0b03143f1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -630,25 +630,21 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 }
 EXPORT_SYMBOL(find_vma);
 
-/*
- * find a VMA
- * - we don't extend stack VMAs under NOMMU conditions
- */
-struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
-{
-	return find_vma(mm, addr);
-}
-
 /*
  * expand a stack to a given address
  * - not supported under NOMMU conditions
  */
-int expand_stack_locked(struct vm_area_struct *vma, unsigned long address,
-		bool write_locked)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long addr)
 {
 	return -ENOMEM;
 }
 
+struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
+{
+	mmap_read_unlock(mm);
+	return NULL;
+}
+
 /*
  * look up the first VMA exactly that exactly matches addr
  * - should be called with mm->mmap_lock at least held readlocked
-- 
cgit v1.2.3


From 1b2c92a1cb2469d8c0079dbf496ab86e22e1cb7c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 28 Jun 2023 12:47:30 -0700
Subject: x86/mem_encrypt: Remove stale mem_encrypt_init() declaration

The memory encryption initialization logic was moved from init/main.c
into arch_cpu_finalize_init() in commit 439e17576eb4 ("init, x86: Move
mem_encrypt_init() into arch_cpu_finalize_init()"), but a stale
declaration for the init function was left in <linux/init.h>.

And didn't cause any problems if you had X86_MEM_ENCRYPT enabled, which
apparently everybody involved did have.  See also commit 0a9567ac5e6a
("x86/mem_encrypt: Unbreak the AMD_MEM_ENCRYPT=n build") in this whole
sad saga of conflicting declarations for different situations.

Reported-by: Matthew Wilcox <willy@infradead.org>
Fixes: 439e17576eb4 init, x86: Move mem_encrypt_init() into arch_cpu_finalize_init()
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/init.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index 1200fa99e848..266c3e1640d4 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -155,7 +155,6 @@ void __init init_rootfs(void);
 
 void init_IRQ(void);
 void time_init(void);
-void mem_encrypt_init(void);
 void poking_init(void);
 void pgtable_cache_init(void);
 
-- 
cgit v1.2.3


From f6c80cffcd47a2d41943e3a41fbe9034d9f6d7b0 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Mon, 12 Jun 2023 12:03:42 -0700
Subject: block: add request polling helper

Provide a direct request polling will for drivers. The interface does
not require a bio, and can skip the overhead associated with polling
those. The biggest gain from skipping the relatively expensive xarray
lookup unnecessary when you already have the request.

With this, the simple rq/qc conversion functions have only one caller
each, so open code this and remove the helpers.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230612190343.2087040-2-kbusch@meta.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 48 ++++++++++++++++++++++++++++++++----------------
 include/linux/blk-mq.h |  2 ++
 2 files changed, 34 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 98eb31ff914d..5504719b970d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -49,17 +49,8 @@ static void blk_mq_request_bypass_insert(struct request *rq,
 		blk_insert_t flags);
 static void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 		struct list_head *list);
-
-static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
-		blk_qc_t qc)
-{
-	return xa_load(&q->hctx_table, qc);
-}
-
-static inline blk_qc_t blk_rq_to_qc(struct request *rq)
-{
-	return rq->mq_hctx->queue_num;
-}
+static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+			 struct io_comp_batch *iob, unsigned int flags);
 
 /*
  * Check if any of the ctx, dispatch list or elevator
@@ -1248,7 +1239,7 @@ void blk_mq_start_request(struct request *rq)
 		q->integrity.profile->prepare_fn(rq);
 #endif
 	if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
-	        WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq));
+	        WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num);
 }
 EXPORT_SYMBOL(blk_mq_start_request);
 
@@ -1354,7 +1345,7 @@ EXPORT_SYMBOL_GPL(blk_rq_is_poll);
 static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
 {
 	do {
-		blk_mq_poll(rq->q, blk_rq_to_qc(rq), NULL, 0);
+		blk_hctx_poll(rq->q, rq->mq_hctx, NULL, 0);
 		cond_resched();
 	} while (!completion_done(wait));
 }
@@ -4749,10 +4740,9 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 }
 EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
 
-int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
-		unsigned int flags)
+static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+			 struct io_comp_batch *iob, unsigned int flags)
 {
-	struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
 	long state = get_current_state();
 	int ret;
 
@@ -4777,6 +4767,32 @@ int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *
 	return 0;
 }
 
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie,
+		struct io_comp_batch *iob, unsigned int flags)
+{
+	struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie);
+
+	return blk_hctx_poll(q, hctx, iob, flags);
+}
+
+int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
+		unsigned int poll_flags)
+{
+	struct request_queue *q = rq->q;
+	int ret;
+
+	if (!blk_rq_is_poll(rq))
+		return 0;
+	if (!percpu_ref_tryget(&q->q_usage_counter))
+		return 0;
+
+	ret = blk_hctx_poll(q, rq->mq_hctx, iob, poll_flags);
+	blk_queue_exit(q);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blk_rq_poll);
+
 unsigned int blk_mq_rq_cpu(struct request *rq)
 {
 	return rq->mq_ctx->cpu;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index aaed687a454c..2b7fb8e87793 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -715,6 +715,8 @@ int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
 void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
 
 void blk_mq_free_request(struct request *rq);
+int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
+		unsigned int poll_flags);
 
 bool blk_mq_queue_inflight(struct request_queue *q);
 
-- 
cgit v1.2.3


From 8117f948f12bc559edf40916e7693512c8c9a50b Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 1 Jun 2023 14:31:50 -0700
Subject: kgdb: Provide a stub kgdb_nmicallback() if !CONFIG_KGDB

To save architectures from needing to wrap the call in #ifdefs, add a
stub no-op version of kgdb_nmicallback(), which returns 1 if it didn't
handle anything.

Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20230601143109.v9.6.Ia3aeac89bb6751b682237e76e5ba594318e4b1aa@changeid
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---
 include/linux/kgdb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 258cdde8d356..76e891ee9e37 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -365,5 +365,6 @@ extern void kgdb_free_init_mem(void);
 #define dbg_late_init()
 static inline void kgdb_panic(const char *msg) {}
 static inline void kgdb_free_init_mem(void) { }
+static inline int kgdb_nmicallback(int cpu, void *regs) { return 1; }
 #endif /* ! CONFIG_KGDB */
 #endif /* _KGDB_H_ */
-- 
cgit v1.2.3


From b69f0aeb068980af983d399deafc7477cec8bc04 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 30 Jun 2023 09:46:17 +0200
Subject: pid: Replace struct pid 1-element array with flex-array

For pid namespaces, struct pid uses a dynamically sized array member,
"numbers".  This was implemented using the ancient 1-element fake
flexible array, which has been deprecated for decades.

Replace it with a C99 flexible array, refactor the array size
calculations to use struct_size(), and address elements via indexes.
Note that the static initializer (which defines a single element) works
as-is, and requires no special handling.

Without this, CONFIG_UBSAN_BOUNDS (and potentially
CONFIG_FORTIFY_SOURCE) will trigger bounds checks:

  https://lore.kernel.org/lkml/20230517-bushaltestelle-super-e223978c1ba6@brauner

Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Xu <jeffxu@google.com>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Cc: Daniel Verkamp <dverkamp@chromium.org>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Jeff Xu <jeffxu@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Reported-by: syzbot+ac3b41786a2d0565b6d5@syzkaller.appspotmail.com
[brauner: dropped unrelated changes and remove 0 with NULL cast]
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h    | 2 +-
 kernel/pid.c           | 7 +++++--
 kernel/pid_namespace.c | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index b75de288a8c2..653a527574c4 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -67,7 +67,7 @@ struct pid
 	/* wait queue for pidfd notifications */
 	wait_queue_head_t wait_pidfd;
 	struct rcu_head rcu;
-	struct upid numbers[1];
+	struct upid numbers[];
 };
 
 extern struct pid init_struct_pid;
diff --git a/kernel/pid.c b/kernel/pid.c
index f93954a0384d..8bce3aebc949 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -656,8 +656,11 @@ void __init pid_idr_init(void)
 
 	idr_init(&init_pid_ns.idr);
 
-	init_pid_ns.pid_cachep = KMEM_CACHE(pid,
-			SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
+	init_pid_ns.pid_cachep = kmem_cache_create("pid",
+			struct_size((struct pid *)NULL, numbers, 1),
+			__alignof__(struct pid),
+			SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT,
+			NULL);
 }
 
 static struct file *__pidfd_fget(struct task_struct *task, int fd)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index b43eee07b00c..70a929784a5d 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -48,7 +48,7 @@ static struct kmem_cache *create_pid_cachep(unsigned int level)
 		return kc;
 
 	snprintf(name, sizeof(name), "pid_%u", level + 1);
-	len = sizeof(struct pid) + level * sizeof(struct upid);
+	len = struct_size((struct pid *)NULL, numbers, level + 1);
 	mutex_lock(&pid_caches_mutex);
 	/* Name collision forces to do allocation under mutex. */
 	if (!*pkc)
-- 
cgit v1.2.3


From a6ec83786ab9f13f25fb18166dee908845713a95 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Thu, 29 Jun 2023 19:11:44 +0800
Subject: f2fs: fix to do sanity check on direct node in truncate_dnode()

syzbot reports below bug:

BUG: KASAN: slab-use-after-free in f2fs_truncate_data_blocks_range+0x122a/0x14c0 fs/f2fs/file.c:574
Read of size 4 at addr ffff88802a25c000 by task syz-executor148/5000

CPU: 1 PID: 5000 Comm: syz-executor148 Not tainted 6.4.0-rc7-syzkaller-00041-ge660abd551f1 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106
 print_address_description.constprop.0+0x2c/0x3c0 mm/kasan/report.c:351
 print_report mm/kasan/report.c:462 [inline]
 kasan_report+0x11c/0x130 mm/kasan/report.c:572
 f2fs_truncate_data_blocks_range+0x122a/0x14c0 fs/f2fs/file.c:574
 truncate_dnode+0x229/0x2e0 fs/f2fs/node.c:944
 f2fs_truncate_inode_blocks+0x64b/0xde0 fs/f2fs/node.c:1154
 f2fs_do_truncate_blocks+0x4ac/0xf30 fs/f2fs/file.c:721
 f2fs_truncate_blocks+0x7b/0x300 fs/f2fs/file.c:749
 f2fs_truncate.part.0+0x4a5/0x630 fs/f2fs/file.c:799
 f2fs_truncate include/linux/fs.h:825 [inline]
 f2fs_setattr+0x1738/0x2090 fs/f2fs/file.c:1006
 notify_change+0xb2c/0x1180 fs/attr.c:483
 do_truncate+0x143/0x200 fs/open.c:66
 handle_truncate fs/namei.c:3295 [inline]
 do_open fs/namei.c:3640 [inline]
 path_openat+0x2083/0x2750 fs/namei.c:3791
 do_filp_open+0x1ba/0x410 fs/namei.c:3818
 do_sys_openat2+0x16d/0x4c0 fs/open.c:1356
 do_sys_open fs/open.c:1372 [inline]
 __do_sys_creat fs/open.c:1448 [inline]
 __se_sys_creat fs/open.c:1442 [inline]
 __x64_sys_creat+0xcd/0x120 fs/open.c:1442
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

The root cause is, inodeA references inodeB via inodeB's ino, once inodeA
is truncated, it calls truncate_dnode() to truncate data blocks in inodeB's
node page, it traverse mapping data from node->i.i_addr[0] to
node->i.i_addr[ADDRS_PER_BLOCK() - 1], result in out-of-boundary access.

This patch fixes to add sanity check on dnode page in truncate_dnode(),
so that, it can help to avoid triggering such issue, and once it encounters
such issue, it will record newly introduced ERROR_INVALID_NODE_REFERENCE
error into superblock, later fsck can detect such issue and try repairing.

Also, it removes f2fs_truncate_data_blocks() for cleanup due to the
function has only one caller, and uses f2fs_truncate_data_blocks_range()
instead.

Reported-and-tested-by: syzbot+12cb4425b22169b52036@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-f2fs-devel/000000000000f3038a05fef867f8@google.com
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h          |  1 -
 fs/f2fs/file.c          |  5 -----
 fs/f2fs/node.c          | 14 ++++++++++++--
 include/linux/f2fs_fs.h |  1 +
 4 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2a6a6b1a0895..c7cb2177b252 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3457,7 +3457,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr)
  * file.c
  */
 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
-void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock);
 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
 int f2fs_truncate(struct inode *inode);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0f54c1ff02f7..861d7aaa4711 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -626,11 +626,6 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
 					 dn->ofs_in_node, nr_free);
 }
 
-void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
-{
-	f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
-}
-
 static int truncate_partial_data_page(struct inode *inode, u64 from,
 								bool cache_only)
 {
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 3e1fa564db8f..ee2e1dd64f25 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -925,6 +925,7 @@ static int truncate_node(struct dnode_of_data *dn)
 
 static int truncate_dnode(struct dnode_of_data *dn)
 {
+	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
 	struct page *page;
 	int err;
 
@@ -932,16 +933,25 @@ static int truncate_dnode(struct dnode_of_data *dn)
 		return 1;
 
 	/* get direct node */
-	page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
+	page = f2fs_get_node_page(sbi, dn->nid);
 	if (PTR_ERR(page) == -ENOENT)
 		return 1;
 	else if (IS_ERR(page))
 		return PTR_ERR(page);
 
+	if (IS_INODE(page) || ino_of_node(page) != dn->inode->i_ino) {
+		f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u",
+				dn->inode->i_ino, dn->nid, ino_of_node(page));
+		set_sbi_flag(sbi, SBI_NEED_FSCK);
+		f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE);
+		f2fs_put_page(page, 1);
+		return -EFSCORRUPTED;
+	}
+
 	/* Make dnode_of_data for parameter */
 	dn->node_page = page;
 	dn->ofs_in_node = 0;
-	f2fs_truncate_data_blocks(dn);
+	f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
 	err = truncate_node(dn);
 	if (err) {
 		f2fs_put_page(page, 1);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1d6402529d10..a82a4bb6ce68 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -103,6 +103,7 @@ enum f2fs_error {
 	ERROR_INCONSISTENT_SIT,
 	ERROR_CORRUPTED_VERITY_XATTR,
 	ERROR_CORRUPTED_XATTR,
+	ERROR_INVALID_NODE_REFERENCE,
 	ERROR_MAX,
 };
 
-- 
cgit v1.2.3


From d85a143b69abb4d7544227e26d12c4c7735ab27d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 30 Jun 2023 18:24:49 -0700
Subject: xtensa: fix NOMMU build with lock_mm_and_find_vma() conversion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It turns out that xtensa has a really odd configuration situation: you
can do a no-MMU config, but still have the page fault code enabled.
Which doesn't sound all that sensible, but it turns out that xtensa can
have protection faults even without the MMU, and we have this:

    config PFAULT
        bool "Handle protection faults" if EXPERT && !MMU
        default y
        help
          Handle protection faults. MMU configurations must enable it.
          noMMU configurations may disable it if used memory map never
          generates protection faults or faults are always fatal.

          If unsure, say Y.

which completely violated my expectations of the page fault handling.

End result: Guenter reports that the xtensa no-MMU builds all fail with

  arch/xtensa/mm/fault.c: In function ‘do_page_fault’:
  arch/xtensa/mm/fault.c:133:8: error: implicit declaration of function ‘lock_mm_and_find_vma’

because I never exposed the new lock_mm_and_find_vma() function for the
no-MMU case.

Doing so is simple enough, and fixes the problem.

Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net>
Fixes: a050ba1e7422 ("mm/fault: convert remaining simple cases to lock_mm_and_find_vma()")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  5 +++--
 mm/nommu.c         | 11 +++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 39aa409e84d5..4f2c33c273eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2323,6 +2323,9 @@ void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
 
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+		unsigned long address, struct pt_regs *regs);
+
 #ifdef CONFIG_MMU
 extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 				  unsigned long address, unsigned int flags,
@@ -2334,8 +2337,6 @@ void unmap_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t nr, bool even_cows);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
-struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
-		unsigned long address, struct pt_regs *regs);
 #else
 static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 					 unsigned long address, unsigned int flags,
diff --git a/mm/nommu.c b/mm/nommu.c
index 37d0b03143f1..fdc392735ec6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -630,6 +630,17 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 }
 EXPORT_SYMBOL(find_vma);
 
+/*
+ * At least xtensa ends up having protection faults even with no
+ * MMU.. No stack expansion, at least.
+ */
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+			unsigned long addr, struct pt_regs *regs)
+{
+	mmap_read_lock(mm);
+	return vma_lookup(mm, addr);
+}
+
 /*
  * expand a stack to a given address
  * - not supported under NOMMU conditions
-- 
cgit v1.2.3


From 408579cd627a15bd703fe3eeb8485fd02726e9d3 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Thu, 29 Jun 2023 22:28:16 -0400
Subject: mm: Update do_vmi_align_munmap() return semantics

Since do_vmi_align_munmap() will always honor the downgrade request on
the success, the callers no longer have to deal with confusing return
codes.  Since all callers that request downgrade actually want the lock
to be dropped, change the downgrade to an unlock request.

Note that the lock still needs to be held in read mode during the page
table clean up to avoid races with a map request.

Update do_vmi_align_munmap() to return 0 for success.  Clean up the
callers and comments to always expect the unlock to be honored on the
success path.  The error path will always leave the lock untouched.

As part of the cleanup, the wrapper function do_vmi_munmap() and callers
to the wrapper are also updated.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/linux-mm/20230629191414.1215929-1-willy@infradead.org/
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  4 +--
 mm/mmap.c          | 94 +++++++++++++++++++++++++-----------------------------
 mm/mremap.c        | 28 +++++++---------
 3 files changed, 57 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4f2c33c273eb..703ba8203da3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3177,7 +3177,7 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long pgoff, unsigned long *populate, struct list_head *uf);
 extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
 			 unsigned long start, size_t len, struct list_head *uf,
-			 bool downgrade);
+			 bool unlock);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t,
 		     struct list_head *uf);
 extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
@@ -3185,7 +3185,7 @@ extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in,
 #ifdef CONFIG_MMU
 extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
 			 unsigned long start, unsigned long end,
-			 struct list_head *uf, bool downgrade);
+			 struct list_head *uf, bool unlock);
 extern int __mm_populate(unsigned long addr, unsigned long len,
 			 int ignore_errors);
 static inline void mm_populate(unsigned long addr, unsigned long len)
diff --git a/mm/mmap.c b/mm/mmap.c
index 141c618847ac..51e70fa98450 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -193,8 +193,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *brkvma, *next = NULL;
 	unsigned long min_brk;
-	bool populate;
-	bool downgraded = false;
+	bool populate = false;
 	LIST_HEAD(uf);
 	struct vma_iterator vmi;
 
@@ -236,13 +235,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 		goto success;
 	}
 
-	/*
-	 * Always allow shrinking brk.
-	 * do_vma_munmap() may downgrade mmap_lock to read.
-	 */
+	/* Always allow shrinking brk. */
 	if (brk <= mm->brk) {
-		int ret;
-
 		/* Search one past newbrk */
 		vma_iter_init(&vmi, mm, newbrk);
 		brkvma = vma_find(&vmi, oldbrk);
@@ -250,19 +244,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 			goto out; /* mapping intersects with an existing non-brk vma. */
 		/*
 		 * mm->brk must be protected by write mmap_lock.
-		 * do_vma_munmap() may downgrade the lock,  so update it
+		 * do_vma_munmap() will drop the lock on success,  so update it
 		 * before calling do_vma_munmap().
 		 */
 		mm->brk = brk;
-		ret = do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true);
-		if (ret == 1)  {
-			downgraded = true;
-			goto success;
-		} else if (!ret)
-			goto success;
-
-		mm->brk = origbrk;
-		goto out;
+		if (do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true))
+			goto out;
+
+		goto success_unlocked;
 	}
 
 	if (check_brk_limits(oldbrk, newbrk - oldbrk))
@@ -283,19 +272,19 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 		goto out;
 
 	mm->brk = brk;
+	if (mm->def_flags & VM_LOCKED)
+		populate = true;
 
 success:
-	populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
-	if (downgraded)
-		mmap_read_unlock(mm);
-	else
-		mmap_write_unlock(mm);
+	mmap_write_unlock(mm);
+success_unlocked:
 	userfaultfd_unmap_complete(mm, &uf);
 	if (populate)
 		mm_populate(oldbrk, newbrk - oldbrk);
 	return brk;
 
 out:
+	mm->brk = origbrk;
 	mmap_write_unlock(mm);
 	return origbrk;
 }
@@ -2428,14 +2417,16 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
  * @start: The aligned start address to munmap.
  * @end: The aligned end address to munmap.
  * @uf: The userfaultfd list_head
- * @downgrade: Set to true to attempt a write downgrade of the mmap_lock
+ * @unlock: Set to true to drop the mmap_lock.  unlocking only happens on
+ * success.
  *
- * If @downgrade is true, check return code for potential release of the lock.
+ * Return: 0 on success and drops the lock if so directed, error and leaves the
+ * lock held otherwise.
  */
 static int
 do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
 		    struct mm_struct *mm, unsigned long start,
-		    unsigned long end, struct list_head *uf, bool downgrade)
+		    unsigned long end, struct list_head *uf, bool unlock)
 {
 	struct vm_area_struct *prev, *next = NULL;
 	struct maple_tree mt_detach;
@@ -2551,22 +2542,24 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	/* Point of no return */
 	mm->locked_vm -= locked_vm;
 	mm->map_count -= count;
-	if (downgrade)
+	if (unlock)
 		mmap_write_downgrade(mm);
 
 	/*
 	 * We can free page tables without write-locking mmap_lock because VMAs
 	 * were isolated before we downgraded mmap_lock.
 	 */
-	unmap_region(mm, &mt_detach, vma, prev, next, start, end, !downgrade);
+	unmap_region(mm, &mt_detach, vma, prev, next, start, end, !unlock);
 	/* Statistics and freeing VMAs */
 	mas_set(&mas_detach, start);
 	remove_mt(mm, &mas_detach);
 	__mt_destroy(&mt_detach);
+	if (unlock)
+		mmap_read_unlock(mm);
 
 
 	validate_mm(mm);
-	return downgrade ? 1 : 0;
+	return 0;
 
 clear_tree_failed:
 userfaultfd_error:
@@ -2589,18 +2582,18 @@ map_count_exceeded:
  * @start: The start address to munmap
  * @len: The length of the range to munmap
  * @uf: The userfaultfd list_head
- * @downgrade: set to true if the user wants to attempt to write_downgrade the
- * mmap_lock
+ * @unlock: set to true if the user wants to drop the mmap_lock on success
  *
  * This function takes a @mas that is either pointing to the previous VMA or set
  * to MA_START and sets it up to remove the mapping(s).  The @len will be
  * aligned and any arch_unmap work will be preformed.
  *
- * Returns: -EINVAL on failure, 1 on success and unlock, 0 otherwise.
+ * Return: 0 on success and drops the lock if so directed, error and leaves the
+ * lock held otherwise.
  */
 int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
 		  unsigned long start, size_t len, struct list_head *uf,
-		  bool downgrade)
+		  bool unlock)
 {
 	unsigned long end;
 	struct vm_area_struct *vma;
@@ -2617,10 +2610,13 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
 
 	/* Find the first overlapping VMA */
 	vma = vma_find(vmi, end);
-	if (!vma)
+	if (!vma) {
+		if (unlock)
+			mmap_write_unlock(mm);
 		return 0;
+	}
 
-	return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade);
+	return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
 }
 
 /* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls.
@@ -2628,6 +2624,8 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
  * @start: The start address to munmap
  * @len: The length to be munmapped.
  * @uf: The userfaultfd list_head
+ *
+ * Return: 0 on success, error otherwise.
  */
 int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
 	      struct list_head *uf)
@@ -2888,7 +2886,7 @@ unacct_error:
 	return error;
 }
 
-static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
+static int __vm_munmap(unsigned long start, size_t len, bool unlock)
 {
 	int ret;
 	struct mm_struct *mm = current->mm;
@@ -2898,16 +2896,8 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
-	ret = do_vmi_munmap(&vmi, mm, start, len, &uf, downgrade);
-	/*
-	 * Returning 1 indicates mmap_lock is downgraded.
-	 * But 1 is not legal return value of vm_munmap() and munmap(), reset
-	 * it to 0 before return.
-	 */
-	if (ret == 1) {
-		mmap_read_unlock(mm);
-		ret = 0;
-	} else
+	ret = do_vmi_munmap(&vmi, mm, start, len, &uf, unlock);
+	if (ret || !unlock)
 		mmap_write_unlock(mm);
 
 	userfaultfd_unmap_complete(mm, &uf);
@@ -3017,21 +3007,23 @@ out:
  * @start: the start of the address to unmap
  * @end: The end of the address to unmap
  * @uf: The userfaultfd list_head
- * @downgrade: Attempt to downgrade or not
+ * @unlock: Drop the lock on success
  *
- * Returns: 0 on success and not downgraded, 1 on success and downgraded.
  * unmaps a VMA mapping when the vma iterator is already in position.
  * Does not handle alignment.
+ *
+ * Return: 0 on success drops the lock of so directed, error on failure and will
+ * still hold the lock.
  */
 int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
-		  unsigned long start, unsigned long end,
-		  struct list_head *uf, bool downgrade)
+		unsigned long start, unsigned long end, struct list_head *uf,
+		bool unlock)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	int ret;
 
 	arch_unmap(mm, start, end);
-	ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade);
+	ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
 	validate_mm(mm);
 	return ret;
 }
diff --git a/mm/mremap.c b/mm/mremap.c
index fe6b722ae633..11e06e4ab33b 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -715,7 +715,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	}
 
 	vma_iter_init(&vmi, mm, old_addr);
-	if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false) < 0) {
+	if (!do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false)) {
 		/* OOM: unable to split vma, just get accounts right */
 		if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP))
 			vm_acct_memory(old_len >> PAGE_SHIFT);
@@ -913,7 +913,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	struct vm_area_struct *vma;
 	unsigned long ret = -EINVAL;
 	bool locked = false;
-	bool downgraded = false;
 	struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
 	LIST_HEAD(uf_unmap_early);
 	LIST_HEAD(uf_unmap);
@@ -999,24 +998,23 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	 * Always allow a shrinking remap: that just unmaps
 	 * the unnecessary pages..
 	 * do_vmi_munmap does all the needed commit accounting, and
-	 * downgrades mmap_lock to read if so directed.
+	 * unlocks the mmap_lock if so directed.
 	 */
 	if (old_len >= new_len) {
-		int retval;
 		VMA_ITERATOR(vmi, mm, addr + new_len);
 
-		retval = do_vmi_munmap(&vmi, mm, addr + new_len,
-				       old_len - new_len, &uf_unmap, true);
-		/* Returning 1 indicates mmap_lock is downgraded to read. */
-		if (retval == 1) {
-			downgraded = true;
-		} else if (retval < 0 && old_len != new_len) {
-			ret = retval;
+		if (old_len == new_len) {
+			ret = addr;
 			goto out;
 		}
 
+		ret = do_vmi_munmap(&vmi, mm, addr + new_len, old_len - new_len,
+				    &uf_unmap, true);
+		if (ret)
+			goto out;
+
 		ret = addr;
-		goto out;
+		goto out_unlocked;
 	}
 
 	/*
@@ -1101,12 +1099,10 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 out:
 	if (offset_in_page(ret))
 		locked = false;
-	if (downgraded)
-		mmap_read_unlock(current->mm);
-	else
-		mmap_write_unlock(current->mm);
+	mmap_write_unlock(current->mm);
 	if (locked && new_len > old_len)
 		mm_populate(new_addr + old_len, new_len - old_len);
+out_unlocked:
 	userfaultfd_unmap_complete(mm, &uf_unmap_early);
 	mremap_userfaultfd_complete(&uf, addr, ret, old_len);
 	userfaultfd_unmap_complete(mm, &uf_unmap);
-- 
cgit v1.2.3


From b6464883f45ae6412de33e53587974fd86ba811e Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Fri, 30 Jun 2023 21:12:06 +0100
Subject: kdb: move kdb_send_sig() declaration to a better header file

kdb_send_sig() is defined in the signal code and called from kdb,
but the declaration is part of the kdb internal code.
Move the declaration to the shared header to avoid the warning:

kernel/signal.c:4789:6: error: no previous prototype for 'kdb_send_sig' [-Werror=missing-prototypes]

Reported-by: Arnd Bergmann <arnd@arndb.de>
Closes: https://lore.kernel.org/lkml/20230517125423.930967-1-arnd@kernel.org/
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20230630201206.2396930-1-daniel.thompson@linaro.org
---
 include/linux/kdb.h            | 2 ++
 kernel/debug/kdb/kdb_private.h | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 07dfb6a20a1c..f6c2ddb16b95 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -196,6 +196,8 @@ int kdb_process_cpu(const struct task_struct *p)
 	return cpu;
 }
 
+extern void kdb_send_sig(struct task_struct *p, int sig);
+
 #ifdef CONFIG_KALLSYMS
 extern const char *kdb_walk_kallsyms(loff_t *pos);
 #else /* ! CONFIG_KALLSYMS */
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 1f8c519a5f81..548fd4059bf9 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -194,7 +194,6 @@ extern char kdb_task_state_char (const struct task_struct *);
 extern bool kdb_task_state(const struct task_struct *p, const char *mask);
 extern void kdb_ps_suppressed(void);
 extern void kdb_ps1(const struct task_struct *p);
-extern void kdb_send_sig(struct task_struct *p, int sig);
 extern char kdb_getchar(void);
 extern char *kdb_getstr(char *, size_t, const char *);
 extern void kdb_gdb_state_pass(char *buf);
-- 
cgit v1.2.3


From f781f661e8c99b0cb34129f2e374234d61864e77 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 13 Jun 2023 10:09:20 +0200
Subject: dma-buf: keep the signaling time of merged fences v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some Android CTS is testing if the signaling time keeps consistent
during merges.

v2: use the current time if the fence is still in the signaling path and
the timestamp not yet available.
v3: improve comment, fix one more case to use the correct timestamp

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230630120041.109216-1-christian.koenig@amd.com
---
 drivers/dma-buf/dma-fence-unwrap.c | 26 ++++++++++++++++++++++----
 drivers/dma-buf/dma-fence.c        |  5 +++--
 drivers/gpu/drm/drm_syncobj.c      |  2 +-
 include/linux/dma-fence.h          |  2 +-
 4 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c
index 7002bca792ff..c625bb2b5d56 100644
--- a/drivers/dma-buf/dma-fence-unwrap.c
+++ b/drivers/dma-buf/dma-fence-unwrap.c
@@ -66,18 +66,36 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
 {
 	struct dma_fence_array *result;
 	struct dma_fence *tmp, **array;
+	ktime_t timestamp;
 	unsigned int i;
 	size_t count;
 
 	count = 0;
+	timestamp = ns_to_ktime(0);
 	for (i = 0; i < num_fences; ++i) {
-		dma_fence_unwrap_for_each(tmp, &iter[i], fences[i])
-			if (!dma_fence_is_signaled(tmp))
+		dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) {
+			if (!dma_fence_is_signaled(tmp)) {
 				++count;
+			} else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
+					    &tmp->flags)) {
+				if (ktime_after(tmp->timestamp, timestamp))
+					timestamp = tmp->timestamp;
+			} else {
+				/*
+				 * Use the current time if the fence is
+				 * currently signaling.
+				 */
+				timestamp = ktime_get();
+			}
+		}
 	}
 
+	/*
+	 * If we couldn't find a pending fence just return a private signaled
+	 * fence with the timestamp of the last signaled one.
+	 */
 	if (count == 0)
-		return dma_fence_get_stub();
+		return dma_fence_allocate_private_stub(timestamp);
 
 	array = kmalloc_array(count, sizeof(*array), GFP_KERNEL);
 	if (!array)
@@ -138,7 +156,7 @@ restart:
 	} while (tmp);
 
 	if (count == 0) {
-		tmp = dma_fence_get_stub();
+		tmp = dma_fence_allocate_private_stub(ktime_get());
 		goto return_tmp;
 	}
 
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index f177c56269bb..ad076f208760 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -150,10 +150,11 @@ EXPORT_SYMBOL(dma_fence_get_stub);
 
 /**
  * dma_fence_allocate_private_stub - return a private, signaled fence
+ * @timestamp: timestamp when the fence was signaled
  *
  * Return a newly allocated and signaled stub fence.
  */
-struct dma_fence *dma_fence_allocate_private_stub(void)
+struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp)
 {
 	struct dma_fence *fence;
 
@@ -169,7 +170,7 @@ struct dma_fence *dma_fence_allocate_private_stub(void)
 	set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 		&fence->flags);
 
-	dma_fence_signal(fence);
+	dma_fence_signal_timestamp(fence, timestamp);
 
 	return fence;
 }
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 0c2be8360525..04589a35eb09 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -353,7 +353,7 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence);
  */
 static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 {
-	struct dma_fence *fence = dma_fence_allocate_private_stub();
+	struct dma_fence *fence = dma_fence_allocate_private_stub(ktime_get());
 
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index d54b595a0fe0..0d678e9a7b24 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -606,7 +606,7 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
 void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline);
 
 struct dma_fence *dma_fence_get_stub(void);
-struct dma_fence *dma_fence_allocate_private_stub(void);
+struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp);
 u64 dma_fence_context_alloc(unsigned num);
 
 extern const struct dma_fence_ops dma_fence_array_ops;
-- 
cgit v1.2.3


From f66066bc5136f25e36a2daff4896c768f18c211e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 2 Jul 2023 23:20:17 -0700
Subject: execve: always mark stack as growing down during early stack setup

While our user stacks can grow either down (all common architectures) or
up (parisc and the ia64 register stack), the initial stack setup when we
copy the argument and environment strings to the new stack at execve()
time is always done by extending the stack downwards.

But it turns out that in commit 8d7071af8907 ("mm: always expand the
stack with the mmap write lock held"), as part of making the stack
growing code more robust, 'expand_downwards()' was now made to actually
check the vma flags:

	if (!(vma->vm_flags & VM_GROWSDOWN))
		return -EFAULT;

and that meant that this execve-time stack expansion started failing on
parisc, because on that architecture, the stack flags do not contain the
VM_GROWSDOWN bit.

At the same time the new check in expand_downwards() is clearly correct,
and simplified the callers, so let's not remove it.

The solution is instead to just codify the fact that yes, during
execve(), the stack grows down.  This not only matches reality, it ends
up being particularly simple: we already have special execve-time flags
for the stack (VM_STACK_INCOMPLETE_SETUP) and use those flags to avoid
page migration during this setup time (see vma_is_temporary_stack() and
invalid_migration_vma()).

So just add VM_GROWSDOWN to that set of temporary flags, and now our
stack flags automatically match reality, and the parisc stack expansion
works again.

Note that the VM_STACK_INCOMPLETE_SETUP bits will be cleared when the
stack is finalized, so we only add the extra VM_GROWSDOWN bit on
CONFIG_STACK_GROWSUP architectures (ie parisc) rather than adding it in
general.

Link: https://lore.kernel.org/all/612eaa53-6904-6e16-67fc-394f4faa0e16@bell.net/
Link: https://lore.kernel.org/all/5fd98a09-4792-1433-752d-029ae3545168@gmx.de/
Fixes: 8d7071af8907 ("mm: always expand the stack with the mmap write lock held")
Reported-by: John David Anglin <dave.anglin@bell.net>
Reported-and-tested-by: Helge Deller <deller@gmx.de>
Reported-and-tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 74f1be743ba2..2dd73e4f3d8e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -377,7 +377,7 @@ extern unsigned int kobjsize(const void *objp);
 #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
 
 /* Bits set in the VMA until the stack is in its final location */
-#define VM_STACK_INCOMPLETE_SETUP	(VM_RAND_READ | VM_SEQ_READ)
+#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
 
 #define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
 
@@ -399,8 +399,10 @@ extern unsigned int kobjsize(const void *objp);
 
 #ifdef CONFIG_STACK_GROWSUP
 #define VM_STACK	VM_GROWSUP
+#define VM_STACK_EARLY	VM_GROWSDOWN
 #else
 #define VM_STACK	VM_GROWSDOWN
+#define VM_STACK_EARLY	0
 #endif
 
 #define VM_STACK_FLAGS	(VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
-- 
cgit v1.2.3


From 2b5ae9604949391da6661eab0a854de4ecd140f6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 3 Jul 2023 15:14:07 +0300
Subject: ACPI: bus: Introduce acpi_match_acpi_device() helper

Match the ACPI device against a given list of ACPI IDs.

Subsequent changes will make use of this.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[ rjw: Changelog edit ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c   | 25 +++++++++++++++++++++----
 include/linux/acpi.h |  9 +++++++++
 2 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index e3e0bd0c5a50..8c086c87cbb6 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -850,6 +850,26 @@ out_acpi_match:
 	return true;
 }
 
+/**
+ * acpi_match_acpi_device - Match an ACPI device against a given list of ACPI IDs
+ * @ids: Array of struct acpi_device_id objects to match against.
+ * @adev: The ACPI device pointer to match.
+ *
+ * Match the ACPI device @adev against a given list of ACPI IDs @ids.
+ *
+ * Return:
+ * a pointer to the first matching ACPI ID on success or %NULL on failure.
+ */
+const struct acpi_device_id *acpi_match_acpi_device(const struct acpi_device_id *ids,
+						    const struct acpi_device *adev)
+{
+	const struct acpi_device_id *id = NULL;
+
+	__acpi_match_device(adev, ids, NULL, &id, NULL);
+	return id;
+}
+EXPORT_SYMBOL_GPL(acpi_match_acpi_device);
+
 /**
  * acpi_match_device - Match a struct device against a given list of ACPI IDs
  * @ids: Array of struct acpi_device_id object to match against.
@@ -864,10 +884,7 @@ out_acpi_match:
 const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
 					       const struct device *dev)
 {
-	const struct acpi_device_id *id = NULL;
-
-	__acpi_match_device(acpi_companion_match(dev), ids, NULL, &id, NULL);
-	return id;
+	return acpi_match_acpi_device(ids, acpi_companion_match(dev));
 }
 EXPORT_SYMBOL_GPL(acpi_match_device);
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 5ef126a0a50f..ffbc8c8f23d8 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -719,6 +719,9 @@ extern int acpi_nvs_register(__u64 start, __u64 size);
 extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
 				    void *data);
 
+const struct acpi_device_id *acpi_match_acpi_device(const struct acpi_device_id *ids,
+						    const struct acpi_device *adev);
+
 const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
 					       const struct device *dev);
 
@@ -935,6 +938,12 @@ static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
 
 struct acpi_device_id;
 
+static inline const struct acpi_device_id *acpi_match_acpi_device(
+	const struct acpi_device_id *ids, const struct acpi_device *adev)
+{
+	return NULL;
+}
+
 static inline const struct acpi_device_id *acpi_match_device(
 	const struct acpi_device_id *ids, const struct device *dev)
 {
-- 
cgit v1.2.3


From a372d66af48506d9f7aaae2a474cd18f14d98cb8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 4 Jul 2023 01:05:45 +0300
Subject: net: dsa: sja1105: always enable the send_meta options

incl_srcpt has the limitation, mentioned in commit b4638af8885a ("net:
dsa: sja1105: always enable the INCL_SRCPT option"), that frames with a
MAC DA of 01:80:c2:xx:yy:zz will be received as 01:80:c2:00:00:zz unless
PTP RX timestamping is enabled.

The incl_srcpt option was initially unconditionally enabled, then that
changed with commit 42824463d38d ("net: dsa: sja1105: Limit use of
incl_srcpt to bridge+vlan mode"), then again with b4638af8885a ("net:
dsa: sja1105: always enable the INCL_SRCPT option"). Bottom line is that
it now needs to be always enabled, otherwise the driver does not have a
reliable source of information regarding source_port and switch_id for
link-local traffic (tag_8021q VLANs may be imprecise since now they
identify an entire bridging domain when ports are not standalone).

If we accept that PTP RX timestamping (and therefore, meta frame
generation) is always enabled in hardware, then that limitation could be
avoided and packets with any MAC DA can be properly received, because
meta frames do contain the original bytes from the MAC DA of their
associated link-local packet.

This change enables meta frame generation unconditionally, which also
has the nice side effects of simplifying the switch control path
(a switch reset is no longer required on hwtstamping settings change)
and the tagger data path (it no longer needs to be informed whether to
expect meta frames or not - it always does).

Fixes: 227d07a07ef1 ("net: dsa: sja1105: Add support for traffic through standalone ports")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h      |  2 +-
 drivers/net/dsa/sja1105/sja1105_main.c |  5 ++--
 drivers/net/dsa/sja1105/sja1105_ptp.c  | 48 +++-------------------------------
 include/linux/dsa/sja1105.h            |  4 ---
 net/dsa/tag_sja1105.c                  | 45 -------------------------------
 5 files changed, 7 insertions(+), 97 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index fb1549a5fe32..dee35ba924ad 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -252,6 +252,7 @@ struct sja1105_private {
 	unsigned long ucast_egress_floods;
 	unsigned long bcast_egress_floods;
 	unsigned long hwts_tx_en;
+	unsigned long hwts_rx_en;
 	const struct sja1105_info *info;
 	size_t max_xfer_len;
 	struct spi_device *spidev;
@@ -289,7 +290,6 @@ struct sja1105_spi_message {
 /* From sja1105_main.c */
 enum sja1105_reset_reason {
 	SJA1105_VLAN_FILTERING = 0,
-	SJA1105_RX_HWTSTAMPING,
 	SJA1105_AGEING_TIME,
 	SJA1105_SCHEDULING,
 	SJA1105_BEST_EFFORT_POLICING,
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index dd154b2b9680..3529a565b4aa 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -867,11 +867,11 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		.mac_fltres1 = SJA1105_LINKLOCAL_FILTER_A,
 		.mac_flt1    = SJA1105_LINKLOCAL_FILTER_A_MASK,
 		.incl_srcpt1 = true,
-		.send_meta1  = false,
+		.send_meta1  = true,
 		.mac_fltres0 = SJA1105_LINKLOCAL_FILTER_B,
 		.mac_flt0    = SJA1105_LINKLOCAL_FILTER_B_MASK,
 		.incl_srcpt0 = true,
-		.send_meta0  = false,
+		.send_meta0  = true,
 		/* Default to an invalid value */
 		.mirr_port = priv->ds->num_ports,
 		/* No TTEthernet */
@@ -2215,7 +2215,6 @@ static int sja1105_reload_cbs(struct sja1105_private *priv)
 
 static const char * const sja1105_reset_reasons[] = {
 	[SJA1105_VLAN_FILTERING] = "VLAN filtering",
-	[SJA1105_RX_HWTSTAMPING] = "RX timestamping",
 	[SJA1105_AGEING_TIME] = "Ageing time",
 	[SJA1105_SCHEDULING] = "Time-aware scheduling",
 	[SJA1105_BEST_EFFORT_POLICING] = "Best-effort policing",
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 30fb2cc40164..a7d41e781398 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -58,35 +58,10 @@ enum sja1105_ptp_clk_mode {
 #define ptp_data_to_sja1105(d) \
 		container_of((d), struct sja1105_private, ptp_data)
 
-/* Must be called only while the RX timestamping state of the tagger
- * is turned off
- */
-static int sja1105_change_rxtstamping(struct sja1105_private *priv,
-				      bool on)
-{
-	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
-	struct sja1105_general_params_entry *general_params;
-	struct sja1105_table *table;
-
-	table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
-	general_params = table->entries;
-	general_params->send_meta1 = on;
-	general_params->send_meta0 = on;
-
-	ptp_cancel_worker_sync(ptp_data->clock);
-	skb_queue_purge(&ptp_data->skb_txtstamp_queue);
-	skb_queue_purge(&ptp_data->skb_rxtstamp_queue);
-
-	return sja1105_static_config_reload(priv, SJA1105_RX_HWTSTAMPING);
-}
-
 int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr)
 {
-	struct sja1105_tagger_data *tagger_data = sja1105_tagger_data(ds);
 	struct sja1105_private *priv = ds->priv;
 	struct hwtstamp_config config;
-	bool rx_on;
-	int rc;
 
 	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
 		return -EFAULT;
@@ -104,26 +79,13 @@ int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr)
 
 	switch (config.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
-		rx_on = false;
+		priv->hwts_rx_en &= ~BIT(port);
 		break;
 	default:
-		rx_on = true;
+		priv->hwts_rx_en |= BIT(port);
 		break;
 	}
 
-	if (rx_on != tagger_data->rxtstamp_get_state(ds)) {
-		tagger_data->rxtstamp_set_state(ds, false);
-
-		rc = sja1105_change_rxtstamping(priv, rx_on);
-		if (rc < 0) {
-			dev_err(ds->dev,
-				"Failed to change RX timestamping: %d\n", rc);
-			return rc;
-		}
-		if (rx_on)
-			tagger_data->rxtstamp_set_state(ds, true);
-	}
-
 	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
 		return -EFAULT;
 	return 0;
@@ -131,7 +93,6 @@ int sja1105_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr)
 
 int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr)
 {
-	struct sja1105_tagger_data *tagger_data = sja1105_tagger_data(ds);
 	struct sja1105_private *priv = ds->priv;
 	struct hwtstamp_config config;
 
@@ -140,7 +101,7 @@ int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr)
 		config.tx_type = HWTSTAMP_TX_ON;
 	else
 		config.tx_type = HWTSTAMP_TX_OFF;
-	if (tagger_data->rxtstamp_get_state(ds))
+	if (priv->hwts_rx_en & BIT(port))
 		config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
 	else
 		config.rx_filter = HWTSTAMP_FILTER_NONE;
@@ -413,11 +374,10 @@ static long sja1105_rxtstamp_work(struct ptp_clock_info *ptp)
 
 bool sja1105_rxtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 {
-	struct sja1105_tagger_data *tagger_data = sja1105_tagger_data(ds);
 	struct sja1105_private *priv = ds->priv;
 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
 
-	if (!tagger_data->rxtstamp_get_state(ds))
+	if (!(priv->hwts_rx_en & BIT(port)))
 		return false;
 
 	/* We need to read the full PTP clock to reconstruct the Rx
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 159e43171ccc..c177322f793d 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -48,13 +48,9 @@ struct sja1105_deferred_xmit_work {
 
 /* Global tagger data */
 struct sja1105_tagger_data {
-	/* Tagger to switch */
 	void (*xmit_work_fn)(struct kthread_work *work);
 	void (*meta_tstamp_handler)(struct dsa_switch *ds, int port, u8 ts_id,
 				    enum sja1110_meta_tstamp dir, u64 tstamp);
-	/* Switch to tagger */
-	bool (*rxtstamp_get_state)(struct dsa_switch *ds);
-	void (*rxtstamp_set_state)(struct dsa_switch *ds, bool on);
 };
 
 struct sja1105_skb_cb {
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index ec48165673ed..ade3eeb2f3e6 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -58,11 +58,8 @@
 #define SJA1110_TX_TRAILER_LEN			4
 #define SJA1110_MAX_PADDING_LEN			15
 
-#define SJA1105_HWTS_RX_EN			0
-
 struct sja1105_tagger_private {
 	struct sja1105_tagger_data data; /* Must be first */
-	unsigned long state;
 	/* Protects concurrent access to the meta state machine
 	 * from taggers running on multiple ports on SMP systems
 	 */
@@ -392,10 +389,6 @@ static struct sk_buff
 
 		priv = sja1105_tagger_private(ds);
 
-		if (!test_bit(SJA1105_HWTS_RX_EN, &priv->state))
-			/* Do normal processing. */
-			return skb;
-
 		spin_lock(&priv->meta_lock);
 		/* Was this a link-local frame instead of the meta
 		 * that we were expecting?
@@ -431,12 +424,6 @@ static struct sk_buff
 
 		priv = sja1105_tagger_private(ds);
 
-		/* Drop the meta frame if we're not in the right state
-		 * to process it.
-		 */
-		if (!test_bit(SJA1105_HWTS_RX_EN, &priv->state))
-			return NULL;
-
 		spin_lock(&priv->meta_lock);
 
 		stampable_skb = priv->stampable_skb;
@@ -472,30 +459,6 @@ static struct sk_buff
 	return skb;
 }
 
-static bool sja1105_rxtstamp_get_state(struct dsa_switch *ds)
-{
-	struct sja1105_tagger_private *priv = sja1105_tagger_private(ds);
-
-	return test_bit(SJA1105_HWTS_RX_EN, &priv->state);
-}
-
-static void sja1105_rxtstamp_set_state(struct dsa_switch *ds, bool on)
-{
-	struct sja1105_tagger_private *priv = sja1105_tagger_private(ds);
-
-	if (on)
-		set_bit(SJA1105_HWTS_RX_EN, &priv->state);
-	else
-		clear_bit(SJA1105_HWTS_RX_EN, &priv->state);
-
-	/* Initialize the meta state machine to a known state */
-	if (!priv->stampable_skb)
-		return;
-
-	kfree_skb(priv->stampable_skb);
-	priv->stampable_skb = NULL;
-}
-
 static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb)
 {
 	u16 tpid = ntohs(eth_hdr(skb)->h_proto);
@@ -552,9 +515,6 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 		 */
 		source_port = hdr->h_dest[3];
 		switch_id = hdr->h_dest[4];
-		/* Clear the DMAC bytes that were mangled by the switch */
-		hdr->h_dest[3] = 0;
-		hdr->h_dest[4] = 0;
 	} else if (is_meta) {
 		sja1105_meta_unpack(skb, &meta);
 		source_port = meta.source_port;
@@ -785,7 +745,6 @@ static void sja1105_disconnect(struct dsa_switch *ds)
 
 static int sja1105_connect(struct dsa_switch *ds)
 {
-	struct sja1105_tagger_data *tagger_data;
 	struct sja1105_tagger_private *priv;
 	struct kthread_worker *xmit_worker;
 	int err;
@@ -805,10 +764,6 @@ static int sja1105_connect(struct dsa_switch *ds)
 	}
 
 	priv->xmit_worker = xmit_worker;
-	/* Export functions for switch driver use */
-	tagger_data = &priv->data;
-	tagger_data->rxtstamp_get_state = sja1105_rxtstamp_get_state;
-	tagger_data->rxtstamp_set_state = sja1105_rxtstamp_set_state;
 	ds->tagger_data = priv;
 
 	return 0;
-- 
cgit v1.2.3


From 2fb48d88e77f29bf9d278f25bcfe82cf59a0e09b Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 9 Jun 2023 23:11:39 -0700
Subject: blk-crypto: use dynamic lock class for blk_crypto_profile::lock

When a device-mapper device is passing through the inline encryption
support of an underlying device, calls to blk_crypto_evict_key() take
the blk_crypto_profile::lock of the device-mapper device, then take the
blk_crypto_profile::lock of the underlying device (nested).  This isn't
a real deadlock, but it causes a lockdep report because there is only
one lock class for all instances of this lock.

Lockdep subclasses don't really work here because the hierarchy of block
devices is dynamic and could have more than 2 levels.

Instead, register a dynamic lock class for each blk_crypto_profile, and
associate that with the lock.

This avoids false-positive lockdep reports like the following:

    ============================================
    WARNING: possible recursive locking detected
    6.4.0-rc5 #2 Not tainted
    --------------------------------------------
    fscryptctl/1421 is trying to acquire lock:
    ffffff80829ca418 (&profile->lock){++++}-{3:3}, at: __blk_crypto_evict_key+0x44/0x1c0

                   but task is already holding lock:
    ffffff8086b68ca8 (&profile->lock){++++}-{3:3}, at: __blk_crypto_evict_key+0xc8/0x1c0

                   other info that might help us debug this:
     Possible unsafe locking scenario:

           CPU0
           ----
      lock(&profile->lock);
      lock(&profile->lock);

                    *** DEADLOCK ***

     May be due to missing lock nesting notation

Fixes: 1b2628397058 ("block: Keyslot Manager for Inline Encryption")
Reported-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20230610061139.212085-1-ebiggers@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-crypto-profile.c         | 12 ++++++++++--
 include/linux/blk-crypto-profile.h |  1 +
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c
index 2a67d3fb63e5..7fabc883e39f 100644
--- a/block/blk-crypto-profile.c
+++ b/block/blk-crypto-profile.c
@@ -79,7 +79,14 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile,
 	unsigned int slot_hashtable_size;
 
 	memset(profile, 0, sizeof(*profile));
-	init_rwsem(&profile->lock);
+
+	/*
+	 * profile->lock of an underlying device can nest inside profile->lock
+	 * of a device-mapper device, so use a dynamic lock class to avoid
+	 * false-positive lockdep reports.
+	 */
+	lockdep_register_key(&profile->lockdep_key);
+	__init_rwsem(&profile->lock, "&profile->lock", &profile->lockdep_key);
 
 	if (num_slots == 0)
 		return 0;
@@ -89,7 +96,7 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile,
 	profile->slots = kvcalloc(num_slots, sizeof(profile->slots[0]),
 				  GFP_KERNEL);
 	if (!profile->slots)
-		return -ENOMEM;
+		goto err_destroy;
 
 	profile->num_slots = num_slots;
 
@@ -435,6 +442,7 @@ void blk_crypto_profile_destroy(struct blk_crypto_profile *profile)
 {
 	if (!profile)
 		return;
+	lockdep_unregister_key(&profile->lockdep_key);
 	kvfree(profile->slot_hashtable);
 	kvfree_sensitive(profile->slots,
 			 sizeof(profile->slots[0]) * profile->num_slots);
diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h
index e6802b69cdd6..90ab33cb5d0e 100644
--- a/include/linux/blk-crypto-profile.h
+++ b/include/linux/blk-crypto-profile.h
@@ -111,6 +111,7 @@ struct blk_crypto_profile {
 	 * keyslots while ensuring that they can't be changed concurrently.
 	 */
 	struct rw_semaphore lock;
+	struct lock_class_key lockdep_key;
 
 	/* List of idle slots, with least recently used slot at front */
 	wait_queue_head_t idle_slots_wait_queue;
-- 
cgit v1.2.3


From 6b5c13b591d753c6022fbd12f8c0c0a9a07fc065 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Fri, 7 Jul 2023 12:56:20 +0200
Subject: s390/ism: Fix locking for forwarding of IRQs and events to clients

The clients array references all registered clients and is protected by
the clients_lock. Besides its use as general list of clients the clients
array is accessed in ism_handle_irq() to forward ISM device events to
clients.

While the clients_lock is taken in the IRQ handler when calling
handle_event() it is however incorrectly not held during the
client->handle_irq() call and for the preceding clients[] access leaving
it unprotected against concurrent client (un-)registration.

Furthermore the accesses to ism->sba_client_arr[] in ism_register_dmb()
and ism_unregister_dmb() are not protected by any lock. This is
especially problematic as the client ID from the ism->sba_client_arr[]
is not checked against NO_CLIENT and neither is the client pointer
checked.

Instead of expanding the use of the clients_lock further add a separate
array in struct ism_dev which references clients subscribed to the
device's events and IRQs. This array is protected by ism->lock which is
already taken in ism_handle_irq() and can be taken outside the IRQ
handler when adding/removing subscribers or the accessing
ism->sba_client_arr[]. This also means that the clients_lock is no
longer taken in IRQ context.

Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration")
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/ism_drv.c | 44 ++++++++++++++++++++++++++++++++++++--------
 include/linux/ism.h        |  1 +
 2 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index 9b5fccdbc7d6..b664e4a08645 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -47,6 +47,15 @@ static struct ism_dev_list ism_dev_list = {
 	.mutex = __MUTEX_INITIALIZER(ism_dev_list.mutex),
 };
 
+static void ism_setup_forwarding(struct ism_client *client, struct ism_dev *ism)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ism->lock, flags);
+	ism->subs[client->id] = client;
+	spin_unlock_irqrestore(&ism->lock, flags);
+}
+
 int ism_register_client(struct ism_client *client)
 {
 	struct ism_dev *ism;
@@ -71,6 +80,7 @@ int ism_register_client(struct ism_client *client)
 		list_for_each_entry(ism, &ism_dev_list.list, list) {
 			ism->priv[i] = NULL;
 			client->add(ism);
+			ism_setup_forwarding(client, ism);
 		}
 	}
 	mutex_unlock(&ism_dev_list.mutex);
@@ -92,6 +102,9 @@ int ism_unregister_client(struct ism_client *client)
 		max_client--;
 	spin_unlock_irqrestore(&clients_lock, flags);
 	list_for_each_entry(ism, &ism_dev_list.list, list) {
+		spin_lock_irqsave(&ism->lock, flags);
+		/* Stop forwarding IRQs and events */
+		ism->subs[client->id] = NULL;
 		for (int i = 0; i < ISM_NR_DMBS; ++i) {
 			if (ism->sba_client_arr[i] == client->id) {
 				pr_err("%s: attempt to unregister client '%s'"
@@ -101,6 +114,7 @@ int ism_unregister_client(struct ism_client *client)
 				goto out;
 			}
 		}
+		spin_unlock_irqrestore(&ism->lock, flags);
 	}
 out:
 	mutex_unlock(&ism_dev_list.mutex);
@@ -328,6 +342,7 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
 		     struct ism_client *client)
 {
 	union ism_reg_dmb cmd;
+	unsigned long flags;
 	int ret;
 
 	ret = ism_alloc_dmb(ism, dmb);
@@ -351,7 +366,9 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
 		goto out;
 	}
 	dmb->dmb_tok = cmd.response.dmb_tok;
+	spin_lock_irqsave(&ism->lock, flags);
 	ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = client->id;
+	spin_unlock_irqrestore(&ism->lock, flags);
 out:
 	return ret;
 }
@@ -360,6 +377,7 @@ EXPORT_SYMBOL_GPL(ism_register_dmb);
 int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
 {
 	union ism_unreg_dmb cmd;
+	unsigned long flags;
 	int ret;
 
 	memset(&cmd, 0, sizeof(cmd));
@@ -368,7 +386,9 @@ int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
 
 	cmd.request.dmb_tok = dmb->dmb_tok;
 
+	spin_lock_irqsave(&ism->lock, flags);
 	ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = NO_CLIENT;
+	spin_unlock_irqrestore(&ism->lock, flags);
 
 	ret = ism_cmd(ism, &cmd);
 	if (ret && ret != ISM_ERROR)
@@ -491,6 +511,7 @@ static u16 ism_get_chid(struct ism_dev *ism)
 static void ism_handle_event(struct ism_dev *ism)
 {
 	struct ism_event *entry;
+	struct ism_client *clt;
 	int i;
 
 	while ((ism->ieq_idx + 1) != READ_ONCE(ism->ieq->header.idx)) {
@@ -499,21 +520,21 @@ static void ism_handle_event(struct ism_dev *ism)
 
 		entry = &ism->ieq->entry[ism->ieq_idx];
 		debug_event(ism_debug_info, 2, entry, sizeof(*entry));
-		spin_lock(&clients_lock);
-		for (i = 0; i < max_client; ++i)
-			if (clients[i])
-				clients[i]->handle_event(ism, entry);
-		spin_unlock(&clients_lock);
+		for (i = 0; i < max_client; ++i) {
+			clt = ism->subs[i];
+			if (clt)
+				clt->handle_event(ism, entry);
+		}
 	}
 }
 
 static irqreturn_t ism_handle_irq(int irq, void *data)
 {
 	struct ism_dev *ism = data;
-	struct ism_client *clt;
 	unsigned long bit, end;
 	unsigned long *bv;
 	u16 dmbemask;
+	u8 client_id;
 
 	bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET];
 	end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET;
@@ -530,8 +551,10 @@ static irqreturn_t ism_handle_irq(int irq, void *data)
 		dmbemask = ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET];
 		ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0;
 		barrier();
-		clt = clients[ism->sba_client_arr[bit]];
-		clt->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask);
+		client_id = ism->sba_client_arr[bit];
+		if (unlikely(client_id == NO_CLIENT || !ism->subs[client_id]))
+			continue;
+		ism->subs[client_id]->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask);
 	}
 
 	if (ism->sba->e) {
@@ -554,6 +577,7 @@ static void ism_dev_add_work_func(struct work_struct *work)
 						 add_work);
 
 	client->add(client->tgt_ism);
+	ism_setup_forwarding(client, client->tgt_ism);
 	atomic_dec(&client->tgt_ism->add_dev_cnt);
 	wake_up(&client->tgt_ism->waitq);
 }
@@ -691,7 +715,11 @@ static void ism_dev_remove_work_func(struct work_struct *work)
 {
 	struct ism_client *client = container_of(work, struct ism_client,
 						 remove_work);
+	unsigned long flags;
 
+	spin_lock_irqsave(&client->tgt_ism->lock, flags);
+	client->tgt_ism->subs[client->id] = NULL;
+	spin_unlock_irqrestore(&client->tgt_ism->lock, flags);
 	client->remove(client->tgt_ism);
 	atomic_dec(&client->tgt_ism->free_clients_cnt);
 	wake_up(&client->tgt_ism->waitq);
diff --git a/include/linux/ism.h b/include/linux/ism.h
index ea2bcdae7401..5160d47e5ea9 100644
--- a/include/linux/ism.h
+++ b/include/linux/ism.h
@@ -44,6 +44,7 @@ struct ism_dev {
 	u64 local_gid;
 	int ieq_idx;
 
+	struct ism_client *subs[MAX_CLIENTS];
 	atomic_t free_clients_cnt;
 	atomic_t add_dev_cnt;
 	wait_queue_head_t waitq;
-- 
cgit v1.2.3


From 76631ffa2fd2d45bae5ad717eef716b94144e0e7 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Fri, 7 Jul 2023 12:56:21 +0200
Subject: s390/ism: Fix and simplify add()/remove() callback handling

Previously the clients_lock was protecting the clients array against
concurrent addition/removal of clients but was also accessed from IRQ
context. This meant that it had to be a spinlock and that the add() and
remove() callbacks in which clients need to do allocation and take
mutexes can't be called under the clients_lock. To work around this these
callbacks were moved to workqueues. This not only introduced significant
complexity but is also subtly broken in at least one way.

In ism_dev_init() and ism_dev_exit() clients[i]->tgt_ism is used to
communicate the added/removed ISM device to the work function. While
write access to client[i]->tgt_ism is protected by the clients_lock and
the code waits that there is no pending add/remove work before and after
setting clients[i]->tgt_ism this is not enough. The problem is that the
wait happens based on per ISM device counters. Thus a concurrent
ism_dev_init()/ism_dev_exit() for a different ISM device may overwrite
a clients[i]->tgt_ism between unlocking the clients_lock and the
subsequent wait for the work to finnish.

Thankfully with the clients_lock no longer held in IRQ context it can be
turned into a mutex which can be held during the calls to add()/remove()
completely removing the need for the workqueues and the associated
broken housekeeping including the per ISM device counters and the
clients[i]->tgt_ism.

Fixes: 89e7d2ba61b7 ("net/ism: Add new API for client registration")
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/ism_drv.c | 86 +++++++++++++---------------------------------
 include/linux/ism.h        |  6 ----
 2 files changed, 24 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index b664e4a08645..54091b7aea16 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -36,7 +36,7 @@ static const struct smcd_ops ism_ops;
 static struct ism_client *clients[MAX_CLIENTS];	/* use an array rather than */
 						/* a list for fast mapping  */
 static u8 max_client;
-static DEFINE_SPINLOCK(clients_lock);
+static DEFINE_MUTEX(clients_lock);
 struct ism_dev_list {
 	struct list_head list;
 	struct mutex mutex; /* protects ism device list */
@@ -59,11 +59,10 @@ static void ism_setup_forwarding(struct ism_client *client, struct ism_dev *ism)
 int ism_register_client(struct ism_client *client)
 {
 	struct ism_dev *ism;
-	unsigned long flags;
 	int i, rc = -ENOSPC;
 
 	mutex_lock(&ism_dev_list.mutex);
-	spin_lock_irqsave(&clients_lock, flags);
+	mutex_lock(&clients_lock);
 	for (i = 0; i < MAX_CLIENTS; ++i) {
 		if (!clients[i]) {
 			clients[i] = client;
@@ -74,7 +73,8 @@ int ism_register_client(struct ism_client *client)
 			break;
 		}
 	}
-	spin_unlock_irqrestore(&clients_lock, flags);
+	mutex_unlock(&clients_lock);
+
 	if (i < MAX_CLIENTS) {
 		/* initialize with all devices that we got so far */
 		list_for_each_entry(ism, &ism_dev_list.list, list) {
@@ -96,11 +96,11 @@ int ism_unregister_client(struct ism_client *client)
 	int rc = 0;
 
 	mutex_lock(&ism_dev_list.mutex);
-	spin_lock_irqsave(&clients_lock, flags);
+	mutex_lock(&clients_lock);
 	clients[client->id] = NULL;
 	if (client->id + 1 == max_client)
 		max_client--;
-	spin_unlock_irqrestore(&clients_lock, flags);
+	mutex_unlock(&clients_lock);
 	list_for_each_entry(ism, &ism_dev_list.list, list) {
 		spin_lock_irqsave(&ism->lock, flags);
 		/* Stop forwarding IRQs and events */
@@ -571,21 +571,9 @@ static u64 ism_get_local_gid(struct ism_dev *ism)
 	return ism->local_gid;
 }
 
-static void ism_dev_add_work_func(struct work_struct *work)
-{
-	struct ism_client *client = container_of(work, struct ism_client,
-						 add_work);
-
-	client->add(client->tgt_ism);
-	ism_setup_forwarding(client, client->tgt_ism);
-	atomic_dec(&client->tgt_ism->add_dev_cnt);
-	wake_up(&client->tgt_ism->waitq);
-}
-
 static int ism_dev_init(struct ism_dev *ism)
 {
 	struct pci_dev *pdev = ism->pdev;
-	unsigned long flags;
 	int i, ret;
 
 	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
@@ -618,25 +606,16 @@ static int ism_dev_init(struct ism_dev *ism)
 		/* hardware is V2 capable */
 		ism_create_system_eid();
 
-	init_waitqueue_head(&ism->waitq);
-	atomic_set(&ism->free_clients_cnt, 0);
-	atomic_set(&ism->add_dev_cnt, 0);
-
-	wait_event(ism->waitq, !atomic_read(&ism->add_dev_cnt));
-	spin_lock_irqsave(&clients_lock, flags);
-	for (i = 0; i < max_client; ++i)
+	mutex_lock(&ism_dev_list.mutex);
+	mutex_lock(&clients_lock);
+	for (i = 0; i < max_client; ++i) {
 		if (clients[i]) {
-			INIT_WORK(&clients[i]->add_work,
-				  ism_dev_add_work_func);
-			clients[i]->tgt_ism = ism;
-			atomic_inc(&ism->add_dev_cnt);
-			schedule_work(&clients[i]->add_work);
+			clients[i]->add(ism);
+			ism_setup_forwarding(clients[i], ism);
 		}
-	spin_unlock_irqrestore(&clients_lock, flags);
-
-	wait_event(ism->waitq, !atomic_read(&ism->add_dev_cnt));
+	}
+	mutex_unlock(&clients_lock);
 
-	mutex_lock(&ism_dev_list.mutex);
 	list_add(&ism->list, &ism_dev_list.list);
 	mutex_unlock(&ism_dev_list.mutex);
 
@@ -711,40 +690,24 @@ err_dev:
 	return ret;
 }
 
-static void ism_dev_remove_work_func(struct work_struct *work)
-{
-	struct ism_client *client = container_of(work, struct ism_client,
-						 remove_work);
-	unsigned long flags;
-
-	spin_lock_irqsave(&client->tgt_ism->lock, flags);
-	client->tgt_ism->subs[client->id] = NULL;
-	spin_unlock_irqrestore(&client->tgt_ism->lock, flags);
-	client->remove(client->tgt_ism);
-	atomic_dec(&client->tgt_ism->free_clients_cnt);
-	wake_up(&client->tgt_ism->waitq);
-}
-
-/* Callers must hold ism_dev_list.mutex */
 static void ism_dev_exit(struct ism_dev *ism)
 {
 	struct pci_dev *pdev = ism->pdev;
 	unsigned long flags;
 	int i;
 
-	wait_event(ism->waitq, !atomic_read(&ism->free_clients_cnt));
-	spin_lock_irqsave(&clients_lock, flags);
+	spin_lock_irqsave(&ism->lock, flags);
 	for (i = 0; i < max_client; ++i)
-		if (clients[i]) {
-			INIT_WORK(&clients[i]->remove_work,
-				  ism_dev_remove_work_func);
-			clients[i]->tgt_ism = ism;
-			atomic_inc(&ism->free_clients_cnt);
-			schedule_work(&clients[i]->remove_work);
-		}
-	spin_unlock_irqrestore(&clients_lock, flags);
+		ism->subs[i] = NULL;
+	spin_unlock_irqrestore(&ism->lock, flags);
 
-	wait_event(ism->waitq, !atomic_read(&ism->free_clients_cnt));
+	mutex_lock(&ism_dev_list.mutex);
+	mutex_lock(&clients_lock);
+	for (i = 0; i < max_client; ++i) {
+		if (clients[i])
+			clients[i]->remove(ism);
+	}
+	mutex_unlock(&clients_lock);
 
 	if (SYSTEM_EID.serial_number[0] != '0' ||
 	    SYSTEM_EID.type[0] != '0')
@@ -755,15 +718,14 @@ static void ism_dev_exit(struct ism_dev *ism)
 	kfree(ism->sba_client_arr);
 	pci_free_irq_vectors(pdev);
 	list_del_init(&ism->list);
+	mutex_unlock(&ism_dev_list.mutex);
 }
 
 static void ism_remove(struct pci_dev *pdev)
 {
 	struct ism_dev *ism = dev_get_drvdata(&pdev->dev);
 
-	mutex_lock(&ism_dev_list.mutex);
 	ism_dev_exit(ism);
-	mutex_unlock(&ism_dev_list.mutex);
 
 	pci_release_mem_regions(pdev);
 	pci_disable_device(pdev);
diff --git a/include/linux/ism.h b/include/linux/ism.h
index 5160d47e5ea9..9a4c204df3da 100644
--- a/include/linux/ism.h
+++ b/include/linux/ism.h
@@ -45,9 +45,6 @@ struct ism_dev {
 	int ieq_idx;
 
 	struct ism_client *subs[MAX_CLIENTS];
-	atomic_t free_clients_cnt;
-	atomic_t add_dev_cnt;
-	wait_queue_head_t waitq;
 };
 
 struct ism_event {
@@ -69,9 +66,6 @@ struct ism_client {
 	 */
 	void (*handle_irq)(struct ism_dev *dev, unsigned int bit, u16 dmbemask);
 	/* Private area - don't touch! */
-	struct work_struct remove_work;
-	struct work_struct add_work;
-	struct ism_dev *tgt_ism;
 	u8 id;
 };
 
-- 
cgit v1.2.3


From 028725e73375a1ff080bbdf9fb503306d0116f28 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Tue, 4 Jul 2023 18:19:42 +0800
Subject: bootmem: remove the vmemmap pages from kmemleak in free_bootmem_page

commit dd0ff4d12dd2 ("bootmem: remove the vmemmap pages from kmemleak in
put_page_bootmem") fix an overlaps existing problem of kmemleak.  But the
problem still existed when HAVE_BOOTMEM_INFO_NODE is disabled, because in
this case, free_bootmem_page() will call free_reserved_page() directly.

Fix the problem by adding kmemleak_free_part() in free_bootmem_page() when
HAVE_BOOTMEM_INFO_NODE is disabled.

Link: https://lkml.kernel.org/r/20230704101942.2819426-1-liushixin2@huawei.com
Fixes: f41f2ed43ca5 ("mm: hugetlb: free the vmemmap pages associated with each HugeTLB page")
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Acked-by: Muchun Song <songmuchun@bytedance.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/bootmem_info.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h
index cc35d010fa94..e1a3c9c9754c 100644
--- a/include/linux/bootmem_info.h
+++ b/include/linux/bootmem_info.h
@@ -3,6 +3,7 @@
 #define __LINUX_BOOTMEM_INFO_H
 
 #include <linux/mm.h>
+#include <linux/kmemleak.h>
 
 /*
  * Types for free bootmem stored in page->lru.next. These have to be in
@@ -59,6 +60,7 @@ static inline void get_page_bootmem(unsigned long info, struct page *page,
 
 static inline void free_bootmem_page(struct page *page)
 {
+	kmemleak_free_part(page_to_virt(page), PAGE_SIZE);
 	free_reserved_page(page);
 }
 #endif
-- 
cgit v1.2.3


From aff037078ecaecf34a7c2afab1341815f90fba5e Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Thu, 29 Jun 2023 17:56:12 -0700
Subject: sched/psi: use kernfs polling functions for PSI trigger polling

Destroying psi trigger in cgroup_file_release causes UAF issues when
a cgroup is removed from under a polling process. This is happening
because cgroup removal causes a call to cgroup_file_release while the
actual file is still alive. Destroying the trigger at this point would
also destroy its waitqueue head and if there is still a polling process
on that file accessing the waitqueue, it will step on the freed pointer:

do_select
  vfs_poll
                           do_rmdir
                             cgroup_rmdir
                               kernfs_drain_open_files
                                 cgroup_file_release
                                   cgroup_pressure_release
                                     psi_trigger_destroy
                                       wake_up_pollfree(&t->event_wait)
// vfs_poll is unblocked
                                       synchronize_rcu
                                       kfree(t)
  poll_freewait -> UAF access to the trigger's waitqueue head

Patch [1] fixed this issue for epoll() case using wake_up_pollfree(),
however the same issue exists for synchronous poll() case.
The root cause of this issue is that the lifecycles of the psi trigger's
waitqueue and of the file associated with the trigger are different. Fix
this by using kernfs_generic_poll function when polling on cgroup-specific
psi triggers. It internally uses kernfs_open_node->poll waitqueue head
with its lifecycle tied to the file's lifecycle. This also renders the
fix in [1] obsolete, so revert it.

[1] commit c2dbe32d5db5 ("sched/psi: Fix use-after-free in ep_remove_wait_queue()")

Fixes: 0e94682b73bf ("psi: introduce psi monitor")
Closes: https://lore.kernel.org/all/20230613062306.101831-1-lujialin4@huawei.com/
Reported-by: Lu Jialin <lujialin4@huawei.com>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230630005612.1014540-1-surenb@google.com
---
 include/linux/psi.h       |  5 +++--
 include/linux/psi_types.h |  3 +++
 kernel/cgroup/cgroup.c    |  2 +-
 kernel/sched/psi.c        | 29 +++++++++++++++++++++--------
 4 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/psi.h b/include/linux/psi.h
index ab26200c2803..e0745873e3f2 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -23,8 +23,9 @@ void psi_memstall_enter(unsigned long *flags);
 void psi_memstall_leave(unsigned long *flags);
 
 int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
-struct psi_trigger *psi_trigger_create(struct psi_group *group,
-			char *buf, enum psi_res res, struct file *file);
+struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
+				       enum psi_res res, struct file *file,
+				       struct kernfs_open_file *of);
 void psi_trigger_destroy(struct psi_trigger *t);
 
 __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index 040c089581c6..f1fd3a8044e0 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -137,6 +137,9 @@ struct psi_trigger {
 	/* Wait queue for polling */
 	wait_queue_head_t event_wait;
 
+	/* Kernfs file for cgroup triggers */
+	struct kernfs_open_file *of;
+
 	/* Pending event flag */
 	int event;
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index bfe3cd8ccf36..f55a40db065f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3730,7 +3730,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
 	}
 
 	psi = cgroup_psi(cgrp);
-	new = psi_trigger_create(psi, buf, res, of->file);
+	new = psi_trigger_create(psi, buf, res, of->file, of);
 	if (IS_ERR(new)) {
 		cgroup_put(cgrp);
 		return PTR_ERR(new);
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 81fca77397f6..9bb3f2b3ccfc 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -493,8 +493,12 @@ static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total,
 			continue;
 
 		/* Generate an event */
-		if (cmpxchg(&t->event, 0, 1) == 0)
-			wake_up_interruptible(&t->event_wait);
+		if (cmpxchg(&t->event, 0, 1) == 0) {
+			if (t->of)
+				kernfs_notify(t->of->kn);
+			else
+				wake_up_interruptible(&t->event_wait);
+		}
 		t->last_event_time = now;
 		/* Reset threshold breach flag once event got generated */
 		t->pending_event = false;
@@ -1271,8 +1275,9 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
 	return 0;
 }
 
-struct psi_trigger *psi_trigger_create(struct psi_group *group,
-			char *buf, enum psi_res res, struct file *file)
+struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
+				       enum psi_res res, struct file *file,
+				       struct kernfs_open_file *of)
 {
 	struct psi_trigger *t;
 	enum psi_states state;
@@ -1331,7 +1336,9 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
 
 	t->event = 0;
 	t->last_event_time = 0;
-	init_waitqueue_head(&t->event_wait);
+	t->of = of;
+	if (!of)
+		init_waitqueue_head(&t->event_wait);
 	t->pending_event = false;
 	t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
 
@@ -1388,7 +1395,10 @@ void psi_trigger_destroy(struct psi_trigger *t)
 	 * being accessed later. Can happen if cgroup is deleted from under a
 	 * polling process.
 	 */
-	wake_up_pollfree(&t->event_wait);
+	if (t->of)
+		kernfs_notify(t->of->kn);
+	else
+		wake_up_interruptible(&t->event_wait);
 
 	if (t->aggregator == PSI_AVGS) {
 		mutex_lock(&group->avgs_lock);
@@ -1465,7 +1475,10 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
 	if (!t)
 		return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
 
-	poll_wait(file, &t->event_wait, wait);
+	if (t->of)
+		kernfs_generic_poll(t->of, wait);
+	else
+		poll_wait(file, &t->event_wait, wait);
 
 	if (cmpxchg(&t->event, 1, 0) == 1)
 		ret |= EPOLLPRI;
@@ -1535,7 +1548,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
 		return -EBUSY;
 	}
 
-	new = psi_trigger_create(&psi_system, buf, res, file);
+	new = psi_trigger_create(&psi_system, buf, res, file, NULL);
 	if (IS_ERR(new)) {
 		mutex_unlock(&seq->lock);
 		return PTR_ERR(new);
-- 
cgit v1.2.3


From b938e6603660652dc3db66d3c915fbfed3bce21d Mon Sep 17 00:00:00 2001
From: Ankit Kumar <ankit.kumar@samsung.com>
Date: Fri, 23 Jun 2023 18:08:05 +0530
Subject: nvme: fix the NVME_ID_NS_NVM_STS_MASK definition

As per NVMe command set specification 1.0c Storage tag size is 7 bits.

Fixes: 4020aad85c67 ("nvme: add support for enhanced metadata")
Signed-off-by: Ankit Kumar <ankit.kumar@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 include/linux/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 182b6d614eb1..26dd3f859d9d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -473,7 +473,7 @@ struct nvme_id_ns_nvm {
 };
 
 enum {
-	NVME_ID_NS_NVM_STS_MASK		= 0x3f,
+	NVME_ID_NS_NVM_STS_MASK		= 0x7f,
 	NVME_ID_NS_NVM_GUARD_SHIFT	= 7,
 	NVME_ID_NS_NVM_GUARD_MASK	= 0x3,
 };
-- 
cgit v1.2.3


From dc8cbb65dc17b0daebca84375d35ce54ff730762 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 10 Jul 2023 10:23:05 -0600
Subject: block: remove dead struc request->completion_data field

It's no longer used. While in there, also update the comment as to why
it can coexist with the rb_node.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2b7fb8e87793..b96e00499f9e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -158,13 +158,13 @@ struct request {
 
 	/*
 	 * The rb_node is only used inside the io scheduler, requests
-	 * are pruned when moved to the dispatch queue. So let the
-	 * completion_data share space with the rb_node.
+	 * are pruned when moved to the dispatch queue. special_vec must
+	 * only be used if RQF_SPECIAL_PAYLOAD is set, and those cannot be
+	 * insert into an IO scheduler.
 	 */
 	union {
 		struct rb_node rb_node;	/* sort/lookup */
 		struct bio_vec special_vec;
-		void *completion_data;
 	};
 
 	/*
-- 
cgit v1.2.3


From 195b9cb5b288fec1c871ef89f78cc9a7461aad3a Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Fri, 7 Jul 2023 23:03:19 +0900
Subject: fprobe: Ensure running fprobe_exit_handler() finished before calling
 rethook_free()

Ensure running fprobe_exit_handler() has finished before
calling rethook_free() in the unregister_fprobe() so that caller can free
the fprobe right after unregister_fprobe().

unregister_fprobe() ensured that all running fprobe_entry/exit_handler()
have finished by calling unregister_ftrace_function() which synchronizes
RCU. But commit 5f81018753df ("fprobe: Release rethook after the ftrace_ops
is unregistered") changed to call rethook_free() after
unregister_ftrace_function(). So call rethook_stop() to make rethook
disabled before unregister_ftrace_function() and ensure it again.

Here is the possible code flow that can call the exit handler after
unregister_fprobe().

------
 CPU1                              CPU2
 call unregister_fprobe(fp)
 ...
                                   __fprobe_handler()
                                   rethook_hook() on probed function
 unregister_ftrace_function()
                                   return from probed function
                                   rethook hooks
                                   find rh->handler == fprobe_exit_handler
                                   call fprobe_exit_handler()
 rethook_free():
   set rh->handler = NULL;
 return from unreigster_fprobe;
                                   call fp->exit_handler() <- (*)
------

(*) At this point, the exit handler is called after returning from
unregister_fprobe().

This fixes it as following;
------
 CPU1                              CPU2
 call unregister_fprobe()
 ...
 rethook_stop():
   set rh->handler = NULL;
                                   __fprobe_handler()
                                   rethook_hook() on probed function
 unregister_ftrace_function()
                                   return from probed function
                                   rethook hooks
                                   find rh->handler == NULL
                                   return from rethook
 rethook_free()
 return from unreigster_fprobe;
------

Link: https://lore.kernel.org/all/168873859949.156157.13039240432299335849.stgit@devnote2/

Fixes: 5f81018753df ("fprobe: Release rethook after the ftrace_ops is unregistered")
Cc: stable@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/rethook.h |  1 +
 kernel/trace/fprobe.c   |  3 +++
 kernel/trace/rethook.c  | 13 +++++++++++++
 3 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rethook.h b/include/linux/rethook.h
index c8ac1e5afcd1..bdbe6717f45a 100644
--- a/include/linux/rethook.h
+++ b/include/linux/rethook.h
@@ -59,6 +59,7 @@ struct rethook_node {
 };
 
 struct rethook *rethook_alloc(void *data, rethook_handler_t handler);
+void rethook_stop(struct rethook *rh);
 void rethook_free(struct rethook *rh);
 void rethook_add_node(struct rethook *rh, struct rethook_node *node);
 struct rethook_node *rethook_try_get(struct rethook *rh);
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 58e4b3607aef..2571f7f3d5f2 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -366,6 +366,9 @@ int unregister_fprobe(struct fprobe *fp)
 		    fp->ops.saved_func != fprobe_kprobe_handler))
 		return -EINVAL;
 
+	if (fp->rethook)
+		rethook_stop(fp->rethook);
+
 	ret = unregister_ftrace_function(&fp->ops);
 	if (ret < 0)
 		return ret;
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index 60f6cb2b486b..468006cce7ca 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -53,6 +53,19 @@ static void rethook_free_rcu(struct rcu_head *head)
 		kfree(rh);
 }
 
+/**
+ * rethook_stop() - Stop using a rethook.
+ * @rh: the struct rethook to stop.
+ *
+ * Stop using a rethook to prepare for freeing it. If you want to wait for
+ * all running rethook handler before calling rethook_free(), you need to
+ * call this first and wait RCU, and call rethook_free().
+ */
+void rethook_stop(struct rethook *rh)
+{
+	WRITE_ONCE(rh->handler, NULL);
+}
+
 /**
  * rethook_free() - Free struct rethook.
  * @rh: the struct rethook to be freed.
-- 
cgit v1.2.3


From 7d8b31b73c79835572611ed1eed649e4d2e14245 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 17 May 2023 14:51:48 +0200
Subject: tracing: arm64: Avoid missing-prototype warnings

These are all tracing W=1 warnings in arm64 allmodconfig about missing
prototypes:

kernel/trace/trace_kprobe_selftest.c:7:5: error: no previous prototype for 'kprobe_trace_selftest_target' [-Werror=missing-pro
totypes]
kernel/trace/ftrace.c:329:5: error: no previous prototype for '__register_ftrace_function' [-Werror=missing-prototypes]
kernel/trace/ftrace.c:372:5: error: no previous prototype for '__unregister_ftrace_function' [-Werror=missing-prototypes]
kernel/trace/ftrace.c:4130:15: error: no previous prototype for 'arch_ftrace_match_adjust' [-Werror=missing-prototypes]
kernel/trace/fgraph.c:243:15: error: no previous prototype for 'ftrace_return_to_handler' [-Werror=missing-prototypes]
kernel/trace/fgraph.c:358:6: error: no previous prototype for 'ftrace_graph_sleep_time_control' [-Werror=missing-prototypes]
arch/arm64/kernel/ftrace.c:460:6: error: no previous prototype for 'prepare_ftrace_return' [-Werror=missing-prototypes]
arch/arm64/kernel/ptrace.c:2172:5: error: no previous prototype for 'syscall_trace_enter' [-Werror=missing-prototypes]
arch/arm64/kernel/ptrace.c:2195:6: error: no previous prototype for 'syscall_trace_exit' [-Werror=missing-prototypes]

Move the declarations to an appropriate header where they can be seen
by the caller and callee, and make sure the headers are included where
needed.

Link: https://lore.kernel.org/linux-trace-kernel/20230517125215.930689-1-arnd@kernel.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Florent Revest <revest@chromium.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
[ Fixed ftrace_return_to_handler() to handle CONFIG_HAVE_FUNCTION_GRAPH_RETVAL case ]
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 arch/arm64/include/asm/ftrace.h      | 4 ++++
 arch/arm64/include/asm/syscall.h     | 3 +++
 arch/arm64/kernel/syscall.c          | 3 ---
 include/linux/ftrace.h               | 9 +++++++++
 kernel/trace/fgraph.c                | 1 +
 kernel/trace/ftrace_internal.h       | 5 +++--
 kernel/trace/trace_kprobe_selftest.c | 3 +++
 7 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 21ac1c5c71d3..ab158196480c 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -211,6 +211,10 @@ static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs
 {
 	return ret_regs->fp;
 }
+
+void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
+			   unsigned long frame_pointer);
+
 #endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER  */
 #endif
 
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 4cfe9b49709b..ab8e14b96f68 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -85,4 +85,7 @@ static inline int syscall_get_arch(struct task_struct *task)
 	return AUDIT_ARCH_AARCH64;
 }
 
+int syscall_trace_enter(struct pt_regs *regs);
+void syscall_trace_exit(struct pt_regs *regs);
+
 #endif	/* __ASM_SYSCALL_H */
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 5a668d7f3c1f..b1ae2f2eaf77 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -75,9 +75,6 @@ static inline bool has_syscall_work(unsigned long flags)
 	return unlikely(flags & _TIF_SYSCALL_WORK);
 }
 
-int syscall_trace_enter(struct pt_regs *regs);
-void syscall_trace_exit(struct pt_regs *regs);
-
 static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 			   const syscall_fn_t syscall_table[])
 {
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8e59bd954153..ce156c7704ee 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -41,6 +41,15 @@ struct ftrace_ops;
 struct ftrace_regs;
 struct dyn_ftrace;
 
+char *arch_ftrace_match_adjust(char *str, const char *search);
+
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
+struct fgraph_ret_regs;
+unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs);
+#else
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer);
+#endif
+
 #ifdef CONFIG_FUNCTION_TRACER
 /*
  * If the arch's mcount caller does not support all of ftrace's
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index cd2c35b1dd8f..c83c005e654e 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -15,6 +15,7 @@
 #include <trace/events/sched.h>
 
 #include "ftrace_internal.h"
+#include "trace.h"
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define ASSIGN_OPS_HASH(opsname, val) \
diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h
index 382775edf690..5012c04f92c0 100644
--- a/kernel/trace/ftrace_internal.h
+++ b/kernel/trace/ftrace_internal.h
@@ -2,6 +2,9 @@
 #ifndef _LINUX_KERNEL_FTRACE_INTERNAL_H
 #define  _LINUX_KERNEL_FTRACE_INTERNAL_H
 
+int __register_ftrace_function(struct ftrace_ops *ops);
+int __unregister_ftrace_function(struct ftrace_ops *ops);
+
 #ifdef CONFIG_FUNCTION_TRACER
 
 extern struct mutex ftrace_lock;
@@ -15,8 +18,6 @@ int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs);
 
 #else /* !CONFIG_DYNAMIC_FTRACE */
 
-int __register_ftrace_function(struct ftrace_ops *ops);
-int __unregister_ftrace_function(struct ftrace_ops *ops);
 /* Keep as macros so we do not need to define the commands */
 # define ftrace_startup(ops, command)					\
 	({								\
diff --git a/kernel/trace/trace_kprobe_selftest.c b/kernel/trace/trace_kprobe_selftest.c
index 16548ee4c8c6..3851cd1e6a62 100644
--- a/kernel/trace/trace_kprobe_selftest.c
+++ b/kernel/trace/trace_kprobe_selftest.c
@@ -1,4 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
+
+#include "trace_kprobe_selftest.h"
+
 /*
  * Function used during the kprobe self test. This function is in a separate
  * compile unit so it can be compile with CC_FLAGS_FTRACE to ensure that it
-- 
cgit v1.2.3