From b3c650ad9bb88ecf36b9aeacf9e7eb7478258da7 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 21 Nov 2022 17:15:41 +0100
Subject: pwm: Move pwm_capture() dummy to restore order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the dummy pwm_capture(), to make the declaration order of all
dummies to match the declaration order of the real functions.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 include/linux/pwm.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 161e91167b9c..7b7b93b6fb81 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -440,13 +440,6 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns,
 	return -EINVAL;
 }
 
-static inline int pwm_capture(struct pwm_device *pwm,
-			      struct pwm_capture *result,
-			      unsigned long timeout)
-{
-	return -EINVAL;
-}
-
 static inline int pwm_enable(struct pwm_device *pwm)
 {
 	might_sleep();
@@ -458,6 +451,13 @@ static inline void pwm_disable(struct pwm_device *pwm)
 	might_sleep();
 }
 
+static inline int pwm_capture(struct pwm_device *pwm,
+			      struct pwm_capture *result,
+			      unsigned long timeout)
+{
+	return -EINVAL;
+}
+
 static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
 {
 	return -EINVAL;
-- 
cgit v1.2.3


From c1085957dece02bda586cbffc1328f3bca09325f Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 12 Jan 2023 21:34:43 +0800
Subject: f2fs: clarify compress level bit offset

commit 3fde13f817e2 ("f2fs: compress: support compress level") introduce
compress level, which macro(COMPRESS_LEVEL_OFFSET) is 8, But use wrong
comment about compress level.

Let's fix it.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index ee0d75d9a302..1701f25117ea 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -315,7 +315,7 @@ struct f2fs_inode {
 			__u8 i_log_cluster_size;	/* log of cluster size */
 			__le16 i_compress_flag;		/* compress flag */
 						/* 0 bit: chksum flag
-						 * [10,15] bits: compress level
+						 * [8,15] bits: compress level
 						 */
 			__le32 i_extra_end[0];	/* for attribute size calculation */
 		} __packed;
-- 
cgit v1.2.3


From 19409796578c879a41e88ddbdbce50c19457658d Mon Sep 17 00:00:00 2001
From: Ahmad Fatoum <a.fatoum@pengutronix.de>
Date: Wed, 23 Nov 2022 10:55:26 +0100
Subject: include/linux/bcd.h: provide bcd_is_valid() helper

bcd2bin(0x0A) happily returns 10, despite this being an invalid BCD
value. RTC drivers converting possibly corrupted BCD timestamps might
want to validate their input before calling bcd2bin().

Provide a macro to do so. Unlike bcd2bin and bin2bcd, out-of-line
versions are not implemented. Should the macro experience enough use,
this can be retrofitted.

Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/r/20221123095527.2771434-2-s.hauer@pengutronix.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/bcd.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bcd.h b/include/linux/bcd.h
index 118bea36d7d4..abbc8149178e 100644
--- a/include/linux/bcd.h
+++ b/include/linux/bcd.h
@@ -14,8 +14,12 @@
 		const_bin2bcd(x) :			\
 		_bin2bcd(x))
 
+#define bcd_is_valid(x)					\
+		const_bcd_is_valid(x)
+
 #define const_bcd2bin(x)	(((x) & 0x0f) + ((x) >> 4) * 10)
 #define const_bin2bcd(x)	((((x) / 10) << 4) + (x) % 10)
+#define const_bcd_is_valid(x)	(((x) & 0x0f) < 10 && ((x) >> 4) < 10)
 
 unsigned _bcd2bin(unsigned char val) __attribute_const__;
 unsigned char _bin2bcd(unsigned val) __attribute_const__;
-- 
cgit v1.2.3


From ffb1b4a41016295e298409c9dbcacd55680bd6d4 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@kernel.org>
Date: Fri, 10 Feb 2023 14:42:01 -0800
Subject: x86/unwind/orc: Add 'signal' field to ORC metadata

Add a 'signal' field which allows unwind hints to specify whether the
instruction pointer should be taken literally (like for most interrupts
and exceptions) rather than decremented (like for call stack return
addresses) when used to find the next ORC entry.

Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/d2c5ec4d83a45b513d8fd72fab59f1a8cfa46871.1676068346.git.jpoimboe@kernel.org
---
 arch/x86/include/asm/orc_types.h       |  4 +++-
 arch/x86/include/asm/unwind_hints.h    | 10 +++++-----
 arch/x86/kernel/unwind_orc.c           |  5 ++---
 include/linux/objtool.h                | 11 +++++++----
 tools/arch/x86/include/asm/orc_types.h |  4 +++-
 tools/include/linux/objtool.h          | 11 +++++++----
 tools/objtool/orc_dump.c               |  4 ++--
 7 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index 5a2baf28a1dc..1343a62106de 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -57,12 +57,14 @@ struct orc_entry {
 	unsigned	sp_reg:4;
 	unsigned	bp_reg:4;
 	unsigned	type:2;
+	unsigned	signal:1;
 	unsigned	end:1;
 #elif defined(__BIG_ENDIAN_BITFIELD)
 	unsigned	bp_reg:4;
 	unsigned	sp_reg:4;
-	unsigned	unused:5;
+	unsigned	unused:4;
 	unsigned	end:1;
+	unsigned	signal:1;
 	unsigned	type:2;
 #endif
 } __packed;
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index f66fbe6537dd..e7c71750b309 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -15,7 +15,7 @@
 	UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
 .endm
 
-.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
+.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 signal=1
 	.if \base == %rsp
 		.if \indirect
 			.set sp_reg, ORC_REG_SP_INDIRECT
@@ -45,11 +45,11 @@
 		.set type, UNWIND_HINT_TYPE_REGS
 	.endif
 
-	UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type
+	UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type signal=\signal
 .endm
 
-.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0
-	UNWIND_HINT_REGS base=\base offset=\offset partial=1
+.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 signal=1
+	UNWIND_HINT_REGS base=\base offset=\offset partial=1 signal=\signal
 .endm
 
 .macro UNWIND_HINT_FUNC
@@ -67,7 +67,7 @@
 #else
 
 #define UNWIND_HINT_FUNC \
-	UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0)
+	UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0, 0)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index cdf6c6060170..37307b40f8da 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -484,6 +484,8 @@ bool unwind_next_frame(struct unwind_state *state)
 		goto the_end;
 	}
 
+	state->signal = orc->signal;
+
 	/* Find the previous frame's stack: */
 	switch (orc->sp_reg) {
 	case ORC_REG_SP:
@@ -563,7 +565,6 @@ bool unwind_next_frame(struct unwind_state *state)
 		state->sp = sp;
 		state->regs = NULL;
 		state->prev_regs = NULL;
-		state->signal = false;
 		break;
 
 	case UNWIND_HINT_TYPE_REGS:
@@ -587,7 +588,6 @@ bool unwind_next_frame(struct unwind_state *state)
 		state->regs = (struct pt_regs *)sp;
 		state->prev_regs = NULL;
 		state->full_regs = true;
-		state->signal = true;
 		break;
 
 	case UNWIND_HINT_TYPE_REGS_PARTIAL:
@@ -604,7 +604,6 @@ bool unwind_next_frame(struct unwind_state *state)
 			state->prev_regs = state->regs;
 		state->regs = (void *)sp - IRET_FRAME_OFFSET;
 		state->full_regs = false;
-		state->signal = true;
 		break;
 
 	default:
diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 62c54ffbeeaa..9ac3df3fccf0 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -15,6 +15,7 @@ struct unwind_hint {
 	s16		sp_offset;
 	u8		sp_reg;
 	u8		type;
+	u8		signal;
 	u8		end;
 };
 #endif
@@ -49,7 +50,7 @@ struct unwind_hint {
 
 #ifndef __ASSEMBLY__
 
-#define UNWIND_HINT(sp_reg, sp_offset, type, end)		\
+#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end)	\
 	"987: \n\t"						\
 	".pushsection .discard.unwind_hints\n\t"		\
 	/* struct unwind_hint */				\
@@ -57,6 +58,7 @@ struct unwind_hint {
 	".short " __stringify(sp_offset) "\n\t"			\
 	".byte " __stringify(sp_reg) "\n\t"			\
 	".byte " __stringify(type) "\n\t"			\
+	".byte " __stringify(signal) "\n\t"			\
 	".byte " __stringify(end) "\n\t"			\
 	".balign 4 \n\t"					\
 	".popsection\n\t"
@@ -129,7 +131,7 @@ struct unwind_hint {
  * the debuginfo as necessary.  It will also warn if it sees any
  * inconsistencies.
  */
-.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0
 .Lunwind_hint_ip_\@:
 	.pushsection .discard.unwind_hints
 		/* struct unwind_hint */
@@ -137,6 +139,7 @@ struct unwind_hint {
 		.short \sp_offset
 		.byte \sp_reg
 		.byte \type
+		.byte \signal
 		.byte \end
 		.balign 4
 	.popsection
@@ -174,7 +177,7 @@ struct unwind_hint {
 
 #ifndef __ASSEMBLY__
 
-#define UNWIND_HINT(sp_reg, sp_offset, type, end)	\
+#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \
 	"\n\t"
 #define STACK_FRAME_NON_STANDARD(func)
 #define STACK_FRAME_NON_STANDARD_FP(func)
@@ -182,7 +185,7 @@ struct unwind_hint {
 #define ASM_REACHABLE
 #else
 #define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0
 .endm
 .macro STACK_FRAME_NON_STANDARD func:req
 .endm
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index 5a2baf28a1dc..1343a62106de 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -57,12 +57,14 @@ struct orc_entry {
 	unsigned	sp_reg:4;
 	unsigned	bp_reg:4;
 	unsigned	type:2;
+	unsigned	signal:1;
 	unsigned	end:1;
 #elif defined(__BIG_ENDIAN_BITFIELD)
 	unsigned	bp_reg:4;
 	unsigned	sp_reg:4;
-	unsigned	unused:5;
+	unsigned	unused:4;
 	unsigned	end:1;
+	unsigned	signal:1;
 	unsigned	type:2;
 #endif
 } __packed;
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
index 62c54ffbeeaa..9ac3df3fccf0 100644
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -15,6 +15,7 @@ struct unwind_hint {
 	s16		sp_offset;
 	u8		sp_reg;
 	u8		type;
+	u8		signal;
 	u8		end;
 };
 #endif
@@ -49,7 +50,7 @@ struct unwind_hint {
 
 #ifndef __ASSEMBLY__
 
-#define UNWIND_HINT(sp_reg, sp_offset, type, end)		\
+#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end)	\
 	"987: \n\t"						\
 	".pushsection .discard.unwind_hints\n\t"		\
 	/* struct unwind_hint */				\
@@ -57,6 +58,7 @@ struct unwind_hint {
 	".short " __stringify(sp_offset) "\n\t"			\
 	".byte " __stringify(sp_reg) "\n\t"			\
 	".byte " __stringify(type) "\n\t"			\
+	".byte " __stringify(signal) "\n\t"			\
 	".byte " __stringify(end) "\n\t"			\
 	".balign 4 \n\t"					\
 	".popsection\n\t"
@@ -129,7 +131,7 @@ struct unwind_hint {
  * the debuginfo as necessary.  It will also warn if it sees any
  * inconsistencies.
  */
-.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0
 .Lunwind_hint_ip_\@:
 	.pushsection .discard.unwind_hints
 		/* struct unwind_hint */
@@ -137,6 +139,7 @@ struct unwind_hint {
 		.short \sp_offset
 		.byte \sp_reg
 		.byte \type
+		.byte \signal
 		.byte \end
 		.balign 4
 	.popsection
@@ -174,7 +177,7 @@ struct unwind_hint {
 
 #ifndef __ASSEMBLY__
 
-#define UNWIND_HINT(sp_reg, sp_offset, type, end)	\
+#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \
 	"\n\t"
 #define STACK_FRAME_NON_STANDARD(func)
 #define STACK_FRAME_NON_STANDARD_FP(func)
@@ -182,7 +185,7 @@ struct unwind_hint {
 #define ASM_REACHABLE
 #else
 #define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0
 .endm
 .macro STACK_FRAME_NON_STANDARD func:req
 .endm
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index 4f1211fec82c..2d8ebdcd1db3 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -211,8 +211,8 @@ int orc_dump(const char *_objname)
 
 		print_reg(orc[i].bp_reg, bswap_if_needed(&dummy_elf, orc[i].bp_offset));
 
-		printf(" type:%s end:%d\n",
-		       orc_type_name(orc[i].type), orc[i].end);
+		printf(" type:%s signal:%d end:%d\n",
+		       orc_type_name(orc[i].type), orc[i].signal, orc[i].end);
 	}
 
 	elf_end(elf);
-- 
cgit v1.2.3


From 05b8773ca33253ea562be145cf3145b05ef19f86 Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Thu, 22 Dec 2022 19:33:31 +0000
Subject: mtd: ubi: block: wire-up device parent

ubiblock devices were previously only identifyable by their name, but
not connected to their parent UBI volume device e.g. in sysfs.
Properly parent ubiblock device as descendant of a UBI volume device
to reflect device model hierachy.

Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/block.c | 2 +-
 drivers/mtd/ubi/kapi.c  | 1 +
 include/linux/mtd/ubi.h | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index f5d036203fe7..6a9eb2c860b5 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -419,7 +419,7 @@ int ubiblock_create(struct ubi_volume_info *vi)
 	list_add_tail(&dev->list, &ubiblock_devices);
 
 	/* Must be the last step: anyone can call file ops from now on */
-	ret = add_disk(dev->gd);
+	ret = device_add_disk(vi->dev, dev->gd, NULL);
 	if (ret)
 		goto out_remove_minor;
 
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 0fce99ff29b5..5db653eacbd4 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -79,6 +79,7 @@ void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
 	vi->name_len = vol->name_len;
 	vi->name = vol->name;
 	vi->cdev = vol->cdev.dev;
+	vi->dev = &vol->dev;
 }
 
 /**
diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h
index 7d48ea368c5e..a529347fd75b 100644
--- a/include/linux/mtd/ubi.h
+++ b/include/linux/mtd/ubi.h
@@ -110,6 +110,7 @@ struct ubi_volume_info {
 	int name_len;
 	const char *name;
 	dev_t cdev;
+	struct device *dev;
 };
 
 /**
-- 
cgit v1.2.3


From d88cbbb39b4db057feb1552de31f22c02a21b36f Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 17 Feb 2023 10:29:10 +0100
Subject: blk-mq: Reorder fields in 'struct blk_mq_tag_set'

Group some variables based on their sizes to reduce hole and avoid padding.
On x86_64, this shrinks the size of 'struct blk_mq_tag_set'
from 304 to 296 bytes.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/6f249f9b02a3490283ef0278096556de41aa0cf0.1676626130.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 779fba613bd0..dd5ce1137f04 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -473,6 +473,7 @@ enum hctx_type {
 
 /**
  * struct blk_mq_tag_set - tag set that can be shared between request queues
+ * @ops:	   Pointers to functions that implement block driver behavior.
  * @map:	   One or more ctx -> hctx mappings. One map exists for each
  *		   hardware queue type (enum hctx_type) that the driver wishes
  *		   to support. There are no restrictions on maps being of the
@@ -480,7 +481,6 @@ enum hctx_type {
  *		   types.
  * @nr_maps:	   Number of elements in the @map array. A number in the range
  *		   [1, HCTX_MAX_TYPES].
- * @ops:	   Pointers to functions that implement block driver behavior.
  * @nr_hw_queues:  Number of hardware queues supported by the block driver that
  *		   owns this data structure.
  * @queue_depth:   Number of tags per hardware queue, reserved tags included.
@@ -505,9 +505,9 @@ enum hctx_type {
  *		   (BLK_MQ_F_BLOCKING).
  */
 struct blk_mq_tag_set {
+	const struct blk_mq_ops	*ops;
 	struct blk_mq_queue_map	map[HCTX_MAX_TYPES];
 	unsigned int		nr_maps;
-	const struct blk_mq_ops	*ops;
 	unsigned int		nr_hw_queues;
 	unsigned int		queue_depth;
 	unsigned int		reserved_tags;
-- 
cgit v1.2.3


From 9e0c7efa5ea231d85c0d41693a5115b3b971717c Mon Sep 17 00:00:00 2001
From: Juhyung Park <qkrwngud825@gmail.com>
Date: Fri, 3 Feb 2023 11:40:29 +0900
Subject: block: remove more NULL checks after bdev_get_queue()

bdev_get_queue() never returns NULL. Several commits [1][2] have been made
before to remove such superfluous checks, but some still remained.

For places where bdev_get_queue() is called solely for NULL checks, it is
removed entirely.

[1] commit ec9fd2a13d74 ("blk-lib: don't check bdev_get_queue() NULL check")
[2] commit fea127b36c93 ("block: remove superfluous check for request queue in bdev_is_zoned()")

Signed-off-by: Juhyung Park <qkrwngud825@gmail.com>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
Link: https://lore.kernel.org/r/20230203024029.48260-1-qkrwngud825@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c       | 10 ----------
 include/linux/blkdev.h  |  7 +------
 kernel/trace/blktrace.c |  6 +-----
 3 files changed, 2 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 614b575be899..fce9082384d6 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -334,17 +334,12 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 {
 	void __user *argp = (void __user *)arg;
 	struct zone_report_args args;
-	struct request_queue *q;
 	struct blk_zone_report rep;
 	int ret;
 
 	if (!argp)
 		return -EINVAL;
 
-	q = bdev_get_queue(bdev);
-	if (!q)
-		return -ENXIO;
-
 	if (!bdev_is_zoned(bdev))
 		return -ENOTTY;
 
@@ -391,7 +386,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 			   unsigned int cmd, unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
-	struct request_queue *q;
 	struct blk_zone_range zrange;
 	enum req_op op;
 	int ret;
@@ -399,10 +393,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!argp)
 		return -EINVAL;
 
-	q = bdev_get_queue(bdev);
-	if (!q)
-		return -ENXIO;
-
 	if (!bdev_is_zoned(bdev))
 		return -ENOTTY;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b9637d63e6f0..89dd9b02b45b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1276,12 +1276,7 @@ static inline bool bdev_nowait(struct block_device *bdev)
 
 static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
 {
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	if (q)
-		return blk_queue_zoned_model(q);
-
-	return BLK_ZONED_NONE;
+	return blk_queue_zoned_model(bdev_get_queue(bdev));
 }
 
 static inline bool bdev_is_zoned(struct block_device *bdev)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 5743be559415..d5d94510afd3 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -729,14 +729,10 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop);
  **/
 int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
 {
-	struct request_queue *q;
+	struct request_queue *q = bdev_get_queue(bdev);
 	int ret, start = 0;
 	char b[BDEVNAME_SIZE];
 
-	q = bdev_get_queue(bdev);
-	if (!q)
-		return -ENXIO;
-
 	mutex_lock(&q->debugfs_mutex);
 
 	switch (cmd) {
-- 
cgit v1.2.3


From 8d664282a03fec09682f10252d3c785c2513691d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 17 Feb 2023 08:27:23 -0700
Subject: io_uring: rename 'in_idle' to 'in_cancel'

This better describes what it does - it's incremented when the task is
currently undergoing a cancelation operation, due to exiting or exec'ing.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  2 +-
 io_uring/io_uring.c            | 18 +++++++++---------
 io_uring/tctx.c                |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 0efe4d784358..00689c12f6ab 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -58,7 +58,7 @@ struct io_uring_task {
 
 	struct xarray			xa;
 	struct wait_queue_head		wait;
-	atomic_t			in_idle;
+	atomic_t			in_cancel;
 	atomic_t			inflight_tracked;
 	struct percpu_counter		inflight;
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index cbe06deb84ff..64e07df034d1 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -719,7 +719,7 @@ static void io_put_task_remote(struct task_struct *task, int nr)
 	struct io_uring_task *tctx = task->io_uring;
 
 	percpu_counter_sub(&tctx->inflight, nr);
-	if (unlikely(atomic_read(&tctx->in_idle)))
+	if (unlikely(atomic_read(&tctx->in_cancel)))
 		wake_up(&tctx->wait);
 	put_task_struct_many(task, nr);
 }
@@ -1258,8 +1258,8 @@ void tctx_task_work(struct callback_head *cb)
 
 	ctx_flush_and_put(ctx, &uring_locked);
 
-	/* relaxed read is enough as only the task itself sets ->in_idle */
-	if (unlikely(atomic_read(&tctx->in_idle)))
+	/* relaxed read is enough as only the task itself sets ->in_cancel */
+	if (unlikely(atomic_read(&tctx->in_cancel)))
 		io_uring_drop_tctx_refs(current);
 
 	trace_io_uring_task_work_run(tctx, count, loops);
@@ -1291,7 +1291,7 @@ static void io_req_local_work_add(struct io_kiocb *req)
 	/* needed for the following wake up */
 	smp_mb__after_atomic();
 
-	if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
+	if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) {
 		io_move_task_work_from_local(ctx);
 		goto put_ref;
 	}
@@ -2937,12 +2937,12 @@ static __cold void io_tctx_exit_cb(struct callback_head *cb)
 
 	work = container_of(cb, struct io_tctx_exit, task_work);
 	/*
-	 * When @in_idle, we're in cancellation and it's racy to remove the
+	 * When @in_cancel, we're in cancellation and it's racy to remove the
 	 * node. It'll be removed by the end of cancellation, just ignore it.
 	 * tctx can be NULL if the queueing of this task_work raced with
 	 * work cancelation off the exec path.
 	 */
-	if (tctx && !atomic_read(&tctx->in_idle))
+	if (tctx && !atomic_read(&tctx->in_cancel))
 		io_uring_del_tctx_node((unsigned long)work->ctx);
 	complete(&work->completion);
 }
@@ -3210,7 +3210,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
 	if (tctx->io_wq)
 		io_wq_exit_start(tctx->io_wq);
 
-	atomic_inc(&tctx->in_idle);
+	atomic_inc(&tctx->in_cancel);
 	do {
 		bool loop = false;
 
@@ -3261,9 +3261,9 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
 	if (cancel_all) {
 		/*
 		 * We shouldn't run task_works after cancel, so just leave
-		 * ->in_idle set for normal exit.
+		 * ->in_cancel set for normal exit.
 		 */
-		atomic_dec(&tctx->in_idle);
+		atomic_dec(&tctx->in_cancel);
 		/* for exec all current's requests should be gone, kill tctx */
 		__io_uring_free(current);
 	}
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index 4324b1cf1f6a..3a8d1dd97e1b 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -83,7 +83,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
 
 	xa_init(&tctx->xa);
 	init_waitqueue_head(&tctx->wait);
-	atomic_set(&tctx->in_idle, 0);
+	atomic_set(&tctx->in_cancel, 0);
 	atomic_set(&tctx->inflight_tracked, 0);
 	task->io_uring = tctx;
 	init_llist_head(&tctx->task_list);
-- 
cgit v1.2.3


From b1a37ed00d7908a991c1d0f18a8cba3c2aa99bdc Mon Sep 17 00:00:00 2001
From: Lee Jones <lee@kernel.org>
Date: Mon, 23 Jan 2023 12:39:11 +0000
Subject: HID: core: Provide new max_buffer_size attribute to over-ride the
 default

Presently, when a report is processed, its proposed size, provided by
the user of the API (as Report Size * Report Count) is compared against
the subsystem default HID_MAX_BUFFER_SIZE (16k).  However, some
low-level HID drivers allocate a reduced amount of memory to their
buffers (e.g. UHID only allocates UHID_DATA_MAX (4k) buffers), rending
this check inadequate in some cases.

In these circumstances, if the received report ends up being smaller
than the proposed report size, the remainder of the buffer is zeroed.
That is, the space between sizeof(csize) (size of the current report)
and the rsize (size proposed i.e. Report Size * Report Count), which can
be handled up to HID_MAX_BUFFER_SIZE (16k).  Meaning that memset()
shoots straight past the end of the buffer boundary and starts zeroing
out in-use values, often resulting in calamity.

This patch introduces a new variable into 'struct hid_ll_driver' where
individual low-level drivers can over-ride the default maximum value of
HID_MAX_BUFFER_SIZE (16k) with something more sympathetic to the
interface.

Signed-off-by: Lee Jones <lee@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 32 +++++++++++++++++++++++++-------
 include/linux/hid.h    |  3 +++
 2 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 1ee623c26c49..7647a946340c 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -256,6 +256,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
 {
 	struct hid_report *report;
 	struct hid_field *field;
+	unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
 	unsigned int usages;
 	unsigned int offset;
 	unsigned int i;
@@ -286,8 +287,11 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
 	offset = report->size;
 	report->size += parser->global.report_size * parser->global.report_count;
 
+	if (parser->device->ll_driver->max_buffer_size)
+		max_buffer_size = parser->device->ll_driver->max_buffer_size;
+
 	/* Total size check: Allow for possible report index byte */
-	if (report->size > (HID_MAX_BUFFER_SIZE - 1) << 3) {
+	if (report->size > (max_buffer_size - 1) << 3) {
 		hid_err(parser->device, "report is too long\n");
 		return -1;
 	}
@@ -1963,6 +1967,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *
 	struct hid_report_enum *report_enum = hid->report_enum + type;
 	struct hid_report *report;
 	struct hid_driver *hdrv;
+	int max_buffer_size = HID_MAX_BUFFER_SIZE;
 	u32 rsize, csize = size;
 	u8 *cdata = data;
 	int ret = 0;
@@ -1978,10 +1983,13 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 *
 
 	rsize = hid_compute_report_size(report);
 
-	if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE)
-		rsize = HID_MAX_BUFFER_SIZE - 1;
-	else if (rsize > HID_MAX_BUFFER_SIZE)
-		rsize = HID_MAX_BUFFER_SIZE;
+	if (hid->ll_driver->max_buffer_size)
+		max_buffer_size = hid->ll_driver->max_buffer_size;
+
+	if (report_enum->numbered && rsize >= max_buffer_size)
+		rsize = max_buffer_size - 1;
+	else if (rsize > max_buffer_size)
+		rsize = max_buffer_size;
 
 	if (csize < rsize) {
 		dbg_hid("report %d is too short, (%d < %d)\n", report->id,
@@ -2396,7 +2404,12 @@ int hid_hw_raw_request(struct hid_device *hdev,
 		       unsigned char reportnum, __u8 *buf,
 		       size_t len, enum hid_report_type rtype, enum hid_class_request reqtype)
 {
-	if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf)
+	unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
+
+	if (hdev->ll_driver->max_buffer_size)
+		max_buffer_size = hdev->ll_driver->max_buffer_size;
+
+	if (len < 1 || len > max_buffer_size || !buf)
 		return -EINVAL;
 
 	return hdev->ll_driver->raw_request(hdev, reportnum, buf, len,
@@ -2415,7 +2428,12 @@ EXPORT_SYMBOL_GPL(hid_hw_raw_request);
  */
 int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len)
 {
-	if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf)
+	unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE;
+
+	if (hdev->ll_driver->max_buffer_size)
+		max_buffer_size = hdev->ll_driver->max_buffer_size;
+
+	if (len < 1 || len > max_buffer_size || !buf)
 		return -EINVAL;
 
 	if (hdev->ll_driver->output_report)
diff --git a/include/linux/hid.h b/include/linux/hid.h
index eaf8ab177303..1ea8c7a3570b 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -834,6 +834,7 @@ struct hid_driver {
  * @output_report: send output report to device
  * @idle: send idle request to device
  * @may_wakeup: return if device may act as a wakeup source during system-suspend
+ * @max_buffer_size: over-ride maximum data buffer size (default: HID_MAX_BUFFER_SIZE)
  */
 struct hid_ll_driver {
 	int (*start)(struct hid_device *hdev);
@@ -859,6 +860,8 @@ struct hid_ll_driver {
 
 	int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype);
 	bool (*may_wakeup)(struct hid_device *hdev);
+
+	unsigned int max_buffer_size;
 };
 
 extern bool hid_is_usb(const struct hid_device *hdev);
-- 
cgit v1.2.3


From 250870824c1cf199b032b1ef889c8e8d69d9123a Mon Sep 17 00:00:00 2001
From: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
Date: Tue, 24 Jan 2023 22:48:16 +0100
Subject: sh: intc: Avoid spurious sizeof-pointer-div warning

GCC warns about the pattern sizeof(void*)/sizeof(void), as it looks like
the abuse of a pattern to calculate the array size. This pattern appears
in the unevaluated part of the ternary operator in _INTC_ARRAY if the
parameter is NULL.

The replacement uses an alternate approach to return 0 in case of NULL
which does not generate the pattern sizeof(void*)/sizeof(void), but still
emits the warning if _INTC_ARRAY is called with a nonarray parameter.

This patch is required for successful compilation with -Werror enabled.

The idea to use _Generic for type distinction is taken from Comment #7
in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108483 by Jakub Jelinek

Signed-off-by: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Link: https://lore.kernel.org/r/619fa552-c988-35e5-b1d7-fe256c46a272@mkarcher.dialup.fu-berlin.de
Signed-off-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
---
 include/linux/sh_intc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index c255273b0281..37ad81058d6a 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -97,7 +97,10 @@ struct intc_hw_desc {
 	unsigned int nr_subgroups;
 };
 
-#define _INTC_ARRAY(a) a, __same_type(a, NULL) ? 0 : sizeof(a)/sizeof(*a)
+#define _INTC_SIZEOF_OR_ZERO(a) (_Generic(a,                 \
+                                 typeof(NULL):  0,           \
+                                 default:       sizeof(a)))
+#define _INTC_ARRAY(a) a, _INTC_SIZEOF_OR_ZERO(a)/sizeof(*a)
 
 #define INTC_HW_DESC(vectors, groups, mask_regs,	\
 		     prio_regs,	sense_regs, ack_regs)	\
-- 
cgit v1.2.3


From a4eecbae092759537748360299de03e434c9a956 Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mjguzik@gmail.com>
Date: Wed, 25 Jan 2023 16:55:56 +0100
Subject: capability: add cap_isidentical

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Reviewed-by: Serge Hallyn <serge@hallyn.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/capability.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 03c2a613ad40..d3c6c2d1ff45 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -157,6 +157,16 @@ static inline bool cap_isclear(const kernel_cap_t a)
 	return true;
 }
 
+static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b)
+{
+	unsigned __capi;
+	CAP_FOR_EACH_U32(__capi) {
+		if (a.cap[__capi] != b.cap[__capi])
+			return false;
+	}
+	return true;
+}
+
 /*
  * Check if "a" is a subset of "set".
  * return true if ALL of the capabilities in "a" are also in "set"
-- 
cgit v1.2.3


From 6da6b1d4a7df8c35770186b53ef65d388398e139 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Tue, 21 Feb 2023 17:59:05 +0900
Subject: mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON

After a memory error happens on a clean folio, a process unexpectedly
receives SIGBUS when it accesses the error page.  This SIGBUS killing is
pointless and simply degrades the level of RAS of the system, because the
clean folio can be dropped without any data lost on memory error handling
as we do for a clean pagecache.

When memory_failure() is called on a clean folio, try_to_unmap() is called
twice (one from split_huge_page() and one from hwpoison_user_mappings()).
The root cause of the issue is that pte conversion to hwpoisoned entry is
now done in the first call of try_to_unmap() because PageHWPoison is
already set at this point, while it's actually expected to be done in the
second call.  This behavior disturbs the error handling operation like
removing pagecache, which results in the malfunction described above.

So convert TTU_IGNORE_HWPOISON into TTU_HWPOISON and set TTU_HWPOISON only
when we really intend to convert pte to hwpoison entry.  This can prevent
other callers of try_to_unmap() from accidentally converting to hwpoison
entries.

Link: https://lkml.kernel.org/r/20230221085905.1465385-1-naoya.horiguchi@linux.dev
Fixes: a42634a6c07d ("readahead: Use a folio in read_pages()")
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/rmap.h | 2 +-
 mm/memory-failure.c  | 8 ++++----
 mm/rmap.c            | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index a4570da03e58..b87d01660412 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -94,7 +94,7 @@ enum ttu_flags {
 	TTU_SPLIT_HUGE_PMD	= 0x4,	/* split huge PMD if any */
 	TTU_IGNORE_MLOCK	= 0x8,	/* ignore mlock */
 	TTU_SYNC		= 0x10,	/* avoid racy checks with PVMW_SYNC */
-	TTU_IGNORE_HWPOISON	= 0x20,	/* corrupted page is recoverable */
+	TTU_HWPOISON		= 0x20,	/* do convert pte to hwpoison entry */
 	TTU_BATCH_FLUSH		= 0x40,	/* Batch TLB flushes where possible
 					 * and caller guarantees they will
 					 * do a final flush if necessary */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a1ede7bdce95..fae9baf3be16 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1069,7 +1069,7 @@ static int me_pagecache_dirty(struct page_state *ps, struct page *p)
  * cache and swap cache(ie. page is freshly swapped in). So it could be
  * referenced concurrently by 2 types of PTEs:
  * normal PTEs and swap PTEs. We try to handle them consistently by calling
- * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs,
+ * try_to_unmap(!TTU_HWPOISON) to convert the normal PTEs to swap PTEs,
  * and then
  *      - clear dirty bit to prevent IO
  *      - remove from LRU
@@ -1486,7 +1486,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 				  int flags, struct page *hpage)
 {
 	struct folio *folio = page_folio(hpage);
-	enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC;
+	enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
 	struct address_space *mapping;
 	LIST_HEAD(tokill);
 	bool unmap_success;
@@ -1516,7 +1516,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 
 	if (PageSwapCache(p)) {
 		pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
-		ttu |= TTU_IGNORE_HWPOISON;
+		ttu &= ~TTU_HWPOISON;
 	}
 
 	/*
@@ -1531,7 +1531,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 		if (page_mkclean(hpage)) {
 			SetPageDirty(hpage);
 		} else {
-			ttu |= TTU_IGNORE_HWPOISON;
+			ttu &= ~TTU_HWPOISON;
 			pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
 				pfn);
 		}
diff --git a/mm/rmap.c b/mm/rmap.c
index 15ae24585fc4..8632e02661ac 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1602,7 +1602,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 		/* Update high watermark before we lower rss */
 		update_hiwater_rss(mm);
 
-		if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
+		if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) {
 			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
 			if (folio_test_hugetlb(folio)) {
 				hugetlb_count_sub(folio_nr_pages(folio), mm);
-- 
cgit v1.2.3


From f122a08b197d076ccf136c73fae0146875812a88 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 28 Feb 2023 11:39:09 -0800
Subject: capability: just use a 'u64' instead of a 'u32[2]' array

Back in 2008 we extended the capability bits from 32 to 64, and we did
it by extending the single 32-bit capability word from one word to an
array of two words.  It was then obfuscated by hiding the "2" behind two
macro expansions, with the reasoning being that maybe it gets extended
further some day.

That reasoning may have been valid at the time, but the last thing we
want to do is to extend the capability set any more.  And the array of
values not only causes source code oddities (with loops to deal with
it), but also results in worse code generation.  It's a lose-lose
situation.

So just change the 'u32[2]' into a 'u64' and be done with it.

We still have to deal with the fact that the user space interface is
designed around an array of these 32-bit values, but that was the case
before too, since the array layouts were different (ie user space
doesn't use an array of 32-bit values for individual capability masks,
but an array of 32-bit slices of multiple masks).

So that marshalling of data is actually simplified too, even if it does
remain somewhat obscure and odd.

This was all triggered by my reaction to the new "cap_isidentical()"
introduced recently.  By just using a saner data structure, it went from

	unsigned __capi;
	CAP_FOR_EACH_U32(__capi) {
		if (a.cap[__capi] != b.cap[__capi])
			return false;
	}
	return true;

to just being

	return a.val == b.val;

instead.  Which is rather more obvious both to humans and to compilers.

Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Paul Moore <paul@paul-moore.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/array.c                                    |   7 +-
 include/linux/capability.h                         | 131 +++++----------------
 io_uring/fdinfo.c                                  |   4 +-
 kernel/auditsc.c                                   |   6 +-
 kernel/capability.c                                | 104 +++++++---------
 kernel/umh.c                                       |  41 +++----
 security/apparmor/policy_unpack.c                  |  40 +++++--
 security/commoncap.c                               |  49 ++++----
 .../selftests/bpf/progs/test_deny_namespace.c      |   7 +-
 9 files changed, 150 insertions(+), 239 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 49283b8103c7..9b0315d34c58 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -300,13 +300,8 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
 static void render_cap_t(struct seq_file *m, const char *header,
 			kernel_cap_t *a)
 {
-	unsigned __capi;
-
 	seq_puts(m, header);
-	CAP_FOR_EACH_U32(__capi) {
-		seq_put_hex_ll(m, NULL,
-			   a->cap[CAP_LAST_U32 - __capi], 8);
-	}
+	seq_put_hex_ll(m, NULL, a->val, 16);
 	seq_putc(m, '\n');
 }
 
diff --git a/include/linux/capability.h b/include/linux/capability.h
index d3c6c2d1ff45..0c356a517991 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -15,28 +15,25 @@
 
 #include <uapi/linux/capability.h>
 #include <linux/uidgid.h>
+#include <linux/bits.h>
 
 #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3
-#define _KERNEL_CAPABILITY_U32S    _LINUX_CAPABILITY_U32S_3
 
 extern int file_caps_enabled;
 
-typedef struct kernel_cap_struct {
-	__u32 cap[_KERNEL_CAPABILITY_U32S];
-} kernel_cap_t;
+typedef struct { u64 val; } kernel_cap_t;
 
 /* same as vfs_ns_cap_data but in cpu endian and always filled completely */
 struct cpu_vfs_cap_data {
 	__u32 magic_etc;
+	kuid_t rootid;
 	kernel_cap_t permitted;
 	kernel_cap_t inheritable;
-	kuid_t rootid;
 };
 
 #define _USER_CAP_HEADER_SIZE  (sizeof(struct __user_cap_header_struct))
 #define _KERNEL_CAP_T_SIZE     (sizeof(kernel_cap_t))
 
-
 struct file;
 struct inode;
 struct dentry;
@@ -44,16 +41,6 @@ struct task_struct;
 struct user_namespace;
 struct mnt_idmap;
 
-extern const kernel_cap_t __cap_empty_set;
-extern const kernel_cap_t __cap_init_eff_set;
-
-/*
- * Internal kernel functions only
- */
-
-#define CAP_FOR_EACH_U32(__capi)  \
-	for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi)
-
 /*
  * CAP_FS_MASK and CAP_NFSD_MASKS:
  *
@@ -67,104 +54,52 @@ extern const kernel_cap_t __cap_init_eff_set;
  *   2. The security.* and trusted.* xattrs are fs-related MAC permissions
  */
 
-# define CAP_FS_MASK_B0     (CAP_TO_MASK(CAP_CHOWN)		\
-			    | CAP_TO_MASK(CAP_MKNOD)		\
-			    | CAP_TO_MASK(CAP_DAC_OVERRIDE)	\
-			    | CAP_TO_MASK(CAP_DAC_READ_SEARCH)	\
-			    | CAP_TO_MASK(CAP_FOWNER)		\
-			    | CAP_TO_MASK(CAP_FSETID))
-
-# define CAP_FS_MASK_B1     (CAP_TO_MASK(CAP_MAC_OVERRIDE))
-
-#if _KERNEL_CAPABILITY_U32S != 2
-# error Fix up hand-coded capability macro initializers
-#else /* HAND-CODED capability initializers */
+# define CAP_FS_MASK     (BIT_ULL(CAP_CHOWN)		\
+			| BIT_ULL(CAP_MKNOD)		\
+			| BIT_ULL(CAP_DAC_OVERRIDE)	\
+			| BIT_ULL(CAP_DAC_READ_SEARCH)	\
+			| BIT_ULL(CAP_FOWNER)		\
+			| BIT_ULL(CAP_FSETID)		\
+			| BIT_ULL(CAP_MAC_OVERRIDE))
+#define CAP_VALID_MASK	 (BIT_ULL(CAP_LAST_CAP+1)-1)
 
-#define CAP_LAST_U32			((_KERNEL_CAPABILITY_U32S) - 1)
-#define CAP_LAST_U32_VALID_MASK		(CAP_TO_MASK(CAP_LAST_CAP + 1) -1)
+# define CAP_EMPTY_SET    ((kernel_cap_t) { 0 })
+# define CAP_FULL_SET     ((kernel_cap_t) { CAP_VALID_MASK })
+# define CAP_FS_SET       ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_LINUX_IMMUTABLE) })
+# define CAP_NFSD_SET     ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_SYS_RESOURCE) })
 
-# define CAP_EMPTY_SET    ((kernel_cap_t){{ 0, 0 }})
-# define CAP_FULL_SET     ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }})
-# define CAP_FS_SET       ((kernel_cap_t){{ CAP_FS_MASK_B0 \
-				    | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \
-				    CAP_FS_MASK_B1 } })
-# define CAP_NFSD_SET     ((kernel_cap_t){{ CAP_FS_MASK_B0 \
-				    | CAP_TO_MASK(CAP_SYS_RESOURCE), \
-				    CAP_FS_MASK_B1 } })
+# define cap_clear(c)         do { (c).val = 0; } while (0)
 
-#endif /* _KERNEL_CAPABILITY_U32S != 2 */
-
-# define cap_clear(c)         do { (c) = __cap_empty_set; } while (0)
-
-#define cap_raise(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
-#define cap_lower(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag))
-#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag))
-
-#define CAP_BOP_ALL(c, a, b, OP)                                    \
-do {                                                                \
-	unsigned __capi;                                            \
-	CAP_FOR_EACH_U32(__capi) {                                  \
-		c.cap[__capi] = a.cap[__capi] OP b.cap[__capi];     \
-	}                                                           \
-} while (0)
-
-#define CAP_UOP_ALL(c, a, OP)                                       \
-do {                                                                \
-	unsigned __capi;                                            \
-	CAP_FOR_EACH_U32(__capi) {                                  \
-		c.cap[__capi] = OP a.cap[__capi];                   \
-	}                                                           \
-} while (0)
+#define cap_raise(c, flag)  ((c).val |= BIT_ULL(flag))
+#define cap_lower(c, flag)  ((c).val &= ~BIT_ULL(flag))
+#define cap_raised(c, flag) (((c).val & BIT_ULL(flag)) != 0)
 
 static inline kernel_cap_t cap_combine(const kernel_cap_t a,
 				       const kernel_cap_t b)
 {
-	kernel_cap_t dest;
-	CAP_BOP_ALL(dest, a, b, |);
-	return dest;
+	return (kernel_cap_t) { a.val | b.val };
 }
 
 static inline kernel_cap_t cap_intersect(const kernel_cap_t a,
 					 const kernel_cap_t b)
 {
-	kernel_cap_t dest;
-	CAP_BOP_ALL(dest, a, b, &);
-	return dest;
+	return (kernel_cap_t) { a.val & b.val };
 }
 
 static inline kernel_cap_t cap_drop(const kernel_cap_t a,
 				    const kernel_cap_t drop)
 {
-	kernel_cap_t dest;
-	CAP_BOP_ALL(dest, a, drop, &~);
-	return dest;
-}
-
-static inline kernel_cap_t cap_invert(const kernel_cap_t c)
-{
-	kernel_cap_t dest;
-	CAP_UOP_ALL(dest, c, ~);
-	return dest;
+	return (kernel_cap_t) { a.val &~ drop.val };
 }
 
 static inline bool cap_isclear(const kernel_cap_t a)
 {
-	unsigned __capi;
-	CAP_FOR_EACH_U32(__capi) {
-		if (a.cap[__capi] != 0)
-			return false;
-	}
-	return true;
+	return !a.val;
 }
 
 static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b)
 {
-	unsigned __capi;
-	CAP_FOR_EACH_U32(__capi) {
-		if (a.cap[__capi] != b.cap[__capi])
-			return false;
-	}
-	return true;
+	return a.val == b.val;
 }
 
 /*
@@ -176,39 +111,31 @@ static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b)
  */
 static inline bool cap_issubset(const kernel_cap_t a, const kernel_cap_t set)
 {
-	kernel_cap_t dest;
-	dest = cap_drop(a, set);
-	return cap_isclear(dest);
+	return !(a.val & ~set.val);
 }
 
 /* Used to decide between falling back on the old suser() or fsuser(). */
 
 static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a)
 {
-	const kernel_cap_t __cap_fs_set = CAP_FS_SET;
-	return cap_drop(a, __cap_fs_set);
+	return cap_drop(a, CAP_FS_SET);
 }
 
 static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a,
 					    const kernel_cap_t permitted)
 {
-	const kernel_cap_t __cap_fs_set = CAP_FS_SET;
-	return cap_combine(a,
-			   cap_intersect(permitted, __cap_fs_set));
+	return cap_combine(a, cap_intersect(permitted, CAP_FS_SET));
 }
 
 static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a)
 {
-	const kernel_cap_t __cap_fs_set = CAP_NFSD_SET;
-	return cap_drop(a, __cap_fs_set);
+	return cap_drop(a, CAP_NFSD_SET);
 }
 
 static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a,
 					      const kernel_cap_t permitted)
 {
-	const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET;
-	return cap_combine(a,
-			   cap_intersect(permitted, __cap_nfsd_set));
+	return cap_combine(a, cap_intersect(permitted, CAP_NFSD_SET));
 }
 
 #ifdef CONFIG_MULTIUSER
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index 882bd56b01ed..76c279b13aee 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -22,7 +22,6 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
 	struct user_namespace *uns = seq_user_ns(m);
 	struct group_info *gi;
 	kernel_cap_t cap;
-	unsigned __capi;
 	int g;
 
 	seq_printf(m, "%5d\n", id);
@@ -42,8 +41,7 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
 	}
 	seq_puts(m, "\n\tCapEff:\t");
 	cap = cred->cap_effective;
-	CAP_FOR_EACH_U32(__capi)
-		seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8);
+	seq_put_hex_ll(m, NULL, cap.val, 16);
 	seq_putc(m, '\n');
 	return 0;
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 93d0b87f3283..addeed3df15d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1295,15 +1295,11 @@ out:
 static void audit_log_cap(struct audit_buffer *ab, char *prefix,
 			  kernel_cap_t *cap)
 {
-	int i;
-
 	if (cap_isclear(*cap)) {
 		audit_log_format(ab, " %s=0", prefix);
 		return;
 	}
-	audit_log_format(ab, " %s=", prefix);
-	CAP_FOR_EACH_U32(i)
-		audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]);
+	audit_log_format(ab, " %s=%016llx", prefix, cap->val);
 }
 
 static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
diff --git a/kernel/capability.c b/kernel/capability.c
index 339a44dfe2f4..3e058f41df32 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -20,13 +20,6 @@
 #include <linux/user_namespace.h>
 #include <linux/uaccess.h>
 
-/*
- * Leveraged for setting/resetting capabilities
- */
-
-const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET;
-EXPORT_SYMBOL(__cap_empty_set);
-
 int file_caps_enabled = 1;
 
 static int __init file_caps_disable(char *str)
@@ -151,6 +144,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
 	pid_t pid;
 	unsigned tocopy;
 	kernel_cap_t pE, pI, pP;
+	struct __user_cap_data_struct kdata[2];
 
 	ret = cap_validate_magic(header, &tocopy);
 	if ((dataptr == NULL) || (ret != 0))
@@ -163,42 +157,46 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
 		return -EINVAL;
 
 	ret = cap_get_target_pid(pid, &pE, &pI, &pP);
-	if (!ret) {
-		struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
-		unsigned i;
-
-		for (i = 0; i < tocopy; i++) {
-			kdata[i].effective = pE.cap[i];
-			kdata[i].permitted = pP.cap[i];
-			kdata[i].inheritable = pI.cap[i];
-		}
-
-		/*
-		 * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
-		 * we silently drop the upper capabilities here. This
-		 * has the effect of making older libcap
-		 * implementations implicitly drop upper capability
-		 * bits when they perform a: capget/modify/capset
-		 * sequence.
-		 *
-		 * This behavior is considered fail-safe
-		 * behavior. Upgrading the application to a newer
-		 * version of libcap will enable access to the newer
-		 * capabilities.
-		 *
-		 * An alternative would be to return an error here
-		 * (-ERANGE), but that causes legacy applications to
-		 * unexpectedly fail; the capget/modify/capset aborts
-		 * before modification is attempted and the application
-		 * fails.
-		 */
-		if (copy_to_user(dataptr, kdata, tocopy
-				 * sizeof(struct __user_cap_data_struct))) {
-			return -EFAULT;
-		}
-	}
+	if (ret)
+		return ret;
 
-	return ret;
+	/*
+	 * Annoying legacy format with 64-bit capabilities exposed
+	 * as two sets of 32-bit fields, so we need to split the
+	 * capability values up.
+	 */
+	kdata[0].effective   = pE.val; kdata[1].effective   = pE.val >> 32;
+	kdata[0].permitted   = pP.val; kdata[1].permitted   = pP.val >> 32;
+	kdata[0].inheritable = pI.val; kdata[1].inheritable = pI.val >> 32;
+
+	/*
+	 * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S,
+	 * we silently drop the upper capabilities here. This
+	 * has the effect of making older libcap
+	 * implementations implicitly drop upper capability
+	 * bits when they perform a: capget/modify/capset
+	 * sequence.
+	 *
+	 * This behavior is considered fail-safe
+	 * behavior. Upgrading the application to a newer
+	 * version of libcap will enable access to the newer
+	 * capabilities.
+	 *
+	 * An alternative would be to return an error here
+	 * (-ERANGE), but that causes legacy applications to
+	 * unexpectedly fail; the capget/modify/capset aborts
+	 * before modification is attempted and the application
+	 * fails.
+	 */
+	if (copy_to_user(dataptr, kdata, tocopy * sizeof(kdata[0])))
+		return -EFAULT;
+
+	return 0;
+}
+
+static kernel_cap_t mk_kernel_cap(u32 low, u32 high)
+{
+	return (kernel_cap_t) { (low | ((u64)high << 32)) & CAP_VALID_MASK };
 }
 
 /**
@@ -221,8 +219,8 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr)
  */
 SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 {
-	struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
-	unsigned i, tocopy, copybytes;
+	struct __user_cap_data_struct kdata[2] = { { 0, }, };
+	unsigned tocopy, copybytes;
 	kernel_cap_t inheritable, permitted, effective;
 	struct cred *new;
 	int ret;
@@ -246,21 +244,9 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data)
 	if (copy_from_user(&kdata, data, copybytes))
 		return -EFAULT;
 
-	for (i = 0; i < tocopy; i++) {
-		effective.cap[i] = kdata[i].effective;
-		permitted.cap[i] = kdata[i].permitted;
-		inheritable.cap[i] = kdata[i].inheritable;
-	}
-	while (i < _KERNEL_CAPABILITY_U32S) {
-		effective.cap[i] = 0;
-		permitted.cap[i] = 0;
-		inheritable.cap[i] = 0;
-		i++;
-	}
-
-	effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
-	permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
-	inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+	effective   = mk_kernel_cap(kdata[0].effective,   kdata[1].effective);
+	permitted   = mk_kernel_cap(kdata[0].permitted,   kdata[1].permitted);
+	inheritable = mk_kernel_cap(kdata[0].inheritable, kdata[1].inheritable);
 
 	new = prepare_creds();
 	if (!new)
diff --git a/kernel/umh.c b/kernel/umh.c
index fbf872c624cb..2a4708277335 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -501,9 +501,9 @@ static int proc_cap_handler(struct ctl_table *table, int write,
 			 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table t;
-	unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
-	kernel_cap_t new_cap;
-	int err, i;
+	unsigned long cap_array[2];
+	kernel_cap_t new_cap, *cap;
+	int err;
 
 	if (write && (!capable(CAP_SETPCAP) ||
 		      !capable(CAP_SYS_MODULE)))
@@ -514,14 +514,16 @@ static int proc_cap_handler(struct ctl_table *table, int write,
 	 * userspace if this is a read.
 	 */
 	spin_lock(&umh_sysctl_lock);
-	for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)  {
-		if (table->data == CAP_BSET)
-			cap_array[i] = usermodehelper_bset.cap[i];
-		else if (table->data == CAP_PI)
-			cap_array[i] = usermodehelper_inheritable.cap[i];
-		else
-			BUG();
-	}
+	if (table->data == CAP_BSET)
+		cap = &usermodehelper_bset;
+	else if (table->data == CAP_PI)
+		cap = &usermodehelper_inheritable;
+	else
+		BUG();
+
+	/* Legacy format: capabilities are exposed as two 32-bit values */
+	cap_array[0] = (u32) cap->val;
+	cap_array[1] = cap->val >> 32;
 	spin_unlock(&umh_sysctl_lock);
 
 	t = *table;
@@ -535,22 +537,15 @@ static int proc_cap_handler(struct ctl_table *table, int write,
 	if (err < 0)
 		return err;
 
-	/*
-	 * convert from the sysctl array of ulongs to the kernel_cap_t
-	 * internal representation
-	 */
-	for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++)
-		new_cap.cap[i] = cap_array[i];
+	new_cap.val = (u32)cap_array[0];
+	new_cap.val += (u64)cap_array[1] << 32;
 
 	/*
 	 * Drop everything not in the new_cap (but don't add things)
 	 */
 	if (write) {
 		spin_lock(&umh_sysctl_lock);
-		if (table->data == CAP_BSET)
-			usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap);
-		if (table->data == CAP_PI)
-			usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap);
+		*cap = cap_intersect(*cap, new_cap);
 		spin_unlock(&umh_sysctl_lock);
 	}
 
@@ -561,14 +556,14 @@ struct ctl_table usermodehelper_table[] = {
 	{
 		.procname	= "bset",
 		.data		= CAP_BSET,
-		.maxlen		= _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
+		.maxlen		= 2 * sizeof(unsigned long),
 		.mode		= 0600,
 		.proc_handler	= proc_cap_handler,
 	},
 	{
 		.procname	= "inheritable",
 		.data		= CAP_PI,
-		.maxlen		= _KERNEL_CAPABILITY_U32S * sizeof(unsigned long),
+		.maxlen		= 2 * sizeof(unsigned long),
 		.mode		= 0600,
 		.proc_handler	= proc_cap_handler,
 	},
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 5e9949832af6..cf2ceec40b28 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -304,6 +304,26 @@ fail:
 }
 EXPORT_SYMBOL_IF_KUNIT(aa_unpack_u64);
 
+static bool aa_unpack_cap_low(struct aa_ext *e, kernel_cap_t *data, const char *name)
+{
+	u32 val;
+
+	if (!aa_unpack_u32(e, &val, name))
+		return false;
+	data->val = val;
+	return true;
+}
+
+static bool aa_unpack_cap_high(struct aa_ext *e, kernel_cap_t *data, const char *name)
+{
+	u32 val;
+
+	if (!aa_unpack_u32(e, &val, name))
+		return false;
+	data->val = (u32)data->val | ((u64)val << 32);
+	return true;
+}
+
 VISIBLE_IF_KUNIT bool aa_unpack_array(struct aa_ext *e, const char *name, u16 *size)
 {
 	void *pos = e->pos;
@@ -897,25 +917,25 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		profile->path_flags = PATH_MEDIATE_DELETED;
 
 	info = "failed to unpack profile capabilities";
-	if (!aa_unpack_u32(e, &(rules->caps.allow.cap[0]), NULL))
+	if (!aa_unpack_cap_low(e, &rules->caps.allow, NULL))
 		goto fail;
-	if (!aa_unpack_u32(e, &(rules->caps.audit.cap[0]), NULL))
+	if (!aa_unpack_cap_low(e, &rules->caps.audit, NULL))
 		goto fail;
-	if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[0]), NULL))
+	if (!aa_unpack_cap_low(e, &rules->caps.quiet, NULL))
 		goto fail;
-	if (!aa_unpack_u32(e, &tmpcap.cap[0], NULL))
+	if (!aa_unpack_cap_low(e, &tmpcap, NULL))
 		goto fail;
 
 	info = "failed to unpack upper profile capabilities";
 	if (aa_unpack_nameX(e, AA_STRUCT, "caps64")) {
 		/* optional upper half of 64 bit caps */
-		if (!aa_unpack_u32(e, &(rules->caps.allow.cap[1]), NULL))
+		if (!aa_unpack_cap_high(e, &rules->caps.allow, NULL))
 			goto fail;
-		if (!aa_unpack_u32(e, &(rules->caps.audit.cap[1]), NULL))
+		if (!aa_unpack_cap_high(e, &rules->caps.audit, NULL))
 			goto fail;
-		if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[1]), NULL))
+		if (!aa_unpack_cap_high(e, &rules->caps.quiet, NULL))
 			goto fail;
-		if (!aa_unpack_u32(e, &(tmpcap.cap[1]), NULL))
+		if (!aa_unpack_cap_high(e, &tmpcap, NULL))
 			goto fail;
 		if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
@@ -924,9 +944,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	info = "failed to unpack extended profile capabilities";
 	if (aa_unpack_nameX(e, AA_STRUCT, "capsx")) {
 		/* optional extended caps mediation mask */
-		if (!aa_unpack_u32(e, &(rules->caps.extended.cap[0]), NULL))
+		if (!aa_unpack_cap_low(e, &rules->caps.extended, NULL))
 			goto fail;
-		if (!aa_unpack_u32(e, &(rules->caps.extended.cap[1]), NULL))
+		if (!aa_unpack_cap_high(e, &rules->caps.extended, NULL))
 			goto fail;
 		if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
diff --git a/security/commoncap.c b/security/commoncap.c
index aec62db55271..5bb7d1e96277 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -589,7 +589,6 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
 					  bool *has_fcap)
 {
 	struct cred *new = bprm->cred;
-	unsigned i;
 	int ret = 0;
 
 	if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
@@ -598,22 +597,17 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
 	if (caps->magic_etc & VFS_CAP_REVISION_MASK)
 		*has_fcap = true;
 
-	CAP_FOR_EACH_U32(i) {
-		__u32 permitted = caps->permitted.cap[i];
-		__u32 inheritable = caps->inheritable.cap[i];
-
-		/*
-		 * pP' = (X & fP) | (pI & fI)
-		 * The addition of pA' is handled later.
-		 */
-		new->cap_permitted.cap[i] =
-			(new->cap_bset.cap[i] & permitted) |
-			(new->cap_inheritable.cap[i] & inheritable);
+	/*
+	 * pP' = (X & fP) | (pI & fI)
+	 * The addition of pA' is handled later.
+	 */
+	new->cap_permitted.val =
+		(new->cap_bset.val & caps->permitted.val) |
+		(new->cap_inheritable.val & caps->inheritable.val);
 
-		if (permitted & ~new->cap_permitted.cap[i])
-			/* insufficient to execute correctly */
-			ret = -EPERM;
-	}
+	if (caps->permitted.val & ~new->cap_permitted.val)
+		/* insufficient to execute correctly */
+		ret = -EPERM;
 
 	/*
 	 * For legacy apps, with no internal support for recognizing they
@@ -644,7 +638,6 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
 {
 	struct inode *inode = d_backing_inode(dentry);
 	__u32 magic_etc;
-	unsigned tocopy, i;
 	int size;
 	struct vfs_ns_cap_data data, *nscaps = &data;
 	struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
@@ -677,17 +670,14 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
 	case VFS_CAP_REVISION_1:
 		if (size != XATTR_CAPS_SZ_1)
 			return -EINVAL;
-		tocopy = VFS_CAP_U32_1;
 		break;
 	case VFS_CAP_REVISION_2:
 		if (size != XATTR_CAPS_SZ_2)
 			return -EINVAL;
-		tocopy = VFS_CAP_U32_2;
 		break;
 	case VFS_CAP_REVISION_3:
 		if (size != XATTR_CAPS_SZ_3)
 			return -EINVAL;
-		tocopy = VFS_CAP_U32_3;
 		rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
 		break;
 
@@ -705,15 +695,20 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
 	if (!rootid_owns_currentns(rootvfsuid))
 		return -ENODATA;
 
-	CAP_FOR_EACH_U32(i) {
-		if (i >= tocopy)
-			break;
-		cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted);
-		cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable);
+	cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
+	cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);
+
+	/*
+	 * Rev1 had just a single 32-bit word, later expanded
+	 * to a second one for the high bits
+	 */
+	if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
+		cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
+		cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
 	}
 
-	cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
-	cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
+	cpu_caps->permitted.val &= CAP_VALID_MASK;
+	cpu_caps->inheritable.val &= CAP_VALID_MASK;
 
 	cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid);
 
diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
index 09ad5a4ebd1f..591104e79812 100644
--- a/tools/testing/selftests/bpf/progs/test_deny_namespace.c
+++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
@@ -6,7 +6,7 @@
 #include <linux/capability.h>
 
 struct kernel_cap_struct {
-	__u32 cap[_LINUX_CAPABILITY_U32S_3];
+	__u64 val;
 } __attribute__((preserve_access_index));
 
 struct cred {
@@ -19,14 +19,13 @@ SEC("lsm.s/userns_create")
 int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
 {
 	struct kernel_cap_struct caps = cred->cap_effective;
-	int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN);
-	__u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN);
+	__u64 cap_mask = BIT_LL(CAP_SYS_ADMIN);
 
 	if (ret)
 		return 0;
 
 	ret = -EPERM;
-	if (caps.cap[cap_index] & cap_mask)
+	if (caps.val & cap_mask)
 		return 0;
 
 	return -EPERM;
-- 
cgit v1.2.3


From 0fb7fb713461e44b12e72c292bf90ee300f40710 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 1 Mar 2023 22:07:48 +0100
Subject: genirq/msi, platform-msi: Ensure that MSI descriptors are
 unreferenced

Miquel reported a warning in the MSI core which is triggered when
interrupts are freed via platform_msi_device_domain_free().

This code got reworked to use core functions for freeing the MSI
descriptors, but nothing took care to clear the msi_desc->irq entry, which
then triggers the warning in msi_free_msi_desc() which uses desc->irq to
validate that the descriptor has been torn down. The same issue exists in
msi_domain_populate_irqs().

Up to the point that msi_free_msi_descs() grew a warning for this case,
this went un-noticed.

Provide the counterpart of msi_domain_populate_irqs() and invoke it in
platform_msi_device_domain_free() before freeing the interrupts and MSI
descriptors and also in the error path of msi_domain_populate_irqs().

Fixes: 2f2940d16823 ("genirq/msi: Remove filter from msi_free_descs_free_range()")
Reported-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/87mt4wkwnv.ffs@tglx
---
 drivers/base/platform-msi.c |  1 +
 include/linux/msi.h         |  2 ++
 kernel/irq/msi.c            | 23 ++++++++++++++++++++++-
 3 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 5883e7634a2b..f37ad34c80ec 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -324,6 +324,7 @@ void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int vir
 	struct platform_msi_priv_data *data = domain->host_data;
 
 	msi_lock_descs(data->dev);
+	msi_domain_depopulate_descs(data->dev, virq, nr_irqs);
 	irq_domain_free_irqs_common(domain, virq, nr_irqs);
 	msi_free_msi_descs_range(data->dev, virq, virq + nr_irqs - 1);
 	msi_unlock_descs(data->dev);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index a112b913fff9..15dd71817996 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -631,6 +631,8 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
 			    int nvec, msi_alloc_info_t *args);
 int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 			     int virq, int nvec, msi_alloc_info_t *args);
+void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec);
+
 struct irq_domain *
 __platform_msi_create_device_domain(struct device *dev,
 				    unsigned int nvec,
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index efd21b79bf32..d169ee0c1799 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -1109,14 +1109,35 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 	return 0;
 
 fail:
-	for (--virq; virq >= virq_base; virq--)
+	for (--virq; virq >= virq_base; virq--) {
+		msi_domain_depopulate_descs(dev, virq, 1);
 		irq_domain_free_irqs_common(domain, virq, 1);
+	}
 	msi_domain_free_descs(dev, &ctrl);
 unlock:
 	msi_unlock_descs(dev);
 	return ret;
 }
 
+void msi_domain_depopulate_descs(struct device *dev, int virq_base, int nvec)
+{
+	struct msi_ctrl ctrl = {
+		.domid	= MSI_DEFAULT_DOMAIN,
+		.first  = virq_base,
+		.last	= virq_base + nvec - 1,
+	};
+	struct msi_desc *desc;
+	struct xarray *xa;
+	unsigned long idx;
+
+	if (!msi_ctrl_valid(dev, &ctrl))
+		return;
+
+	xa = &dev->msi.data->__domains[ctrl.domid].store;
+	xa_for_each_range(xa, idx, desc, ctrl.first, ctrl.last)
+		desc->irq = 0;
+}
+
 /*
  * Carefully check whether the device can use reservation mode. If
  * reservation mode is enabled then the early activation will assign a
-- 
cgit v1.2.3


From 95207db8166ab95c42a03fdc5e3abd212c9987dc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 Oct 2022 03:23:49 +0900
Subject: Remove Intel compiler support

include/linux/compiler-intel.h had no update in the past 3 years.

We often forget about the third C compiler to build the kernel.

For example, commit a0a12c3ed057 ("asm goto: eradicate CC_HAS_ASM_GOTO")
only mentioned GCC and Clang.

init/Kconfig defines CC_IS_GCC and CC_IS_CLANG but not CC_IS_ICC,
and nobody has reported any issue.

I guess the Intel Compiler support is broken, and nobody is caring
about it.

Harald Arnesen pointed out ICC (classic Intel C/C++ compiler) is
deprecated:

    $ icc -v
    icc: remark #10441: The Intel(R) C++ Compiler Classic (ICC) is
    deprecated and will be removed from product release in the second half
    of 2023. The Intel(R) oneAPI DPC++/C++ Compiler (ICX) is the recommended
    compiler moving forward. Please transition to use this compiler. Use
    '-diag-disable=10441' to disable this message.
    icc version 2021.7.0 (gcc version 12.1.0 compatibility)

Arnd Bergmann provided a link to the article, "Intel C/C++ compilers
complete adoption of LLVM".

lib/zstd/common/compiler.h and lib/zstd/compress/zstd_fast.c were kept
untouched for better sync with https://github.com/facebook/zstd

Link: https://www.intel.com/content/www/us/en/developer/articles/technical/adoption-of-llvm-complete-icx.html
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/uapi/asm/cmpxchg.h      |   6 +-
 arch/ia64/include/uapi/asm/intel_intrin.h | 162 ------------------------------
 arch/ia64/include/uapi/asm/intrinsics.h   |   6 +-
 include/acpi/platform/acenv.h             |   5 +-
 include/acpi/platform/acenvex.h           |   2 +-
 include/acpi/platform/acintel.h           |  55 ----------
 include/linux/compiler-intel.h            |  34 -------
 include/linux/compiler_attributes.h       |  14 +--
 include/linux/compiler_types.h            |   2 -
 scripts/cc-version.sh                     |   2 -
 scripts/min-tool-version.sh               |   4 -
 11 files changed, 5 insertions(+), 287 deletions(-)
 delete mode 100644 arch/ia64/include/uapi/asm/intel_intrin.h
 delete mode 100644 include/acpi/platform/acintel.h
 delete mode 100644 include/linux/compiler-intel.h

(limited to 'include/linux')

diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h
index ca2e02685343..259ae57570bf 100644
--- a/arch/ia64/include/uapi/asm/cmpxchg.h
+++ b/arch/ia64/include/uapi/asm/cmpxchg.h
@@ -15,11 +15,7 @@
 #include <linux/types.h>
 /* include compiler specific intrinsics */
 #include <asm/ia64regs.h>
-#ifdef __INTEL_COMPILER
-# include <asm/intel_intrin.h>
-#else
-# include <asm/gcc_intrin.h>
-#endif
+#include <asm/gcc_intrin.h>
 
 /*
  * This function doesn't exist, so you'll get a linker error if
diff --git a/arch/ia64/include/uapi/asm/intel_intrin.h b/arch/ia64/include/uapi/asm/intel_intrin.h
deleted file mode 100644
index dc1884dc54b5..000000000000
--- a/arch/ia64/include/uapi/asm/intel_intrin.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_INTEL_INTRIN_H
-#define _ASM_IA64_INTEL_INTRIN_H
-/*
- * Intel Compiler Intrinsics
- *
- * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
- * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
- * Copyright (C) 2005,2006 Hongjiu Lu <hongjiu.lu@intel.com>
- *
- */
-#include <ia64intrin.h>
-
-#define ia64_barrier()		__memory_barrier()
-
-#define ia64_stop()	/* Nothing: As of now stop bit is generated for each
-		 	 * intrinsic
-		 	 */
-
-#define ia64_getreg		__getReg
-#define ia64_setreg		__setReg
-
-#define ia64_hint		__hint
-#define ia64_hint_pause		__hint_pause
-
-#define ia64_mux1_brcst		_m64_mux1_brcst
-#define ia64_mux1_mix		_m64_mux1_mix
-#define ia64_mux1_shuf		_m64_mux1_shuf
-#define ia64_mux1_alt		_m64_mux1_alt
-#define ia64_mux1_rev		_m64_mux1_rev
-
-#define ia64_mux1(x,v)		_m_to_int64(_m64_mux1(_m_from_int64(x), (v)))
-#define ia64_popcnt		_m64_popcnt
-#define ia64_getf_exp		__getf_exp
-#define ia64_shrp		_m64_shrp
-
-#define ia64_tpa		__tpa
-#define ia64_invala		__invala
-#define ia64_invala_gr		__invala_gr
-#define ia64_invala_fr		__invala_fr
-#define ia64_nop		__nop
-#define ia64_sum		__sum
-#define ia64_ssm		__ssm
-#define ia64_rum		__rum
-#define ia64_rsm		__rsm
-#define ia64_fc			__fc
-
-#define ia64_ldfs		__ldfs
-#define ia64_ldfd		__ldfd
-#define ia64_ldfe		__ldfe
-#define ia64_ldf8		__ldf8
-#define ia64_ldf_fill		__ldf_fill
-
-#define ia64_stfs		__stfs
-#define ia64_stfd		__stfd
-#define ia64_stfe		__stfe
-#define ia64_stf8		__stf8
-#define ia64_stf_spill		__stf_spill
-
-#define ia64_mf			__mf
-#define ia64_mfa		__mfa
-
-#define ia64_fetchadd4_acq	__fetchadd4_acq
-#define ia64_fetchadd4_rel	__fetchadd4_rel
-#define ia64_fetchadd8_acq	__fetchadd8_acq
-#define ia64_fetchadd8_rel	__fetchadd8_rel
-
-#define ia64_xchg1		_InterlockedExchange8
-#define ia64_xchg2		_InterlockedExchange16
-#define ia64_xchg4		_InterlockedExchange
-#define ia64_xchg8		_InterlockedExchange64
-
-#define ia64_cmpxchg1_rel	_InterlockedCompareExchange8_rel
-#define ia64_cmpxchg1_acq	_InterlockedCompareExchange8_acq
-#define ia64_cmpxchg2_rel	_InterlockedCompareExchange16_rel
-#define ia64_cmpxchg2_acq	_InterlockedCompareExchange16_acq
-#define ia64_cmpxchg4_rel	_InterlockedCompareExchange_rel
-#define ia64_cmpxchg4_acq	_InterlockedCompareExchange_acq
-#define ia64_cmpxchg8_rel	_InterlockedCompareExchange64_rel
-#define ia64_cmpxchg8_acq	_InterlockedCompareExchange64_acq
-
-#define __ia64_set_dbr(index, val)	\
-		__setIndReg(_IA64_REG_INDR_DBR, index, val)
-#define ia64_set_ibr(index, val)	\
-		__setIndReg(_IA64_REG_INDR_IBR, index, val)
-#define ia64_set_pkr(index, val)	\
-		__setIndReg(_IA64_REG_INDR_PKR, index, val)
-#define ia64_set_pmc(index, val)	\
-		__setIndReg(_IA64_REG_INDR_PMC, index, val)
-#define ia64_set_pmd(index, val)	\
-		__setIndReg(_IA64_REG_INDR_PMD, index, val)
-#define ia64_set_rr(index, val)		\
-		__setIndReg(_IA64_REG_INDR_RR, index, val)
-
-#define ia64_get_cpuid(index)	\
-		__getIndReg(_IA64_REG_INDR_CPUID, index)
-#define __ia64_get_dbr(index)		__getIndReg(_IA64_REG_INDR_DBR, index)
-#define ia64_get_ibr(index)		__getIndReg(_IA64_REG_INDR_IBR, index)
-#define ia64_get_pkr(index)		__getIndReg(_IA64_REG_INDR_PKR, index)
-#define ia64_get_pmc(index)		__getIndReg(_IA64_REG_INDR_PMC, index)
-#define ia64_get_pmd(index)		__getIndReg(_IA64_REG_INDR_PMD, index)
-#define ia64_get_rr(index)		__getIndReg(_IA64_REG_INDR_RR, index)
-
-#define ia64_srlz_d		__dsrlz
-#define ia64_srlz_i		__isrlz
-
-#define ia64_dv_serialize_data()
-#define ia64_dv_serialize_instruction()
-
-#define ia64_st1_rel		__st1_rel
-#define ia64_st2_rel		__st2_rel
-#define ia64_st4_rel		__st4_rel
-#define ia64_st8_rel		__st8_rel
-
-/* FIXME: need st4.rel.nta intrinsic */
-#define ia64_st4_rel_nta	__st4_rel
-
-#define ia64_ld1_acq		__ld1_acq
-#define ia64_ld2_acq		__ld2_acq
-#define ia64_ld4_acq		__ld4_acq
-#define ia64_ld8_acq		__ld8_acq
-
-#define ia64_sync_i		__synci
-#define ia64_thash		__thash
-#define ia64_ttag		__ttag
-#define ia64_itcd		__itcd
-#define ia64_itci		__itci
-#define ia64_itrd		__itrd
-#define ia64_itri		__itri
-#define ia64_ptce		__ptce
-#define ia64_ptcl		__ptcl
-#define ia64_ptcg		__ptcg
-#define ia64_ptcga		__ptcga
-#define ia64_ptri		__ptri
-#define ia64_ptrd		__ptrd
-#define ia64_dep_mi		_m64_dep_mi
-
-/* Values for lfhint in __lfetch and __lfetch_fault */
-
-#define ia64_lfhint_none	__lfhint_none
-#define ia64_lfhint_nt1		__lfhint_nt1
-#define ia64_lfhint_nt2		__lfhint_nt2
-#define ia64_lfhint_nta		__lfhint_nta
-
-#define ia64_lfetch		__lfetch
-#define ia64_lfetch_excl	__lfetch_excl
-#define ia64_lfetch_fault	__lfetch_fault
-#define ia64_lfetch_fault_excl	__lfetch_fault_excl
-
-#define ia64_intrin_local_irq_restore(x)		\
-do {							\
-	if ((x) != 0) {					\
-		ia64_ssm(IA64_PSR_I);			\
-		ia64_srlz_d();				\
-	} else {					\
-		ia64_rsm(IA64_PSR_I);			\
-	}						\
-} while (0)
-
-#define __builtin_trap()	__break(0);
-
-#endif /* _ASM_IA64_INTEL_INTRIN_H */
diff --git a/arch/ia64/include/uapi/asm/intrinsics.h b/arch/ia64/include/uapi/asm/intrinsics.h
index a0e0a064f5b1..63f27c4ec739 100644
--- a/arch/ia64/include/uapi/asm/intrinsics.h
+++ b/arch/ia64/include/uapi/asm/intrinsics.h
@@ -14,11 +14,7 @@
 #include <linux/types.h>
 /* include compiler specific intrinsics */
 #include <asm/ia64regs.h>
-#ifdef __INTEL_COMPILER
-# include <asm/intel_intrin.h>
-#else
-# include <asm/gcc_intrin.h>
-#endif
+#include <asm/gcc_intrin.h>
 #include <asm/cmpxchg.h>
 
 #define ia64_set_rr0_to_rr4(val0, val1, val2, val3, val4)		\
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index 03eb3d977075..9e4f7564201a 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -148,15 +148,12 @@
  *
  *****************************************************************************/
 
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#if defined(__GNUC__)
 #include <acpi/platform/acgcc.h>
 
 #elif defined(_MSC_VER)
 #include "acmsvc.h"
 
-#elif defined(__INTEL_COMPILER)
-#include <acpi/platform/acintel.h>
-
 #endif
 
 #if defined(_LINUX) || defined(__linux__)
diff --git a/include/acpi/platform/acenvex.h b/include/acpi/platform/acenvex.h
index 3a6b1db9a984..72cc7bab469e 100644
--- a/include/acpi/platform/acenvex.h
+++ b/include/acpi/platform/acenvex.h
@@ -35,7 +35,7 @@
 
 #endif
 
-#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#if defined(__GNUC__)
 #include "acgccex.h"
 
 #elif defined(_MSC_VER)
diff --git a/include/acpi/platform/acintel.h b/include/acpi/platform/acintel.h
deleted file mode 100644
index 85b1ae86ee63..000000000000
--- a/include/acpi/platform/acintel.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
-/******************************************************************************
- *
- * Name: acintel.h - VC specific defines, etc.
- *
- * Copyright (C) 2000 - 2022, Intel Corp.
- *
- *****************************************************************************/
-
-#ifndef __ACINTEL_H__
-#define __ACINTEL_H__
-
-/*
- * Use compiler specific <stdarg.h> is a good practice for even when
- * -nostdinc is specified (i.e., ACPI_USE_STANDARD_HEADERS undefined.
- */
-#ifndef va_arg
-#include <stdarg.h>
-#endif
-
-/* Configuration specific to Intel 64-bit C compiler */
-
-#define COMPILER_DEPENDENT_INT64    __int64
-#define COMPILER_DEPENDENT_UINT64   unsigned __int64
-#define ACPI_INLINE                 __inline
-
-/*
- * Calling conventions:
- *
- * ACPI_SYSTEM_XFACE        - Interfaces to host OS (handlers, threads)
- * ACPI_EXTERNAL_XFACE      - External ACPI interfaces
- * ACPI_INTERNAL_XFACE      - Internal ACPI interfaces
- * ACPI_INTERNAL_VAR_XFACE  - Internal variable-parameter list interfaces
- */
-#define ACPI_SYSTEM_XFACE
-#define ACPI_EXTERNAL_XFACE
-#define ACPI_INTERNAL_XFACE
-#define ACPI_INTERNAL_VAR_XFACE
-
-/* remark 981 - operands evaluated in no particular order */
-#pragma warning(disable:981)
-
-/* warn C4100: unreferenced formal parameter */
-#pragma warning(disable:4100)
-
-/* warn C4127: conditional expression is constant */
-#pragma warning(disable:4127)
-
-/* warn C4706: assignment within conditional expression */
-#pragma warning(disable:4706)
-
-/* warn C4214: bit field types other than int */
-#pragma warning(disable:4214)
-
-#endif				/* __ACINTEL_H__ */
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
deleted file mode 100644
index b17f3cd18334..000000000000
--- a/include/linux/compiler-intel.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_COMPILER_TYPES_H
-#error "Please don't include <linux/compiler-intel.h> directly, include <linux/compiler.h> instead."
-#endif
-
-#ifdef __ECC
-
-/* Compiler specific definitions for Intel ECC compiler */
-
-#include <asm/intrinsics.h>
-
-/* Intel ECC compiler doesn't support gcc specific asm stmts.
- * It uses intrinsics to do the equivalent things.
- */
-
-#define barrier() __memory_barrier()
-#define barrier_data(ptr) barrier()
-
-#define RELOC_HIDE(ptr, off)					\
-  ({ unsigned long __ptr;					\
-     __ptr = (unsigned long) (ptr);				\
-    (typeof(ptr)) (__ptr + (off)); })
-
-/* This should act as an optimization barrier on var.
- * Given that this compiler does not have inline assembly, a compiler barrier
- * is the best we can do.
- */
-#define OPTIMIZER_HIDE_VAR(var) barrier()
-
-#endif
-
-/* icc has this, but it's called _bswap16 */
-#define __HAVE_BUILTIN_BSWAP16__
-#define __builtin_bswap16 _bswap16
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 4a3bd114a24f..e659cb6fded3 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -64,16 +64,10 @@
  * compiler should see some alignment anyway, when the return value is
  * massaged by 'flags = ptr & 3; ptr &= ~3;').
  *
- * Optional: not supported by icc
- *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute
  * clang: https://clang.llvm.org/docs/AttributeReference.html#assume-aligned
  */
-#if __has_attribute(__assume_aligned__)
-# define __assume_aligned(a, ...)       __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
-#else
-# define __assume_aligned(a, ...)
-#endif
+#define __assume_aligned(a, ...)        __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
 
 /*
  * Note the long name.
@@ -85,7 +79,6 @@
 /*
  * Optional: only supported since gcc >= 9
  * Optional: not supported by clang
- * Optional: not supported by icc
  *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-copy-function-attribute
  */
@@ -98,7 +91,6 @@
 /*
  * Optional: not supported by gcc
  * Optional: only supported since clang >= 14.0
- * Optional: not supported by icc
  *
  * clang: https://clang.llvm.org/docs/AttributeReference.html#diagnose_as_builtin
  */
@@ -122,7 +114,6 @@
 
 /*
  * Optional: not supported by clang
- * Optional: not supported by icc
  *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-designated_005finit-type-attribute
  */
@@ -236,7 +227,6 @@
 /*
  * Optional: only supported since gcc >= 8
  * Optional: not supported by clang
- * Optional: not supported by icc
  *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute
  */
@@ -267,7 +257,6 @@
 
 /*
  * Optional: not supported by gcc.
- * Optional: not supported by icc.
  *
  * clang: https://clang.llvm.org/docs/AttributeReference.html#overloadable
  */
@@ -287,7 +276,6 @@
  * Note: the "type" argument should match any __builtin_object_size(p, type) usage.
  *
  * Optional: not supported by gcc.
- * Optional: not supported by icc.
  *
  * clang: https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size
  */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 690c7c826fbf..547ea1ff806e 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -120,8 +120,6 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
 /* Compiler specific macros. */
 #ifdef __clang__
 #include <linux/compiler-clang.h>
-#elif defined(__INTEL_COMPILER)
-#include <linux/compiler-intel.h>
 #elif defined(__GNUC__)
 /* The above compilers also define __GNUC__, so order is important here. */
 #include <linux/compiler-gcc.h>
diff --git a/scripts/cc-version.sh b/scripts/cc-version.sh
index 2401c86fcf53..0573c92e841d 100755
--- a/scripts/cc-version.sh
+++ b/scripts/cc-version.sh
@@ -12,8 +12,6 @@ get_c_compiler_info()
 	cat <<- EOF | "$@" -E -P -x c - 2>/dev/null
 	#if defined(__clang__)
 	Clang	__clang_major__  __clang_minor__  __clang_patchlevel__
-	#elif defined(__INTEL_COMPILER)
-	ICC	__INTEL_COMPILER  __INTEL_COMPILER_UPDATE
 	#elif defined(__GNUC__)
 	GCC	__GNUC__  __GNUC_MINOR__  __GNUC_PATCHLEVEL__
 	#else
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index a814f1efb39d..20d483ec6f5f 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -19,10 +19,6 @@ binutils)
 gcc)
 	echo 5.1.0
 	;;
-icc)
-	# temporary
-	echo 16.0.3
-	;;
 llvm)
 	if [ "$SRCARCH" = s390 ]; then
 		echo 15.0.0
-- 
cgit v1.2.3


From 596ff4a09b8981790e15572e8e7bc904df5835e7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 4 Mar 2023 13:35:43 -0800
Subject: cpumask: re-introduce constant-sized cpumask optimizations

Commit aa47a7c215e7 ("lib/cpumask: deprecate nr_cpumask_bits") resulted
in the cpumask operations potentially becoming hugely less efficient,
because suddenly the cpumask was always considered to be variable-sized.

The optimization was then later added back in a limited form by commit
6f9c07be9d02 ("lib/cpumask: add FORCE_NR_CPUS config option"), but that
FORCE_NR_CPUS option is not useful in a generic kernel and more of a
special case for embedded situations with fixed hardware.

Instead, just re-introduce the optimization, with some changes.

Instead of depending on CPUMASK_OFFSTACK being false, and then always
using the full constant cpumask width, this introduces three different
cpumask "sizes":

 - the exact size (nr_cpumask_bits) remains identical to nr_cpu_ids.

   This is used for situations where we should use the exact size.

 - the "small" size (small_cpumask_bits) is the NR_CPUS constant if it
   fits in a single word and the bitmap operations thus end up able
   to trigger the "small_const_nbits()" optimizations.

   This is used for the operations that have optimized single-word
   cases that get inlined, notably the bit find and scanning functions.

 - the "large" size (large_cpumask_bits) is the NR_CPUS constant if it
   is an sufficiently small constant that makes simple "copy" and
   "clear" operations more efficient.

   This is arbitrarily set at four words or less.

As a an example of this situation, without this fixed size optimization,
cpumask_clear() will generate code like

        movl    nr_cpu_ids(%rip), %edx
        addq    $63, %rdx
        shrq    $3, %rdx
        andl    $-8, %edx
        callq   memset@PLT

on x86-64, because it would calculate the "exact" number of longwords
that need to be cleared.

In contrast, with this patch, using a MAX_CPU of 64 (which is quite a
reasonable value to use), the above becomes a single

	movq $0,cpumask

instruction instead, because instead of caring to figure out exactly how
many CPU's the system has, it just knows that the cpumask will be a
single word and can just clear it all.

Note that this does end up tightening the rules a bit from the original
version in another way: operations that set bits in the cpumask are now
limited to the actual nr_cpu_ids limit, whereas we used to do the
nr_cpumask_bits thing almost everywhere in the cpumask code.

But if you just clear bits, or scan for bits, we can use the simpler
compile-time constants.

In the process, remove 'cpumask_complement()' and 'for_each_cpu_not()'
which were not useful, and which fundamentally have to be limited to
'nr_cpu_ids'.  Better remove them now than have somebody introduce use
of them later.

Of course, on x86-64 with MAXSMP there is no sane small compile-time
constant for the cpumask sizes, and we end up using the actual CPU bits,
and will generate the above kind of horrors regardless.  Please don't
use MAXSMP unless you really expect to have machines with thousands of
cores.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .clang-format           |   1 -
 arch/ia64/kernel/acpi.c |   4 +-
 include/linux/cpumask.h | 125 +++++++++++++++++++++++++++---------------------
 lib/cpumask_kunit.c     |  14 +-----
 4 files changed, 72 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/.clang-format b/.clang-format
index 2c61b4553374..d988e9fa9b26 100644
--- a/.clang-format
+++ b/.clang-format
@@ -226,7 +226,6 @@ ForEachMacros:
   - 'for_each_console_srcu'
   - 'for_each_cpu'
   - 'for_each_cpu_and'
-  - 'for_each_cpu_not'
   - 'for_each_cpu_wrap'
   - 'for_each_dapm_widgets'
   - 'for_each_dedup_cand'
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 96d13cb7c19f..15f6cfddcc08 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -783,11 +783,9 @@ __init void prefill_possible_map(void)
 
 static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 {
-	cpumask_t tmp_map;
 	int cpu;
 
-	cpumask_complement(&tmp_map, cpu_present_mask);
-	cpu = cpumask_first(&tmp_map);
+	cpu = cpumask_first_zero(cpu_present_mask);
 	if (cpu >= nr_cpu_ids)
 		return -EINVAL;
 
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 10c92bd9b807..8fbe76607965 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -50,8 +50,41 @@ static inline void set_nr_cpu_ids(unsigned int nr)
 #endif
 }
 
-/* Deprecated. Always use nr_cpu_ids. */
-#define nr_cpumask_bits	nr_cpu_ids
+/*
+ * We have several different "preferred sizes" for the cpumask
+ * operations, depending on operation.
+ *
+ * For example, the bitmap scanning and operating operations have
+ * optimized routines that work for the single-word case, but only when
+ * the size is constant. So if NR_CPUS fits in one single word, we are
+ * better off using that small constant, in order to trigger the
+ * optimized bit finding. That is 'small_cpumask_size'.
+ *
+ * The clearing and copying operations will similarly perform better
+ * with a constant size, but we limit that size arbitrarily to four
+ * words. We call this 'large_cpumask_size'.
+ *
+ * Finally, some operations just want the exact limit, either because
+ * they set bits or just don't have any faster fixed-sized versions. We
+ * call this just 'nr_cpumask_size'.
+ *
+ * Note that these optional constants are always guaranteed to be at
+ * least as big as 'nr_cpu_ids' itself is, and all our cpumask
+ * allocations are at least that size (see cpumask_size()). The
+ * optimization comes from being able to potentially use a compile-time
+ * constant instead of a run-time generated exact number of CPUs.
+ */
+#if NR_CPUS <= BITS_PER_LONG
+  #define small_cpumask_bits ((unsigned int)NR_CPUS)
+  #define large_cpumask_bits ((unsigned int)NR_CPUS)
+#elif NR_CPUS <= 4*BITS_PER_LONG
+  #define small_cpumask_bits nr_cpu_ids
+  #define large_cpumask_bits ((unsigned int)NR_CPUS)
+#else
+  #define small_cpumask_bits nr_cpu_ids
+  #define large_cpumask_bits nr_cpu_ids
+#endif
+#define nr_cpumask_bits nr_cpu_ids
 
 /*
  * The following particular system cpumasks and operations manage
@@ -126,7 +159,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
  */
 static inline unsigned int cpumask_first(const struct cpumask *srcp)
 {
-	return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits);
+	return find_first_bit(cpumask_bits(srcp), small_cpumask_bits);
 }
 
 /**
@@ -137,7 +170,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
  */
 static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
 {
-	return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits);
+	return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits);
 }
 
 /**
@@ -150,7 +183,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
 static inline
 unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
 {
-	return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits);
+	return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
 }
 
 /**
@@ -161,7 +194,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask
  */
 static inline unsigned int cpumask_last(const struct cpumask *srcp)
 {
-	return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits);
+	return find_last_bit(cpumask_bits(srcp), small_cpumask_bits);
 }
 
 /**
@@ -177,7 +210,7 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp)
 	/* -1 is a legal arg here. */
 	if (n != -1)
 		cpumask_check(n);
-	return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1);
+	return find_next_bit(cpumask_bits(srcp), small_cpumask_bits, n + 1);
 }
 
 /**
@@ -192,7 +225,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
 	/* -1 is a legal arg here. */
 	if (n != -1)
 		cpumask_check(n);
-	return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
+	return find_next_zero_bit(cpumask_bits(srcp), small_cpumask_bits, n+1);
 }
 
 #if NR_CPUS == 1
@@ -235,7 +268,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
 	if (n != -1)
 		cpumask_check(n);
 	return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p),
-		nr_cpumask_bits, n + 1);
+		small_cpumask_bits, n + 1);
 }
 
 /**
@@ -246,17 +279,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
  * After the loop, cpu is >= nr_cpu_ids.
  */
 #define for_each_cpu(cpu, mask)				\
-	for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
-
-/**
- * for_each_cpu_not - iterate over every cpu in a complemented mask
- * @cpu: the (optionally unsigned) integer iterator
- * @mask: the cpumask pointer
- *
- * After the loop, cpu is >= nr_cpu_ids.
- */
-#define for_each_cpu_not(cpu, mask)				\
-	for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
+	for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
 
 #if NR_CPUS == 1
 static inline
@@ -290,7 +313,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
  * After the loop, cpu is >= nr_cpu_ids.
  */
 #define for_each_cpu_wrap(cpu, mask, start)				\
-	for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits, start)
+	for_each_set_bit_wrap(cpu, cpumask_bits(mask), small_cpumask_bits, start)
 
 /**
  * for_each_cpu_and - iterate over every cpu in both masks
@@ -307,7 +330,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
  * After the loop, cpu is >= nr_cpu_ids.
  */
 #define for_each_cpu_and(cpu, mask1, mask2)				\
-	for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits)
+	for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
 
 /**
  * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding
@@ -325,7 +348,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
  * After the loop, cpu is >= nr_cpu_ids.
  */
 #define for_each_cpu_andnot(cpu, mask1, mask2)				\
-	for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits)
+	for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
 
 /**
  * cpumask_any_but - return a "random" in a cpumask, but not this one.
@@ -356,7 +379,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
  */
 static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
 {
-	return find_nth_bit(cpumask_bits(srcp), nr_cpumask_bits, cpumask_check(cpu));
+	return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu));
 }
 
 /**
@@ -372,7 +395,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
 							const struct cpumask *srcp2)
 {
 	return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
-				nr_cpumask_bits, cpumask_check(cpu));
+				small_cpumask_bits, cpumask_check(cpu));
 }
 
 /**
@@ -388,7 +411,7 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
 							const struct cpumask *srcp2)
 {
 	return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
-				nr_cpumask_bits, cpumask_check(cpu));
+				small_cpumask_bits, cpumask_check(cpu));
 }
 
 /**
@@ -408,7 +431,7 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp
 	return find_nth_and_andnot_bit(cpumask_bits(srcp1),
 					cpumask_bits(srcp2),
 					cpumask_bits(srcp3),
-					nr_cpumask_bits, cpumask_check(cpu));
+					small_cpumask_bits, cpumask_check(cpu));
 }
 
 #define CPU_BITS_NONE						\
@@ -495,10 +518,14 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *
 /**
  * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
  * @dstp: the cpumask pointer
+ *
+ * Note: since we set bits, we should use the tighter 'bitmap_set()' with
+ * the eact number of bits, not 'bitmap_fill()' that will fill past the
+ * end.
  */
 static inline void cpumask_setall(struct cpumask *dstp)
 {
-	bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits);
+	bitmap_set(cpumask_bits(dstp), 0, nr_cpumask_bits);
 }
 
 /**
@@ -507,7 +534,7 @@ static inline void cpumask_setall(struct cpumask *dstp)
  */
 static inline void cpumask_clear(struct cpumask *dstp)
 {
-	bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits);
+	bitmap_zero(cpumask_bits(dstp), large_cpumask_bits);
 }
 
 /**
@@ -523,7 +550,7 @@ static inline bool cpumask_and(struct cpumask *dstp,
 			       const struct cpumask *src2p)
 {
 	return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p),
-				       cpumask_bits(src2p), nr_cpumask_bits);
+				       cpumask_bits(src2p), small_cpumask_bits);
 }
 
 /**
@@ -536,7 +563,7 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
 			      const struct cpumask *src2p)
 {
 	bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p),
-				      cpumask_bits(src2p), nr_cpumask_bits);
+				      cpumask_bits(src2p), small_cpumask_bits);
 }
 
 /**
@@ -550,7 +577,7 @@ static inline void cpumask_xor(struct cpumask *dstp,
 			       const struct cpumask *src2p)
 {
 	bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p),
-				       cpumask_bits(src2p), nr_cpumask_bits);
+				       cpumask_bits(src2p), small_cpumask_bits);
 }
 
 /**
@@ -566,19 +593,7 @@ static inline bool cpumask_andnot(struct cpumask *dstp,
 				  const struct cpumask *src2p)
 {
 	return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
-					  cpumask_bits(src2p), nr_cpumask_bits);
-}
-
-/**
- * cpumask_complement - *dstp = ~*srcp
- * @dstp: the cpumask result
- * @srcp: the input to invert
- */
-static inline void cpumask_complement(struct cpumask *dstp,
-				      const struct cpumask *srcp)
-{
-	bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp),
-					      nr_cpumask_bits);
+					  cpumask_bits(src2p), small_cpumask_bits);
 }
 
 /**
@@ -590,7 +605,7 @@ static inline bool cpumask_equal(const struct cpumask *src1p,
 				const struct cpumask *src2p)
 {
 	return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p),
-						 nr_cpumask_bits);
+						 small_cpumask_bits);
 }
 
 /**
@@ -604,7 +619,7 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p,
 				    const struct cpumask *src3p)
 {
 	return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p),
-			       cpumask_bits(src3p), nr_cpumask_bits);
+			       cpumask_bits(src3p), small_cpumask_bits);
 }
 
 /**
@@ -616,7 +631,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p,
 				     const struct cpumask *src2p)
 {
 	return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p),
-						      nr_cpumask_bits);
+						      small_cpumask_bits);
 }
 
 /**
@@ -630,7 +645,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p,
 				 const struct cpumask *src2p)
 {
 	return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p),
-						  nr_cpumask_bits);
+						  small_cpumask_bits);
 }
 
 /**
@@ -639,7 +654,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p,
  */
 static inline bool cpumask_empty(const struct cpumask *srcp)
 {
-	return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits);
+	return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits);
 }
 
 /**
@@ -657,7 +672,7 @@ static inline bool cpumask_full(const struct cpumask *srcp)
  */
 static inline unsigned int cpumask_weight(const struct cpumask *srcp)
 {
-	return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits);
+	return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits);
 }
 
 /**
@@ -668,7 +683,7 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)
 static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
 						const struct cpumask *srcp2)
 {
-	return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits);
+	return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
 }
 
 /**
@@ -681,7 +696,7 @@ static inline void cpumask_shift_right(struct cpumask *dstp,
 				       const struct cpumask *srcp, int n)
 {
 	bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n,
-					       nr_cpumask_bits);
+					       small_cpumask_bits);
 }
 
 /**
@@ -705,7 +720,7 @@ static inline void cpumask_shift_left(struct cpumask *dstp,
 static inline void cpumask_copy(struct cpumask *dstp,
 				const struct cpumask *srcp)
 {
-	bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits);
+	bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
 }
 
 /**
@@ -789,7 +804,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
  */
 static inline unsigned int cpumask_size(void)
 {
-	return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long);
+	return BITS_TO_LONGS(large_cpumask_bits) * sizeof(long);
 }
 
 /*
diff --git a/lib/cpumask_kunit.c b/lib/cpumask_kunit.c
index d1fc6ece21f3..a105e6369efc 100644
--- a/lib/cpumask_kunit.c
+++ b/lib/cpumask_kunit.c
@@ -23,16 +23,6 @@
 		KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(mask));	\
 	} while (0)
 
-#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask)					\
-	do {									\
-		const cpumask_t *m = (mask);					\
-		int mask_weight = cpumask_weight(m);				\
-		int cpu, iter = 0;						\
-		for_each_cpu_not(cpu, m)					\
-			iter++;							\
-		KUNIT_EXPECT_EQ_MSG((test), nr_cpu_ids - mask_weight, iter, MASK_MSG(mask));	\
-	} while (0)
-
 #define EXPECT_FOR_EACH_CPU_OP_EQ(test, op, mask1, mask2)			\
 	do {									\
 		const cpumask_t *m1 = (mask1);					\
@@ -77,7 +67,7 @@ static void test_cpumask_weight(struct kunit *test)
 	KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_weight(&mask_empty), MASK_MSG(&mask_empty));
 	KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask),
 			    MASK_MSG(cpu_possible_mask));
-	KUNIT_EXPECT_EQ_MSG(test, nr_cpumask_bits, cpumask_weight(&mask_all), MASK_MSG(&mask_all));
+	KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids, cpumask_weight(&mask_all), MASK_MSG(&mask_all));
 }
 
 static void test_cpumask_first(struct kunit *test)
@@ -113,14 +103,12 @@ static void test_cpumask_next(struct kunit *test)
 static void test_cpumask_iterators(struct kunit *test)
 {
 	EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty);
-	EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty);
 	EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty);
 	EXPECT_FOR_EACH_CPU_OP_EQ(test, and, &mask_empty, &mask_empty);
 	EXPECT_FOR_EACH_CPU_OP_EQ(test, and, cpu_possible_mask, &mask_empty);
 	EXPECT_FOR_EACH_CPU_OP_EQ(test, andnot, &mask_empty, &mask_empty);
 
 	EXPECT_FOR_EACH_CPU_EQ(test, cpu_possible_mask);
-	EXPECT_FOR_EACH_CPU_NOT_EQ(test, cpu_possible_mask);
 	EXPECT_FOR_EACH_CPU_WRAP_EQ(test, cpu_possible_mask);
 	EXPECT_FOR_EACH_CPU_OP_EQ(test, and, cpu_possible_mask, cpu_possible_mask);
 	EXPECT_FOR_EACH_CPU_OP_EQ(test, andnot, cpu_possible_mask, &mask_empty);
-- 
cgit v1.2.3


From 80c16b2b121fbc3380dbffa9bab7559acbaaa2ed Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 6 Mar 2023 17:22:04 +0200
Subject: cpumask: Fix typo nr_cpumask_size --> nr_cpumask_bits

The never used nr_cpumask_size is just a typo, hence use existing
redefinition that's called nr_cpumask_bits.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 8fbe76607965..ce8eb7ef2107 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -66,7 +66,7 @@ static inline void set_nr_cpu_ids(unsigned int nr)
  *
  * Finally, some operations just want the exact limit, either because
  * they set bits or just don't have any faster fixed-sized versions. We
- * call this just 'nr_cpumask_size'.
+ * call this just 'nr_cpumask_bits'.
  *
  * Note that these optional constants are always guaranteed to be at
  * least as big as 'nr_cpu_ids' itself is, and all our cpumask
-- 
cgit v1.2.3


From 63355b9884b3d1677de6bd1517cd2b8a9bf53978 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 7 Mar 2023 12:16:18 -0800
Subject: cpumask: be more careful with 'cpumask_setall()'

Commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask
optimizations") changed cpumask_setall() to use "bitmap_set()" instead
of "bitmap_fill()", because bitmap_fill() would explicitly set all the
bits of a constant sized small bitmap, and that's exactly what we don't
want: we want to only set bits up to 'nr_cpu_ids', which is what
"bitmap_set()" does.

However, Yury correctly points out that while "bitmap_set()" does indeed
only set bits up to the required bitmap size, it doesn't _clear_ bits
above that size, so the upper bits would still not have well-defined
values.

Now, none of this should really matter, since any bits set past
'nr_cpu_ids' should always be ignored in the first place.  Yes, the bit
scanning functions might return them as a result, but since users should
always consider the ">= nr_cpu_ids" condition to mean "no more bits",
that shouldn't have any actual effect (see previous commit 8ca09d5fa354
"cpumask: fix incorrect cpumask scanning result checks").

But let's just do it right, the way the code was _intended_ to work.  We
have had enough lazy code that works but bites us in the *rse later
(again, see previous commit) that there's no reason to not just do this
properly.

It turns out that "bitmap_fill()" gets this all right for the complex
case, and really only fails for the inlined optimized case that just
fills the whole word.  And while we could just fix bitmap_fill() to use
the proper last word mask, there's two issues with that:

 - the cpumask case wants to do the _optimization_ based on "NR_CPUS is
   a small constant", but then wants to do the actual bit _fill_ based
   on "nr_cpu_ids" that isn't necessarily that same constant

 - we have lots of non-cpumask users of bitmap_fill(), and while they
   hopefully don't care, and probably would want the proper semantics
   anyway ("only set bits up to the limit"), I do not want the cpumask
   changes to impact other parts

So this ends up just doing the single-word optimization by hand in the
cpumask code.  If our cpumask is fundamentally limited to a single word,
just do the proper "fill in that word" exactly.  And if it's the more
complex multi-word case, then the generic bitmap_fill() will DTRT.

This is all an example of how our bitmap function optimizations really
are somewhat broken.  They conflate the "this is size of the bitmap"
optimizations with the actual bit(s) we want to set.

In many cases we really want to have the two be separate things:
sometimes we base our optimizations on the size of the whole bitmap ("I
know this whole bitmap fits in a single word, so I'll just use
single-word accesses"), and sometimes we base them on the bit we are
looking at ("this is just acting on bits that are in the first word, so
I'll use single-word accesses").

Notice how the end result of the two optimizations are the same, but the
way we get to them are quite different.

And all our cpumask optimization games are really about that fundamental
distinction, and we'd often really want to pass in both the "this is the
bit I'm working on" (which _can_ be a small constant but might be
variable), and "I know it's in this range even if it's variable" (based
on CONFIG_NR_CPUS).

So this cpumask_setall() implementation just makes that explicit.  It
checks the "I statically know the size is small" using the known static
size of the cpumask (which is what that 'small_cpumask_bits' is all
about), but then sets the actual bits using the exact number of cpus we
have (ie 'nr_cpumask_bits')

Of course, in a perfect world, the compiler would have done all the
range analysis (possibly with help from us just telling it that
"this value is always in this range"), and would do all of this for us.
But that is not the world we live in.

While we dream of that perfect world, this does that manual logic to
make it all work out.  And this was a very long explanation for a small
code change that shouldn't even matter.

Reported-by: Yury Norov <yury.norov@gmail.com>
Link: https://lore.kernel.org/lkml/ZAV9nGG9e1%2FrV+L%2F@yury-laptop/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index ce8eb7ef2107..63d637d18e79 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -518,14 +518,14 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *
 /**
  * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
  * @dstp: the cpumask pointer
- *
- * Note: since we set bits, we should use the tighter 'bitmap_set()' with
- * the eact number of bits, not 'bitmap_fill()' that will fill past the
- * end.
  */
 static inline void cpumask_setall(struct cpumask *dstp)
 {
-	bitmap_set(cpumask_bits(dstp), 0, nr_cpumask_bits);
+	if (small_const_nbits(small_cpumask_bits)) {
+		cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits);
+		return;
+	}
+	bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits);
 }
 
 /**
-- 
cgit v1.2.3