From b3c650ad9bb88ecf36b9aeacf9e7eb7478258da7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 21 Nov 2022 17:15:41 +0100 Subject: pwm: Move pwm_capture() dummy to restore order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the dummy pwm_capture(), to make the declaration order of all dummies to match the declaration order of the real functions. Signed-off-by: Geert Uytterhoeven Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 161e91167b9c..7b7b93b6fb81 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -440,13 +440,6 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns, return -EINVAL; } -static inline int pwm_capture(struct pwm_device *pwm, - struct pwm_capture *result, - unsigned long timeout) -{ - return -EINVAL; -} - static inline int pwm_enable(struct pwm_device *pwm) { might_sleep(); @@ -458,6 +451,13 @@ static inline void pwm_disable(struct pwm_device *pwm) might_sleep(); } +static inline int pwm_capture(struct pwm_device *pwm, + struct pwm_capture *result, + unsigned long timeout) +{ + return -EINVAL; +} + static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) { return -EINVAL; -- cgit v1.2.3 From c1085957dece02bda586cbffc1328f3bca09325f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 12 Jan 2023 21:34:43 +0800 Subject: f2fs: clarify compress level bit offset commit 3fde13f817e2 ("f2fs: compress: support compress level") introduce compress level, which macro(COMPRESS_LEVEL_OFFSET) is 8, But use wrong comment about compress level. Let's fix it. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ee0d75d9a302..1701f25117ea 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -315,7 +315,7 @@ struct f2fs_inode { __u8 i_log_cluster_size; /* log of cluster size */ __le16 i_compress_flag; /* compress flag */ /* 0 bit: chksum flag - * [10,15] bits: compress level + * [8,15] bits: compress level */ __le32 i_extra_end[0]; /* for attribute size calculation */ } __packed; -- cgit v1.2.3 From 19409796578c879a41e88ddbdbce50c19457658d Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Wed, 23 Nov 2022 10:55:26 +0100 Subject: include/linux/bcd.h: provide bcd_is_valid() helper bcd2bin(0x0A) happily returns 10, despite this being an invalid BCD value. RTC drivers converting possibly corrupted BCD timestamps might want to validate their input before calling bcd2bin(). Provide a macro to do so. Unlike bcd2bin and bin2bcd, out-of-line versions are not implemented. Should the macro experience enough use, this can be retrofitted. Signed-off-by: Ahmad Fatoum Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20221123095527.2771434-2-s.hauer@pengutronix.de Signed-off-by: Alexandre Belloni --- include/linux/bcd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcd.h b/include/linux/bcd.h index 118bea36d7d4..abbc8149178e 100644 --- a/include/linux/bcd.h +++ b/include/linux/bcd.h @@ -14,8 +14,12 @@ const_bin2bcd(x) : \ _bin2bcd(x)) +#define bcd_is_valid(x) \ + const_bcd_is_valid(x) + #define const_bcd2bin(x) (((x) & 0x0f) + ((x) >> 4) * 10) #define const_bin2bcd(x) ((((x) / 10) << 4) + (x) % 10) +#define const_bcd_is_valid(x) (((x) & 0x0f) < 10 && ((x) >> 4) < 10) unsigned _bcd2bin(unsigned char val) __attribute_const__; unsigned char _bin2bcd(unsigned val) __attribute_const__; -- cgit v1.2.3 From ffb1b4a41016295e298409c9dbcacd55680bd6d4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 10 Feb 2023 14:42:01 -0800 Subject: x86/unwind/orc: Add 'signal' field to ORC metadata Add a 'signal' field which allows unwind hints to specify whether the instruction pointer should be taken literally (like for most interrupts and exceptions) rather than decremented (like for call stack return addresses) when used to find the next ORC entry. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/d2c5ec4d83a45b513d8fd72fab59f1a8cfa46871.1676068346.git.jpoimboe@kernel.org --- arch/x86/include/asm/orc_types.h | 4 +++- arch/x86/include/asm/unwind_hints.h | 10 +++++----- arch/x86/kernel/unwind_orc.c | 5 ++--- include/linux/objtool.h | 11 +++++++---- tools/arch/x86/include/asm/orc_types.h | 4 +++- tools/include/linux/objtool.h | 11 +++++++---- tools/objtool/orc_dump.c | 4 ++-- 7 files changed, 29 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 5a2baf28a1dc..1343a62106de 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -57,12 +57,14 @@ struct orc_entry { unsigned sp_reg:4; unsigned bp_reg:4; unsigned type:2; + unsigned signal:1; unsigned end:1; #elif defined(__BIG_ENDIAN_BITFIELD) unsigned bp_reg:4; unsigned sp_reg:4; - unsigned unused:5; + unsigned unused:4; unsigned end:1; + unsigned signal:1; unsigned type:2; #endif } __packed; diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index f66fbe6537dd..e7c71750b309 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -15,7 +15,7 @@ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 .endm -.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 +.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 signal=1 .if \base == %rsp .if \indirect .set sp_reg, ORC_REG_SP_INDIRECT @@ -45,11 +45,11 @@ .set type, UNWIND_HINT_TYPE_REGS .endif - UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type + UNWIND_HINT sp_reg=sp_reg sp_offset=sp_offset type=type signal=\signal .endm -.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 - UNWIND_HINT_REGS base=\base offset=\offset partial=1 +.macro UNWIND_HINT_IRET_REGS base=%rsp offset=0 signal=1 + UNWIND_HINT_REGS base=\base offset=\offset partial=1 signal=\signal .endm .macro UNWIND_HINT_FUNC @@ -67,7 +67,7 @@ #else #define UNWIND_HINT_FUNC \ - UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0) + UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0, 0) #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index cdf6c6060170..37307b40f8da 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -484,6 +484,8 @@ bool unwind_next_frame(struct unwind_state *state) goto the_end; } + state->signal = orc->signal; + /* Find the previous frame's stack: */ switch (orc->sp_reg) { case ORC_REG_SP: @@ -563,7 +565,6 @@ bool unwind_next_frame(struct unwind_state *state) state->sp = sp; state->regs = NULL; state->prev_regs = NULL; - state->signal = false; break; case UNWIND_HINT_TYPE_REGS: @@ -587,7 +588,6 @@ bool unwind_next_frame(struct unwind_state *state) state->regs = (struct pt_regs *)sp; state->prev_regs = NULL; state->full_regs = true; - state->signal = true; break; case UNWIND_HINT_TYPE_REGS_PARTIAL: @@ -604,7 +604,6 @@ bool unwind_next_frame(struct unwind_state *state) state->prev_regs = state->regs; state->regs = (void *)sp - IRET_FRAME_OFFSET; state->full_regs = false; - state->signal = true; break; default: diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 62c54ffbeeaa..9ac3df3fccf0 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -15,6 +15,7 @@ struct unwind_hint { s16 sp_offset; u8 sp_reg; u8 type; + u8 signal; u8 end; }; #endif @@ -49,7 +50,7 @@ struct unwind_hint { #ifndef __ASSEMBLY__ -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ +#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \ "987: \n\t" \ ".pushsection .discard.unwind_hints\n\t" \ /* struct unwind_hint */ \ @@ -57,6 +58,7 @@ struct unwind_hint { ".short " __stringify(sp_offset) "\n\t" \ ".byte " __stringify(sp_reg) "\n\t" \ ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(signal) "\n\t" \ ".byte " __stringify(end) "\n\t" \ ".balign 4 \n\t" \ ".popsection\n\t" @@ -129,7 +131,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -137,6 +139,7 @@ struct unwind_hint { .short \sp_offset .byte \sp_reg .byte \type + .byte \signal .byte \end .balign 4 .popsection @@ -174,7 +177,7 @@ struct unwind_hint { #ifndef __ASSEMBLY__ -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ +#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) @@ -182,7 +185,7 @@ struct unwind_hint { #define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index 5a2baf28a1dc..1343a62106de 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -57,12 +57,14 @@ struct orc_entry { unsigned sp_reg:4; unsigned bp_reg:4; unsigned type:2; + unsigned signal:1; unsigned end:1; #elif defined(__BIG_ENDIAN_BITFIELD) unsigned bp_reg:4; unsigned sp_reg:4; - unsigned unused:5; + unsigned unused:4; unsigned end:1; + unsigned signal:1; unsigned type:2; #endif } __packed; diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 62c54ffbeeaa..9ac3df3fccf0 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -15,6 +15,7 @@ struct unwind_hint { s16 sp_offset; u8 sp_reg; u8 type; + u8 signal; u8 end; }; #endif @@ -49,7 +50,7 @@ struct unwind_hint { #ifndef __ASSEMBLY__ -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ +#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \ "987: \n\t" \ ".pushsection .discard.unwind_hints\n\t" \ /* struct unwind_hint */ \ @@ -57,6 +58,7 @@ struct unwind_hint { ".short " __stringify(sp_offset) "\n\t" \ ".byte " __stringify(sp_reg) "\n\t" \ ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(signal) "\n\t" \ ".byte " __stringify(end) "\n\t" \ ".balign 4 \n\t" \ ".popsection\n\t" @@ -129,7 +131,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -137,6 +139,7 @@ struct unwind_hint { .short \sp_offset .byte \sp_reg .byte \type + .byte \signal .byte \end .balign 4 .popsection @@ -174,7 +177,7 @@ struct unwind_hint { #ifndef __ASSEMBLY__ -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ +#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) @@ -182,7 +185,7 @@ struct unwind_hint { #define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index 4f1211fec82c..2d8ebdcd1db3 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -211,8 +211,8 @@ int orc_dump(const char *_objname) print_reg(orc[i].bp_reg, bswap_if_needed(&dummy_elf, orc[i].bp_offset)); - printf(" type:%s end:%d\n", - orc_type_name(orc[i].type), orc[i].end); + printf(" type:%s signal:%d end:%d\n", + orc_type_name(orc[i].type), orc[i].signal, orc[i].end); } elf_end(elf); -- cgit v1.2.3 From 05b8773ca33253ea562be145cf3145b05ef19f86 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 22 Dec 2022 19:33:31 +0000 Subject: mtd: ubi: block: wire-up device parent ubiblock devices were previously only identifyable by their name, but not connected to their parent UBI volume device e.g. in sysfs. Properly parent ubiblock device as descendant of a UBI volume device to reflect device model hierachy. Signed-off-by: Daniel Golle Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/block.c | 2 +- drivers/mtd/ubi/kapi.c | 1 + include/linux/mtd/ubi.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index f5d036203fe7..6a9eb2c860b5 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -419,7 +419,7 @@ int ubiblock_create(struct ubi_volume_info *vi) list_add_tail(&dev->list, &ubiblock_devices); /* Must be the last step: anyone can call file ops from now on */ - ret = add_disk(dev->gd); + ret = device_add_disk(vi->dev, dev->gd, NULL); if (ret) goto out_remove_minor; diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 0fce99ff29b5..5db653eacbd4 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -79,6 +79,7 @@ void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol, vi->name_len = vol->name_len; vi->name = vol->name; vi->cdev = vol->cdev.dev; + vi->dev = &vol->dev; } /** diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index 7d48ea368c5e..a529347fd75b 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -110,6 +110,7 @@ struct ubi_volume_info { int name_len; const char *name; dev_t cdev; + struct device *dev; }; /** -- cgit v1.2.3 From d88cbbb39b4db057feb1552de31f22c02a21b36f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 17 Feb 2023 10:29:10 +0100 Subject: blk-mq: Reorder fields in 'struct blk_mq_tag_set' Group some variables based on their sizes to reduce hole and avoid padding. On x86_64, this shrinks the size of 'struct blk_mq_tag_set' from 304 to 296 bytes. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/6f249f9b02a3490283ef0278096556de41aa0cf0.1676626130.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 779fba613bd0..dd5ce1137f04 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -473,6 +473,7 @@ enum hctx_type { /** * struct blk_mq_tag_set - tag set that can be shared between request queues + * @ops: Pointers to functions that implement block driver behavior. * @map: One or more ctx -> hctx mappings. One map exists for each * hardware queue type (enum hctx_type) that the driver wishes * to support. There are no restrictions on maps being of the @@ -480,7 +481,6 @@ enum hctx_type { * types. * @nr_maps: Number of elements in the @map array. A number in the range * [1, HCTX_MAX_TYPES]. - * @ops: Pointers to functions that implement block driver behavior. * @nr_hw_queues: Number of hardware queues supported by the block driver that * owns this data structure. * @queue_depth: Number of tags per hardware queue, reserved tags included. @@ -505,9 +505,9 @@ enum hctx_type { * (BLK_MQ_F_BLOCKING). */ struct blk_mq_tag_set { + const struct blk_mq_ops *ops; struct blk_mq_queue_map map[HCTX_MAX_TYPES]; unsigned int nr_maps; - const struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; unsigned int reserved_tags; -- cgit v1.2.3 From 9e0c7efa5ea231d85c0d41693a5115b3b971717c Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Fri, 3 Feb 2023 11:40:29 +0900 Subject: block: remove more NULL checks after bdev_get_queue() bdev_get_queue() never returns NULL. Several commits [1][2] have been made before to remove such superfluous checks, but some still remained. For places where bdev_get_queue() is called solely for NULL checks, it is removed entirely. [1] commit ec9fd2a13d74 ("blk-lib: don't check bdev_get_queue() NULL check") [2] commit fea127b36c93 ("block: remove superfluous check for request queue in bdev_is_zoned()") Signed-off-by: Juhyung Park Reviewed-by: Pankaj Raghav Link: https://lore.kernel.org/r/20230203024029.48260-1-qkrwngud825@gmail.com Signed-off-by: Jens Axboe --- block/blk-zoned.c | 10 ---------- include/linux/blkdev.h | 7 +------ kernel/trace/blktrace.c | 6 +----- 3 files changed, 2 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 614b575be899..fce9082384d6 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -334,17 +334,12 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, { void __user *argp = (void __user *)arg; struct zone_report_args args; - struct request_queue *q; struct blk_zone_report rep; int ret; if (!argp) return -EINVAL; - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - if (!bdev_is_zoned(bdev)) return -ENOTTY; @@ -391,7 +386,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; - struct request_queue *q; struct blk_zone_range zrange; enum req_op op; int ret; @@ -399,10 +393,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, if (!argp) return -EINVAL; - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - if (!bdev_is_zoned(bdev)) return -ENOTTY; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b9637d63e6f0..89dd9b02b45b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1276,12 +1276,7 @@ static inline bool bdev_nowait(struct block_device *bdev) static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return blk_queue_zoned_model(q); - - return BLK_ZONED_NONE; + return blk_queue_zoned_model(bdev_get_queue(bdev)); } static inline bool bdev_is_zoned(struct block_device *bdev) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 5743be559415..d5d94510afd3 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -729,14 +729,10 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop); **/ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) { - struct request_queue *q; + struct request_queue *q = bdev_get_queue(bdev); int ret, start = 0; char b[BDEVNAME_SIZE]; - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - mutex_lock(&q->debugfs_mutex); switch (cmd) { -- cgit v1.2.3 From 8d664282a03fec09682f10252d3c785c2513691d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Feb 2023 08:27:23 -0700 Subject: io_uring: rename 'in_idle' to 'in_cancel' This better describes what it does - it's incremented when the task is currently undergoing a cancelation operation, due to exiting or exec'ing. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- io_uring/io_uring.c | 18 +++++++++--------- io_uring/tctx.c | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 0efe4d784358..00689c12f6ab 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -58,7 +58,7 @@ struct io_uring_task { struct xarray xa; struct wait_queue_head wait; - atomic_t in_idle; + atomic_t in_cancel; atomic_t inflight_tracked; struct percpu_counter inflight; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cbe06deb84ff..64e07df034d1 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -719,7 +719,7 @@ static void io_put_task_remote(struct task_struct *task, int nr) struct io_uring_task *tctx = task->io_uring; percpu_counter_sub(&tctx->inflight, nr); - if (unlikely(atomic_read(&tctx->in_idle))) + if (unlikely(atomic_read(&tctx->in_cancel))) wake_up(&tctx->wait); put_task_struct_many(task, nr); } @@ -1258,8 +1258,8 @@ void tctx_task_work(struct callback_head *cb) ctx_flush_and_put(ctx, &uring_locked); - /* relaxed read is enough as only the task itself sets ->in_idle */ - if (unlikely(atomic_read(&tctx->in_idle))) + /* relaxed read is enough as only the task itself sets ->in_cancel */ + if (unlikely(atomic_read(&tctx->in_cancel))) io_uring_drop_tctx_refs(current); trace_io_uring_task_work_run(tctx, count, loops); @@ -1291,7 +1291,7 @@ static void io_req_local_work_add(struct io_kiocb *req) /* needed for the following wake up */ smp_mb__after_atomic(); - if (unlikely(atomic_read(&req->task->io_uring->in_idle))) { + if (unlikely(atomic_read(&req->task->io_uring->in_cancel))) { io_move_task_work_from_local(ctx); goto put_ref; } @@ -2937,12 +2937,12 @@ static __cold void io_tctx_exit_cb(struct callback_head *cb) work = container_of(cb, struct io_tctx_exit, task_work); /* - * When @in_idle, we're in cancellation and it's racy to remove the + * When @in_cancel, we're in cancellation and it's racy to remove the * node. It'll be removed by the end of cancellation, just ignore it. * tctx can be NULL if the queueing of this task_work raced with * work cancelation off the exec path. */ - if (tctx && !atomic_read(&tctx->in_idle)) + if (tctx && !atomic_read(&tctx->in_cancel)) io_uring_del_tctx_node((unsigned long)work->ctx); complete(&work->completion); } @@ -3210,7 +3210,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) if (tctx->io_wq) io_wq_exit_start(tctx->io_wq); - atomic_inc(&tctx->in_idle); + atomic_inc(&tctx->in_cancel); do { bool loop = false; @@ -3261,9 +3261,9 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) if (cancel_all) { /* * We shouldn't run task_works after cancel, so just leave - * ->in_idle set for normal exit. + * ->in_cancel set for normal exit. */ - atomic_dec(&tctx->in_idle); + atomic_dec(&tctx->in_cancel); /* for exec all current's requests should be gone, kill tctx */ __io_uring_free(current); } diff --git a/io_uring/tctx.c b/io_uring/tctx.c index 4324b1cf1f6a..3a8d1dd97e1b 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -83,7 +83,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task, xa_init(&tctx->xa); init_waitqueue_head(&tctx->wait); - atomic_set(&tctx->in_idle, 0); + atomic_set(&tctx->in_cancel, 0); atomic_set(&tctx->inflight_tracked, 0); task->io_uring = tctx; init_llist_head(&tctx->task_list); -- cgit v1.2.3 From b1a37ed00d7908a991c1d0f18a8cba3c2aa99bdc Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 23 Jan 2023 12:39:11 +0000 Subject: HID: core: Provide new max_buffer_size attribute to over-ride the default Presently, when a report is processed, its proposed size, provided by the user of the API (as Report Size * Report Count) is compared against the subsystem default HID_MAX_BUFFER_SIZE (16k). However, some low-level HID drivers allocate a reduced amount of memory to their buffers (e.g. UHID only allocates UHID_DATA_MAX (4k) buffers), rending this check inadequate in some cases. In these circumstances, if the received report ends up being smaller than the proposed report size, the remainder of the buffer is zeroed. That is, the space between sizeof(csize) (size of the current report) and the rsize (size proposed i.e. Report Size * Report Count), which can be handled up to HID_MAX_BUFFER_SIZE (16k). Meaning that memset() shoots straight past the end of the buffer boundary and starts zeroing out in-use values, often resulting in calamity. This patch introduces a new variable into 'struct hid_ll_driver' where individual low-level drivers can over-ride the default maximum value of HID_MAX_BUFFER_SIZE (16k) with something more sympathetic to the interface. Signed-off-by: Lee Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 32 +++++++++++++++++++++++++------- include/linux/hid.h | 3 +++ 2 files changed, 28 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 1ee623c26c49..7647a946340c 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -256,6 +256,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign { struct hid_report *report; struct hid_field *field; + unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE; unsigned int usages; unsigned int offset; unsigned int i; @@ -286,8 +287,11 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign offset = report->size; report->size += parser->global.report_size * parser->global.report_count; + if (parser->device->ll_driver->max_buffer_size) + max_buffer_size = parser->device->ll_driver->max_buffer_size; + /* Total size check: Allow for possible report index byte */ - if (report->size > (HID_MAX_BUFFER_SIZE - 1) << 3) { + if (report->size > (max_buffer_size - 1) << 3) { hid_err(parser->device, "report is too long\n"); return -1; } @@ -1963,6 +1967,7 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * struct hid_report_enum *report_enum = hid->report_enum + type; struct hid_report *report; struct hid_driver *hdrv; + int max_buffer_size = HID_MAX_BUFFER_SIZE; u32 rsize, csize = size; u8 *cdata = data; int ret = 0; @@ -1978,10 +1983,13 @@ int hid_report_raw_event(struct hid_device *hid, enum hid_report_type type, u8 * rsize = hid_compute_report_size(report); - if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE) - rsize = HID_MAX_BUFFER_SIZE - 1; - else if (rsize > HID_MAX_BUFFER_SIZE) - rsize = HID_MAX_BUFFER_SIZE; + if (hid->ll_driver->max_buffer_size) + max_buffer_size = hid->ll_driver->max_buffer_size; + + if (report_enum->numbered && rsize >= max_buffer_size) + rsize = max_buffer_size - 1; + else if (rsize > max_buffer_size) + rsize = max_buffer_size; if (csize < rsize) { dbg_hid("report %d is too short, (%d < %d)\n", report->id, @@ -2396,7 +2404,12 @@ int hid_hw_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, enum hid_report_type rtype, enum hid_class_request reqtype) { - if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf) + unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE; + + if (hdev->ll_driver->max_buffer_size) + max_buffer_size = hdev->ll_driver->max_buffer_size; + + if (len < 1 || len > max_buffer_size || !buf) return -EINVAL; return hdev->ll_driver->raw_request(hdev, reportnum, buf, len, @@ -2415,7 +2428,12 @@ EXPORT_SYMBOL_GPL(hid_hw_raw_request); */ int hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len) { - if (len < 1 || len > HID_MAX_BUFFER_SIZE || !buf) + unsigned int max_buffer_size = HID_MAX_BUFFER_SIZE; + + if (hdev->ll_driver->max_buffer_size) + max_buffer_size = hdev->ll_driver->max_buffer_size; + + if (len < 1 || len > max_buffer_size || !buf) return -EINVAL; if (hdev->ll_driver->output_report) diff --git a/include/linux/hid.h b/include/linux/hid.h index eaf8ab177303..1ea8c7a3570b 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -834,6 +834,7 @@ struct hid_driver { * @output_report: send output report to device * @idle: send idle request to device * @may_wakeup: return if device may act as a wakeup source during system-suspend + * @max_buffer_size: over-ride maximum data buffer size (default: HID_MAX_BUFFER_SIZE) */ struct hid_ll_driver { int (*start)(struct hid_device *hdev); @@ -859,6 +860,8 @@ struct hid_ll_driver { int (*idle)(struct hid_device *hdev, int report, int idle, int reqtype); bool (*may_wakeup)(struct hid_device *hdev); + + unsigned int max_buffer_size; }; extern bool hid_is_usb(const struct hid_device *hdev); -- cgit v1.2.3 From 250870824c1cf199b032b1ef889c8e8d69d9123a Mon Sep 17 00:00:00 2001 From: Michael Karcher Date: Tue, 24 Jan 2023 22:48:16 +0100 Subject: sh: intc: Avoid spurious sizeof-pointer-div warning GCC warns about the pattern sizeof(void*)/sizeof(void), as it looks like the abuse of a pattern to calculate the array size. This pattern appears in the unevaluated part of the ternary operator in _INTC_ARRAY if the parameter is NULL. The replacement uses an alternate approach to return 0 in case of NULL which does not generate the pattern sizeof(void*)/sizeof(void), but still emits the warning if _INTC_ARRAY is called with a nonarray parameter. This patch is required for successful compilation with -Werror enabled. The idea to use _Generic for type distinction is taken from Comment #7 in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108483 by Jakub Jelinek Signed-off-by: Michael Karcher Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/619fa552-c988-35e5-b1d7-fe256c46a272@mkarcher.dialup.fu-berlin.de Signed-off-by: John Paul Adrian Glaubitz --- include/linux/sh_intc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index c255273b0281..37ad81058d6a 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -97,7 +97,10 @@ struct intc_hw_desc { unsigned int nr_subgroups; }; -#define _INTC_ARRAY(a) a, __same_type(a, NULL) ? 0 : sizeof(a)/sizeof(*a) +#define _INTC_SIZEOF_OR_ZERO(a) (_Generic(a, \ + typeof(NULL): 0, \ + default: sizeof(a))) +#define _INTC_ARRAY(a) a, _INTC_SIZEOF_OR_ZERO(a)/sizeof(*a) #define INTC_HW_DESC(vectors, groups, mask_regs, \ prio_regs, sense_regs, ack_regs) \ -- cgit v1.2.3 From a4eecbae092759537748360299de03e434c9a956 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 25 Jan 2023 16:55:56 +0100 Subject: capability: add cap_isidentical Signed-off-by: Mateusz Guzik Reviewed-by: Serge Hallyn Cc: Al Viro Signed-off-by: Linus Torvalds --- include/linux/capability.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 03c2a613ad40..d3c6c2d1ff45 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -157,6 +157,16 @@ static inline bool cap_isclear(const kernel_cap_t a) return true; } +static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) +{ + unsigned __capi; + CAP_FOR_EACH_U32(__capi) { + if (a.cap[__capi] != b.cap[__capi]) + return false; + } + return true; +} + /* * Check if "a" is a subset of "set". * return true if ALL of the capabilities in "a" are also in "set" -- cgit v1.2.3 From 6da6b1d4a7df8c35770186b53ef65d388398e139 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 21 Feb 2023 17:59:05 +0900 Subject: mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON After a memory error happens on a clean folio, a process unexpectedly receives SIGBUS when it accesses the error page. This SIGBUS killing is pointless and simply degrades the level of RAS of the system, because the clean folio can be dropped without any data lost on memory error handling as we do for a clean pagecache. When memory_failure() is called on a clean folio, try_to_unmap() is called twice (one from split_huge_page() and one from hwpoison_user_mappings()). The root cause of the issue is that pte conversion to hwpoisoned entry is now done in the first call of try_to_unmap() because PageHWPoison is already set at this point, while it's actually expected to be done in the second call. This behavior disturbs the error handling operation like removing pagecache, which results in the malfunction described above. So convert TTU_IGNORE_HWPOISON into TTU_HWPOISON and set TTU_HWPOISON only when we really intend to convert pte to hwpoison entry. This can prevent other callers of try_to_unmap() from accidentally converting to hwpoison entries. Link: https://lkml.kernel.org/r/20230221085905.1465385-1-naoya.horiguchi@linux.dev Fixes: a42634a6c07d ("readahead: Use a folio in read_pages()") Signed-off-by: Naoya Horiguchi Cc: David Hildenbrand Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Minchan Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/rmap.h | 2 +- mm/memory-failure.c | 8 ++++---- mm/rmap.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index a4570da03e58..b87d01660412 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -94,7 +94,7 @@ enum ttu_flags { TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ - TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ + TTU_HWPOISON = 0x20, /* do convert pte to hwpoison entry */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will * do a final flush if necessary */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index a1ede7bdce95..fae9baf3be16 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1069,7 +1069,7 @@ static int me_pagecache_dirty(struct page_state *ps, struct page *p) * cache and swap cache(ie. page is freshly swapped in). So it could be * referenced concurrently by 2 types of PTEs: * normal PTEs and swap PTEs. We try to handle them consistently by calling - * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs, + * try_to_unmap(!TTU_HWPOISON) to convert the normal PTEs to swap PTEs, * and then * - clear dirty bit to prevent IO * - remove from LRU @@ -1486,7 +1486,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, int flags, struct page *hpage) { struct folio *folio = page_folio(hpage); - enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC; + enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON; struct address_space *mapping; LIST_HEAD(tokill); bool unmap_success; @@ -1516,7 +1516,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, if (PageSwapCache(p)) { pr_err("%#lx: keeping poisoned page in swap cache\n", pfn); - ttu |= TTU_IGNORE_HWPOISON; + ttu &= ~TTU_HWPOISON; } /* @@ -1531,7 +1531,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, if (page_mkclean(hpage)) { SetPageDirty(hpage); } else { - ttu |= TTU_IGNORE_HWPOISON; + ttu &= ~TTU_HWPOISON; pr_info("%#lx: corrupted page was clean: dropped without side effects\n", pfn); } diff --git a/mm/rmap.c b/mm/rmap.c index 15ae24585fc4..8632e02661ac 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1602,7 +1602,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* Update high watermark before we lower rss */ update_hiwater_rss(mm); - if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) { + if (PageHWPoison(subpage) && (flags & TTU_HWPOISON)) { pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); if (folio_test_hugetlb(folio)) { hugetlb_count_sub(folio_nr_pages(folio), mm); -- cgit v1.2.3 From f122a08b197d076ccf136c73fae0146875812a88 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 28 Feb 2023 11:39:09 -0800 Subject: capability: just use a 'u64' instead of a 'u32[2]' array Back in 2008 we extended the capability bits from 32 to 64, and we did it by extending the single 32-bit capability word from one word to an array of two words. It was then obfuscated by hiding the "2" behind two macro expansions, with the reasoning being that maybe it gets extended further some day. That reasoning may have been valid at the time, but the last thing we want to do is to extend the capability set any more. And the array of values not only causes source code oddities (with loops to deal with it), but also results in worse code generation. It's a lose-lose situation. So just change the 'u32[2]' into a 'u64' and be done with it. We still have to deal with the fact that the user space interface is designed around an array of these 32-bit values, but that was the case before too, since the array layouts were different (ie user space doesn't use an array of 32-bit values for individual capability masks, but an array of 32-bit slices of multiple masks). So that marshalling of data is actually simplified too, even if it does remain somewhat obscure and odd. This was all triggered by my reaction to the new "cap_isidentical()" introduced recently. By just using a saner data structure, it went from unsigned __capi; CAP_FOR_EACH_U32(__capi) { if (a.cap[__capi] != b.cap[__capi]) return false; } return true; to just being return a.val == b.val; instead. Which is rather more obvious both to humans and to compilers. Cc: Mateusz Guzik Cc: Casey Schaufler Cc: Serge Hallyn Cc: Al Viro Cc: Paul Moore Signed-off-by: Linus Torvalds --- fs/proc/array.c | 7 +- include/linux/capability.h | 131 +++++---------------- io_uring/fdinfo.c | 4 +- kernel/auditsc.c | 6 +- kernel/capability.c | 104 +++++++--------- kernel/umh.c | 41 +++---- security/apparmor/policy_unpack.c | 40 +++++-- security/commoncap.c | 49 ++++---- .../selftests/bpf/progs/test_deny_namespace.c | 7 +- 9 files changed, 150 insertions(+), 239 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/array.c b/fs/proc/array.c index 49283b8103c7..9b0315d34c58 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -300,13 +300,8 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) static void render_cap_t(struct seq_file *m, const char *header, kernel_cap_t *a) { - unsigned __capi; - seq_puts(m, header); - CAP_FOR_EACH_U32(__capi) { - seq_put_hex_ll(m, NULL, - a->cap[CAP_LAST_U32 - __capi], 8); - } + seq_put_hex_ll(m, NULL, a->val, 16); seq_putc(m, '\n'); } diff --git a/include/linux/capability.h b/include/linux/capability.h index d3c6c2d1ff45..0c356a517991 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -15,28 +15,25 @@ #include #include +#include #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 -#define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 extern int file_caps_enabled; -typedef struct kernel_cap_struct { - __u32 cap[_KERNEL_CAPABILITY_U32S]; -} kernel_cap_t; +typedef struct { u64 val; } kernel_cap_t; /* same as vfs_ns_cap_data but in cpu endian and always filled completely */ struct cpu_vfs_cap_data { __u32 magic_etc; + kuid_t rootid; kernel_cap_t permitted; kernel_cap_t inheritable; - kuid_t rootid; }; #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) - struct file; struct inode; struct dentry; @@ -44,16 +41,6 @@ struct task_struct; struct user_namespace; struct mnt_idmap; -extern const kernel_cap_t __cap_empty_set; -extern const kernel_cap_t __cap_init_eff_set; - -/* - * Internal kernel functions only - */ - -#define CAP_FOR_EACH_U32(__capi) \ - for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi) - /* * CAP_FS_MASK and CAP_NFSD_MASKS: * @@ -67,104 +54,52 @@ extern const kernel_cap_t __cap_init_eff_set; * 2. The security.* and trusted.* xattrs are fs-related MAC permissions */ -# define CAP_FS_MASK_B0 (CAP_TO_MASK(CAP_CHOWN) \ - | CAP_TO_MASK(CAP_MKNOD) \ - | CAP_TO_MASK(CAP_DAC_OVERRIDE) \ - | CAP_TO_MASK(CAP_DAC_READ_SEARCH) \ - | CAP_TO_MASK(CAP_FOWNER) \ - | CAP_TO_MASK(CAP_FSETID)) - -# define CAP_FS_MASK_B1 (CAP_TO_MASK(CAP_MAC_OVERRIDE)) - -#if _KERNEL_CAPABILITY_U32S != 2 -# error Fix up hand-coded capability macro initializers -#else /* HAND-CODED capability initializers */ +# define CAP_FS_MASK (BIT_ULL(CAP_CHOWN) \ + | BIT_ULL(CAP_MKNOD) \ + | BIT_ULL(CAP_DAC_OVERRIDE) \ + | BIT_ULL(CAP_DAC_READ_SEARCH) \ + | BIT_ULL(CAP_FOWNER) \ + | BIT_ULL(CAP_FSETID) \ + | BIT_ULL(CAP_MAC_OVERRIDE)) +#define CAP_VALID_MASK (BIT_ULL(CAP_LAST_CAP+1)-1) -#define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1) -#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1) +# define CAP_EMPTY_SET ((kernel_cap_t) { 0 }) +# define CAP_FULL_SET ((kernel_cap_t) { CAP_VALID_MASK }) +# define CAP_FS_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_LINUX_IMMUTABLE) }) +# define CAP_NFSD_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_SYS_RESOURCE) }) -# define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) -# define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }}) -# define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ - | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ - CAP_FS_MASK_B1 } }) -# define CAP_NFSD_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ - | CAP_TO_MASK(CAP_SYS_RESOURCE), \ - CAP_FS_MASK_B1 } }) +# define cap_clear(c) do { (c).val = 0; } while (0) -#endif /* _KERNEL_CAPABILITY_U32S != 2 */ - -# define cap_clear(c) do { (c) = __cap_empty_set; } while (0) - -#define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) -#define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) -#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag)) - -#define CAP_BOP_ALL(c, a, b, OP) \ -do { \ - unsigned __capi; \ - CAP_FOR_EACH_U32(__capi) { \ - c.cap[__capi] = a.cap[__capi] OP b.cap[__capi]; \ - } \ -} while (0) - -#define CAP_UOP_ALL(c, a, OP) \ -do { \ - unsigned __capi; \ - CAP_FOR_EACH_U32(__capi) { \ - c.cap[__capi] = OP a.cap[__capi]; \ - } \ -} while (0) +#define cap_raise(c, flag) ((c).val |= BIT_ULL(flag)) +#define cap_lower(c, flag) ((c).val &= ~BIT_ULL(flag)) +#define cap_raised(c, flag) (((c).val & BIT_ULL(flag)) != 0) static inline kernel_cap_t cap_combine(const kernel_cap_t a, const kernel_cap_t b) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, b, |); - return dest; + return (kernel_cap_t) { a.val | b.val }; } static inline kernel_cap_t cap_intersect(const kernel_cap_t a, const kernel_cap_t b) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, b, &); - return dest; + return (kernel_cap_t) { a.val & b.val }; } static inline kernel_cap_t cap_drop(const kernel_cap_t a, const kernel_cap_t drop) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, drop, &~); - return dest; -} - -static inline kernel_cap_t cap_invert(const kernel_cap_t c) -{ - kernel_cap_t dest; - CAP_UOP_ALL(dest, c, ~); - return dest; + return (kernel_cap_t) { a.val &~ drop.val }; } static inline bool cap_isclear(const kernel_cap_t a) { - unsigned __capi; - CAP_FOR_EACH_U32(__capi) { - if (a.cap[__capi] != 0) - return false; - } - return true; + return !a.val; } static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) { - unsigned __capi; - CAP_FOR_EACH_U32(__capi) { - if (a.cap[__capi] != b.cap[__capi]) - return false; - } - return true; + return a.val == b.val; } /* @@ -176,39 +111,31 @@ static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) */ static inline bool cap_issubset(const kernel_cap_t a, const kernel_cap_t set) { - kernel_cap_t dest; - dest = cap_drop(a, set); - return cap_isclear(dest); + return !(a.val & ~set.val); } /* Used to decide between falling back on the old suser() or fsuser(). */ static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a) { - const kernel_cap_t __cap_fs_set = CAP_FS_SET; - return cap_drop(a, __cap_fs_set); + return cap_drop(a, CAP_FS_SET); } static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a, const kernel_cap_t permitted) { - const kernel_cap_t __cap_fs_set = CAP_FS_SET; - return cap_combine(a, - cap_intersect(permitted, __cap_fs_set)); + return cap_combine(a, cap_intersect(permitted, CAP_FS_SET)); } static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a) { - const kernel_cap_t __cap_fs_set = CAP_NFSD_SET; - return cap_drop(a, __cap_fs_set); + return cap_drop(a, CAP_NFSD_SET); } static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, const kernel_cap_t permitted) { - const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET; - return cap_combine(a, - cap_intersect(permitted, __cap_nfsd_set)); + return cap_combine(a, cap_intersect(permitted, CAP_NFSD_SET)); } #ifdef CONFIG_MULTIUSER diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 882bd56b01ed..76c279b13aee 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -22,7 +22,6 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, struct user_namespace *uns = seq_user_ns(m); struct group_info *gi; kernel_cap_t cap; - unsigned __capi; int g; seq_printf(m, "%5d\n", id); @@ -42,8 +41,7 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, } seq_puts(m, "\n\tCapEff:\t"); cap = cred->cap_effective; - CAP_FOR_EACH_U32(__capi) - seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); + seq_put_hex_ll(m, NULL, cap.val, 16); seq_putc(m, '\n'); return 0; } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 93d0b87f3283..addeed3df15d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1295,15 +1295,11 @@ out: static void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) { - int i; - if (cap_isclear(*cap)) { audit_log_format(ab, " %s=0", prefix); return; } - audit_log_format(ab, " %s=", prefix); - CAP_FOR_EACH_U32(i) - audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]); + audit_log_format(ab, " %s=%016llx", prefix, cap->val); } static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) diff --git a/kernel/capability.c b/kernel/capability.c index 339a44dfe2f4..3e058f41df32 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -20,13 +20,6 @@ #include #include -/* - * Leveraged for setting/resetting capabilities - */ - -const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; -EXPORT_SYMBOL(__cap_empty_set); - int file_caps_enabled = 1; static int __init file_caps_disable(char *str) @@ -151,6 +144,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) pid_t pid; unsigned tocopy; kernel_cap_t pE, pI, pP; + struct __user_cap_data_struct kdata[2]; ret = cap_validate_magic(header, &tocopy); if ((dataptr == NULL) || (ret != 0)) @@ -163,42 +157,46 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) return -EINVAL; ret = cap_get_target_pid(pid, &pE, &pI, &pP); - if (!ret) { - struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; - unsigned i; - - for (i = 0; i < tocopy; i++) { - kdata[i].effective = pE.cap[i]; - kdata[i].permitted = pP.cap[i]; - kdata[i].inheritable = pI.cap[i]; - } - - /* - * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S, - * we silently drop the upper capabilities here. This - * has the effect of making older libcap - * implementations implicitly drop upper capability - * bits when they perform a: capget/modify/capset - * sequence. - * - * This behavior is considered fail-safe - * behavior. Upgrading the application to a newer - * version of libcap will enable access to the newer - * capabilities. - * - * An alternative would be to return an error here - * (-ERANGE), but that causes legacy applications to - * unexpectedly fail; the capget/modify/capset aborts - * before modification is attempted and the application - * fails. - */ - if (copy_to_user(dataptr, kdata, tocopy - * sizeof(struct __user_cap_data_struct))) { - return -EFAULT; - } - } + if (ret) + return ret; - return ret; + /* + * Annoying legacy format with 64-bit capabilities exposed + * as two sets of 32-bit fields, so we need to split the + * capability values up. + */ + kdata[0].effective = pE.val; kdata[1].effective = pE.val >> 32; + kdata[0].permitted = pP.val; kdata[1].permitted = pP.val >> 32; + kdata[0].inheritable = pI.val; kdata[1].inheritable = pI.val >> 32; + + /* + * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S, + * we silently drop the upper capabilities here. This + * has the effect of making older libcap + * implementations implicitly drop upper capability + * bits when they perform a: capget/modify/capset + * sequence. + * + * This behavior is considered fail-safe + * behavior. Upgrading the application to a newer + * version of libcap will enable access to the newer + * capabilities. + * + * An alternative would be to return an error here + * (-ERANGE), but that causes legacy applications to + * unexpectedly fail; the capget/modify/capset aborts + * before modification is attempted and the application + * fails. + */ + if (copy_to_user(dataptr, kdata, tocopy * sizeof(kdata[0]))) + return -EFAULT; + + return 0; +} + +static kernel_cap_t mk_kernel_cap(u32 low, u32 high) +{ + return (kernel_cap_t) { (low | ((u64)high << 32)) & CAP_VALID_MASK }; } /** @@ -221,8 +219,8 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) */ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) { - struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; - unsigned i, tocopy, copybytes; + struct __user_cap_data_struct kdata[2] = { { 0, }, }; + unsigned tocopy, copybytes; kernel_cap_t inheritable, permitted, effective; struct cred *new; int ret; @@ -246,21 +244,9 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) if (copy_from_user(&kdata, data, copybytes)) return -EFAULT; - for (i = 0; i < tocopy; i++) { - effective.cap[i] = kdata[i].effective; - permitted.cap[i] = kdata[i].permitted; - inheritable.cap[i] = kdata[i].inheritable; - } - while (i < _KERNEL_CAPABILITY_U32S) { - effective.cap[i] = 0; - permitted.cap[i] = 0; - inheritable.cap[i] = 0; - i++; - } - - effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; - permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; - inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + effective = mk_kernel_cap(kdata[0].effective, kdata[1].effective); + permitted = mk_kernel_cap(kdata[0].permitted, kdata[1].permitted); + inheritable = mk_kernel_cap(kdata[0].inheritable, kdata[1].inheritable); new = prepare_creds(); if (!new) diff --git a/kernel/umh.c b/kernel/umh.c index fbf872c624cb..2a4708277335 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -501,9 +501,9 @@ static int proc_cap_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; - unsigned long cap_array[_KERNEL_CAPABILITY_U32S]; - kernel_cap_t new_cap; - int err, i; + unsigned long cap_array[2]; + kernel_cap_t new_cap, *cap; + int err; if (write && (!capable(CAP_SETPCAP) || !capable(CAP_SYS_MODULE))) @@ -514,14 +514,16 @@ static int proc_cap_handler(struct ctl_table *table, int write, * userspace if this is a read. */ spin_lock(&umh_sysctl_lock); - for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) { - if (table->data == CAP_BSET) - cap_array[i] = usermodehelper_bset.cap[i]; - else if (table->data == CAP_PI) - cap_array[i] = usermodehelper_inheritable.cap[i]; - else - BUG(); - } + if (table->data == CAP_BSET) + cap = &usermodehelper_bset; + else if (table->data == CAP_PI) + cap = &usermodehelper_inheritable; + else + BUG(); + + /* Legacy format: capabilities are exposed as two 32-bit values */ + cap_array[0] = (u32) cap->val; + cap_array[1] = cap->val >> 32; spin_unlock(&umh_sysctl_lock); t = *table; @@ -535,22 +537,15 @@ static int proc_cap_handler(struct ctl_table *table, int write, if (err < 0) return err; - /* - * convert from the sysctl array of ulongs to the kernel_cap_t - * internal representation - */ - for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) - new_cap.cap[i] = cap_array[i]; + new_cap.val = (u32)cap_array[0]; + new_cap.val += (u64)cap_array[1] << 32; /* * Drop everything not in the new_cap (but don't add things) */ if (write) { spin_lock(&umh_sysctl_lock); - if (table->data == CAP_BSET) - usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap); - if (table->data == CAP_PI) - usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap); + *cap = cap_intersect(*cap, new_cap); spin_unlock(&umh_sysctl_lock); } @@ -561,14 +556,14 @@ struct ctl_table usermodehelper_table[] = { { .procname = "bset", .data = CAP_BSET, - .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), + .maxlen = 2 * sizeof(unsigned long), .mode = 0600, .proc_handler = proc_cap_handler, }, { .procname = "inheritable", .data = CAP_PI, - .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), + .maxlen = 2 * sizeof(unsigned long), .mode = 0600, .proc_handler = proc_cap_handler, }, diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 5e9949832af6..cf2ceec40b28 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -304,6 +304,26 @@ fail: } EXPORT_SYMBOL_IF_KUNIT(aa_unpack_u64); +static bool aa_unpack_cap_low(struct aa_ext *e, kernel_cap_t *data, const char *name) +{ + u32 val; + + if (!aa_unpack_u32(e, &val, name)) + return false; + data->val = val; + return true; +} + +static bool aa_unpack_cap_high(struct aa_ext *e, kernel_cap_t *data, const char *name) +{ + u32 val; + + if (!aa_unpack_u32(e, &val, name)) + return false; + data->val = (u32)data->val | ((u64)val << 32); + return true; +} + VISIBLE_IF_KUNIT bool aa_unpack_array(struct aa_ext *e, const char *name, u16 *size) { void *pos = e->pos; @@ -897,25 +917,25 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) profile->path_flags = PATH_MEDIATE_DELETED; info = "failed to unpack profile capabilities"; - if (!aa_unpack_u32(e, &(rules->caps.allow.cap[0]), NULL)) + if (!aa_unpack_cap_low(e, &rules->caps.allow, NULL)) goto fail; - if (!aa_unpack_u32(e, &(rules->caps.audit.cap[0]), NULL)) + if (!aa_unpack_cap_low(e, &rules->caps.audit, NULL)) goto fail; - if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[0]), NULL)) + if (!aa_unpack_cap_low(e, &rules->caps.quiet, NULL)) goto fail; - if (!aa_unpack_u32(e, &tmpcap.cap[0], NULL)) + if (!aa_unpack_cap_low(e, &tmpcap, NULL)) goto fail; info = "failed to unpack upper profile capabilities"; if (aa_unpack_nameX(e, AA_STRUCT, "caps64")) { /* optional upper half of 64 bit caps */ - if (!aa_unpack_u32(e, &(rules->caps.allow.cap[1]), NULL)) + if (!aa_unpack_cap_high(e, &rules->caps.allow, NULL)) goto fail; - if (!aa_unpack_u32(e, &(rules->caps.audit.cap[1]), NULL)) + if (!aa_unpack_cap_high(e, &rules->caps.audit, NULL)) goto fail; - if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[1]), NULL)) + if (!aa_unpack_cap_high(e, &rules->caps.quiet, NULL)) goto fail; - if (!aa_unpack_u32(e, &(tmpcap.cap[1]), NULL)) + if (!aa_unpack_cap_high(e, &tmpcap, NULL)) goto fail; if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; @@ -924,9 +944,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) info = "failed to unpack extended profile capabilities"; if (aa_unpack_nameX(e, AA_STRUCT, "capsx")) { /* optional extended caps mediation mask */ - if (!aa_unpack_u32(e, &(rules->caps.extended.cap[0]), NULL)) + if (!aa_unpack_cap_low(e, &rules->caps.extended, NULL)) goto fail; - if (!aa_unpack_u32(e, &(rules->caps.extended.cap[1]), NULL)) + if (!aa_unpack_cap_high(e, &rules->caps.extended, NULL)) goto fail; if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; diff --git a/security/commoncap.c b/security/commoncap.c index aec62db55271..5bb7d1e96277 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -589,7 +589,6 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, bool *has_fcap) { struct cred *new = bprm->cred; - unsigned i; int ret = 0; if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE) @@ -598,22 +597,17 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, if (caps->magic_etc & VFS_CAP_REVISION_MASK) *has_fcap = true; - CAP_FOR_EACH_U32(i) { - __u32 permitted = caps->permitted.cap[i]; - __u32 inheritable = caps->inheritable.cap[i]; - - /* - * pP' = (X & fP) | (pI & fI) - * The addition of pA' is handled later. - */ - new->cap_permitted.cap[i] = - (new->cap_bset.cap[i] & permitted) | - (new->cap_inheritable.cap[i] & inheritable); + /* + * pP' = (X & fP) | (pI & fI) + * The addition of pA' is handled later. + */ + new->cap_permitted.val = + (new->cap_bset.val & caps->permitted.val) | + (new->cap_inheritable.val & caps->inheritable.val); - if (permitted & ~new->cap_permitted.cap[i]) - /* insufficient to execute correctly */ - ret = -EPERM; - } + if (caps->permitted.val & ~new->cap_permitted.val) + /* insufficient to execute correctly */ + ret = -EPERM; /* * For legacy apps, with no internal support for recognizing they @@ -644,7 +638,6 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap, { struct inode *inode = d_backing_inode(dentry); __u32 magic_etc; - unsigned tocopy, i; int size; struct vfs_ns_cap_data data, *nscaps = &data; struct vfs_cap_data *caps = (struct vfs_cap_data *) &data; @@ -677,17 +670,14 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap, case VFS_CAP_REVISION_1: if (size != XATTR_CAPS_SZ_1) return -EINVAL; - tocopy = VFS_CAP_U32_1; break; case VFS_CAP_REVISION_2: if (size != XATTR_CAPS_SZ_2) return -EINVAL; - tocopy = VFS_CAP_U32_2; break; case VFS_CAP_REVISION_3: if (size != XATTR_CAPS_SZ_3) return -EINVAL; - tocopy = VFS_CAP_U32_3; rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid)); break; @@ -705,15 +695,20 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap, if (!rootid_owns_currentns(rootvfsuid)) return -ENODATA; - CAP_FOR_EACH_U32(i) { - if (i >= tocopy) - break; - cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted); - cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable); + cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted); + cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable); + + /* + * Rev1 had just a single 32-bit word, later expanded + * to a second one for the high bits + */ + if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) { + cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32; + cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32; } - cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; - cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + cpu_caps->permitted.val &= CAP_VALID_MASK; + cpu_caps->inheritable.val &= CAP_VALID_MASK; cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid); diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c index 09ad5a4ebd1f..591104e79812 100644 --- a/tools/testing/selftests/bpf/progs/test_deny_namespace.c +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c @@ -6,7 +6,7 @@ #include struct kernel_cap_struct { - __u32 cap[_LINUX_CAPABILITY_U32S_3]; + __u64 val; } __attribute__((preserve_access_index)); struct cred { @@ -19,14 +19,13 @@ SEC("lsm.s/userns_create") int BPF_PROG(test_userns_create, const struct cred *cred, int ret) { struct kernel_cap_struct caps = cred->cap_effective; - int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN); - __u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN); + __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN); if (ret) return 0; ret = -EPERM; - if (caps.cap[cap_index] & cap_mask) + if (caps.val & cap_mask) return 0; return -EPERM; -- cgit v1.2.3 From 0fb7fb713461e44b12e72c292bf90ee300f40710 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 1 Mar 2023 22:07:48 +0100 Subject: genirq/msi, platform-msi: Ensure that MSI descriptors are unreferenced Miquel reported a warning in the MSI core which is triggered when interrupts are freed via platform_msi_device_domain_free(). This code got reworked to use core functions for freeing the MSI descriptors, but nothing took care to clear the msi_desc->irq entry, which then triggers the warning in msi_free_msi_desc() which uses desc->irq to validate that the descriptor has been torn down. The same issue exists in msi_domain_populate_irqs(). Up to the point that msi_free_msi_descs() grew a warning for this case, this went un-noticed. Provide the counterpart of msi_domain_populate_irqs() and invoke it in platform_msi_device_domain_free() before freeing the interrupts and MSI descriptors and also in the error path of msi_domain_populate_irqs(). Fixes: 2f2940d16823 ("genirq/msi: Remove filter from msi_free_descs_free_range()") Reported-by: Miquel Raynal Signed-off-by: Thomas Gleixner Tested-by: Miquel Raynal Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87mt4wkwnv.ffs@tglx --- drivers/base/platform-msi.c | 1 + include/linux/msi.h | 2 ++ kernel/irq/msi.c | 23 ++++++++++++++++++++++- 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index 5883e7634a2b..f37ad34c80ec 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -324,6 +324,7 @@ void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int vir struct platform_msi_priv_data *data = domain->host_data; msi_lock_descs(data->dev); + msi_domain_depopulate_descs(data->dev, virq, nr_irqs); irq_domain_free_irqs_common(domain, virq, nr_irqs); msi_free_msi_descs_range(data->dev, virq, virq + nr_irqs - 1); msi_unlock_descs(data->dev); diff --git a/include/linux/msi.h b/include/linux/msi.h index a112b913fff9..15dd71817996 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -631,6 +631,8 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *args); int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, int virq, int nvec, msi_alloc_info_t *args); +void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec); + struct irq_domain * __platform_msi_create_device_domain(struct device *dev, unsigned int nvec, diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index efd21b79bf32..d169ee0c1799 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1109,14 +1109,35 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, return 0; fail: - for (--virq; virq >= virq_base; virq--) + for (--virq; virq >= virq_base; virq--) { + msi_domain_depopulate_descs(dev, virq, 1); irq_domain_free_irqs_common(domain, virq, 1); + } msi_domain_free_descs(dev, &ctrl); unlock: msi_unlock_descs(dev); return ret; } +void msi_domain_depopulate_descs(struct device *dev, int virq_base, int nvec) +{ + struct msi_ctrl ctrl = { + .domid = MSI_DEFAULT_DOMAIN, + .first = virq_base, + .last = virq_base + nvec - 1, + }; + struct msi_desc *desc; + struct xarray *xa; + unsigned long idx; + + if (!msi_ctrl_valid(dev, &ctrl)) + return; + + xa = &dev->msi.data->__domains[ctrl.domid].store; + xa_for_each_range(xa, idx, desc, ctrl.first, ctrl.last) + desc->irq = 0; +} + /* * Carefully check whether the device can use reservation mode. If * reservation mode is enabled then the early activation will assign a -- cgit v1.2.3 From 95207db8166ab95c42a03fdc5e3abd212c9987dc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Oct 2022 03:23:49 +0900 Subject: Remove Intel compiler support include/linux/compiler-intel.h had no update in the past 3 years. We often forget about the third C compiler to build the kernel. For example, commit a0a12c3ed057 ("asm goto: eradicate CC_HAS_ASM_GOTO") only mentioned GCC and Clang. init/Kconfig defines CC_IS_GCC and CC_IS_CLANG but not CC_IS_ICC, and nobody has reported any issue. I guess the Intel Compiler support is broken, and nobody is caring about it. Harald Arnesen pointed out ICC (classic Intel C/C++ compiler) is deprecated: $ icc -v icc: remark #10441: The Intel(R) C++ Compiler Classic (ICC) is deprecated and will be removed from product release in the second half of 2023. The Intel(R) oneAPI DPC++/C++ Compiler (ICX) is the recommended compiler moving forward. Please transition to use this compiler. Use '-diag-disable=10441' to disable this message. icc version 2021.7.0 (gcc version 12.1.0 compatibility) Arnd Bergmann provided a link to the article, "Intel C/C++ compilers complete adoption of LLVM". lib/zstd/common/compiler.h and lib/zstd/compress/zstd_fast.c were kept untouched for better sync with https://github.com/facebook/zstd Link: https://www.intel.com/content/www/us/en/developer/articles/technical/adoption-of-llvm-complete-icx.html Signed-off-by: Masahiro Yamada Acked-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Reviewed-by: Miguel Ojeda Signed-off-by: Linus Torvalds --- arch/ia64/include/uapi/asm/cmpxchg.h | 6 +- arch/ia64/include/uapi/asm/intel_intrin.h | 162 ------------------------------ arch/ia64/include/uapi/asm/intrinsics.h | 6 +- include/acpi/platform/acenv.h | 5 +- include/acpi/platform/acenvex.h | 2 +- include/acpi/platform/acintel.h | 55 ---------- include/linux/compiler-intel.h | 34 ------- include/linux/compiler_attributes.h | 14 +-- include/linux/compiler_types.h | 2 - scripts/cc-version.sh | 2 - scripts/min-tool-version.sh | 4 - 11 files changed, 5 insertions(+), 287 deletions(-) delete mode 100644 arch/ia64/include/uapi/asm/intel_intrin.h delete mode 100644 include/acpi/platform/acintel.h delete mode 100644 include/linux/compiler-intel.h (limited to 'include/linux') diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h index ca2e02685343..259ae57570bf 100644 --- a/arch/ia64/include/uapi/asm/cmpxchg.h +++ b/arch/ia64/include/uapi/asm/cmpxchg.h @@ -15,11 +15,7 @@ #include /* include compiler specific intrinsics */ #include -#ifdef __INTEL_COMPILER -# include -#else -# include -#endif +#include /* * This function doesn't exist, so you'll get a linker error if diff --git a/arch/ia64/include/uapi/asm/intel_intrin.h b/arch/ia64/include/uapi/asm/intel_intrin.h deleted file mode 100644 index dc1884dc54b5..000000000000 --- a/arch/ia64/include/uapi/asm/intel_intrin.h +++ /dev/null @@ -1,162 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_INTEL_INTRIN_H -#define _ASM_IA64_INTEL_INTRIN_H -/* - * Intel Compiler Intrinsics - * - * Copyright (C) 2002,2003 Jun Nakajima - * Copyright (C) 2002,2003 Suresh Siddha - * Copyright (C) 2005,2006 Hongjiu Lu - * - */ -#include - -#define ia64_barrier() __memory_barrier() - -#define ia64_stop() /* Nothing: As of now stop bit is generated for each - * intrinsic - */ - -#define ia64_getreg __getReg -#define ia64_setreg __setReg - -#define ia64_hint __hint -#define ia64_hint_pause __hint_pause - -#define ia64_mux1_brcst _m64_mux1_brcst -#define ia64_mux1_mix _m64_mux1_mix -#define ia64_mux1_shuf _m64_mux1_shuf -#define ia64_mux1_alt _m64_mux1_alt -#define ia64_mux1_rev _m64_mux1_rev - -#define ia64_mux1(x,v) _m_to_int64(_m64_mux1(_m_from_int64(x), (v))) -#define ia64_popcnt _m64_popcnt -#define ia64_getf_exp __getf_exp -#define ia64_shrp _m64_shrp - -#define ia64_tpa __tpa -#define ia64_invala __invala -#define ia64_invala_gr __invala_gr -#define ia64_invala_fr __invala_fr -#define ia64_nop __nop -#define ia64_sum __sum -#define ia64_ssm __ssm -#define ia64_rum __rum -#define ia64_rsm __rsm -#define ia64_fc __fc - -#define ia64_ldfs __ldfs -#define ia64_ldfd __ldfd -#define ia64_ldfe __ldfe -#define ia64_ldf8 __ldf8 -#define ia64_ldf_fill __ldf_fill - -#define ia64_stfs __stfs -#define ia64_stfd __stfd -#define ia64_stfe __stfe -#define ia64_stf8 __stf8 -#define ia64_stf_spill __stf_spill - -#define ia64_mf __mf -#define ia64_mfa __mfa - -#define ia64_fetchadd4_acq __fetchadd4_acq -#define ia64_fetchadd4_rel __fetchadd4_rel -#define ia64_fetchadd8_acq __fetchadd8_acq -#define ia64_fetchadd8_rel __fetchadd8_rel - -#define ia64_xchg1 _InterlockedExchange8 -#define ia64_xchg2 _InterlockedExchange16 -#define ia64_xchg4 _InterlockedExchange -#define ia64_xchg8 _InterlockedExchange64 - -#define ia64_cmpxchg1_rel _InterlockedCompareExchange8_rel -#define ia64_cmpxchg1_acq _InterlockedCompareExchange8_acq -#define ia64_cmpxchg2_rel _InterlockedCompareExchange16_rel -#define ia64_cmpxchg2_acq _InterlockedCompareExchange16_acq -#define ia64_cmpxchg4_rel _InterlockedCompareExchange_rel -#define ia64_cmpxchg4_acq _InterlockedCompareExchange_acq -#define ia64_cmpxchg8_rel _InterlockedCompareExchange64_rel -#define ia64_cmpxchg8_acq _InterlockedCompareExchange64_acq - -#define __ia64_set_dbr(index, val) \ - __setIndReg(_IA64_REG_INDR_DBR, index, val) -#define ia64_set_ibr(index, val) \ - __setIndReg(_IA64_REG_INDR_IBR, index, val) -#define ia64_set_pkr(index, val) \ - __setIndReg(_IA64_REG_INDR_PKR, index, val) -#define ia64_set_pmc(index, val) \ - __setIndReg(_IA64_REG_INDR_PMC, index, val) -#define ia64_set_pmd(index, val) \ - __setIndReg(_IA64_REG_INDR_PMD, index, val) -#define ia64_set_rr(index, val) \ - __setIndReg(_IA64_REG_INDR_RR, index, val) - -#define ia64_get_cpuid(index) \ - __getIndReg(_IA64_REG_INDR_CPUID, index) -#define __ia64_get_dbr(index) __getIndReg(_IA64_REG_INDR_DBR, index) -#define ia64_get_ibr(index) __getIndReg(_IA64_REG_INDR_IBR, index) -#define ia64_get_pkr(index) __getIndReg(_IA64_REG_INDR_PKR, index) -#define ia64_get_pmc(index) __getIndReg(_IA64_REG_INDR_PMC, index) -#define ia64_get_pmd(index) __getIndReg(_IA64_REG_INDR_PMD, index) -#define ia64_get_rr(index) __getIndReg(_IA64_REG_INDR_RR, index) - -#define ia64_srlz_d __dsrlz -#define ia64_srlz_i __isrlz - -#define ia64_dv_serialize_data() -#define ia64_dv_serialize_instruction() - -#define ia64_st1_rel __st1_rel -#define ia64_st2_rel __st2_rel -#define ia64_st4_rel __st4_rel -#define ia64_st8_rel __st8_rel - -/* FIXME: need st4.rel.nta intrinsic */ -#define ia64_st4_rel_nta __st4_rel - -#define ia64_ld1_acq __ld1_acq -#define ia64_ld2_acq __ld2_acq -#define ia64_ld4_acq __ld4_acq -#define ia64_ld8_acq __ld8_acq - -#define ia64_sync_i __synci -#define ia64_thash __thash -#define ia64_ttag __ttag -#define ia64_itcd __itcd -#define ia64_itci __itci -#define ia64_itrd __itrd -#define ia64_itri __itri -#define ia64_ptce __ptce -#define ia64_ptcl __ptcl -#define ia64_ptcg __ptcg -#define ia64_ptcga __ptcga -#define ia64_ptri __ptri -#define ia64_ptrd __ptrd -#define ia64_dep_mi _m64_dep_mi - -/* Values for lfhint in __lfetch and __lfetch_fault */ - -#define ia64_lfhint_none __lfhint_none -#define ia64_lfhint_nt1 __lfhint_nt1 -#define ia64_lfhint_nt2 __lfhint_nt2 -#define ia64_lfhint_nta __lfhint_nta - -#define ia64_lfetch __lfetch -#define ia64_lfetch_excl __lfetch_excl -#define ia64_lfetch_fault __lfetch_fault -#define ia64_lfetch_fault_excl __lfetch_fault_excl - -#define ia64_intrin_local_irq_restore(x) \ -do { \ - if ((x) != 0) { \ - ia64_ssm(IA64_PSR_I); \ - ia64_srlz_d(); \ - } else { \ - ia64_rsm(IA64_PSR_I); \ - } \ -} while (0) - -#define __builtin_trap() __break(0); - -#endif /* _ASM_IA64_INTEL_INTRIN_H */ diff --git a/arch/ia64/include/uapi/asm/intrinsics.h b/arch/ia64/include/uapi/asm/intrinsics.h index a0e0a064f5b1..63f27c4ec739 100644 --- a/arch/ia64/include/uapi/asm/intrinsics.h +++ b/arch/ia64/include/uapi/asm/intrinsics.h @@ -14,11 +14,7 @@ #include /* include compiler specific intrinsics */ #include -#ifdef __INTEL_COMPILER -# include -#else -# include -#endif +#include #include #define ia64_set_rr0_to_rr4(val0, val1, val2, val3, val4) \ diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h index 03eb3d977075..9e4f7564201a 100644 --- a/include/acpi/platform/acenv.h +++ b/include/acpi/platform/acenv.h @@ -148,15 +148,12 @@ * *****************************************************************************/ -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +#if defined(__GNUC__) #include #elif defined(_MSC_VER) #include "acmsvc.h" -#elif defined(__INTEL_COMPILER) -#include - #endif #if defined(_LINUX) || defined(__linux__) diff --git a/include/acpi/platform/acenvex.h b/include/acpi/platform/acenvex.h index 3a6b1db9a984..72cc7bab469e 100644 --- a/include/acpi/platform/acenvex.h +++ b/include/acpi/platform/acenvex.h @@ -35,7 +35,7 @@ #endif -#if defined(__GNUC__) && !defined(__INTEL_COMPILER) +#if defined(__GNUC__) #include "acgccex.h" #elif defined(_MSC_VER) diff --git a/include/acpi/platform/acintel.h b/include/acpi/platform/acintel.h deleted file mode 100644 index 85b1ae86ee63..000000000000 --- a/include/acpi/platform/acintel.h +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ -/****************************************************************************** - * - * Name: acintel.h - VC specific defines, etc. - * - * Copyright (C) 2000 - 2022, Intel Corp. - * - *****************************************************************************/ - -#ifndef __ACINTEL_H__ -#define __ACINTEL_H__ - -/* - * Use compiler specific is a good practice for even when - * -nostdinc is specified (i.e., ACPI_USE_STANDARD_HEADERS undefined. - */ -#ifndef va_arg -#include -#endif - -/* Configuration specific to Intel 64-bit C compiler */ - -#define COMPILER_DEPENDENT_INT64 __int64 -#define COMPILER_DEPENDENT_UINT64 unsigned __int64 -#define ACPI_INLINE __inline - -/* - * Calling conventions: - * - * ACPI_SYSTEM_XFACE - Interfaces to host OS (handlers, threads) - * ACPI_EXTERNAL_XFACE - External ACPI interfaces - * ACPI_INTERNAL_XFACE - Internal ACPI interfaces - * ACPI_INTERNAL_VAR_XFACE - Internal variable-parameter list interfaces - */ -#define ACPI_SYSTEM_XFACE -#define ACPI_EXTERNAL_XFACE -#define ACPI_INTERNAL_XFACE -#define ACPI_INTERNAL_VAR_XFACE - -/* remark 981 - operands evaluated in no particular order */ -#pragma warning(disable:981) - -/* warn C4100: unreferenced formal parameter */ -#pragma warning(disable:4100) - -/* warn C4127: conditional expression is constant */ -#pragma warning(disable:4127) - -/* warn C4706: assignment within conditional expression */ -#pragma warning(disable:4706) - -/* warn C4214: bit field types other than int */ -#pragma warning(disable:4214) - -#endif /* __ACINTEL_H__ */ diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h deleted file mode 100644 index b17f3cd18334..000000000000 --- a/include/linux/compiler-intel.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_COMPILER_TYPES_H -#error "Please don't include directly, include instead." -#endif - -#ifdef __ECC - -/* Compiler specific definitions for Intel ECC compiler */ - -#include - -/* Intel ECC compiler doesn't support gcc specific asm stmts. - * It uses intrinsics to do the equivalent things. - */ - -#define barrier() __memory_barrier() -#define barrier_data(ptr) barrier() - -#define RELOC_HIDE(ptr, off) \ - ({ unsigned long __ptr; \ - __ptr = (unsigned long) (ptr); \ - (typeof(ptr)) (__ptr + (off)); }) - -/* This should act as an optimization barrier on var. - * Given that this compiler does not have inline assembly, a compiler barrier - * is the best we can do. - */ -#define OPTIMIZER_HIDE_VAR(var) barrier() - -#endif - -/* icc has this, but it's called _bswap16 */ -#define __HAVE_BUILTIN_BSWAP16__ -#define __builtin_bswap16 _bswap16 diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 4a3bd114a24f..e659cb6fded3 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -64,16 +64,10 @@ * compiler should see some alignment anyway, when the return value is * massaged by 'flags = ptr & 3; ptr &= ~3;'). * - * Optional: not supported by icc - * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute * clang: https://clang.llvm.org/docs/AttributeReference.html#assume-aligned */ -#if __has_attribute(__assume_aligned__) -# define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) -#else -# define __assume_aligned(a, ...) -#endif +#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) /* * Note the long name. @@ -85,7 +79,6 @@ /* * Optional: only supported since gcc >= 9 * Optional: not supported by clang - * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-copy-function-attribute */ @@ -98,7 +91,6 @@ /* * Optional: not supported by gcc * Optional: only supported since clang >= 14.0 - * Optional: not supported by icc * * clang: https://clang.llvm.org/docs/AttributeReference.html#diagnose_as_builtin */ @@ -122,7 +114,6 @@ /* * Optional: not supported by clang - * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-designated_005finit-type-attribute */ @@ -236,7 +227,6 @@ /* * Optional: only supported since gcc >= 8 * Optional: not supported by clang - * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute */ @@ -267,7 +257,6 @@ /* * Optional: not supported by gcc. - * Optional: not supported by icc. * * clang: https://clang.llvm.org/docs/AttributeReference.html#overloadable */ @@ -287,7 +276,6 @@ * Note: the "type" argument should match any __builtin_object_size(p, type) usage. * * Optional: not supported by gcc. - * Optional: not supported by icc. * * clang: https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 690c7c826fbf..547ea1ff806e 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -120,8 +120,6 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* Compiler specific macros. */ #ifdef __clang__ #include -#elif defined(__INTEL_COMPILER) -#include #elif defined(__GNUC__) /* The above compilers also define __GNUC__, so order is important here. */ #include diff --git a/scripts/cc-version.sh b/scripts/cc-version.sh index 2401c86fcf53..0573c92e841d 100755 --- a/scripts/cc-version.sh +++ b/scripts/cc-version.sh @@ -12,8 +12,6 @@ get_c_compiler_info() cat <<- EOF | "$@" -E -P -x c - 2>/dev/null #if defined(__clang__) Clang __clang_major__ __clang_minor__ __clang_patchlevel__ - #elif defined(__INTEL_COMPILER) - ICC __INTEL_COMPILER __INTEL_COMPILER_UPDATE #elif defined(__GNUC__) GCC __GNUC__ __GNUC_MINOR__ __GNUC_PATCHLEVEL__ #else diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index a814f1efb39d..20d483ec6f5f 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -19,10 +19,6 @@ binutils) gcc) echo 5.1.0 ;; -icc) - # temporary - echo 16.0.3 - ;; llvm) if [ "$SRCARCH" = s390 ]; then echo 15.0.0 -- cgit v1.2.3 From 596ff4a09b8981790e15572e8e7bc904df5835e7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Mar 2023 13:35:43 -0800 Subject: cpumask: re-introduce constant-sized cpumask optimizations Commit aa47a7c215e7 ("lib/cpumask: deprecate nr_cpumask_bits") resulted in the cpumask operations potentially becoming hugely less efficient, because suddenly the cpumask was always considered to be variable-sized. The optimization was then later added back in a limited form by commit 6f9c07be9d02 ("lib/cpumask: add FORCE_NR_CPUS config option"), but that FORCE_NR_CPUS option is not useful in a generic kernel and more of a special case for embedded situations with fixed hardware. Instead, just re-introduce the optimization, with some changes. Instead of depending on CPUMASK_OFFSTACK being false, and then always using the full constant cpumask width, this introduces three different cpumask "sizes": - the exact size (nr_cpumask_bits) remains identical to nr_cpu_ids. This is used for situations where we should use the exact size. - the "small" size (small_cpumask_bits) is the NR_CPUS constant if it fits in a single word and the bitmap operations thus end up able to trigger the "small_const_nbits()" optimizations. This is used for the operations that have optimized single-word cases that get inlined, notably the bit find and scanning functions. - the "large" size (large_cpumask_bits) is the NR_CPUS constant if it is an sufficiently small constant that makes simple "copy" and "clear" operations more efficient. This is arbitrarily set at four words or less. As a an example of this situation, without this fixed size optimization, cpumask_clear() will generate code like movl nr_cpu_ids(%rip), %edx addq $63, %rdx shrq $3, %rdx andl $-8, %edx callq memset@PLT on x86-64, because it would calculate the "exact" number of longwords that need to be cleared. In contrast, with this patch, using a MAX_CPU of 64 (which is quite a reasonable value to use), the above becomes a single movq $0,cpumask instruction instead, because instead of caring to figure out exactly how many CPU's the system has, it just knows that the cpumask will be a single word and can just clear it all. Note that this does end up tightening the rules a bit from the original version in another way: operations that set bits in the cpumask are now limited to the actual nr_cpu_ids limit, whereas we used to do the nr_cpumask_bits thing almost everywhere in the cpumask code. But if you just clear bits, or scan for bits, we can use the simpler compile-time constants. In the process, remove 'cpumask_complement()' and 'for_each_cpu_not()' which were not useful, and which fundamentally have to be limited to 'nr_cpu_ids'. Better remove them now than have somebody introduce use of them later. Of course, on x86-64 with MAXSMP there is no sane small compile-time constant for the cpumask sizes, and we end up using the actual CPU bits, and will generate the above kind of horrors regardless. Please don't use MAXSMP unless you really expect to have machines with thousands of cores. Signed-off-by: Linus Torvalds --- .clang-format | 1 - arch/ia64/kernel/acpi.c | 4 +- include/linux/cpumask.h | 125 +++++++++++++++++++++++++++--------------------- lib/cpumask_kunit.c | 14 +----- 4 files changed, 72 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/.clang-format b/.clang-format index 2c61b4553374..d988e9fa9b26 100644 --- a/.clang-format +++ b/.clang-format @@ -226,7 +226,6 @@ ForEachMacros: - 'for_each_console_srcu' - 'for_each_cpu' - 'for_each_cpu_and' - - 'for_each_cpu_not' - 'for_each_cpu_wrap' - 'for_each_dapm_widgets' - 'for_each_dedup_cand' diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 96d13cb7c19f..15f6cfddcc08 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -783,11 +783,9 @@ __init void prefill_possible_map(void) static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) { - cpumask_t tmp_map; int cpu; - cpumask_complement(&tmp_map, cpu_present_mask); - cpu = cpumask_first(&tmp_map); + cpu = cpumask_first_zero(cpu_present_mask); if (cpu >= nr_cpu_ids) return -EINVAL; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 10c92bd9b807..8fbe76607965 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -50,8 +50,41 @@ static inline void set_nr_cpu_ids(unsigned int nr) #endif } -/* Deprecated. Always use nr_cpu_ids. */ -#define nr_cpumask_bits nr_cpu_ids +/* + * We have several different "preferred sizes" for the cpumask + * operations, depending on operation. + * + * For example, the bitmap scanning and operating operations have + * optimized routines that work for the single-word case, but only when + * the size is constant. So if NR_CPUS fits in one single word, we are + * better off using that small constant, in order to trigger the + * optimized bit finding. That is 'small_cpumask_size'. + * + * The clearing and copying operations will similarly perform better + * with a constant size, but we limit that size arbitrarily to four + * words. We call this 'large_cpumask_size'. + * + * Finally, some operations just want the exact limit, either because + * they set bits or just don't have any faster fixed-sized versions. We + * call this just 'nr_cpumask_size'. + * + * Note that these optional constants are always guaranteed to be at + * least as big as 'nr_cpu_ids' itself is, and all our cpumask + * allocations are at least that size (see cpumask_size()). The + * optimization comes from being able to potentially use a compile-time + * constant instead of a run-time generated exact number of CPUs. + */ +#if NR_CPUS <= BITS_PER_LONG + #define small_cpumask_bits ((unsigned int)NR_CPUS) + #define large_cpumask_bits ((unsigned int)NR_CPUS) +#elif NR_CPUS <= 4*BITS_PER_LONG + #define small_cpumask_bits nr_cpu_ids + #define large_cpumask_bits ((unsigned int)NR_CPUS) +#else + #define small_cpumask_bits nr_cpu_ids + #define large_cpumask_bits nr_cpu_ids +#endif +#define nr_cpumask_bits nr_cpu_ids /* * The following particular system cpumasks and operations manage @@ -126,7 +159,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { - return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); + return find_first_bit(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -137,7 +170,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) */ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { - return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits); + return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -150,7 +183,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { - return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); + return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } /** @@ -161,7 +194,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask */ static inline unsigned int cpumask_last(const struct cpumask *srcp) { - return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); + return find_last_bit(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -177,7 +210,7 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); - return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1); + return find_next_bit(cpumask_bits(srcp), small_cpumask_bits, n + 1); } /** @@ -192,7 +225,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); - return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); + return find_next_zero_bit(cpumask_bits(srcp), small_cpumask_bits, n+1); } #if NR_CPUS == 1 @@ -235,7 +268,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, if (n != -1) cpumask_check(n); return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits, n + 1); + small_cpumask_bits, n + 1); } /** @@ -246,17 +279,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu(cpu, mask) \ - for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits) - -/** - * for_each_cpu_not - iterate over every cpu in a complemented mask - * @cpu: the (optionally unsigned) integer iterator - * @mask: the cpumask pointer - * - * After the loop, cpu is >= nr_cpu_ids. - */ -#define for_each_cpu_not(cpu, mask) \ - for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits) + for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits) #if NR_CPUS == 1 static inline @@ -290,7 +313,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_wrap(cpu, mask, start) \ - for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits, start) + for_each_set_bit_wrap(cpu, cpumask_bits(mask), small_cpumask_bits, start) /** * for_each_cpu_and - iterate over every cpu in both masks @@ -307,7 +330,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_and(cpu, mask1, mask2) \ - for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits) + for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) /** * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding @@ -325,7 +348,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_andnot(cpu, mask1, mask2) \ - for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits) + for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) /** * cpumask_any_but - return a "random" in a cpumask, but not this one. @@ -356,7 +379,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) */ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { - return find_nth_bit(cpumask_bits(srcp), nr_cpumask_bits, cpumask_check(cpu)); + return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu)); } /** @@ -372,7 +395,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), - nr_cpumask_bits, cpumask_check(cpu)); + small_cpumask_bits, cpumask_check(cpu)); } /** @@ -388,7 +411,7 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), - nr_cpumask_bits, cpumask_check(cpu)); + small_cpumask_bits, cpumask_check(cpu)); } /** @@ -408,7 +431,7 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp return find_nth_and_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), cpumask_bits(srcp3), - nr_cpumask_bits, cpumask_check(cpu)); + small_cpumask_bits, cpumask_check(cpu)); } #define CPU_BITS_NONE \ @@ -495,10 +518,14 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask * /** * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer + * + * Note: since we set bits, we should use the tighter 'bitmap_set()' with + * the eact number of bits, not 'bitmap_fill()' that will fill past the + * end. */ static inline void cpumask_setall(struct cpumask *dstp) { - bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); + bitmap_set(cpumask_bits(dstp), 0, nr_cpumask_bits); } /** @@ -507,7 +534,7 @@ static inline void cpumask_setall(struct cpumask *dstp) */ static inline void cpumask_clear(struct cpumask *dstp) { - bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits); + bitmap_zero(cpumask_bits(dstp), large_cpumask_bits); } /** @@ -523,7 +550,7 @@ static inline bool cpumask_and(struct cpumask *dstp, const struct cpumask *src2p) { return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -536,7 +563,7 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -550,7 +577,7 @@ static inline void cpumask_xor(struct cpumask *dstp, const struct cpumask *src2p) { bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -566,19 +593,7 @@ static inline bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src2p) { return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); -} - -/** - * cpumask_complement - *dstp = ~*srcp - * @dstp: the cpumask result - * @srcp: the input to invert - */ -static inline void cpumask_complement(struct cpumask *dstp, - const struct cpumask *srcp) -{ - bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp), - nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -590,7 +605,7 @@ static inline bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -604,7 +619,7 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src3p) { return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p), - cpumask_bits(src3p), nr_cpumask_bits); + cpumask_bits(src3p), small_cpumask_bits); } /** @@ -616,7 +631,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -630,7 +645,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -639,7 +654,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p, */ static inline bool cpumask_empty(const struct cpumask *srcp) { - return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits); + return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -657,7 +672,7 @@ static inline bool cpumask_full(const struct cpumask *srcp) */ static inline unsigned int cpumask_weight(const struct cpumask *srcp) { - return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits); + return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -668,7 +683,7 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { - return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); + return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } /** @@ -681,7 +696,7 @@ static inline void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -705,7 +720,7 @@ static inline void cpumask_shift_left(struct cpumask *dstp, static inline void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) { - bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits); + bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits); } /** @@ -789,7 +804,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) */ static inline unsigned int cpumask_size(void) { - return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long); + return BITS_TO_LONGS(large_cpumask_bits) * sizeof(long); } /* diff --git a/lib/cpumask_kunit.c b/lib/cpumask_kunit.c index d1fc6ece21f3..a105e6369efc 100644 --- a/lib/cpumask_kunit.c +++ b/lib/cpumask_kunit.c @@ -23,16 +23,6 @@ KUNIT_EXPECT_EQ_MSG((test), mask_weight, iter, MASK_MSG(mask)); \ } while (0) -#define EXPECT_FOR_EACH_CPU_NOT_EQ(test, mask) \ - do { \ - const cpumask_t *m = (mask); \ - int mask_weight = cpumask_weight(m); \ - int cpu, iter = 0; \ - for_each_cpu_not(cpu, m) \ - iter++; \ - KUNIT_EXPECT_EQ_MSG((test), nr_cpu_ids - mask_weight, iter, MASK_MSG(mask)); \ - } while (0) - #define EXPECT_FOR_EACH_CPU_OP_EQ(test, op, mask1, mask2) \ do { \ const cpumask_t *m1 = (mask1); \ @@ -77,7 +67,7 @@ static void test_cpumask_weight(struct kunit *test) KUNIT_EXPECT_EQ_MSG(test, 0, cpumask_weight(&mask_empty), MASK_MSG(&mask_empty)); KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids, cpumask_weight(cpu_possible_mask), MASK_MSG(cpu_possible_mask)); - KUNIT_EXPECT_EQ_MSG(test, nr_cpumask_bits, cpumask_weight(&mask_all), MASK_MSG(&mask_all)); + KUNIT_EXPECT_EQ_MSG(test, nr_cpu_ids, cpumask_weight(&mask_all), MASK_MSG(&mask_all)); } static void test_cpumask_first(struct kunit *test) @@ -113,14 +103,12 @@ static void test_cpumask_next(struct kunit *test) static void test_cpumask_iterators(struct kunit *test) { EXPECT_FOR_EACH_CPU_EQ(test, &mask_empty); - EXPECT_FOR_EACH_CPU_NOT_EQ(test, &mask_empty); EXPECT_FOR_EACH_CPU_WRAP_EQ(test, &mask_empty); EXPECT_FOR_EACH_CPU_OP_EQ(test, and, &mask_empty, &mask_empty); EXPECT_FOR_EACH_CPU_OP_EQ(test, and, cpu_possible_mask, &mask_empty); EXPECT_FOR_EACH_CPU_OP_EQ(test, andnot, &mask_empty, &mask_empty); EXPECT_FOR_EACH_CPU_EQ(test, cpu_possible_mask); - EXPECT_FOR_EACH_CPU_NOT_EQ(test, cpu_possible_mask); EXPECT_FOR_EACH_CPU_WRAP_EQ(test, cpu_possible_mask); EXPECT_FOR_EACH_CPU_OP_EQ(test, and, cpu_possible_mask, cpu_possible_mask); EXPECT_FOR_EACH_CPU_OP_EQ(test, andnot, cpu_possible_mask, &mask_empty); -- cgit v1.2.3 From 80c16b2b121fbc3380dbffa9bab7559acbaaa2ed Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Mar 2023 17:22:04 +0200 Subject: cpumask: Fix typo nr_cpumask_size --> nr_cpumask_bits The never used nr_cpumask_size is just a typo, hence use existing redefinition that's called nr_cpumask_bits. Signed-off-by: Andy Shevchenko Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 8fbe76607965..ce8eb7ef2107 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -66,7 +66,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * Finally, some operations just want the exact limit, either because * they set bits or just don't have any faster fixed-sized versions. We - * call this just 'nr_cpumask_size'. + * call this just 'nr_cpumask_bits'. * * Note that these optional constants are always guaranteed to be at * least as big as 'nr_cpu_ids' itself is, and all our cpumask -- cgit v1.2.3 From 63355b9884b3d1677de6bd1517cd2b8a9bf53978 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 7 Mar 2023 12:16:18 -0800 Subject: cpumask: be more careful with 'cpumask_setall()' Commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask optimizations") changed cpumask_setall() to use "bitmap_set()" instead of "bitmap_fill()", because bitmap_fill() would explicitly set all the bits of a constant sized small bitmap, and that's exactly what we don't want: we want to only set bits up to 'nr_cpu_ids', which is what "bitmap_set()" does. However, Yury correctly points out that while "bitmap_set()" does indeed only set bits up to the required bitmap size, it doesn't _clear_ bits above that size, so the upper bits would still not have well-defined values. Now, none of this should really matter, since any bits set past 'nr_cpu_ids' should always be ignored in the first place. Yes, the bit scanning functions might return them as a result, but since users should always consider the ">= nr_cpu_ids" condition to mean "no more bits", that shouldn't have any actual effect (see previous commit 8ca09d5fa354 "cpumask: fix incorrect cpumask scanning result checks"). But let's just do it right, the way the code was _intended_ to work. We have had enough lazy code that works but bites us in the *rse later (again, see previous commit) that there's no reason to not just do this properly. It turns out that "bitmap_fill()" gets this all right for the complex case, and really only fails for the inlined optimized case that just fills the whole word. And while we could just fix bitmap_fill() to use the proper last word mask, there's two issues with that: - the cpumask case wants to do the _optimization_ based on "NR_CPUS is a small constant", but then wants to do the actual bit _fill_ based on "nr_cpu_ids" that isn't necessarily that same constant - we have lots of non-cpumask users of bitmap_fill(), and while they hopefully don't care, and probably would want the proper semantics anyway ("only set bits up to the limit"), I do not want the cpumask changes to impact other parts So this ends up just doing the single-word optimization by hand in the cpumask code. If our cpumask is fundamentally limited to a single word, just do the proper "fill in that word" exactly. And if it's the more complex multi-word case, then the generic bitmap_fill() will DTRT. This is all an example of how our bitmap function optimizations really are somewhat broken. They conflate the "this is size of the bitmap" optimizations with the actual bit(s) we want to set. In many cases we really want to have the two be separate things: sometimes we base our optimizations on the size of the whole bitmap ("I know this whole bitmap fits in a single word, so I'll just use single-word accesses"), and sometimes we base them on the bit we are looking at ("this is just acting on bits that are in the first word, so I'll use single-word accesses"). Notice how the end result of the two optimizations are the same, but the way we get to them are quite different. And all our cpumask optimization games are really about that fundamental distinction, and we'd often really want to pass in both the "this is the bit I'm working on" (which _can_ be a small constant but might be variable), and "I know it's in this range even if it's variable" (based on CONFIG_NR_CPUS). So this cpumask_setall() implementation just makes that explicit. It checks the "I statically know the size is small" using the known static size of the cpumask (which is what that 'small_cpumask_bits' is all about), but then sets the actual bits using the exact number of cpus we have (ie 'nr_cpumask_bits') Of course, in a perfect world, the compiler would have done all the range analysis (possibly with help from us just telling it that "this value is always in this range"), and would do all of this for us. But that is not the world we live in. While we dream of that perfect world, this does that manual logic to make it all work out. And this was a very long explanation for a small code change that shouldn't even matter. Reported-by: Yury Norov Link: https://lore.kernel.org/lkml/ZAV9nGG9e1%2FrV+L%2F@yury-laptop/ Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ce8eb7ef2107..63d637d18e79 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -518,14 +518,14 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask * /** * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer - * - * Note: since we set bits, we should use the tighter 'bitmap_set()' with - * the eact number of bits, not 'bitmap_fill()' that will fill past the - * end. */ static inline void cpumask_setall(struct cpumask *dstp) { - bitmap_set(cpumask_bits(dstp), 0, nr_cpumask_bits); + if (small_const_nbits(small_cpumask_bits)) { + cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits); + return; + } + bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); } /** -- cgit v1.2.3