diff options
Diffstat (limited to 'include/linux')
262 files changed, 5085 insertions, 2439 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3015235d65e3..5e6a876e17ba 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -586,6 +586,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_SB_CPC_FLEXIBLE_ADR_SPACE 0x00004000 #define OSC_SB_NATIVE_USB4_SUPPORT 0x00040000 #define OSC_SB_PRM_SUPPORT 0x00200000 +#define OSC_SB_FFH_OPR_SUPPORT 0x00400000 extern bool osc_sb_apei_support_acked; extern bool osc_pc_lpi_support_confirmed; @@ -1136,6 +1137,7 @@ int acpi_subsys_freeze(struct device *dev); int acpi_subsys_poweroff(struct device *dev); void acpi_ec_mark_gpe_for_wake(void); void acpi_ec_set_gpe_wake_mask(u8 action); +int acpi_subsys_restore_early(struct device *dev); #else static inline int acpi_subsys_prepare(struct device *dev) { return 0; } static inline void acpi_subsys_complete(struct device *dev) {} @@ -1144,6 +1146,7 @@ static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; } static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } static inline int acpi_subsys_poweroff(struct device *dev) { return 0; } +static inline int acpi_subsys_restore_early(struct device *dev) { return 0; } static inline void acpi_ec_mark_gpe_for_wake(void) {} static inline void acpi_ec_set_gpe_wake_mask(u8 action) {} #endif @@ -1488,6 +1491,16 @@ void acpi_init_pcc(void); static inline void acpi_init_pcc(void) { } #endif +#ifdef CONFIG_ACPI_FFH +void acpi_init_ffh(void); +extern int acpi_ffh_address_space_arch_setup(void *handler_ctxt, + void **region_ctxt); +extern int acpi_ffh_address_space_arch_handler(acpi_integer *value, + void *region_context); +#else +static inline void acpi_init_ffh(void) { } +#endif + #ifdef CONFIG_ACPI extern void acpi_device_notify(struct device *dev); extern void acpi_device_notify_remove(struct device *dev); diff --git a/include/linux/acpi_apmt.h b/include/linux/acpi_apmt.h new file mode 100644 index 000000000000..40bd634d082f --- /dev/null +++ b/include/linux/acpi_apmt.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * ARM CoreSight PMU driver. + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. + * + */ + +#ifndef __ACPI_APMT_H__ +#define __ACPI_APMT_H__ + +#include <linux/acpi.h> + +#ifdef CONFIG_ACPI_APMT +void acpi_apmt_init(void); +#else +static inline void acpi_apmt_init(void) { } +#endif /* CONFIG_ACPI_APMT */ + +#endif /* __ACPI_APMT_H__ */ diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 5f02d2e6b9d9..c87aeecaa9b2 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -11,6 +11,89 @@ #include <linux/types.h> #include <linux/uuid.h> +#define FFA_SMC(calling_convention, func_num) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, (calling_convention), \ + ARM_SMCCC_OWNER_STANDARD, (func_num)) + +#define FFA_SMC_32(func_num) FFA_SMC(ARM_SMCCC_SMC_32, (func_num)) +#define FFA_SMC_64(func_num) FFA_SMC(ARM_SMCCC_SMC_64, (func_num)) + +#define FFA_ERROR FFA_SMC_32(0x60) +#define FFA_SUCCESS FFA_SMC_32(0x61) +#define FFA_INTERRUPT FFA_SMC_32(0x62) +#define FFA_VERSION FFA_SMC_32(0x63) +#define FFA_FEATURES FFA_SMC_32(0x64) +#define FFA_RX_RELEASE FFA_SMC_32(0x65) +#define FFA_RXTX_MAP FFA_SMC_32(0x66) +#define FFA_FN64_RXTX_MAP FFA_SMC_64(0x66) +#define FFA_RXTX_UNMAP FFA_SMC_32(0x67) +#define FFA_PARTITION_INFO_GET FFA_SMC_32(0x68) +#define FFA_ID_GET FFA_SMC_32(0x69) +#define FFA_MSG_POLL FFA_SMC_32(0x6A) +#define FFA_MSG_WAIT FFA_SMC_32(0x6B) +#define FFA_YIELD FFA_SMC_32(0x6C) +#define FFA_RUN FFA_SMC_32(0x6D) +#define FFA_MSG_SEND FFA_SMC_32(0x6E) +#define FFA_MSG_SEND_DIRECT_REQ FFA_SMC_32(0x6F) +#define FFA_FN64_MSG_SEND_DIRECT_REQ FFA_SMC_64(0x6F) +#define FFA_MSG_SEND_DIRECT_RESP FFA_SMC_32(0x70) +#define FFA_FN64_MSG_SEND_DIRECT_RESP FFA_SMC_64(0x70) +#define FFA_MEM_DONATE FFA_SMC_32(0x71) +#define FFA_FN64_MEM_DONATE FFA_SMC_64(0x71) +#define FFA_MEM_LEND FFA_SMC_32(0x72) +#define FFA_FN64_MEM_LEND FFA_SMC_64(0x72) +#define FFA_MEM_SHARE FFA_SMC_32(0x73) +#define FFA_FN64_MEM_SHARE FFA_SMC_64(0x73) +#define FFA_MEM_RETRIEVE_REQ FFA_SMC_32(0x74) +#define FFA_FN64_MEM_RETRIEVE_REQ FFA_SMC_64(0x74) +#define FFA_MEM_RETRIEVE_RESP FFA_SMC_32(0x75) +#define FFA_MEM_RELINQUISH FFA_SMC_32(0x76) +#define FFA_MEM_RECLAIM FFA_SMC_32(0x77) +#define FFA_MEM_OP_PAUSE FFA_SMC_32(0x78) +#define FFA_MEM_OP_RESUME FFA_SMC_32(0x79) +#define FFA_MEM_FRAG_RX FFA_SMC_32(0x7A) +#define FFA_MEM_FRAG_TX FFA_SMC_32(0x7B) +#define FFA_NORMAL_WORLD_RESUME FFA_SMC_32(0x7C) + +/* + * For some calls it is necessary to use SMC64 to pass or return 64-bit values. + * For such calls FFA_FN_NATIVE(name) will choose the appropriate + * (native-width) function ID. + */ +#ifdef CONFIG_64BIT +#define FFA_FN_NATIVE(name) FFA_FN64_##name +#else +#define FFA_FN_NATIVE(name) FFA_##name +#endif + +/* FFA error codes. */ +#define FFA_RET_SUCCESS (0) +#define FFA_RET_NOT_SUPPORTED (-1) +#define FFA_RET_INVALID_PARAMETERS (-2) +#define FFA_RET_NO_MEMORY (-3) +#define FFA_RET_BUSY (-4) +#define FFA_RET_INTERRUPTED (-5) +#define FFA_RET_DENIED (-6) +#define FFA_RET_RETRY (-7) +#define FFA_RET_ABORTED (-8) + +/* FFA version encoding */ +#define FFA_MAJOR_VERSION_MASK GENMASK(30, 16) +#define FFA_MINOR_VERSION_MASK GENMASK(15, 0) +#define FFA_MAJOR_VERSION(x) ((u16)(FIELD_GET(FFA_MAJOR_VERSION_MASK, (x)))) +#define FFA_MINOR_VERSION(x) ((u16)(FIELD_GET(FFA_MINOR_VERSION_MASK, (x)))) +#define FFA_PACK_VERSION_INFO(major, minor) \ + (FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) | \ + FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor))) +#define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0) + +/** + * FF-A specification mentions explicitly about '4K pages'. This should + * not be confused with the kernel PAGE_SIZE, which is the translation + * granule kernel is configured and may be one among 4K, 16K and 64K. + */ +#define FFA_PAGE_SIZE SZ_4K + /* FFA Bus/Device/Driver related */ struct ffa_device { int vm_id; @@ -161,11 +244,11 @@ struct ffa_mem_region_attributes { */ #define FFA_MEM_RETRIEVE_SELF_BORROWER BIT(0) u8 flag; - u32 composite_off; /* * Offset in bytes from the start of the outer `ffa_memory_region` to * an `struct ffa_mem_region_addr_range`. */ + u32 composite_off; u64 reserved; }; diff --git a/include/linux/ata.h b/include/linux/ata.h index e3050e153a71..0c18499f60b6 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -615,15 +615,6 @@ static inline bool ata_id_has_flush(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 12); } -static inline bool ata_id_flush_enabled(const u16 *id) -{ - if (ata_id_has_flush(id) == 0) - return false; - if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) - return false; - return id[ATA_ID_CFS_ENABLE_2] & (1 << 12); -} - static inline bool ata_id_has_flush_ext(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) @@ -631,19 +622,6 @@ static inline bool ata_id_has_flush_ext(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 13); } -static inline bool ata_id_flush_ext_enabled(const u16 *id) -{ - if (ata_id_has_flush_ext(id) == 0) - return false; - if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) - return false; - /* - * some Maxtor disks have bit 13 defined incorrectly - * so check bit 10 too - */ - return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; -} - static inline u32 ata_id_logical_sector_size(const u16 *id) { /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. @@ -698,15 +676,6 @@ static inline bool ata_id_has_lba48(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 10); } -static inline bool ata_id_lba48_enabled(const u16 *id) -{ - if (ata_id_has_lba48(id) == 0) - return false; - if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) - return false; - return id[ATA_ID_CFS_ENABLE_2] & (1 << 10); -} - static inline bool ata_id_hpa_enabled(const u16 *id) { /* Yes children, word 83 valid bits cover word 82 data */ diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 2ce27e8e4f19..d91af50ac58d 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -136,7 +136,8 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_CHANNELS = 31, VIRTCHNL_OP_ADD_CLOUD_FILTER = 32, VIRTCHNL_OP_DEL_CLOUD_FILTER = 33, - /* opcode 34 - 44 are reserved */ + /* opcode 34 - 43 are reserved */ + VIRTCHNL_OP_GET_SUPPORTED_RXDIDS = 44, VIRTCHNL_OP_ADD_RSS_CFG = 45, VIRTCHNL_OP_DEL_RSS_CFG = 46, VIRTCHNL_OP_ADD_FDIR_FILTER = 47, @@ -263,6 +264,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM BIT(22) #define VIRTCHNL_VF_OFFLOAD_ADQ BIT(23) #define VIRTCHNL_VF_OFFLOAD_USO BIT(25) +#define VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC BIT(26) #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) #define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) @@ -318,7 +320,9 @@ struct virtchnl_rxq_info { u16 splithdr_enabled; /* deprecated with AVF 1.0 */ u32 databuffer_size; u32 max_pkt_size; - u32 pad1; + u8 pad0; + u8 rxdid; + u8 pad1[2]; u64 dma_ring_addr; enum virtchnl_rx_hsplit rx_split_pos; /* deprecated with AVF 1.0 */ u32 pad2; @@ -970,6 +974,10 @@ struct virtchnl_filter { VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); +struct virtchnl_supported_rxdids { + u64 supported_rxdids; +}; + /* VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other @@ -1499,6 +1507,8 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_DEL_CLOUD_FILTER: valid_len = sizeof(struct virtchnl_filter); break; + case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS: + break; case VIRTCHNL_OP_ADD_RSS_CFG: case VIRTCHNL_OP_DEL_RSS_CFG: valid_len = sizeof(struct virtchnl_rss_cfg); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 439815cc1ab9..fbad4fcd408e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -102,8 +102,18 @@ static inline unsigned long wb_stat_error(void) #endif } +/* BDI ratio is expressed as part per 1000000 for finer granularity. */ +#define BDI_RATIO_SCALE 10000 + +u64 bdi_get_min_bytes(struct backing_dev_info *bdi); +u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio); +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* * Flags in backing_dev_info::capability diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h index 53b31f69b74a..7615f8d7b1ed 100644 --- a/include/linux/bcm47xx_nvram.h +++ b/include/linux/bcm47xx_nvram.h @@ -11,6 +11,7 @@ #include <linux/vmalloc.h> #ifdef CONFIG_BCM47XX_NVRAM +int bcm47xx_nvram_init_from_iomem(void __iomem *nvram_start, size_t res_size); int bcm47xx_nvram_init_from_mem(u32 base, u32 lim); int bcm47xx_nvram_getenv(const char *name, char *val, size_t val_len); int bcm47xx_nvram_gpio_pin(const char *name); @@ -20,6 +21,11 @@ static inline void bcm47xx_nvram_release_contents(char *nvram) vfree(nvram); }; #else +static inline int bcm47xx_nvram_init_from_iomem(void __iomem *nvram_start, + size_t res_size) +{ + return -ENOTSUPP; +} static inline int bcm47xx_nvram_init_from_mem(u32 base, u32 lim) { return -ENOTSUPP; diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h index 2d94c30ed439..0cb6638b55e5 100644 --- a/include/linux/bcma/bcma_driver_chipcommon.h +++ b/include/linux/bcma/bcma_driver_chipcommon.h @@ -4,7 +4,7 @@ #include <linux/platform_device.h> #include <linux/platform_data/brcmnand.h> -#include <linux/gpio.h> +#include <linux/gpio/driver.h> /** ChipCommon core registers. **/ #define BCMA_CC_ID 0x0000 diff --git a/include/linux/bio.h b/include/linux/bio.h index 2c5806997bbf..b231a665682a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -475,8 +475,6 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, - struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); extern void bio_free_pages(struct bio *bio); void guard_bio_eod(struct bio *bio); diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h index bbab65bd5428..e6802b69cdd6 100644 --- a/include/linux/blk-crypto-profile.h +++ b/include/linux/blk-crypto-profile.h @@ -138,18 +138,6 @@ int devm_blk_crypto_profile_init(struct device *dev, unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot); -blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile, - const struct blk_crypto_key *key, - struct blk_crypto_keyslot **slot_ptr); - -void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot); - -bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile, - const struct blk_crypto_config *cfg); - -int __blk_crypto_evict_key(struct blk_crypto_profile *profile, - const struct blk_crypto_key *key); - void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile); void blk_crypto_profile_destroy(struct blk_crypto_profile *profile); diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 69b24fe92cbf..1e3e5d0adf12 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -13,6 +13,7 @@ enum blk_crypto_mode_num { BLK_ENCRYPTION_MODE_AES_256_XTS, BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, BLK_ENCRYPTION_MODE_ADIANTUM, + BLK_ENCRYPTION_MODE_SM4_XTS, BLK_ENCRYPTION_MODE_MAX, }; @@ -71,9 +72,6 @@ struct bio_crypt_ctx { #include <linux/blk_types.h> #include <linux/blkdev.h> -struct request; -struct request_queue; - #ifdef CONFIG_BLK_INLINE_ENCRYPTION static inline bool bio_has_crypt_ctx(struct bio *bio) @@ -94,13 +92,15 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, unsigned int dun_bytes, unsigned int data_unit_size); -int blk_crypto_start_using_key(const struct blk_crypto_key *key, - struct request_queue *q); +int blk_crypto_start_using_key(struct block_device *bdev, + const struct blk_crypto_key *key); -int blk_crypto_evict_key(struct request_queue *q, +int blk_crypto_evict_key(struct block_device *bdev, const struct blk_crypto_key *key); -bool blk_crypto_config_supported(struct request_queue *q, +bool blk_crypto_config_supported_natively(struct block_device *bdev, + const struct blk_crypto_config *cfg); +bool blk_crypto_config_supported(struct block_device *bdev, const struct blk_crypto_config *cfg); #else /* CONFIG_BLK_INLINE_ENCRYPTION */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d6119c5d1069..779fba613bd0 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -7,6 +7,7 @@ #include <linux/lockdep.h> #include <linux/scatterlist.h> #include <linux/prefetch.h> +#include <linux/srcu.h> struct blk_mq_tags; struct blk_flush_queue; @@ -140,7 +141,6 @@ struct request { struct blk_crypto_keyslot *crypt_keyslot; #endif - unsigned short write_hint; unsigned short ioprio; enum mq_rq_state state; @@ -501,6 +501,8 @@ enum hctx_type { * @tag_list_lock: Serializes tag_list accesses. * @tag_list: List of the request queues that use this tag set. See also * request_queue.tag_set_list. + * @srcu: Use as lock when type of the request queue is blocking + * (BLK_MQ_F_BLOCKING). */ struct blk_mq_tag_set { struct blk_mq_queue_map map[HCTX_MAX_TYPES]; @@ -521,6 +523,7 @@ struct blk_mq_tag_set { struct mutex tag_list_lock; struct list_head tag_list; + struct srcu_struct *srcu; }; /** @@ -878,7 +881,9 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); -void blk_mq_wait_quiesce_done(struct request_queue *q); +void blk_mq_wait_quiesce_done(struct blk_mq_tag_set *set); +void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set); +void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set); void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e0b098089ef2..99be590f952f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -472,13 +472,6 @@ static inline enum req_op bio_op(const struct bio *bio) return bio->bi_opf & REQ_OP_MASK; } -/* obsolete, don't use in new code */ -static inline void bio_set_op_attrs(struct bio *bio, enum req_op op, - blk_opf_t op_flags) -{ - bio->bi_opf = op | op_flags; -} - static inline bool op_is_write(blk_opf_t op) { return !!(op & (__force blk_opf_t)1); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50e358a19d98..301cf1cf4f2f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -22,7 +22,6 @@ #include <linux/blkzoned.h> #include <linux/sched.h> #include <linux/sbitmap.h> -#include <linux/srcu.h> #include <linux/uuid.h> #include <linux/xarray.h> @@ -156,6 +155,7 @@ struct gendisk { unsigned open_partitions; /* number of open partitions */ struct backing_dev_info *bdi; + struct kobject queue_kobj; /* the queue/ directory */ struct kobject *slave_dir; #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED struct list_head slave_bdevs; @@ -311,6 +311,13 @@ struct queue_limits { unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; enum blk_zoned_model zoned; + + /* + * Drivers that set dma_alignment to less than 511 must be prepared to + * handle individual bvec's that are not a multiple of a SECTOR_SIZE + * due to possible offsets. + */ + unsigned int dma_alignment; }; typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, @@ -431,10 +438,7 @@ struct request_queue { struct gendisk *disk; - /* - * queue kobject - */ - struct kobject kobj; + refcount_t refs; /* * mq queue kobject @@ -456,12 +460,6 @@ struct request_queue { unsigned long nr_requests; /* Max # of requests */ unsigned int dma_pad_mask; - /* - * Drivers that set dma_alignment to less than 511 must be prepared to - * handle individual bvec's that are not a multiple of a SECTOR_SIZE - * due to possible offsets. - */ - unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; @@ -543,18 +541,11 @@ struct request_queue { struct mutex debugfs_mutex; bool mq_sysfs_init_done; - - /** - * @srcu: Sleepable RCU. Use as lock when type of the request queue - * is blocking (BLK_MQ_F_BLOCKING). Must be the last member - */ - struct srcu_struct srcu[]; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ #define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ #define QUEUE_FLAG_DYING 1 /* queue being torn down */ -#define QUEUE_FLAG_HAS_SRCU 2 /* SRCU is allocated */ #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ @@ -579,6 +570,7 @@ struct request_queue { #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ +#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ #define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \ (1UL << QUEUE_FLAG_SAME_COMP) | \ @@ -590,7 +582,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) -#define blk_queue_has_srcu(q) test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ @@ -619,6 +610,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) +#define blk_queue_skip_tagset_quiesce(q) \ + test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); @@ -839,7 +832,6 @@ void set_capacity(struct gendisk *disk, sector_t size); #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); -int bd_register_pending_holders(struct gendisk *disk); #else static inline int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) @@ -850,10 +842,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { } -static inline int bd_register_pending_holders(struct gendisk *disk) -{ - return 0; -} #endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ dev_t part_devt(struct gendisk *disk, u8 partno); @@ -944,7 +932,6 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); -extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); @@ -1324,7 +1311,7 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) static inline int queue_dma_alignment(const struct request_queue *q) { - return q ? q->dma_alignment : 511; + return q ? q->limits.dma_alignment : 511; } static inline unsigned int bdev_dma_alignment(struct block_device *bdev) @@ -1349,12 +1336,7 @@ static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, /* assumes size > 256 */ static inline unsigned int blksize_bits(unsigned int size) { - unsigned int bits = 8; - do { - bits++; - size >>= 1; - } while (size > 256); - return bits; + return order_base_2(size >> SECTOR_SHIFT) + SECTOR_SHIFT; } static inline unsigned int block_size(struct block_device *bdev) @@ -1413,7 +1395,6 @@ struct block_device_operations { void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); - char *(*devnode)(struct gendisk *disk, umode_t *mode); /* returns the length of the identifier or a negative errno: */ int (*get_unique_id)(struct gendisk *disk, u8 id[16], enum blk_unique_id id_type); @@ -1458,7 +1439,6 @@ unsigned long bdev_start_io_acct(struct block_device *bdev, void bdev_end_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time); -void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); unsigned long bio_start_io_acct(struct bio *bio); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0566705c1d4e..3de24cfb7a3d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,7 +27,7 @@ #include <linux/bpfptr.h> #include <linux/btf.h> #include <linux/rcupdate_trace.h> -#include <linux/init.h> +#include <linux/static_call.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -54,6 +54,8 @@ struct cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; extern struct kobject *btf_kobj; +extern struct bpf_mem_alloc bpf_global_ma; +extern bool bpf_global_ma_set; typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64); typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, @@ -85,7 +87,8 @@ struct bpf_map_ops { int (*map_lookup_and_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); - int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr, + int (*map_update_batch)(struct bpf_map *map, struct file *map_file, + const union bpf_attr *attr, union bpf_attr __user *uattr); int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -135,7 +138,7 @@ struct bpf_map_ops { struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner); /* Misc helpers.*/ - int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags); + int (*map_redirect)(struct bpf_map *map, u64 key, u64 flags); /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure @@ -165,38 +168,55 @@ struct bpf_map_ops { }; enum { - /* Support at most 8 pointers in a BPF map value */ - BPF_MAP_VALUE_OFF_MAX = 8, - BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX + - 1 + /* for bpf_spin_lock */ - 1, /* for bpf_timer */ + /* Support at most 10 fields in a BTF type */ + BTF_FIELDS_MAX = 10, }; -enum bpf_kptr_type { - BPF_KPTR_UNREF, - BPF_KPTR_REF, +enum btf_field_type { + BPF_SPIN_LOCK = (1 << 0), + BPF_TIMER = (1 << 1), + BPF_KPTR_UNREF = (1 << 2), + BPF_KPTR_REF = (1 << 3), + BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF, + BPF_LIST_HEAD = (1 << 4), + BPF_LIST_NODE = (1 << 5), }; -struct bpf_map_value_off_desc { +struct btf_field_kptr { + struct btf *btf; + struct module *module; + btf_dtor_kfunc_t dtor; + u32 btf_id; +}; + +struct btf_field_list_head { + struct btf *btf; + u32 value_btf_id; + u32 node_offset; + struct btf_record *value_rec; +}; + +struct btf_field { u32 offset; - enum bpf_kptr_type type; - struct { - struct btf *btf; - struct module *module; - btf_dtor_kfunc_t dtor; - u32 btf_id; - } kptr; + enum btf_field_type type; + union { + struct btf_field_kptr kptr; + struct btf_field_list_head list_head; + }; }; -struct bpf_map_value_off { - u32 nr_off; - struct bpf_map_value_off_desc off[]; +struct btf_record { + u32 cnt; + u32 field_mask; + int spin_lock_off; + int timer_off; + struct btf_field fields[]; }; -struct bpf_map_off_arr { +struct btf_field_offs { u32 cnt; - u32 field_off[BPF_MAP_OFF_ARR_MAX]; - u8 field_sz[BPF_MAP_OFF_ARR_MAX]; + u32 field_off[BTF_FIELDS_MAX]; + u8 field_sz[BTF_FIELDS_MAX]; }; struct bpf_map { @@ -214,10 +234,8 @@ struct bpf_map { u32 max_entries; u64 map_extra; /* any per-map-type extra fields */ u32 map_flags; - int spin_lock_off; /* >=0 valid offset, <0 error */ - struct bpf_map_value_off *kptr_off_tab; - int timer_off; /* >=0 valid offset, <0 error */ u32 id; + struct btf_record *record; int numa_node; u32 btf_key_type_id; u32 btf_value_type_id; @@ -227,7 +245,7 @@ struct bpf_map { struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; - struct bpf_map_off_arr *off_arr; + struct btf_field_offs *field_offs; /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ @@ -251,34 +269,86 @@ struct bpf_map { bool frozen; /* write-once; write-protected by freeze_mutex */ }; -static inline bool map_value_has_spin_lock(const struct bpf_map *map) -{ - return map->spin_lock_off >= 0; +static inline const char *btf_field_type_name(enum btf_field_type type) +{ + switch (type) { + case BPF_SPIN_LOCK: + return "bpf_spin_lock"; + case BPF_TIMER: + return "bpf_timer"; + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + return "kptr"; + case BPF_LIST_HEAD: + return "bpf_list_head"; + case BPF_LIST_NODE: + return "bpf_list_node"; + default: + WARN_ON_ONCE(1); + return "unknown"; + } +} + +static inline u32 btf_field_type_size(enum btf_field_type type) +{ + switch (type) { + case BPF_SPIN_LOCK: + return sizeof(struct bpf_spin_lock); + case BPF_TIMER: + return sizeof(struct bpf_timer); + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + return sizeof(u64); + case BPF_LIST_HEAD: + return sizeof(struct bpf_list_head); + case BPF_LIST_NODE: + return sizeof(struct bpf_list_node); + default: + WARN_ON_ONCE(1); + return 0; + } +} + +static inline u32 btf_field_type_align(enum btf_field_type type) +{ + switch (type) { + case BPF_SPIN_LOCK: + return __alignof__(struct bpf_spin_lock); + case BPF_TIMER: + return __alignof__(struct bpf_timer); + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + return __alignof__(u64); + case BPF_LIST_HEAD: + return __alignof__(struct bpf_list_head); + case BPF_LIST_NODE: + return __alignof__(struct bpf_list_node); + default: + WARN_ON_ONCE(1); + return 0; + } } -static inline bool map_value_has_timer(const struct bpf_map *map) +static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_field_type type) { - return map->timer_off >= 0; + if (IS_ERR_OR_NULL(rec)) + return false; + return rec->field_mask & type; } -static inline bool map_value_has_kptrs(const struct bpf_map *map) +static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj) { - return !IS_ERR_OR_NULL(map->kptr_off_tab); + int i; + + if (!foffs) + return; + for (i = 0; i < foffs->cnt; i++) + memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]); } static inline void check_and_init_map_value(struct bpf_map *map, void *dst) { - if (unlikely(map_value_has_spin_lock(map))) - memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock)); - if (unlikely(map_value_has_timer(map))) - memset(dst + map->timer_off, 0, sizeof(struct bpf_timer)); - if (unlikely(map_value_has_kptrs(map))) { - struct bpf_map_value_off *tab = map->kptr_off_tab; - int i; - - for (i = 0; i < tab->nr_off; i++) - *(u64 *)(dst + tab->off[i].offset) = 0; - } + bpf_obj_init(map->field_offs, dst); } /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and @@ -298,60 +368,72 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) } /* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */ -static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, bool long_memcpy) +static inline void bpf_obj_memcpy(struct btf_field_offs *foffs, + void *dst, void *src, u32 size, + bool long_memcpy) { u32 curr_off = 0; int i; - if (likely(!map->off_arr)) { + if (likely(!foffs)) { if (long_memcpy) - bpf_long_memcpy(dst, src, round_up(map->value_size, 8)); + bpf_long_memcpy(dst, src, round_up(size, 8)); else - memcpy(dst, src, map->value_size); + memcpy(dst, src, size); return; } - for (i = 0; i < map->off_arr->cnt; i++) { - u32 next_off = map->off_arr->field_off[i]; + for (i = 0; i < foffs->cnt; i++) { + u32 next_off = foffs->field_off[i]; + u32 sz = next_off - curr_off; - memcpy(dst + curr_off, src + curr_off, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + memcpy(dst + curr_off, src + curr_off, sz); + curr_off += foffs->field_sz[i] + sz; } - memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); + memcpy(dst + curr_off, src + curr_off, size - curr_off); } static inline void copy_map_value(struct bpf_map *map, void *dst, void *src) { - __copy_map_value(map, dst, src, false); + bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false); } static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src) { - __copy_map_value(map, dst, src, true); + bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true); } -static inline void zero_map_value(struct bpf_map *map, void *dst) +static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size) { u32 curr_off = 0; int i; - if (likely(!map->off_arr)) { - memset(dst, 0, map->value_size); + if (likely(!foffs)) { + memset(dst, 0, size); return; } - for (i = 0; i < map->off_arr->cnt; i++) { - u32 next_off = map->off_arr->field_off[i]; + for (i = 0; i < foffs->cnt; i++) { + u32 next_off = foffs->field_off[i]; + u32 sz = next_off - curr_off; - memset(dst + curr_off, 0, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + memset(dst + curr_off, 0, sz); + curr_off += foffs->field_sz[i] + sz; } - memset(dst + curr_off, 0, map->value_size - curr_off); + memset(dst + curr_off, 0, size - curr_off); +} + +static inline void zero_map_value(struct bpf_map *map, void *dst) +{ + bpf_obj_memzero(map->field_offs, dst, map->value_size); } void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); +void bpf_list_head_free(const struct btf_field *field, void *list_head, + struct bpf_spin_lock *spin_lock); + int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); struct bpf_offload_dev; @@ -420,10 +502,8 @@ enum bpf_type_flag { */ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), - /* MEM was "allocated" from a different helper, and cannot be mixed - * with regular non-MEM_ALLOC'ed MEM types. - */ - MEM_ALLOC = BIT(2 + BPF_BASE_TYPE_BITS), + /* MEM points to BPF ring buffer reservation. */ + MEM_RINGBUF = BIT(2 + BPF_BASE_TYPE_BITS), /* MEM is in user address space. */ MEM_USER = BIT(3 + BPF_BASE_TYPE_BITS), @@ -458,6 +538,43 @@ enum bpf_type_flag { /* Size is known at compile time. */ MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS), + /* MEM is of an allocated object of type in program BTF. This is used to + * tag PTR_TO_BTF_ID allocated using bpf_obj_new. + */ + MEM_ALLOC = BIT(11 + BPF_BASE_TYPE_BITS), + + /* PTR was passed from the kernel in a trusted context, and may be + * passed to KF_TRUSTED_ARGS kfuncs or BPF helper functions. + * Confusingly, this is _not_ the opposite of PTR_UNTRUSTED above. + * PTR_UNTRUSTED refers to a kptr that was read directly from a map + * without invoking bpf_kptr_xchg(). What we really need to know is + * whether a pointer is safe to pass to a kfunc or BPF helper function. + * While PTR_UNTRUSTED pointers are unsafe to pass to kfuncs and BPF + * helpers, they do not cover all possible instances of unsafe + * pointers. For example, a pointer that was obtained from walking a + * struct will _not_ get the PTR_UNTRUSTED type modifier, despite the + * fact that it may be NULL, invalid, etc. This is due to backwards + * compatibility requirements, as this was the behavior that was first + * introduced when kptrs were added. The behavior is now considered + * deprecated, and PTR_UNTRUSTED will eventually be removed. + * + * PTR_TRUSTED, on the other hand, is a pointer that the kernel + * guarantees to be valid and safe to pass to kfuncs and BPF helpers. + * For example, pointers passed to tracepoint arguments are considered + * PTR_TRUSTED, as are pointers that are passed to struct_ops + * callbacks. As alluded to above, pointers that are obtained from + * walking PTR_TRUSTED pointers are _not_ trusted. For example, if a + * struct task_struct *task is PTR_TRUSTED, then accessing + * task->last_wakee will lose the PTR_TRUSTED modifier when it's stored + * in a BPF register. Similarly, pointers passed to certain programs + * types such as kretprobes are not guaranteed to be valid, as they may + * for example contain an object that was recently freed. + */ + PTR_TRUSTED = BIT(12 + BPF_BASE_TYPE_BITS), + + /* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */ + MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; @@ -497,7 +614,7 @@ enum bpf_arg_type { ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ - ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ + ARG_PTR_TO_RINGBUF_MEM, /* pointer to dynamically reserved ringbuf memory */ ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ @@ -514,7 +631,6 @@ enum bpf_arg_type { ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM, ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX, ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, - ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, /* pointer to memory does not need to be initialized, helper function must fill @@ -539,7 +655,7 @@ enum bpf_return_type { RET_PTR_TO_SOCKET, /* returns a pointer to a socket */ RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */ RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */ - RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */ + RET_PTR_TO_MEM, /* returns a pointer to memory */ RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ __BPF_RET_TYPE_MAX, @@ -549,9 +665,10 @@ enum bpf_return_type { RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, - RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, - RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_RINGBUF_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_RINGBUF | RET_PTR_TO_MEM, + RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, + RET_PTR_TO_BTF_ID_TRUSTED = PTR_TRUSTED | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. @@ -568,6 +685,7 @@ struct bpf_func_proto { u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool gpl_only; bool pkt_access; + bool might_sleep; enum bpf_return_type ret_type; union { struct { @@ -657,7 +775,7 @@ enum bpf_reg_type { PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_BUF, /* reg points to a read/write buffer */ PTR_TO_FUNC, /* reg points to a bpf program function */ - PTR_TO_DYNPTR, /* reg points to a dynptr */ + CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */ __BPF_REG_TYPE_MAX, /* Extended reg_types. */ @@ -706,6 +824,7 @@ struct bpf_prog_ops { union bpf_attr __user *uattr); }; +struct bpf_reg_state; struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto * @@ -727,9 +846,8 @@ struct bpf_verifier_ops { struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); int (*btf_struct_access)(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, u32 *next_btf_id, enum bpf_type_flag *flag); }; @@ -855,22 +973,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *orig_call); -/* these two functions are called from generated trampoline */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx); -u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx); -u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, - struct bpf_tramp_run_ctx *run_ctx); -void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, - struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); +typedef u64 (*bpf_trampoline_enter_t)(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); +bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog); +bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog); struct bpf_ksym { unsigned long start; @@ -954,6 +1068,10 @@ struct bpf_dispatcher { void *rw_image; u32 image_off; struct bpf_ksym ksym; +#ifdef CONFIG_HAVE_STATIC_CALL + struct static_call_key *sc_key; + void *sc_tramp; +#endif }; static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( @@ -971,7 +1089,33 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); -int __init bpf_arch_init_dispatcher_early(void *ip); + +/* + * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn + * indirection with a direct call to the bpf program. If the architecture does + * not have STATIC_CALL, avoid a double-indirection. + */ +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __BPF_DISPATCHER_SC_INIT(_name) \ + .sc_key = &STATIC_CALL_KEY(_name), \ + .sc_tramp = STATIC_CALL_TRAMP_ADDR(_name), + +#define __BPF_DISPATCHER_SC(name) \ + DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func) + +#define __BPF_DISPATCHER_CALL(name) \ + static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func) + +#define __BPF_DISPATCHER_UPDATE(_d, _new) \ + __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) + +#else +#define __BPF_DISPATCHER_SC_INIT(name) +#define __BPF_DISPATCHER_SC(name) +#define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi) +#define __BPF_DISPATCHER_UPDATE(_d, _new) +#endif #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ @@ -984,34 +1128,21 @@ int __init bpf_arch_init_dispatcher_early(void *ip); .name = #_name, \ .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ }, \ + __BPF_DISPATCHER_SC_INIT(_name##_call) \ } -#define BPF_DISPATCHER_INIT_CALL(_name) \ - static int __init _name##_init(void) \ - { \ - return bpf_arch_init_dispatcher_early(_name##_func); \ - } \ - early_initcall(_name##_init) - -#ifdef CONFIG_X86_64 -#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) -#else -#define BPF_DISPATCHER_ATTRIBUTES -#endif - #define DEFINE_BPF_DISPATCHER(name) \ - notrace BPF_DISPATCHER_ATTRIBUTES \ + __BPF_DISPATCHER_SC(name); \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ { \ - return bpf_func(ctx, insnsi); \ + return __BPF_DISPATCHER_CALL(name); \ } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ - BPF_DISPATCHER_INIT(bpf_dispatcher_##name); \ - BPF_DISPATCHER_INIT_CALL(bpf_dispatcher_##name); + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ @@ -1019,6 +1150,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); const struct bpf_insn *insnsi, \ bpf_func_t bpf_func); \ extern struct bpf_dispatcher bpf_dispatcher_##name; + #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, @@ -1703,11 +1835,14 @@ void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); -struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u32 offset); -void bpf_map_free_kptr_off_tab(struct bpf_map *map); -struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map); -bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b); -void bpf_map_free_kptrs(struct bpf_map *map, void *map_value); +struct btf_field *btf_record_find(const struct btf_record *rec, + u32 offset, enum btf_field_type type); +void btf_record_free(struct btf_record *rec); +void bpf_map_free_record(struct bpf_map *map); +struct btf_record *btf_record_dup(const struct btf_record *rec); +bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); +void bpf_obj_free_timer(const struct btf_record *rec, void *obj); +void bpf_obj_free_fields(const struct btf_record *rec, void *obj); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); @@ -1725,7 +1860,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); -int generic_map_update_batch(struct bpf_map *map, +int generic_map_update_batch(struct bpf_map *map, struct file *map_file, const union bpf_attr *attr, union bpf_attr __user *uattr); int generic_map_delete_batch(struct bpf_map *map, @@ -1774,11 +1909,6 @@ static inline bool bpf_allow_uninit_stack(void) return perfmon_capable(); } -static inline bool bpf_allow_ptr_to_map_access(void) -{ - return perfmon_capable(); -} - static inline bool bpf_bypass_spec_v1(void) { return perfmon_capable(); @@ -2016,9 +2146,9 @@ static inline bool bpf_tracing_btf_ctx_access(int off, int size, return btf_ctx_access(off, size, type, prog, info); } -int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, +int btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, u32 *next_btf_id, enum bpf_type_flag *flag); bool btf_struct_ids_match(struct bpf_verifier_log *log, const struct btf *btf, u32 id, int off, @@ -2031,22 +2161,11 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, const char *func_name, struct btf_func_model *m); -struct bpf_kfunc_arg_meta { - u64 r0_size; - bool r0_rdonly; - int ref_obj_id; - u32 flags; -}; - struct bpf_reg_state; int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); -int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, - const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs, - struct bpf_kfunc_arg_meta *meta); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, @@ -2057,6 +2176,7 @@ struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); void bpf_task_storage_free(struct task_struct *task); +void bpf_cgrp_storage_free(struct cgroup *cgroup); bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog); const struct btf_func_model * bpf_jit_find_kfunc_model(const struct bpf_prog *prog, @@ -2268,9 +2388,8 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) } static inline int btf_struct_access(struct bpf_verifier_log *log, - const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, u32 *next_btf_id, enum bpf_type_flag *flag) { return -EACCES; @@ -2311,6 +2430,10 @@ static inline bool has_current_bpf_ctx(void) static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog) { } + +static inline void bpf_cgrp_storage_free(struct cgroup *cgroup) +{ +} #endif /* CONFIG_BPF_SYSCALL */ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@ -2535,7 +2658,9 @@ extern const struct bpf_func_proto bpf_this_cpu_ptr_proto; extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; +extern const struct bpf_func_proto bpf_task_storage_get_recur_proto; extern const struct bpf_func_proto bpf_task_storage_get_proto; +extern const struct bpf_func_proto bpf_task_storage_delete_recur_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; @@ -2549,6 +2674,8 @@ extern const struct bpf_func_proto bpf_copy_from_user_task_proto; extern const struct bpf_func_proto bpf_set_retval_proto; extern const struct bpf_func_proto bpf_get_retval_proto; extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto; +extern const struct bpf_func_proto bpf_cgrp_storage_get_proto; +extern const struct bpf_func_proto bpf_cgrp_storage_delete_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2701,7 +2828,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type, u32 offset, u32 size); void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); int bpf_dynptr_check_size(u32 size); -u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr); +u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_LSM void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype); @@ -2719,4 +2846,10 @@ struct bpf_key { bool has_ref; }; #endif /* CONFIG_KEYS */ + +static inline bool type_is_alloc(u32 type) +{ + return type & MEM_ALLOC; +} + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 7ea18d4da84b..6d37a40cd90e 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -116,21 +116,22 @@ static struct bpf_local_storage_cache name = { \ .idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock), \ } -u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache); -void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, - u16 idx); - /* Helper functions for bpf_local_storage */ int bpf_local_storage_map_alloc_check(union bpf_attr *attr); -struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr); +struct bpf_map * +bpf_local_storage_map_alloc(union bpf_attr *attr, + struct bpf_local_storage_cache *cache); struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool cacheit_lockit); -void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, +bool bpf_local_storage_unlink_nolock(struct bpf_local_storage *local_storage); + +void bpf_local_storage_map_free(struct bpf_map *map, + struct bpf_local_storage_cache *cache, int __percpu *busy_counter); int bpf_local_storage_map_check_btf(const struct bpf_map *map, @@ -141,10 +142,6 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem); -bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, - struct bpf_local_storage_elem *selem, - bool uncharge_omem, bool use_trace_rcu); - void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu); void bpf_selem_link_map(struct bpf_local_storage_map *smap, diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 4bcf76a9bb06..1de7ece5d36d 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -28,6 +28,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog); bool bpf_lsm_is_sleepable_hook(u32 btf_id); +bool bpf_lsm_is_trusted(const struct bpf_prog *prog); static inline struct bpf_storage_blob *bpf_inode( const struct inode *inode) @@ -51,6 +52,11 @@ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) return false; } +static inline bool bpf_lsm_is_trusted(const struct bpf_prog *prog) +{ + return false; +} + static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog) { diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 2c6a4f2562a7..d4ee3ccd3753 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -86,6 +86,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) #ifdef CONFIG_CGROUPS BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_CGRP_STORAGE, cgrp_storage_map_ops) #endif #ifdef CONFIG_CGROUP_BPF BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9e1e6965f407..53d175cbaa02 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -19,7 +19,7 @@ */ #define BPF_MAX_VAR_SIZ (1 << 29) /* size of type_str_buf in bpf_verifier. */ -#define TYPE_STR_BUF_LEN 64 +#define TYPE_STR_BUF_LEN 128 /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that @@ -223,6 +223,11 @@ struct bpf_reference_state { * exiting a callback function. */ int callback_ref; + /* Mark the reference state to release the registers sharing the same id + * on bpf_spin_unlock (for nodes that we will lose ownership to but are + * safe to access inside the critical section). + */ + bool release_on_unlock; }; /* state of the program: @@ -268,9 +273,9 @@ struct bpf_id_pair { u32 cur; }; -/* Maximum number of register states that can exist at once */ -#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) #define MAX_CALL_FRAMES 8 +/* Maximum number of register states that can exist at once */ +#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES) struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; @@ -323,8 +328,23 @@ struct bpf_verifier_state { u32 branches; u32 insn_idx; u32 curframe; - u32 active_spin_lock; + /* For every reg representing a map value or allocated object pointer, + * we consider the tuple of (ptr, id) for them to be unique in verifier + * context and conside them to not alias each other for the purposes of + * tracking lock state. + */ + struct { + /* This can either be reg->map_ptr or reg->btf. If ptr is NULL, + * there's no active lock held, and other fields have no + * meaning. If non-NULL, it indicates that a lock is held and + * id member has the reg->id of the register which can be >= 0. + */ + void *ptr; + /* This will be reg->id */ + u32 id; + } active_lock; bool speculative; + bool active_rcu_lock; /* first and last insn idx of this verifier state */ u32 first_insn_idx; @@ -419,16 +439,20 @@ struct bpf_insn_aux_data { */ struct bpf_loop_inline_state loop_inline_state; }; + u64 obj_new_size; /* remember the size of type passed to bpf_obj_new to rewrite R1 */ + struct btf_struct_meta *kptr_struct_meta; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ + bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */ u8 alu_state; /* used in combination with alu_limit */ /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ bool prune_point; + bool jmp_point; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -508,11 +532,11 @@ struct bpf_verifier_env { bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; - bool allow_ptr_to_map_access; bool bpf_capable; bool bypass_spec_v1; bool bypass_spec_v4; bool seen_direct_write; + bool rcu_tag_supported; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; @@ -589,15 +613,11 @@ int check_ptr_off_reg(struct bpf_verifier_env *env, int check_func_arg_reg_off(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, enum bpf_arg_type arg_type); -int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - u32 regno); int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, u32 mem_size); -bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, - struct bpf_reg_state *reg); -bool is_dynptr_type_expected(struct bpf_verifier_env *env, - struct bpf_reg_state *reg, - enum bpf_arg_type arg_type); +struct bpf_call_arg_meta; +int process_dynptr_func(struct bpf_verifier_env *env, int regno, + enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta); /* this lives here instead of in bpf.h because it needs to dereference tgt_prog */ static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog, @@ -642,10 +662,30 @@ static inline u32 type_flag(u32 type) } /* only use after check_attach_btf_id() */ -static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) +static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { return prog->type == BPF_PROG_TYPE_EXT ? prog->aux->dst_prog->type : prog->type; } +static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) +{ + switch (resolve_prog_type(prog)) { + case BPF_PROG_TYPE_TRACING: + return prog->expected_attach_type != BPF_TRACE_ITER; + case BPF_PROG_TYPE_STRUCT_OPS: + case BPF_PROG_TYPE_LSM: + return false; + default: + return true; + } +} + +#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED) + +static inline bool bpf_type_has_unsafe_modifiers(u32 type) +{ + return type_flag(type) & ~BPF_REG_TRUSTED_MODIFIERS; +} + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/btf.h b/include/linux/btf.h index f9aababc5d78..5f628f323442 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -6,6 +6,8 @@ #include <linux/types.h> #include <linux/bpfptr.h> +#include <linux/bsearch.h> +#include <linux/btf_ids.h> #include <uapi/linux/btf.h> #include <uapi/linux/bpf.h> @@ -17,40 +19,58 @@ #define KF_RELEASE (1 << 1) /* kfunc is a release function */ #define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */ #define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */ -/* Trusted arguments are those which are meant to be referenced arguments with - * unchanged offset. It is used to enforce that pointers obtained from acquire - * kfuncs remain unmodified when being passed to helpers taking trusted args. +/* Trusted arguments are those which are guaranteed to be valid when passed to + * the kfunc. It is used to enforce that pointers obtained from either acquire + * kfuncs, or from the main kernel on a tracepoint or struct_ops callback + * invocation, remain unmodified when being passed to helpers taking trusted + * args. * - * Consider - * struct foo { - * int data; - * struct foo *next; - * }; + * Consider, for example, the following new task tracepoint: * - * struct bar { - * int data; - * struct foo f; - * }; + * SEC("tp_btf/task_newtask") + * int BPF_PROG(new_task_tp, struct task_struct *task, u64 clone_flags) + * { + * ... + * } * - * struct foo *f = alloc_foo(); // Acquire kfunc - * struct bar *b = alloc_bar(); // Acquire kfunc + * And the following kfunc: * - * If a kfunc set_foo_data() wants to operate only on the allocated object, it - * will set the KF_TRUSTED_ARGS flag, which will prevent unsafe usage like: + * BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) * - * set_foo_data(f, 42); // Allowed - * set_foo_data(f->next, 42); // Rejected, non-referenced pointer - * set_foo_data(&f->next, 42);// Rejected, referenced, but wrong type - * set_foo_data(&b->f, 42); // Rejected, referenced, but bad offset + * All invocations to the kfunc must pass the unmodified, unwalked task: * - * In the final case, usually for the purposes of type matching, it is deduced - * by looking at the type of the member at the offset, but due to the - * requirement of trusted argument, this deduction will be strict and not done - * for this case. + * bpf_task_acquire(task); // Allowed + * bpf_task_acquire(task->last_wakee); // Rejected, walked task + * + * Programs may also pass referenced tasks directly to the kfunc: + * + * struct task_struct *acquired; + * + * acquired = bpf_task_acquire(task); // Allowed, same as above + * bpf_task_acquire(acquired); // Allowed + * bpf_task_acquire(task); // Allowed + * bpf_task_acquire(acquired->last_wakee); // Rejected, walked task + * + * Programs may _not_, however, pass a task from an arbitrary fentry/fexit, or + * kprobe/kretprobe to the kfunc, as BPF cannot guarantee that all of these + * pointers are guaranteed to be safe. For example, the following BPF program + * would be rejected: + * + * SEC("kretprobe/free_task") + * int BPF_PROG(free_task_probe, struct task_struct *tsk) + * { + * struct task_struct *acquired; + * + * acquired = bpf_task_acquire(acquired); // Rejected, not a trusted pointer + * bpf_task_release(acquired); + * + * return 0; + * } */ #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */ #define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */ #define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */ +#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */ /* * Return the name of the passed struct, if exists, or halt the build if for @@ -78,6 +98,17 @@ struct btf_id_dtor_kfunc { u32 kfunc_btf_id; }; +struct btf_struct_meta { + u32 btf_id; + struct btf_record *record; + struct btf_field_offs *field_offs; +}; + +struct btf_struct_metas { + u32 cnt; + struct btf_struct_meta types[]; +}; + typedef void (*btf_dtor_kfunc_t)(void *); extern const struct file_operations btf_fops; @@ -163,8 +194,10 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, u32 expected_offset, u32 expected_size); int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); int btf_find_timer(const struct btf *btf, const struct btf_type *t); -struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf, - const struct btf_type *t); +struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, + u32 field_mask, u32 value_size); +int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec); +struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec); bool btf_type_is_void(const struct btf_type *t); s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind); const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, @@ -288,6 +321,11 @@ static inline bool btf_type_is_typedef(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF; } +static inline bool btf_type_is_volatile(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_VOLATILE; +} + static inline bool btf_type_is_func(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC; @@ -318,6 +356,16 @@ static inline bool btf_type_is_struct(const struct btf_type *t) return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; } +static inline bool __btf_type_is_struct(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; +} + +static inline bool btf_type_is_array(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; +} + static inline u16 btf_type_vlen(const struct btf_type *t) { return BTF_INFO_VLEN(t->info); @@ -402,9 +450,27 @@ static inline struct btf_param *btf_params(const struct btf_type *t) return (struct btf_param *)(t + 1); } -#ifdef CONFIG_BPF_SYSCALL +static inline int btf_id_cmp_func(const void *a, const void *b) +{ + const int *pa = a, *pb = b; + + return *pa - *pb; +} + +static inline bool btf_id_set_contains(const struct btf_id_set *set, u32 id) +{ + return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; +} + +static inline void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) +{ + return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); +} + struct bpf_prog; +struct bpf_verifier_log; +#ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); @@ -412,11 +478,21 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, u32 kfunc_btf_id); +u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s); +int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset); s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); +struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); +const struct btf_member * +btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg); +int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type); +bool btf_types_are_same(const struct btf *btf1, u32 id1, + const struct btf *btf2, u32 id2); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) @@ -448,6 +524,26 @@ static inline int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dt { return 0; } +static inline struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id) +{ + return NULL; +} +static inline const struct btf_member * +btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg) +{ + return NULL; +} +static inline int get_kern_ctx_btf_id(struct bpf_verifier_log *log, + enum bpf_prog_type prog_type) { + return -EINVAL; +} +static inline bool btf_types_are_same(const struct btf *btf1, u32 id1, + const struct btf *btf2, u32 id2) +{ + return false; +} #endif static inline bool btf_type_is_struct_ptr(struct btf *btf, const struct btf_type *t) diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index 2aea877d644f..3a4f7cd882ca 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -204,7 +204,7 @@ extern struct btf_id_set8 name; #else -#define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[16]; #define BTF_ID(prefix, name) #define BTF_ID_FLAGS(prefix, name, ...) #define BTF_ID_UNUSED @@ -265,5 +265,7 @@ MAX_BTF_TRACING_TYPE, }; extern u32 btf_tracing_ids[]; +extern u32 bpf_cgroup_btf_id[]; +extern u32 bpf_local_storage_map_btf_id[]; #endif diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 58f5431a5559..982ba245eb41 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -152,6 +152,22 @@ static inline bool can_is_canxl_dev_mtu(unsigned int mtu) return (mtu >= CANXL_MIN_MTU && mtu <= CANXL_MAX_MTU); } +/* drop skb if it does not contain a valid CAN frame for sending */ +static inline bool can_dev_dropped_skb(struct net_device *dev, struct sk_buff *skb) +{ + struct can_priv *priv = netdev_priv(dev); + + if (priv->ctrlmode & CAN_CTRLMODE_LISTENONLY) { + netdev_info_once(dev, + "interface in listen only mode, dropping skb\n"); + kfree_skb(skb); + dev->stats.tx_dropped++; + return true; + } + + return can_dropped_invalid_skb(dev, skb); +} + void can_setup(struct net_device *dev); struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 5755ae5a4712..6a869682c120 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -14,7 +14,7 @@ #define OCR_MODE_TEST 0x01 #define OCR_MODE_NORMAL 0x02 #define OCR_MODE_CLOCK 0x03 -#define OCR_MODE_MASK 0x07 +#define OCR_MODE_MASK 0x03 #define OCR_TX0_INVERT 0x04 #define OCR_TX0_PULLDOWN 0x08 #define OCR_TX0_PULLUP 0x10 diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 6e01f10f0d88..8a0d5466c7be 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -507,6 +507,10 @@ struct cgroup { /* Used to store internal freezer state */ struct cgroup_freezer_state freezer; +#ifdef CONFIG_BPF_SYSCALL + struct bpf_local_storage __rcu *bpf_cgrp_storage; +#endif + /* All ancestors including self */ struct cgroup *ancestors[]; }; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 528bd44b59e2..3410aecffdb4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -68,6 +68,7 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ }; +extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; @@ -309,72 +310,25 @@ void css_task_iter_end(struct css_task_iter *it); * Inline functions. */ +#ifdef CONFIG_DEBUG_CGROUP_REF +void css_get(struct cgroup_subsys_state *css); +void css_get_many(struct cgroup_subsys_state *css, unsigned int n); +bool css_tryget(struct cgroup_subsys_state *css); +bool css_tryget_online(struct cgroup_subsys_state *css); +void css_put(struct cgroup_subsys_state *css); +void css_put_many(struct cgroup_subsys_state *css, unsigned int n); +#else +#define CGROUP_REF_FN_ATTRS static inline +#define CGROUP_REF_EXPORT(fn) +#include <linux/cgroup_refcnt.h> +#endif + static inline u64 cgroup_id(const struct cgroup *cgrp) { return cgrp->kn->id; } /** - * css_get - obtain a reference on the specified css - * @css: target css - * - * The caller must already have a reference. - */ -static inline void css_get(struct cgroup_subsys_state *css) -{ - if (!(css->flags & CSS_NO_REF)) - percpu_ref_get(&css->refcnt); -} - -/** - * css_get_many - obtain references on the specified css - * @css: target css - * @n: number of references to get - * - * The caller must already have a reference. - */ -static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n) -{ - if (!(css->flags & CSS_NO_REF)) - percpu_ref_get_many(&css->refcnt, n); -} - -/** - * css_tryget - try to obtain a reference on the specified css - * @css: target css - * - * Obtain a reference on @css unless it already has reached zero and is - * being released. This function doesn't care whether @css is on or - * offline. The caller naturally needs to ensure that @css is accessible - * but doesn't have to be holding a reference on it - IOW, RCU protected - * access is good enough for this function. Returns %true if a reference - * count was successfully obtained; %false otherwise. - */ -static inline bool css_tryget(struct cgroup_subsys_state *css) -{ - if (!(css->flags & CSS_NO_REF)) - return percpu_ref_tryget(&css->refcnt); - return true; -} - -/** - * css_tryget_online - try to obtain a reference on the specified css if online - * @css: target css - * - * Obtain a reference on @css if it's online. The caller naturally needs - * to ensure that @css is accessible but doesn't have to be holding a - * reference on it - IOW, RCU protected access is good enough for this - * function. Returns %true if a reference count was successfully obtained; - * %false otherwise. - */ -static inline bool css_tryget_online(struct cgroup_subsys_state *css) -{ - if (!(css->flags & CSS_NO_REF)) - return percpu_ref_tryget_live(&css->refcnt); - return true; -} - -/** * css_is_dying - test whether the specified css is dying * @css: target css * @@ -394,31 +348,6 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css) return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); } -/** - * css_put - put a css reference - * @css: target css - * - * Put a reference obtained via css_get() and css_tryget_online(). - */ -static inline void css_put(struct cgroup_subsys_state *css) -{ - if (!(css->flags & CSS_NO_REF)) - percpu_ref_put(&css->refcnt); -} - -/** - * css_put_many - put css references - * @css: target css - * @n: number of references to put - * - * Put references obtained via css_get() and css_tryget_online(). - */ -static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) -{ - if (!(css->flags & CSS_NO_REF)) - percpu_ref_put_many(&css->refcnt, n); -} - static inline void cgroup_get(struct cgroup *cgrp) { css_get(&cgrp->self); diff --git a/include/linux/cgroup_refcnt.h b/include/linux/cgroup_refcnt.h new file mode 100644 index 000000000000..2eea0a69ecfc --- /dev/null +++ b/include/linux/cgroup_refcnt.h @@ -0,0 +1,96 @@ +/** + * css_get - obtain a reference on the specified css + * @css: target css + * + * The caller must already have a reference. + */ +CGROUP_REF_FN_ATTRS +void css_get(struct cgroup_subsys_state *css) +{ + if (!(css->flags & CSS_NO_REF)) + percpu_ref_get(&css->refcnt); +} +CGROUP_REF_EXPORT(css_get) + +/** + * css_get_many - obtain references on the specified css + * @css: target css + * @n: number of references to get + * + * The caller must already have a reference. + */ +CGROUP_REF_FN_ATTRS +void css_get_many(struct cgroup_subsys_state *css, unsigned int n) +{ + if (!(css->flags & CSS_NO_REF)) + percpu_ref_get_many(&css->refcnt, n); +} +CGROUP_REF_EXPORT(css_get_many) + +/** + * css_tryget - try to obtain a reference on the specified css + * @css: target css + * + * Obtain a reference on @css unless it already has reached zero and is + * being released. This function doesn't care whether @css is on or + * offline. The caller naturally needs to ensure that @css is accessible + * but doesn't have to be holding a reference on it - IOW, RCU protected + * access is good enough for this function. Returns %true if a reference + * count was successfully obtained; %false otherwise. + */ +CGROUP_REF_FN_ATTRS +bool css_tryget(struct cgroup_subsys_state *css) +{ + if (!(css->flags & CSS_NO_REF)) + return percpu_ref_tryget(&css->refcnt); + return true; +} +CGROUP_REF_EXPORT(css_tryget) + +/** + * css_tryget_online - try to obtain a reference on the specified css if online + * @css: target css + * + * Obtain a reference on @css if it's online. The caller naturally needs + * to ensure that @css is accessible but doesn't have to be holding a + * reference on it - IOW, RCU protected access is good enough for this + * function. Returns %true if a reference count was successfully obtained; + * %false otherwise. + */ +CGROUP_REF_FN_ATTRS +bool css_tryget_online(struct cgroup_subsys_state *css) +{ + if (!(css->flags & CSS_NO_REF)) + return percpu_ref_tryget_live(&css->refcnt); + return true; +} +CGROUP_REF_EXPORT(css_tryget_online) + +/** + * css_put - put a css reference + * @css: target css + * + * Put a reference obtained via css_get() and css_tryget_online(). + */ +CGROUP_REF_FN_ATTRS +void css_put(struct cgroup_subsys_state *css) +{ + if (!(css->flags & CSS_NO_REF)) + percpu_ref_put(&css->refcnt); +} +CGROUP_REF_EXPORT(css_put) + +/** + * css_put_many - put css references + * @css: target css + * @n: number of references to put + * + * Put references obtained via css_get() and css_tryget_online(). + */ +CGROUP_REF_FN_ATTRS +void css_put_many(struct cgroup_subsys_state *css, unsigned int n) +{ + if (!(css->flags & CSS_NO_REF)) + percpu_ref_put_many(&css->refcnt, n); +} +CGROUP_REF_EXPORT(css_put_many) diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 267cd06b54a0..842e72a5348f 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -44,6 +44,7 @@ struct dentry; * * Should be initialized by calling clk_hw_init_rate_request(). * + * @core: Pointer to the struct clk_core affected by this request * @rate: Requested clock rate. This field will be adjusted by * clock drivers according to hardware capabilities. * @min_rate: Minimum rate imposed by clk users. @@ -55,6 +56,7 @@ struct dentry; * */ struct clk_rate_request { + struct clk_core *core; unsigned long rate; unsigned long min_rate; unsigned long max_rate; diff --git a/include/linux/compat.h b/include/linux/compat.h index 594357881b0b..44b1736c95b5 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -126,11 +126,9 @@ struct compat_tms { #define _COMPAT_NSIG_WORDS (_COMPAT_NSIG / _COMPAT_NSIG_BPW) -#ifndef compat_sigset_t typedef struct { compat_sigset_word sig[_COMPAT_NSIG_WORDS]; } compat_sigset_t; -#endif int set_compat_user_sigmask(const compat_sigset_t __user *umask, size_t sigsetsize); diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f55a37efdb97..7af9e34ec261 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -82,26 +82,21 @@ #define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif -#if __has_attribute(__no_sanitize_address__) -#define __no_sanitize_address __attribute__((no_sanitize_address)) -#else -#define __no_sanitize_address -#endif +#define __no_sanitize_address __attribute__((__no_sanitize_address__)) -#if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__) -#define __no_sanitize_thread __attribute__((no_sanitize_thread)) +#if defined(__SANITIZE_THREAD__) +#define __no_sanitize_thread __attribute__((__no_sanitize_thread__)) #else #define __no_sanitize_thread #endif -#if __has_attribute(__no_sanitize_undefined__) -#define __no_sanitize_undefined __attribute__((no_sanitize_undefined)) -#else -#define __no_sanitize_undefined -#endif +#define __no_sanitize_undefined __attribute__((__no_sanitize_undefined__)) +/* + * Only supported since gcc >= 12 + */ #if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__) -#define __no_sanitize_coverage __attribute__((no_sanitize_coverage)) +#define __no_sanitize_coverage __attribute__((__no_sanitize_coverage__)) #else #define __no_sanitize_coverage #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 973a1bfd7ef5..947a60b801db 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -236,6 +236,7 @@ static inline void *offset_to_ptr(const int *off) * bool and also pointer types. */ #define is_signed_type(type) (((type)(-1)) < (__force type)1) +#define is_unsigned_type(type) (!is_signed_type(type)) /* * This is needed in functions which generate the stack canary, see diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index eb0466236661..7c1afe0f4129 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -49,7 +49,8 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # endif # define __iomem # define __percpu BTF_TYPE_TAG(percpu) -# define __rcu +# define __rcu BTF_TYPE_TAG(rcu) + # define __chk_user_ptr(x) (void)0 # define __chk_io_ptr(x) (void)0 /* context/locking */ diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 97cfd13bae51..2606711adb18 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -204,8 +204,6 @@ static struct configfs_bin_attribute _pfx##attr_##_name = { \ * group children. default_groups may coexist alongsize make_group() or * make_item(), but if the group wishes to have only default_groups * children (disallowing mkdir(2)), it need not provide either function. - * If the group has commit(), it supports pending and committed (active) - * items. */ struct configfs_item_operations { void (*release)(struct config_item *); @@ -216,7 +214,6 @@ struct configfs_item_operations { struct configfs_group_operations { struct config_item *(*make_item)(struct config_group *group, const char *name); struct config_group *(*make_group)(struct config_group *group, const char *name); - int (*commit_item)(struct config_item *item); void (*disconnect_notify)(struct config_group *group, struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); }; diff --git a/include/linux/console.h b/include/linux/console.h index 8c1686e2c233..9cea254b34b8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -15,6 +15,7 @@ #define _LINUX_CONSOLE_H_ 1 #include <linux/atomic.h> +#include <linux/rculist.h> #include <linux/types.h> struct vc_data; @@ -154,14 +155,132 @@ struct console { u64 seq; unsigned long dropped; void *data; - struct console *next; + struct hlist_node node; }; +#ifdef CONFIG_LOCKDEP +extern void lockdep_assert_console_list_lock_held(void); +#else +static inline void lockdep_assert_console_list_lock_held(void) +{ +} +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern bool console_srcu_read_lock_is_held(void); +#else +static inline bool console_srcu_read_lock_is_held(void) +{ + return 1; +} +#endif + +extern int console_srcu_read_lock(void); +extern void console_srcu_read_unlock(int cookie); + +extern void console_list_lock(void) __acquires(console_mutex); +extern void console_list_unlock(void) __releases(console_mutex); + +extern struct hlist_head console_list; + +/** + * console_srcu_read_flags - Locklessly read the console flags + * @con: struct console pointer of console to read flags from + * + * This function provides the necessary READ_ONCE() and data_race() + * notation for locklessly reading the console flags. The READ_ONCE() + * in this function matches the WRITE_ONCE() when @flags are modified + * for registered consoles with console_srcu_write_flags(). + * + * Only use this function to read console flags when locklessly + * iterating the console list via srcu. + * + * Context: Any context. + */ +static inline short console_srcu_read_flags(const struct console *con) +{ + WARN_ON_ONCE(!console_srcu_read_lock_is_held()); + + /* + * Locklessly reading console->flags provides a consistent + * read value because there is at most one CPU modifying + * console->flags and that CPU is using only read-modify-write + * operations to do so. + */ + return data_race(READ_ONCE(con->flags)); +} + +/** + * console_srcu_write_flags - Write flags for a registered console + * @con: struct console pointer of console to write flags to + * @flags: new flags value to write + * + * Only use this function to write flags for registered consoles. It + * requires holding the console_list_lock. + * + * Context: Any context. + */ +static inline void console_srcu_write_flags(struct console *con, short flags) +{ + lockdep_assert_console_list_lock_held(); + + /* This matches the READ_ONCE() in console_srcu_read_flags(). */ + WRITE_ONCE(con->flags, flags); +} + +/* Variant of console_is_registered() when the console_list_lock is held. */ +static inline bool console_is_registered_locked(const struct console *con) +{ + lockdep_assert_console_list_lock_held(); + return !hlist_unhashed(&con->node); +} + /* - * for_each_console() allows you to iterate on each console + * console_is_registered - Check if the console is registered + * @con: struct console pointer of console to check + * + * Context: Process context. May sleep while acquiring console list lock. + * Return: true if the console is in the console list, otherwise false. + * + * If false is returned for a console that was previously registered, it + * can be assumed that the console's unregistration is fully completed, + * including the exit() callback after console list removal. + */ +static inline bool console_is_registered(const struct console *con) +{ + bool ret; + + console_list_lock(); + ret = console_is_registered_locked(con); + console_list_unlock(); + return ret; +} + +/** + * for_each_console_srcu() - Iterator over registered consoles + * @con: struct console pointer used as loop cursor + * + * Although SRCU guarantees the console list will be consistent, the + * struct console fields may be updated by other CPUs while iterating. + * + * Requires console_srcu_read_lock to be held. Can be invoked from + * any context. + */ +#define for_each_console_srcu(con) \ + hlist_for_each_entry_srcu(con, &console_list, node, \ + console_srcu_read_lock_is_held()) + +/** + * for_each_console() - Iterator over registered consoles + * @con: struct console pointer used as loop cursor + * + * The console list and the console->flags are immutable while iterating. + * + * Requires console_list_lock to be held. */ -#define for_each_console(con) \ - for (con = console_drivers; con != NULL; con = con->next) +#define for_each_console(con) \ + lockdep_assert_console_list_lock_held(); \ + hlist_for_each_entry(con, &console_list, node) extern int console_set_on_cmdline; extern struct console *early_console; @@ -172,9 +291,9 @@ enum con_flush_mode { }; extern int add_preferred_console(char *name, int idx, char *options); +extern void console_force_preferred_locked(struct console *con); extern void register_console(struct console *); extern int unregister_console(struct console *); -extern struct console *console_drivers; extern void console_lock(void); extern int console_trylock(void); extern void console_unlock(void); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 08a1d3e7e46d..d3eba4360150 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -18,10 +18,10 @@ struct core_vma_metadata { struct coredump_params { const kernel_siginfo_t *siginfo; - struct pt_regs *regs; struct file *file; unsigned long limit; unsigned long mm_flags; + int cpu; loff_t written; loff_t pos; loff_t to_skip; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d5595d57f4e5..6a94a6eaad27 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -1110,10 +1110,10 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy, } static inline int parse_perf_domain(int cpu, const char *list_name, - const char *cell_name) + const char *cell_name, + struct of_phandle_args *args) { struct device_node *cpu_np; - struct of_phandle_args args; int ret; cpu_np = of_cpu_device_node_get(cpu); @@ -1121,41 +1121,44 @@ static inline int parse_perf_domain(int cpu, const char *list_name, return -ENODEV; ret = of_parse_phandle_with_args(cpu_np, list_name, cell_name, 0, - &args); + args); if (ret < 0) return ret; of_node_put(cpu_np); - return args.args[0]; + return 0; } static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, - const char *cell_name, struct cpumask *cpumask) + const char *cell_name, struct cpumask *cpumask, + struct of_phandle_args *pargs) { - int target_idx; int cpu, ret; + struct of_phandle_args args; - ret = parse_perf_domain(pcpu, list_name, cell_name); + ret = parse_perf_domain(pcpu, list_name, cell_name, pargs); if (ret < 0) return ret; - target_idx = ret; cpumask_set_cpu(pcpu, cpumask); for_each_possible_cpu(cpu) { if (cpu == pcpu) continue; - ret = parse_perf_domain(cpu, list_name, cell_name); + ret = parse_perf_domain(cpu, list_name, cell_name, &args); if (ret < 0) continue; - if (target_idx == ret) + if (pargs->np == args.np && pargs->args_count == args.args_count && + !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count)) cpumask_set_cpu(cpu, cpumask); + + of_node_put(args.np); } - return target_idx; + return 0; } #else static inline int cpufreq_boost_trigger_state(int state) @@ -1185,7 +1188,8 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy, } static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name, - const char *cell_name, struct cpumask *cpumask) + const char *cell_name, struct cpumask *cpumask, + struct of_phandle_args *pargs) { return -EOPNOTSUPP; } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f61447913db9..6c6859bfc454 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -69,6 +69,7 @@ enum cpuhp_state { CPUHP_X86_APB_DEAD, CPUHP_X86_MCE_DEAD, CPUHP_VIRT_NET_DEAD, + CPUHP_IBMVNIC_DEAD, CPUHP_SLUB_DEAD, CPUHP_DEBUG_OBJ_DEAD, CPUHP_MM_WRITEBACK_DEAD, @@ -140,6 +141,7 @@ enum cpuhp_state { */ CPUHP_AP_IDLE_DEAD, CPUHP_AP_OFFLINE, + CPUHP_AP_CACHECTRL_STARTING, CPUHP_AP_SCHED_STARTING, CPUHP_AP_RCUTREE_DYING, CPUHP_AP_CPU_PM_STARTING, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 2324ab6f1846..5d1e961f810e 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -714,11 +714,6 @@ static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags) tfm->crt_flags &= ~flags; } -static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) -{ - return tfm->__crt_ctx; -} - static inline unsigned int crypto_tfm_ctx_alignment(void) { struct crypto_tfm *tfm; diff --git a/include/linux/cxl_err.h b/include/linux/cxl_err.h new file mode 100644 index 000000000000..629e1bdeda44 --- /dev/null +++ b/include/linux/cxl_err.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * + * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> + */ + +#ifndef LINUX_CXL_ERR_H +#define LINUX_CXL_ERR_H + +/* CXL RAS Capability Structure, CXL v3.1 sec 8.2.4.16 */ +struct cxl_ras_capability_regs { + u32 uncor_status; + u32 uncor_mask; + u32 uncor_severity; + u32 cor_status; + u32 cor_mask; + u32 cap_control; + u32 header_log[16]; +}; + +#endif //__CXL_ERR_ diff --git a/include/linux/damon.h b/include/linux/damon.h index 620ada094c3b..ad15a5b88e3a 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -21,7 +21,7 @@ /* Get a random number in [l, r) */ static inline unsigned long damon_rand(unsigned long l, unsigned long r) { - return l + prandom_u32_max(r - l); + return l + get_random_u32_below(r - l); } /** @@ -357,6 +357,7 @@ struct damon_operations { * @after_wmarks_check: Called after each schemes' watermarks check. * @after_sampling: Called after each sampling. * @after_aggregation: Called after each aggregation. + * @before_damos_apply: Called before applying DAMOS action. * @before_terminate: Called before terminating the monitoring. * @private: User private data. * @@ -385,6 +386,10 @@ struct damon_callback { int (*after_wmarks_check)(struct damon_ctx *context); int (*after_sampling)(struct damon_ctx *context); int (*after_aggregation)(struct damon_ctx *context); + int (*before_damos_apply)(struct damon_ctx *context, + struct damon_target *target, + struct damon_region *region, + struct damos *scheme); void (*before_terminate)(struct damon_ctx *context); }; diff --git a/include/linux/dax.h b/include/linux/dax.h index ba985333e26b..2b5ecb591059 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -205,6 +205,8 @@ static inline void dax_unlock_mapping_entry(struct address_space *mapping, } #endif +int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, + const struct iomap_ops *ops); int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops); int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index f60674692d36..ea2d919fd9c7 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -45,7 +45,7 @@ struct debugfs_u32_array { extern struct dentry *arch_debugfs_dir; -#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ +#define DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ __simple_attr_check_format(__fmt, 0ull); \ @@ -56,10 +56,16 @@ static const struct file_operations __fops = { \ .open = __fops ## _open, \ .release = simple_attr_release, \ .read = debugfs_attr_read, \ - .write = debugfs_attr_write, \ + .write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \ .llseek = no_llseek, \ } +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ + DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false) + +#define DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \ + DEFINE_DEBUGFS_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true) + typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); #if defined(CONFIG_DEBUG_FS) @@ -102,6 +108,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); ssize_t debugfs_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); +ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); @@ -254,6 +262,13 @@ static inline ssize_t debugfs_attr_write(struct file *file, return -ENODEV; } +static inline ssize_t debugfs_attr_write_signed(struct file *file, + const char __user *buf, + size_t len, loff_t *ppos) +{ + return -ENODEV; +} + static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) { diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 34aab4dd336c..4dc7cda4fd46 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -152,8 +152,8 @@ struct devfreq_stats { * @max_state: count of entry present in the frequency table. * @previous_freq: previously configured frequency value. * @last_status: devfreq user device info, performance statistics - * @data: Private data of the governor. The devfreq framework does not - * touch this. + * @data: devfreq driver pass to governors, governor should not change it. + * @governor_data: private data for governors, devfreq core doesn't touch it. * @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs) * @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs) * @scaling_min_freq: Limit minimum frequency requested by OPP interface @@ -193,7 +193,8 @@ struct devfreq { unsigned long previous_freq; struct devfreq_dev_status last_status; - void *data; /* private data for governors */ + void *data; + void *governor_data; struct dev_pm_qos_request user_min_freq_req; struct dev_pm_qos_request user_max_freq_req; diff --git a/include/linux/device.h b/include/linux/device.h index 424b55df0272..7bcfaf54fea3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -197,9 +197,9 @@ void devres_remove_group(struct device *dev, void *id); int devres_release_group(struct device *dev, void *id); /* managed devm_k.alloc/kfree for device drivers */ -void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __malloc; +void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __alloc_size(2); void *devm_krealloc(struct device *dev, void *ptr, size_t size, - gfp_t gfp) __must_check; + gfp_t gfp) __must_check __realloc_size(3); __printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap) __malloc; __printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp, @@ -226,7 +226,8 @@ static inline void *devm_kcalloc(struct device *dev, void devm_kfree(struct device *dev, const void *p); char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc; const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp); -void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp); +void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp) + __realloc_size(3); unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); @@ -378,10 +379,8 @@ struct dev_links_info { * @data: Pointer to MSI device data */ struct dev_msi_info { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN - struct irq_domain *domain; -#endif #ifdef CONFIG_GENERIC_MSI_IRQ + struct irq_domain *domain; struct msi_device_data *data; #endif }; @@ -742,7 +741,7 @@ static inline void set_dev_node(struct device *dev, int node) static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ return dev->msi.domain; #else return NULL; @@ -751,7 +750,7 @@ static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) static inline void dev_set_msi_domain(struct device *dev, struct irq_domain *d) { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ dev->msi.domain = d; #endif } diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 71731796c8c3..6fa8d4e29719 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -327,15 +327,6 @@ struct dma_buf { const struct dma_buf_ops *ops; /** - * @lock: - * - * Used internally to serialize list manipulation, attach/detach and - * vmap/unmap. Note that in many cases this is superseeded by - * dma_resv_lock() on @resv. - */ - struct mutex lock; - - /** * @vmapping_counter: * * Used internally to refcnt the vmaps returned by dma_buf_vmap(). @@ -627,9 +618,17 @@ int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); int dma_buf_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction dir); +struct sg_table * +dma_buf_map_attachment_unlocked(struct dma_buf_attachment *attach, + enum dma_data_direction direction); +void dma_buf_unmap_attachment_unlocked(struct dma_buf_attachment *attach, + struct sg_table *sg_table, + enum dma_data_direction direction); int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *, unsigned long); int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map); +int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); +void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); #endif /* __DMA_BUF_H__ */ diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 3ed117e299ec..f3664ee12170 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -5,28 +5,8 @@ #ifndef _NET_DSA_8021Q_H #define _NET_DSA_8021Q_H -#include <linux/refcount.h> -#include <linux/types.h> #include <net/dsa.h> - -struct dsa_switch; -struct dsa_port; -struct sk_buff; -struct net_device; - -struct dsa_tag_8021q_vlan { - struct list_head list; - int port; - u16 vid; - refcount_t refcount; -}; - -struct dsa_8021q_context { - struct dsa_switch *ds; - struct list_head vlans; - /* EtherType of RX VID, used for filtering on master interface */ - __be16 proto; -}; +#include <linux/types.h> int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); @@ -38,15 +18,6 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge); -struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, - u16 tpid, u16 tci); - -void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, - int *vbid); - -struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, - int vbid); - u16 dsa_tag_8021q_bridge_vid(unsigned int bridge_num); u16 dsa_tag_8021q_standalone_vid(const struct dsa_port *dp); diff --git a/include/linux/efi.h b/include/linux/efi.h index 929d559ad41d..4b27519143f5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -371,6 +371,7 @@ void efi_native_runtime_setup(void); #define LOADED_IMAGE_DEVICE_PATH_PROTOCOL_GUID EFI_GUID(0xbc62157e, 0x3e33, 0x4fec, 0x99, 0x20, 0x2d, 0x3b, 0x36, 0xd7, 0x50, 0xdf) #define EFI_DEVICE_PATH_PROTOCOL_GUID EFI_GUID(0x09576e91, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID EFI_GUID(0x8b843e20, 0x8132, 0x4852, 0x90, 0xcc, 0x55, 0x1a, 0x4e, 0x4a, 0x7f, 0x1c) +#define EFI_DEVICE_PATH_FROM_TEXT_PROTOCOL_GUID EFI_GUID(0x05c99a21, 0xc70f, 0x4ad2, 0x8a, 0x5f, 0x35, 0xdf, 0x33, 0x43, 0xf5, 0x1e) #define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a) #define EFI_UGA_PROTOCOL_GUID EFI_GUID(0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39) #define EFI_PCI_IO_PROTOCOL_GUID EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a) @@ -389,6 +390,7 @@ void efi_native_runtime_setup(void); #define EFI_LOAD_FILE2_PROTOCOL_GUID EFI_GUID(0x4006c0c1, 0xfcb3, 0x403e, 0x99, 0x6d, 0x4a, 0x6c, 0x87, 0x24, 0xe0, 0x6d) #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) #define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9) +#define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7) #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) @@ -403,7 +405,7 @@ void efi_native_runtime_setup(void); * structure that was populated by the stub based on the GOP protocol instance * associated with ConOut */ -#define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) +#define LINUX_EFI_SCREEN_INFO_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) #define LINUX_EFI_ARM_CPU_STATE_TABLE_GUID EFI_GUID(0xef79e4aa, 0x3c3d, 0x4989, 0xb9, 0x02, 0x07, 0xa9, 0x43, 0xe5, 0x50, 0xd2) #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) @@ -411,7 +413,6 @@ void efi_native_runtime_setup(void); #define LINUX_EFI_TPM_FINAL_LOG_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define LINUX_EFI_MEMRESERVE_TABLE_GUID EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5, 0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2) #define LINUX_EFI_INITRD_MEDIA_GUID EFI_GUID(0x5568e427, 0x68fc, 0x4f3d, 0xac, 0x74, 0xca, 0x55, 0x52, 0x31, 0xcc, 0x68) -#define LINUX_EFI_ZBOOT_MEDIA_GUID EFI_GUID(0xe565a30d, 0x47da, 0x4dbd, 0xb3, 0x54, 0x9b, 0xb5, 0xc8, 0x4f, 0x8b, 0xe2) #define LINUX_EFI_MOK_VARIABLE_TABLE_GUID EFI_GUID(0xc451ed2b, 0x9694, 0x45d3, 0xba, 0xba, 0xed, 0x9f, 0x89, 0x88, 0xa3, 0x89) #define LINUX_EFI_COCO_SECRET_AREA_GUID EFI_GUID(0xadf956ad, 0xe98c, 0x484c, 0xae, 0x11, 0xb5, 0x1c, 0x7d, 0x33, 0x64, 0x47) #define LINUX_EFI_BOOT_MEMMAP_GUID EFI_GUID(0x800f683f, 0xd08b, 0x423a, 0xa2, 0x93, 0x96, 0x5c, 0x3c, 0x6f, 0xe2, 0xb4) @@ -706,18 +707,10 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); -extern int __init efi_memmap_alloc(unsigned int num_entries, - struct efi_memory_map_data *data); -extern void __efi_memmap_free(u64 phys, unsigned long size, - unsigned long flags); +extern int __init __efi_memmap_init(struct efi_memory_map_data *data); extern int __init efi_memmap_init_early(struct efi_memory_map_data *data); extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size); extern void __init efi_memmap_unmap(void); -extern int __init efi_memmap_install(struct efi_memory_map_data *data); -extern int __init efi_memmap_split_count(efi_memory_desc_t *md, - struct range *range); -extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap, - void *buf, struct efi_mem_range *mem); #ifdef CONFIG_EFI_ESRT extern void __init efi_esrt_init(void); @@ -748,12 +741,6 @@ extern struct kobject *efi_kobj; extern int efi_reboot_quirk_mode; extern bool efi_poweroff_required(void); -#ifdef CONFIG_EFI_FAKE_MEMMAP -extern void __init efi_fake_memmap(void); -#else -static inline void efi_fake_memmap(void) { } -#endif - extern unsigned long efi_mem_attr_table; /* @@ -1011,6 +998,11 @@ struct efi_mem_mapped_dev_path { u64 ending_addr; } __packed; +struct efi_file_path_dev_path { + struct efi_generic_dev_path header; + efi_char16_t filename[]; +} __packed; + struct efi_dev_path { union { struct efi_generic_dev_path header; @@ -1097,34 +1089,6 @@ extern int efi_capsule_update(efi_capsule_header_t *capsule, static inline bool efi_capsule_pending(int *reset_type) { return false; } #endif -#ifdef CONFIG_EFI_RUNTIME_MAP -int efi_runtime_map_init(struct kobject *); -int efi_get_runtime_map_size(void); -int efi_get_runtime_map_desc_size(void); -int efi_runtime_map_copy(void *buf, size_t bufsz); -#else -static inline int efi_runtime_map_init(struct kobject *kobj) -{ - return 0; -} - -static inline int efi_get_runtime_map_size(void) -{ - return 0; -} - -static inline int efi_get_runtime_map_desc_size(void) -{ - return 0; -} - -static inline int efi_runtime_map_copy(void *buf, size_t bufsz) -{ - return 0; -} - -#endif - #ifdef CONFIG_EFI extern bool efi_runtime_disabled(void); #else @@ -1169,8 +1133,6 @@ void efi_check_for_embedded_firmwares(void); static inline void efi_check_for_embedded_firmwares(void) { } #endif -efi_status_t efi_random_get_seed(void); - #define arch_efi_call_virt(p, f, args...) ((p)->f(args)) /* diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 346a8b56cdc8..9ec81290e3c8 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -88,22 +88,13 @@ static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* { #if defined (ELF_CORE_COPY_TASK_REGS) return ELF_CORE_COPY_TASK_REGS(t, elfregs); -#elif defined (task_pt_regs) +#else elf_core_copy_regs(elfregs, task_pt_regs(t)); #endif return 0; } -extern int dump_fpu (struct pt_regs *, elf_fpregset_t *); - -static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_regs *regs, elf_fpregset_t *fpu) -{ -#ifdef ELF_CORE_COPY_FPREGS - return ELF_CORE_COPY_FPREGS(t, fpu); -#else - return dump_fpu(regs, fpu); -#endif -} +int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu); #ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS /* diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 99dc7bfbcd3c..9e0a76fc7de9 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -125,6 +125,20 @@ struct ethtool_link_ext_state_info { }; }; +struct ethtool_link_ext_stats { + /* Custom Linux statistic for PHY level link down events. + * In a simpler world it should be equal to netdev->carrier_down_count + * unfortunately netdev also counts local reconfigurations which don't + * actually take the physical link down, not to mention NC-SI which, + * if present, keeps the link up regardless of host state. + * This statistic counts when PHY _actually_ went down, or lost link. + * + * Note that we need u64 for ethtool_stats_init() and comparisons + * to ETHTOOL_STAT_NOT_SET, but only u32 is exposed to the user. + */ + u64 link_down_events; +}; + /** * ethtool_rxfh_indir_default - get default value for RX flow hash indirection * @index: Index in RX flow hash indirection table @@ -459,10 +473,10 @@ struct ethtool_module_power_mode_params { * parameter. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. - * @get_drvinfo: Report driver/device information. Should only set the - * @driver, @version, @fw_version and @bus_info fields. If not - * implemented, the @driver and @bus_info fields will be filled in - * according to the netdev's parent device. + * @get_drvinfo: Report driver/device information. Modern drivers no + * longer have to implement this callback. Most fields are + * correctly filled in by the core using system information, or + * populated using other driver operations. * @get_regs_len: Get buffer length required for @get_regs * @get_regs: Get device registers * @get_wol: Report whether Wake-on-Lan is enabled @@ -481,6 +495,7 @@ struct ethtool_module_power_mode_params { * do not attach ext_substate attribute to netlink message). If link_ext_state * and link_ext_substate are unknown, return -ENODATA. If not implemented, * link_ext_state and link_ext_substate will not be sent to userspace. + * @get_link_ext_stats: Read extra link-related counters. * @get_eeprom_len: Read range of EEPROM addresses for validation of * @get_eeprom and @set_eeprom requests. * Returns 0 if device does not support EEPROM access. @@ -652,6 +667,8 @@ struct ethtool_ops { u32 (*get_link)(struct net_device *); int (*get_link_ext_state)(struct net_device *, struct ethtool_link_ext_state_info *); + void (*get_link_ext_stats)(struct net_device *dev, + struct ethtool_link_ext_stats *stats); int (*get_eeprom_len)(struct net_device *); int (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 30eb30d6909b..36a486505b08 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -40,6 +40,7 @@ struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); +__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt); @@ -61,7 +62,13 @@ static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd) return ERR_PTR(-ENOSYS); } -static inline int eventfd_signal(struct eventfd_ctx *ctx, int n) +static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n) +{ + return -ENOSYS; +} + +static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, + unsigned mask) { return -ENOSYS; } diff --git a/include/linux/evm.h b/include/linux/evm.h index aa63e0b3c0a2..7a9ee2157f69 100644 --- a/include/linux/evm.h +++ b/include/linux/evm.h @@ -35,6 +35,27 @@ extern int evm_inode_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry, const char *xattr_name); extern void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name); +static inline void evm_inode_post_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + evm_inode_post_removexattr(dentry, acl_name); +} +extern int evm_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl); +static inline int evm_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return evm_inode_set_acl(mnt_userns, dentry, acl_name, NULL); +} +static inline void evm_inode_post_set_acl(struct dentry *dentry, + const char *acl_name, + struct posix_acl *kacl) +{ + return evm_inode_post_setxattr(dentry, acl_name, NULL, 0); +} extern int evm_inode_init_security(struct inode *inode, const struct xattr *xattr_array, struct xattr *evm); @@ -108,6 +129,34 @@ static inline void evm_inode_post_removexattr(struct dentry *dentry, return; } +static inline void evm_inode_post_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return; +} + +static inline int evm_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl) +{ + return 0; +} + +static inline int evm_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return 0; +} + +static inline void evm_inode_post_set_acl(struct dentry *dentry, + const char *acl_name, + struct posix_acl *kacl) +{ + return; +} + static inline int evm_inode_init_security(struct inode *inode, const struct xattr *xattr_array, struct xattr *evm) diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags { + FAULT_NOWARN = 1 << 0, +}; + #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \ } #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/include/linux/fb.h b/include/linux/fb.h index bcb8658f5b64..96b96323e9cb 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -803,6 +803,15 @@ extern int fb_find_mode(struct fb_var_screeninfo *var, const struct fb_videomode *default_mode, unsigned int default_bpp); +#if defined(CONFIG_VIDEO_NOMODESET) +bool fb_modesetting_disabled(const char *drvname); +#else +static inline bool fb_modesetting_disabled(const char *drvname) +{ + return false; +} +#endif + /* Convenience logging macros */ #define fb_err(fb_info, fmt, ...) \ pr_err("fb%d: " fmt, (fb_info)->node, ##__VA_ARGS__) diff --git a/include/linux/filter.h b/include/linux/filter.h index efc42a6e3aed..bf701976056e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -568,10 +568,10 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); extern struct mutex nf_conn_btf_access_lock; -extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct btf *btf, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, u32 *next_btf_id, - enum bpf_type_flag *flag); +extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype, + u32 *next_btf_id, enum bpf_type_flag *flag); typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx, const struct bpf_insn *insnsi, @@ -643,13 +643,13 @@ struct bpf_nh_params { }; struct bpf_redirect_info { - u32 flags; - u32 tgt_index; + u64 tgt_index; void *tgt_value; struct bpf_map *map; + u32 flags; + u32 kern_flags; u32 map_id; enum bpf_map_type map_type; - u32 kern_flags; struct bpf_nh_params nh; }; @@ -1504,7 +1504,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, } #endif /* IS_ENABLED(CONFIG_IPV6) */ -static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, +static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index, u64 flags, const u64 flag_mask, void *lookup_elem(struct bpf_map *map, u32 key)) { @@ -1515,7 +1515,7 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind if (unlikely(flags & ~(action_mask | flag_mask))) return XDP_ABORTED; - ri->tgt_value = lookup_elem(map, ifindex); + ri->tgt_value = lookup_elem(map, index); if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) { /* If the lookup fails we want to clear out the state in the * redirect_info struct completely, so that if an eBPF program @@ -1527,7 +1527,7 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind return flags & action_mask; } - ri->tgt_index = ifindex; + ri->tgt_index = index; ri->map_id = map->id; ri->map_type = map->map_type; diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 76d2b3ebad84..fac37680ffe7 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -135,6 +135,7 @@ enum pm_ret_status { }; enum pm_ioctl_id { + IOCTL_SET_TAPDELAY_BYPASS = 4, IOCTL_SD_DLL_RESET = 6, IOCTL_SET_SD_TAPDELAY = 7, IOCTL_SET_PLL_FRAC_MODE = 8, @@ -389,6 +390,18 @@ enum zynqmp_pm_shutdown_subtype { ZYNQMP_PM_SHUTDOWN_SUBTYPE_SYSTEM = 2, }; +enum tap_delay_signal_type { + PM_TAPDELAY_NAND_DQS_IN = 0, + PM_TAPDELAY_NAND_DQS_OUT = 1, + PM_TAPDELAY_QSPI = 2, + PM_TAPDELAY_MAX = 3, +}; + +enum tap_delay_bypass_ctrl { + PM_TAPDELAY_BYPASS_DISABLE = 0, + PM_TAPDELAY_BYPASS_ENABLE = 1, +}; + enum ospi_mux_select_type { PM_OSPI_MUX_SEL_DMA = 0, PM_OSPI_MUX_SEL_LINEAR = 1, @@ -484,6 +497,7 @@ int zynqmp_pm_write_ggs(u32 index, u32 value); int zynqmp_pm_read_ggs(u32 index, u32 *value); int zynqmp_pm_write_pggs(u32 index, u32 value); int zynqmp_pm_read_pggs(u32 index, u32 *value); +int zynqmp_pm_set_tapdelay_bypass(u32 index, u32 value); int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype); int zynqmp_pm_set_boot_health_status(u32 value); int zynqmp_pm_pinctrl_request(const u32 pin); @@ -696,6 +710,11 @@ static inline int zynqmp_pm_read_pggs(u32 index, u32 *value) return -ENODEV; } +static inline int zynqmp_pm_set_tapdelay_bypass(u32 index, u32 value) +{ + return -ENODEV; +} + static inline int zynqmp_pm_system_shutdown(const u32 type, const u32 subtype) { return -ENODEV; diff --git a/include/linux/fixp-arith.h b/include/linux/fixp-arith.h index 281cb4f83dbe..e485fb0c1201 100644 --- a/include/linux/fixp-arith.h +++ b/include/linux/fixp-arith.h @@ -2,6 +2,7 @@ #ifndef _FIXP_ARITH_H #define _FIXP_ARITH_H +#include <linux/bug.h> #include <linux/math64.h> /* diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 1067a8450826..7cad8bb031e9 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -18,7 +18,7 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning(" #define __compiletime_strlen(p) \ ({ \ - unsigned char *__p = (unsigned char *)(p); \ + char *__p = (char *)(p); \ size_t __ret = SIZE_MAX; \ size_t __p_size = __member_size(p); \ if (__p_size != SIZE_MAX && \ @@ -119,13 +119,13 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) * Instead, please choose an alternative, so that the expectation * of @p's contents is unambiguous: * - * +--------------------+-----------------+------------+ - * | @p needs to be: | padded to @size | not padded | - * +====================+=================+============+ - * | NUL-terminated | strscpy_pad() | strscpy() | - * +--------------------+-----------------+------------+ - * | not NUL-terminated | strtomem_pad() | strtomem() | - * +--------------------+-----------------+------------+ + * +--------------------+--------------------+------------+ + * | **p** needs to be: | padded to **size** | not padded | + * +====================+====================+============+ + * | NUL-terminated | strscpy_pad() | strscpy() | + * +--------------------+--------------------+------------+ + * | not NUL-terminated | strtomem_pad() | strtomem() | + * +--------------------+--------------------+------------+ * * Note strscpy*()'s differing return values for detecting truncation, * and strtomem*()'s expectation that the destination is marked with @@ -144,6 +144,21 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size) return __underlying_strncpy(p, q, size); } +/** + * strcat - Append a string to an existing string + * + * @p: pointer to NUL-terminated string to append to + * @q: pointer to NUL-terminated source string to append from + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * read and write overflows, this is only possible when the + * destination buffer size is known to the compiler. Prefer + * building the string with formatting, via scnprintf() or similar. + * At the very least, use strncat(). + * + * Returns @p. + * + */ __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2) char *strcat(char * const POS p, const char *q) { @@ -157,6 +172,16 @@ char *strcat(char * const POS p, const char *q) } extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); +/** + * strnlen - Return bounded count of characters in a NUL-terminated string + * + * @p: pointer to NUL-terminated string to count. + * @maxlen: maximum number of characters to count. + * + * Returns number of characters in @p (NOT including the final NUL), or + * @maxlen, if no NUL has been found up to there. + * + */ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen) { size_t p_size = __member_size(p); @@ -182,6 +207,19 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size * possible for strlen() to be used on compile-time strings for use in * static initializers (i.e. as a constant expression). */ +/** + * strlen - Return count of characters in a NUL-terminated string + * + * @p: pointer to NUL-terminated string to count. + * + * Do not use this function unless the string length is known at + * compile-time. When @p is unterminated, this function may crash + * or return unexpected counts that could lead to memory content + * exposures. Prefer strnlen(). + * + * Returns number of characters in @p (NOT including the final NUL). + * + */ #define strlen(p) \ __builtin_choose_expr(__is_constexpr(__builtin_strlen(p)), \ __builtin_strlen(p), __fortify_strlen(p)) @@ -200,8 +238,26 @@ __kernel_size_t __fortify_strlen(const char * const POS p) return ret; } -/* defined after fortified strlen to reuse it */ +/* Defined after fortified strlen() to reuse it. */ extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); +/** + * strlcpy - Copy a string into another string buffer + * + * @p: pointer to destination of copy + * @q: pointer to NUL-terminated source string to copy + * @size: maximum number of bytes to write at @p + * + * If strlen(@q) >= @size, the copy of @q will be truncated at + * @size - 1 bytes. @p will always be NUL-terminated. + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * over-reads when calculating strlen(@q), it is still possible. + * Prefer strscpy(), though note its different return values for + * detecting truncation. + * + * Returns total number of bytes written to @p, including terminating NUL. + * + */ __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size) { size_t p_size = __member_size(p); @@ -227,8 +283,32 @@ __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, si return q_len; } -/* defined after fortified strnlen to reuse it */ +/* Defined after fortified strnlen() to reuse it. */ extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy); +/** + * strscpy - Copy a C-string into a sized buffer + * + * @p: Where to copy the string to + * @q: Where to copy the string from + * @size: Size of destination buffer + * + * Copy the source string @p, or as much of it as fits, into the destination + * @q buffer. The behavior is undefined if the string buffers overlap. The + * destination @p buffer is always NUL terminated, unless it's zero-sized. + * + * Preferred to strlcpy() since the API doesn't require reading memory + * from the source @q string beyond the specified @size bytes, and since + * the return value is easier to error-check than strlcpy()'s. + * In addition, the implementation is robust to the string changing out + * from underneath it, unlike the current strlcpy() implementation. + * + * Preferred to strncpy() since it always returns a valid string, and + * doesn't unnecessarily force the tail of the destination buffer to be + * zero padded. If padding is desired please use strscpy_pad(). + * + * Returns the number of characters copied in @p (not including the + * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated. + */ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size) { size_t len; @@ -247,6 +327,16 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s if (__compiletime_lessthan(p_size, size)) __write_overflow(); + /* Short-circuit for compile-time known-safe lengths. */ + if (__compiletime_lessthan(p_size, SIZE_MAX)) { + len = __compiletime_strlen(q); + + if (len < SIZE_MAX && __compiletime_lessthan(len, size)) { + __underlying_memcpy(p, q, len + 1); + return len; + } + } + /* * This call protects from read overflow, because len will default to q * length if it smaller than size. @@ -274,7 +364,26 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s return __real_strscpy(p, q, len); } -/* defined after fortified strlen and strnlen to reuse them */ +/** + * strncat - Append a string to an existing string + * + * @p: pointer to NUL-terminated string to append to + * @q: pointer to source string to append from + * @count: Maximum bytes to read from @q + * + * Appends at most @count bytes from @q (stopping at the first + * NUL byte) after the NUL-terminated string at @p. @p will be + * NUL-terminated. + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * read and write overflows, this is only possible when the sizes + * of @p and @q are known to the compiler. Prefer building the + * string with formatting, via scnprintf() or similar. + * + * Returns @p. + * + */ +/* Defined after fortified strlen() and strnlen() to reuse them. */ __FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3) char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count) { @@ -573,7 +682,8 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size) return __real_memchr_inv(p, c, size); } -extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup); +extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup) + __realloc_size(2); __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp) { size_t p_size = __struct_size(p); @@ -585,6 +695,20 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp return __real_kmemdup(p, size, gfp); } +/** + * strcpy - Copy a string into another string buffer + * + * @p: pointer to destination of copy + * @q: pointer to NUL-terminated source string to copy + * + * Do not use this function. While FORTIFY_SOURCE tries to avoid + * overflows, this is only possible when the sizes of @q and @p are + * known to the compiler. Prefer strscpy(), though note its different + * return values for detecting truncation. + * + * Returns @p. + * + */ /* Defined after fortified strlen to reuse it. */ __FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2) char *strcpy(char * const POS p, const char * const POS q) diff --git a/include/linux/fs.h b/include/linux/fs.h index e654435f1651..066555ad1bf8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -560,8 +560,8 @@ struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) /* * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to - * cache the ACL. This also means that ->get_acl() can be called in RCU mode - * with the LOOKUP_RCU flag. + * cache the ACL. This also means that ->get_inode_acl() can be called in RCU + * mode with the LOOKUP_RCU flag. */ #define ACL_DONT_CACHE ((void *)(-3)) @@ -1131,9 +1131,8 @@ struct file_lock_context { /* The following constant reflects the upper bound of the file/locking space */ #ifndef OFFSET_MAX -#define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) -#define OFFSET_MAX INT_LIMIT(loff_t) -#define OFFT_OFFSET_MAX INT_LIMIT(off_t) +#define OFFSET_MAX type_max(loff_t) +#define OFFT_OFFSET_MAX type_max(off_t) #endif extern void send_sigio(struct fown_struct *fown, int fd, int band); @@ -1170,6 +1169,7 @@ extern int locks_delete_block(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); +bool vfs_inode_has_locks(struct inode *inode); extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec64 *time); @@ -1186,6 +1186,13 @@ extern void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files); extern bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner); + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return smp_load_acquire(&inode->i_flctx); +} + #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, unsigned int cmd, struct flock __user *user) @@ -1284,6 +1291,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return 0; } +static inline bool vfs_inode_has_locks(struct inode *inode) +{ + return false; +} + static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) { return -ENOLCK; @@ -1326,6 +1338,13 @@ static inline bool locks_owner_has_blockers(struct file_lock_context *flctx, { return false; } + +static inline struct file_lock_context * +locks_inode_context(const struct inode *inode) +{ + return NULL; +} + #endif /* !CONFIG_FILE_LOCKING */ static inline struct inode *file_inode(const struct file *f) @@ -1613,23 +1632,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) } /** - * i_uid_into_mnt - map an inode's i_uid down into a mnt_userns - * @mnt_userns: user namespace of the mount the inode was found from - * @inode: inode to map - * - * Note, this will eventually be removed completely in favor of the type-safe - * i_uid_into_vfsuid(). - * - * Return: the inode's i_uid mapped down according to @mnt_userns. - * If the inode's i_uid has no mapping INVALID_UID is returned. - */ -static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns, - const struct inode *inode) -{ - return AS_KUIDT(make_vfsuid(mnt_userns, i_user_ns(inode), inode->i_uid)); -} - -/** * i_uid_into_vfsuid - map an inode's i_uid down into a mnt_userns * @mnt_userns: user namespace of the mount the inode was found from * @inode: inode to map @@ -1682,23 +1684,6 @@ static inline void i_uid_update(struct user_namespace *mnt_userns, } /** - * i_gid_into_mnt - map an inode's i_gid down into a mnt_userns - * @mnt_userns: user namespace of the mount the inode was found from - * @inode: inode to map - * - * Note, this will eventually be removed completely in favor of the type-safe - * i_gid_into_vfsgid(). - * - * Return: the inode's i_gid mapped down according to @mnt_userns. - * If the inode's i_gid has no mapping INVALID_GID is returned. - */ -static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns, - const struct inode *inode) -{ - return AS_KGIDT(make_vfsgid(mnt_userns, i_user_ns(inode), inode->i_gid)); -} - -/** * i_gid_into_vfsgid - map an inode's i_gid down into a mnt_userns * @mnt_userns: user namespace of the mount the inode was found from * @inode: inode to map @@ -2089,6 +2074,14 @@ struct dir_context { */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) +/* + * These flags control the behavior of vfs_copy_file_range(). + * They are not available to the user via syscall. + * + * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops + */ +#define COPY_FILE_SPLICE (1 << 0) + struct iov_iter; struct io_uring_cmd; @@ -2142,7 +2135,7 @@ struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct user_namespace *, struct inode *, int); - struct posix_acl * (*get_acl)(struct inode *, int, bool); + struct posix_acl * (*get_inode_acl)(struct inode *, int, bool); int (*readlink) (struct dentry *, char __user *,int); @@ -2172,7 +2165,9 @@ struct inode_operations { umode_t create_mode); int (*tmpfile) (struct user_namespace *, struct inode *, struct file *, umode_t); - int (*set_acl)(struct user_namespace *, struct inode *, + struct posix_acl *(*get_acl)(struct user_namespace *, struct dentry *, + int); + int (*set_acl)(struct user_namespace *, struct dentry *, struct posix_acl *, int); int (*fileattr_set)(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa); @@ -2732,18 +2727,22 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file) return mnt_user_ns(file->f_path.mnt); } +static inline struct mnt_idmap *file_mnt_idmap(struct file *file) +{ + return mnt_idmap(file->f_path.mnt); +} + /** * is_idmapped_mnt - check whether a mount is mapped * @mnt: the mount to check * - * If @mnt has an idmapping attached different from the - * filesystem's idmapping then @mnt is mapped. + * If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped. * * Return: true if mount is mapped, false if not. */ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) { - return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns; + return mnt_idmap(mnt) != &nop_mnt_idmap; } extern long vfs_truncate(const struct path *, loff_t); @@ -3104,7 +3103,7 @@ extern void __destroy_inode(struct inode *); extern struct inode *new_inode_pseudo(struct super_block *sb); extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); -extern int should_remove_suid(struct dentry *); +extern int setattr_should_drop_suidgid(struct user_namespace *, struct inode *); extern int file_remove_privs(struct file *); /* @@ -3485,7 +3484,7 @@ void simple_transaction_set(struct file *file, size_t n); * All attributes contain a text representation of a numeric value * that are accessed with the get() and set() functions. */ -#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ +#define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ __simple_attr_check_format(__fmt, 0ull); \ @@ -3496,10 +3495,16 @@ static const struct file_operations __fops = { \ .open = __fops ## _open, \ .release = simple_attr_release, \ .read = simple_attr_read, \ - .write = simple_attr_write, \ + .write = (__is_signed) ? simple_attr_write_signed : simple_attr_write, \ .llseek = generic_file_llseek, \ } +#define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ + DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false) + +#define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt) \ + DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true) + static inline __printf(1, 2) void __simple_attr_check_format(const char *fmt, ...) { @@ -3514,6 +3519,8 @@ ssize_t simple_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); +ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos); struct ctl_table; int __init list_bdev_fs_names(char *buf, size_t size); @@ -3527,7 +3534,7 @@ int __init list_bdev_fs_names(char *buf, size_t size); static inline bool is_sxid(umode_t mode) { - return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); + return mode & (S_ISUID | S_ISGID); } static inline int check_sticky(struct user_namespace *mnt_userns, diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 13fa6f3df8e4..5469ffee21c7 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -99,7 +99,7 @@ struct fs_context { const struct cred *cred; /* The mounter's credentials */ struct p_log log; /* Logging buffer */ const char *source; /* The source name (eg. dev path) */ - void *security; /* Linux S&M options */ + void *security; /* LSM options */ void *s_fs_info; /* Proposed s_fs_info */ unsigned int sb_flags; /* Proposed superblock flags (SB_*) */ unsigned int sb_flags_mask; /* Superblock flags that were changed */ @@ -145,20 +145,6 @@ extern void fc_drop_locked(struct fs_context *fc); int reconfigure_single(struct super_block *s, int flags, void *data); -/* - * sget() wrappers to be called from the ->get_tree() op. - */ -enum vfs_get_super_keying { - vfs_get_single_super, /* Only one such superblock may exist */ - vfs_get_single_reconf_super, /* As above, but reconfigure if it exists */ - vfs_get_keyed_super, /* Superblocks with different s_fs_info keys may exist */ - vfs_get_independent_super, /* Multiple independent superblocks may exist */ -}; -extern int vfs_get_super(struct fs_context *fc, - enum vfs_get_super_keying keying, - int (*fill_super)(struct super_block *sb, - struct fs_context *fc)); - extern int get_tree_nodev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, struct fs_context *fc)); diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index f103c91139d4..01542c4b87a2 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -76,6 +76,7 @@ static inline int fs_parse(struct fs_context *fc, extern int fs_lookup_param(struct fs_context *fc, struct fs_parameter *param, bool want_bdev, + unsigned int flags, struct path *_path); extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 36e5dd84cf59..8e312c8323a8 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -75,7 +75,7 @@ struct fscache_volume { atomic_t n_accesses; /* Number of cache accesses in progress */ unsigned int debug_id; unsigned int key_hash; /* Hash of key string */ - char *key; /* Volume ID, eg. "afs@example.com@1234" */ + u8 *key; /* Volume ID, eg. "afs@example.com@1234" */ struct list_head proc_link; /* Link in /proc/fs/fscache/volumes */ struct hlist_bl_node hash_link; /* Link in hash table */ struct work_struct work; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 62557d4bffc2..99f1146614c0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -37,9 +37,10 @@ extern void ftrace_boot_snapshot(void); static inline void ftrace_boot_snapshot(void) { } #endif -#ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops; struct ftrace_regs; + +#ifdef CONFIG_FUNCTION_TRACER /* * If the arch's mcount caller does not support all of ftrace's * features, then it must call an indirect function that @@ -110,12 +111,11 @@ struct ftrace_regs { #define arch_ftrace_get_regs(fregs) (&(fregs)->regs) /* - * ftrace_instruction_pointer_set() is to be defined by the architecture - * if to allow setting of the instruction pointer from the ftrace_regs - * when HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports - * live kernel patching. + * ftrace_regs_set_instruction_pointer() is to be defined by the architecture + * if to allow setting of the instruction pointer from the ftrace_regs when + * HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports live kernel patching. */ -#define ftrace_instruction_pointer_set(fregs, ip) do { } while (0) +#define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0) #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) @@ -126,6 +126,35 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs return arch_ftrace_get_regs(fregs); } +/* + * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs. + * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs. + */ +static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs) +{ + if (IS_ENABLED(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)) + return true; + + return ftrace_get_regs(fregs) != NULL; +} + +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +#define ftrace_regs_get_instruction_pointer(fregs) \ + instruction_pointer(ftrace_get_regs(fregs)) +#define ftrace_regs_get_argument(fregs, n) \ + regs_get_kernel_argument(ftrace_get_regs(fregs), n) +#define ftrace_regs_get_stack_pointer(fregs) \ + kernel_stack_pointer(ftrace_get_regs(fregs)) +#define ftrace_regs_return_value(fregs) \ + regs_return_value(ftrace_get_regs(fregs)) +#define ftrace_regs_set_return_value(fregs, ret) \ + regs_set_return_value(ftrace_get_regs(fregs), ret) +#define ftrace_override_function_with_return(fregs) \ + override_function_with_return(ftrace_get_regs(fregs)) +#define ftrace_regs_query_register_offset(name) \ + regs_query_register_offset(name) +#endif + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); @@ -427,9 +456,7 @@ static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsi { return -ENODEV; } -#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ -#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* * This must be implemented by the architecture. * It is the way the ftrace direct_ops helper, when called @@ -443,9 +470,9 @@ static inline int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsi * the return from the trampoline jump to the direct caller * instead of going back to the function it just traced. */ -static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, +static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) { } -#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifdef CONFIG_STACK_TRACER diff --git a/include/linux/gfp.h b/include/linux/gfp.h index ef4aea3b356e..65a78773dcca 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); } +static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) +{ + gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); + + if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN)) + return; + + if (node_online(this_node)) + return; + + pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node); + dump_stack(); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). @@ -218,7 +232,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp_mask); return __alloc_pages(gfp_mask, order, nid, NULL); } @@ -227,7 +241,7 @@ static inline struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp); return __folio_alloc(gfp, order, nid, NULL); } diff --git a/include/linux/gpio.h b/include/linux/gpio.h index a370387fa406..346f60bbab30 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -98,9 +98,9 @@ int devm_gpio_request_one(struct device *dev, unsigned gpio, #else /* ! CONFIG_GPIOLIB */ +#include <linux/bug.h> #include <linux/kernel.h> #include <linux/types.h> -#include <linux/bug.h> struct device; struct gpio_chip; diff --git a/include/linux/gpio/aspeed.h b/include/linux/gpio/aspeed.h index 1bfb3cdc86d0..9a547e66c8c4 100644 --- a/include/linux/gpio/aspeed.h +++ b/include/linux/gpio/aspeed.h @@ -1,6 +1,10 @@ #ifndef __GPIO_ASPEED_H #define __GPIO_ASPEED_H +#include <linux/types.h> + +struct gpio_desc; + struct aspeed_gpio_copro_ops { int (*request_access)(void *data); int (*release_access)(void *data); diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 36460ced060b..45da8f137fe5 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -581,27 +581,6 @@ struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev, flags, label); } -static inline -struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, - const char *con_id, int index, - struct fwnode_handle *child, - enum gpiod_flags flags, - const char *label) -{ - return devm_fwnode_gpiod_get_index(dev, child, con_id, index, - flags, label); -} - -static inline -struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev, - const char *con_id, - struct fwnode_handle *child, - enum gpiod_flags flags, - const char *label) -{ - return devm_fwnode_gpiod_get_index(dev, child, con_id, 0, flags, label); -} - #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_OF_GPIO) struct device_node; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 6aeea1071b1b..44783fc16125 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -7,8 +7,8 @@ #include <linux/irqchip/chained_irq.h> #include <linux/irqdomain.h> #include <linux/lockdep.h> -#include <linux/pinctrl/pinctrl.h> #include <linux/pinctrl/pinconf-generic.h> +#include <linux/pinctrl/pinctrl.h> #include <linux/property.h> #include <linux/types.h> @@ -27,7 +27,7 @@ struct gpio_chip; union gpio_irq_fwspec { struct irq_fwspec fwspec; -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ msi_alloc_info_t msiinfo; #endif }; diff --git a/include/linux/gpio/gpio-reg.h b/include/linux/gpio/gpio-reg.h index 39b888c40b39..3913b6660ed1 100644 --- a/include/linux/gpio/gpio-reg.h +++ b/include/linux/gpio/gpio-reg.h @@ -2,9 +2,13 @@ #ifndef GPIO_REG_H #define GPIO_REG_H +#include <linux/types.h> + struct device; struct irq_domain; +struct gpio_chip; + struct gpio_chip *gpio_reg_init(struct device *dev, void __iomem *reg, int base, int num, const char *label, u32 direction, u32 def_out, const char *const *names, struct irq_domain *irqdom, const int *irqs); diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index 0b619eb7ae83..44e5f162973e 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -3,7 +3,6 @@ #define __LINUX_GPIO_MACHINE_H #include <linux/types.h> -#include <linux/list.h> enum gpio_lookup_flags { GPIO_ACTIVE_HIGH = (0 << 0), diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h new file mode 100644 index 000000000000..6c75c8bd44a0 --- /dev/null +++ b/include/linux/gpio/property.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0+ +#ifndef __LINUX_GPIO_PROPERTY_H +#define __LINUX_GPIO_PROPERTY_H + +#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */ +#include <linux/property.h> + +#define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \ + PROPERTY_ENTRY_REF(_name_, _chip_node_, _idx_, _flags_) + +#endif /* __LINUX_GPIO_PROPERTY_H */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index e9912da5441b..44242268f53b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -319,6 +319,32 @@ static inline void copy_user_highpage(struct page *to, struct page *from, #endif +#ifdef copy_mc_to_kernel +static inline int copy_mc_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + unsigned long ret; + char *vfrom, *vto; + + vfrom = kmap_local_page(from); + vto = kmap_local_page(to); + ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); + if (!ret) + kmsan_unpoison_memory(page_address(to), PAGE_SIZE); + kunmap_local(vto); + kunmap_local(vfrom); + + return ret; +} +#else +static inline int copy_mc_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + copy_user_highpage(to, from, vaddr, vma); + return 0; +} +#endif + #ifndef __HAVE_ARCH_COPY_HIGHPAGE static inline void copy_highpage(struct page *to, struct page *from) diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index e230c7c46110..be3aedaa96dc 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -384,14 +384,14 @@ struct hisi_qp { static inline int q_num_set(const char *val, const struct kernel_param *kp, unsigned int device) { - struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, - device, NULL); + struct pci_dev *pdev; u32 n, q_num; int ret; if (!val) return -EINVAL; + pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL); if (!pdev) { q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2); pr_info("No device found currently, suppose queue number is %u\n", @@ -401,6 +401,8 @@ static inline int q_num_set(const char *val, const struct kernel_param *kp, q_num = QM_QNUM_V1; else q_num = QM_QNUM_V2; + + pci_dev_put(pdev); } ret = kstrtou32(val, 10, &n); @@ -469,11 +471,11 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen); int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs); void hisi_qm_dev_err_init(struct hisi_qm *qm); void hisi_qm_dev_err_uninit(struct hisi_qm *qm); -int hisi_qm_diff_regs_init(struct hisi_qm *qm, - struct dfx_diff_registers *dregs, int reg_len); -void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len); +int hisi_qm_regs_debugfs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, u32 reg_len); +void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len); void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, - struct dfx_diff_registers *dregs, int regs_len); + struct dfx_diff_registers *dregs, u32 regs_len); pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev, pci_channel_state_t state); diff --git a/include/linux/host1x.h b/include/linux/host1x.h index cb2100d9b0ff..dc55d9d3b94f 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -469,11 +469,13 @@ struct host1x_memory_context { #ifdef CONFIG_IOMMU_API struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x, + struct device *dev, struct pid *pid); void host1x_memory_context_get(struct host1x_memory_context *cd); void host1x_memory_context_put(struct host1x_memory_context *cd); #else static inline struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x, + struct device *dev, struct pid *pid) { return NULL; diff --git a/include/linux/hpet.h b/include/linux/hpet.h index 8604564b985d..21e69eaf7a36 100644 --- a/include/linux/hpet.h +++ b/include/linux/hpet.h @@ -30,7 +30,7 @@ struct hpet { unsigned long _hpet_compare; } _u1; u64 hpet_fsb[2]; /* FSB route */ - } hpet_timers[1]; + } hpet_timers[]; }; #define hpet_mc _u0._hpet_mc diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8b4f93e84868..551834cd5299 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -33,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t; /* * For HugeTLB page, there are more metadata to save in the struct page. But * the head struct page cannot meet our needs, so we have to abuse other tail - * struct page to store the metadata. In order to avoid conflicts caused by - * subsequent use of more tail struct pages, we gather these discrete indexes - * of tail struct page here. + * struct page to store the metadata. */ -enum { - SUBPAGE_INDEX_SUBPOOL = 1, /* reuse page->private */ -#ifdef CONFIG_CGROUP_HUGETLB - SUBPAGE_INDEX_CGROUP, /* reuse page->private */ - SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */ - __MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD, -#endif -#ifdef CONFIG_MEMORY_FAILURE - SUBPAGE_INDEX_HWPOISON, -#endif - __NR_USED_SUBPAGE, -}; +#define __NR_USED_SUBPAGE 3 struct hugepage_subpool { spinlock_t lock; @@ -149,6 +136,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, unsigned long len); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *, struct vm_area_struct *); +struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, + unsigned long address, unsigned int flags); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, unsigned long *, long, unsigned int, @@ -181,10 +170,11 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); int isolate_hugetlb(struct page *page, struct list_head *list); -int get_hwpoison_huge_page(struct page *page, bool *hugetlb); -int get_huge_page_for_hwpoison(unsigned long pfn, int flags); +int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison); +int get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared); void putback_active_hugepage(struct page *page); -void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); +void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; @@ -209,17 +199,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write); -struct page *follow_huge_pd(struct vm_area_struct *vma, - unsigned long address, hugepd_t hpd, - int flags, int pdshift); -struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, - int flags); -struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int flags); -struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, - pgd_t *pgd, int flags); void hugetlb_vma_lock_read(struct vm_area_struct *vma); void hugetlb_vma_unlock_read(struct vm_area_struct *vma); @@ -272,6 +251,12 @@ static inline void adjust_range_if_pmd_sharing_possible( { } +static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, + unsigned long address, unsigned int flags) +{ + BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ +} + static inline long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, @@ -282,12 +267,6 @@ static inline long follow_hugetlb_page(struct mm_struct *mm, return 0; } -static inline struct page *follow_huge_addr(struct mm_struct *mm, - unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - static inline int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *dst_vma, @@ -320,31 +299,6 @@ static inline void hugetlb_show_meminfo_node(int nid) { } -static inline struct page *follow_huge_pd(struct vm_area_struct *vma, - unsigned long address, hugepd_t hpd, int flags, - int pdshift) -{ - return NULL; -} - -static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, - unsigned long address, int flags) -{ - return NULL; -} - -static inline struct page *follow_huge_pud(struct mm_struct *mm, - unsigned long address, pud_t *pud, int flags) -{ - return NULL; -} - -static inline struct page *follow_huge_pgd(struct mm_struct *mm, - unsigned long address, pgd_t *pgd, int flags) -{ - return NULL; -} - static inline int prepare_hugepage_range(struct file *file, unsigned long addr, unsigned long len) { @@ -425,12 +379,13 @@ static inline int isolate_hugetlb(struct page *page, struct list_head *list) return -EBUSY; } -static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb) +static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison) { return 0; } -static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags) +static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared) { return 0; } @@ -439,8 +394,8 @@ static inline void putback_active_hugepage(struct page *page) { } -static inline void move_hugetlb_state(struct page *oldpage, - struct page *newpage, int reason) +static inline void move_hugetlb_state(struct folio *old_folio, + struct folio *new_folio, int reason) { } @@ -623,26 +578,50 @@ enum hugetlb_page_flags { */ #ifdef CONFIG_HUGETLB_PAGE #define TESTHPAGEFLAG(uname, flname) \ +static __always_inline \ +bool folio_test_hugetlb_##flname(struct folio *folio) \ + { void *private = &folio->private; \ + return test_bit(HPG_##flname, private); \ + } \ static inline int HPage##uname(struct page *page) \ { return test_bit(HPG_##flname, &(page->private)); } #define SETHPAGEFLAG(uname, flname) \ +static __always_inline \ +void folio_set_hugetlb_##flname(struct folio *folio) \ + { void *private = &folio->private; \ + set_bit(HPG_##flname, private); \ + } \ static inline void SetHPage##uname(struct page *page) \ { set_bit(HPG_##flname, &(page->private)); } #define CLEARHPAGEFLAG(uname, flname) \ +static __always_inline \ +void folio_clear_hugetlb_##flname(struct folio *folio) \ + { void *private = &folio->private; \ + clear_bit(HPG_##flname, private); \ + } \ static inline void ClearHPage##uname(struct page *page) \ { clear_bit(HPG_##flname, &(page->private)); } #else #define TESTHPAGEFLAG(uname, flname) \ +static inline bool \ +folio_test_hugetlb_##flname(struct folio *folio) \ + { return 0; } \ static inline int HPage##uname(struct page *page) \ { return 0; } #define SETHPAGEFLAG(uname, flname) \ +static inline void \ +folio_set_hugetlb_##flname(struct folio *folio) \ + { } \ static inline void SetHPage##uname(struct page *page) \ { } #define CLEARHPAGEFLAG(uname, flname) \ +static inline void \ +folio_clear_hugetlb_##flname(struct folio *folio) \ + { } \ static inline void ClearHPage##uname(struct page *page) \ { } #endif @@ -728,18 +707,29 @@ extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) +static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) +{ + return folio->_hugetlb_subpool; +} + /* - * hugetlb page subpool pointer located in hpage[1].private + * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool */ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) { - return (void *)page_private(hpage + SUBPAGE_INDEX_SUBPOOL); + return hugetlb_folio_subpool(page_folio(hpage)); +} + +static inline void hugetlb_set_folio_subpool(struct folio *folio, + struct hugepage_subpool *subpool) +{ + folio->_hugetlb_subpool = subpool; } static inline void hugetlb_set_page_subpool(struct page *hpage, struct hugepage_subpool *subpool) { - set_page_private(hpage + SUBPAGE_INDEX_SUBPOOL, (unsigned long)subpool); + hugetlb_set_folio_subpool(page_folio(hpage), subpool); } static inline struct hstate *hstate_file(struct file *f) @@ -823,10 +813,15 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, } #endif +static inline struct hstate *folio_hstate(struct folio *folio) +{ + VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); + return size_to_hstate(folio_size(folio)); +} + static inline struct hstate *page_hstate(struct page *page) { - VM_BUG_ON_PAGE(!PageHuge(page), page); - return size_to_hstate(page_size(page)); + return folio_hstate(page_folio(page)); } static inline unsigned hstate_index_to_shift(unsigned index) @@ -983,6 +978,11 @@ void hugetlb_unregister_node(struct node *node); #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; +static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) +{ + return NULL; +} + static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) { return NULL; @@ -1035,6 +1035,11 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma) return NULL; } +static inline struct hstate *folio_hstate(struct folio *folio) +{ + return NULL; +} + static inline struct hstate *page_hstate(struct page *page) { return NULL; diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 630cd255d0cf..f706626a8063 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -24,12 +24,10 @@ struct file_region; #ifdef CONFIG_CGROUP_HUGETLB /* * Minimum page order trackable by hugetlb cgroup. - * At least 4 pages are necessary for all the tracking information. - * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault - * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD]) - * is the reservation usage cgroup. + * At least 3 pages are necessary for all the tracking information. + * The second tail page contains all of the hugetlb-specific fields. */ -#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1) +#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE) enum hugetlb_memory_event { HUGETLB_MAX, @@ -67,54 +65,50 @@ struct hugetlb_cgroup { }; static inline struct hugetlb_cgroup * -__hugetlb_cgroup_from_page(struct page *page, bool rsvd) +__hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd) { - VM_BUG_ON_PAGE(!PageHuge(page), page); - - if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) + VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); + if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) return NULL; if (rsvd) - return (void *)page_private(page + SUBPAGE_INDEX_CGROUP_RSVD); + return folio->_hugetlb_cgroup_rsvd; else - return (void *)page_private(page + SUBPAGE_INDEX_CGROUP); + return folio->_hugetlb_cgroup; } -static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio) { - return __hugetlb_cgroup_from_page(page, false); + return __hugetlb_cgroup_from_folio(folio, false); } static inline struct hugetlb_cgroup * -hugetlb_cgroup_from_page_rsvd(struct page *page) +hugetlb_cgroup_from_folio_rsvd(struct folio *folio) { - return __hugetlb_cgroup_from_page(page, true); + return __hugetlb_cgroup_from_folio(folio, true); } -static inline void __set_hugetlb_cgroup(struct page *page, +static inline void __set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg, bool rsvd) { - VM_BUG_ON_PAGE(!PageHuge(page), page); - - if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) + VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); + if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) return; if (rsvd) - set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD, - (unsigned long)h_cg); + folio->_hugetlb_cgroup_rsvd = h_cg; else - set_page_private(page + SUBPAGE_INDEX_CGROUP, - (unsigned long)h_cg); + folio->_hugetlb_cgroup = h_cg; } -static inline void set_hugetlb_cgroup(struct page *page, +static inline void set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg) { - __set_hugetlb_cgroup(page, h_cg, false); + __set_hugetlb_cgroup(folio, h_cg, false); } -static inline void set_hugetlb_cgroup_rsvd(struct page *page, +static inline void set_hugetlb_cgroup_rsvd(struct folio *folio, struct hugetlb_cgroup *h_cg) { - __set_hugetlb_cgroup(page, h_cg, true); + __set_hugetlb_cgroup(folio, h_cg, true); } static inline bool hugetlb_cgroup_disabled(void) @@ -151,10 +145,10 @@ extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct page *page); -extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, - struct page *page); -extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, - struct page *page); +extern void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, + struct folio *folio); +extern void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, + struct folio *folio); extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg); @@ -170,8 +164,8 @@ extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, bool region_del); extern void hugetlb_cgroup_file_init(void) __init; -extern void hugetlb_cgroup_migrate(struct page *oldhpage, - struct page *newhpage); +extern void hugetlb_cgroup_migrate(struct folio *old_folio, + struct folio *new_folio); #else static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, @@ -181,29 +175,23 @@ static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, { } -static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) -{ - return NULL; -} - -static inline struct hugetlb_cgroup * -hugetlb_cgroup_from_page_resv(struct page *page) +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio) { return NULL; } static inline struct hugetlb_cgroup * -hugetlb_cgroup_from_page_rsvd(struct page *page) +hugetlb_cgroup_from_folio_rsvd(struct folio *folio) { return NULL; } -static inline void set_hugetlb_cgroup(struct page *page, +static inline void set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg) { } -static inline void set_hugetlb_cgroup_rsvd(struct page *page, +static inline void set_hugetlb_cgroup_rsvd(struct folio *folio, struct hugetlb_cgroup *h_cg) { } @@ -253,14 +241,14 @@ hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, { } -static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, - struct page *page) +static inline void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, + struct folio *folio) { } -static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx, +static inline void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, - struct page *page) + struct folio *folio) { } static inline void hugetlb_cgroup_uncharge_cgroup(int idx, @@ -285,8 +273,8 @@ static inline void hugetlb_cgroup_file_init(void) { } -static inline void hugetlb_cgroup_migrate(struct page *oldhpage, - struct page *newhpage) +static inline void hugetlb_cgroup_migrate(struct folio *old_folio, + struct folio *new_folio) { } diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 77c2885c4c13..8a3115516a1b 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -34,7 +34,7 @@ * @priv: Private data, for use by the RNG driver. * @quality: Estimation of true entropy in RNG's bitstream * (in bits of entropy per 1024 bits of input; - * valid values: 1 to 1024, or 0 for unknown). + * valid values: 1 to 1024, or 0 for maximum). */ struct hwrng { const char *name; diff --git a/include/linux/hwmon-sysfs.h b/include/linux/hwmon-sysfs.h index cb26d02f52f3..d896713359cd 100644 --- a/include/linux/hwmon-sysfs.h +++ b/include/linux/hwmon-sysfs.h @@ -8,6 +8,7 @@ #define _LINUX_HWMON_SYSFS_H #include <linux/device.h> +#include <linux/kstrtox.h> struct sensor_device_attribute{ struct device_attribute dev_attr; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 3b42264333ef..85f7c5a63aa6 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -969,7 +969,7 @@ struct vmbus_channel { * mechanism improves throughput by: * * A) Making the host more efficient - each time it wakes up, - * potentially it will process morev number of packets. The + * potentially it will process more number of packets. The * monitor latency allows a batch to build up. * B) By deferring the hypercall to signal, we will also minimize * the interrupts. @@ -1341,6 +1341,8 @@ struct hv_ring_buffer_debug_info { int hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, struct hv_ring_buffer_debug_info *debug_info); +bool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel); + /* Vmbus interface */ #define vmbus_driver_register(driver) \ __vmbus_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f7c49bbdb8a1..d84e0e99f084 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -189,6 +189,7 @@ s32 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client, u8 *values); int i2c_get_device_id(const struct i2c_client *client, struct i2c_device_identity *id); +const struct i2c_device_id *i2c_client_get_device_id(const struct i2c_client *client); #endif /* I2C */ /** diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 8242e13e7b0b..1c997abe868c 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -287,12 +287,15 @@ static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, #define module_i3c_i2c_driver(__i3cdrv, __i2cdrv) \ module_driver(__i3cdrv, \ i3c_i2c_driver_register, \ - i3c_i2c_driver_unregister) + i3c_i2c_driver_unregister, \ + __i2cdrv) int i3c_device_do_priv_xfers(struct i3c_device *dev, struct i3c_priv_xfer *xfers, int nxfers); +int i3c_device_do_setdasa(struct i3c_device *dev); + void i3c_device_get_info(struct i3c_device *dev, struct i3c_device_info *info); struct i3c_ibi_payload { diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 79690938d9a2..80d6308dea06 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -338,6 +338,17 @@ struct ieee80211_qos_hdr { __le16 qos_ctrl; } __packed __aligned(2); +struct ieee80211_qos_hdr_4addr { + __le16 frame_control; + __le16 duration_id; + u8 addr1[ETH_ALEN]; + u8 addr2[ETH_ALEN]; + u8 addr3[ETH_ALEN]; + __le16 seq_ctrl; + u8 addr4[ETH_ALEN]; + __le16 qos_ctrl; +} __packed __aligned(2); + struct ieee80211_trigger { __le16 frame_control; __le16 duration; @@ -4060,16 +4071,21 @@ struct ieee80211_he_6ghz_capa { * @hdr: the frame * * The qos ctrl bytes come after the frame_control, duration, seq_num - * and 3 or 4 addresses of length ETH_ALEN. - * 3 addr: 2 + 2 + 2 + 3*6 = 24 - * 4 addr: 2 + 2 + 2 + 4*6 = 30 + * and 3 or 4 addresses of length ETH_ALEN. Checks frame_control to choose + * between struct ieee80211_qos_hdr_4addr and struct ieee80211_qos_hdr. */ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr) { - if (ieee80211_has_a4(hdr->frame_control)) - return (u8 *)hdr + 30; + union { + struct ieee80211_qos_hdr addr3; + struct ieee80211_qos_hdr_4addr addr4; + } *qos; + + qos = (void *)hdr; + if (ieee80211_has_a4(qos->addr3.frame_control)) + return (u8 *)&qos->addr4.qos_ctrl; else - return (u8 *)hdr + 24; + return (u8 *)&qos->addr3.qos_ctrl; } /** @@ -4573,18 +4589,17 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: - common += sizeof(struct ieee80211_mle_basic_common_info); - break; case IEEE80211_ML_CONTROL_TYPE_PREQ: - common += sizeof(struct ieee80211_mle_preq_common_info); + case IEEE80211_ML_CONTROL_TYPE_TDLS: + /* + * The length is the first octet pointed by mle->variable so no + * need to add anything + */ break; case IEEE80211_ML_CONTROL_TYPE_RECONF: if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR) common += ETH_ALEN; return common; - case IEEE80211_ML_CONTROL_TYPE_TDLS: - common += sizeof(struct ieee80211_mle_tdls_common_info); - break; case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR) common += ETH_ALEN; @@ -4594,7 +4609,7 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) return 0; } - return common + mle->variable[0]; + return sizeof(*mle) + common + mle->variable[0]; } /** @@ -4602,7 +4617,7 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) * @data: pointer to the element data * @len: length of the containing element */ -static inline bool ieee80211_mle_size_ok(const u8 *data, u8 len) +static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u8 fixed = sizeof(*mle); @@ -4667,6 +4682,7 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, u8 len) enum ieee80211_mle_subelems { IEEE80211_MLE_SUBELEM_PER_STA_PROFILE = 0, + IEEE80211_MLE_SUBELEM_FRAGMENT = 254, }; #define IEEE80211_MLE_STA_CONTROL_LINK_ID 0x000f @@ -4685,6 +4701,46 @@ struct ieee80211_mle_per_sta_profile { u8 variable[]; } __packed; +/** + * ieee80211_mle_sta_prof_size_ok - validate multi-link element sta profile size + * @data: pointer to the sub element data + * @len: length of the containing sub element + */ +static inline bool ieee80211_mle_sta_prof_size_ok(const u8 *data, size_t len) +{ + const struct ieee80211_mle_per_sta_profile *prof = (const void *)data; + u16 control; + u8 fixed = sizeof(*prof); + u8 info_len = 1; + + if (len < fixed) + return false; + + control = le16_to_cpu(prof->control); + + if (control & IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT) + info_len += 6; + if (control & IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT) + info_len += 2; + if (control & IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT) + info_len += 8; + if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT) + info_len += 2; + if (control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT) + info_len += 1; + + if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE && + control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) { + if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) + info_len += 2; + else + info_len += 1; + } + + return prof->sta_info_len >= info_len && + fixed + prof->sta_info_len <= len; +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index f1f9412b6ac6..0303eb84d596 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -276,6 +276,30 @@ enum { IEEE802154_SYSTEM_ERROR = 0xff, }; +/** + * enum ieee802154_filtering_level - Filtering levels applicable to a PHY + * + * @IEEE802154_FILTERING_NONE: No filtering at all, what is received is + * forwarded to the softMAC + * @IEEE802154_FILTERING_1_FCS: First filtering level, frames with an invalid + * FCS should be dropped + * @IEEE802154_FILTERING_2_PROMISCUOUS: Second filtering level, promiscuous + * mode as described in the spec, identical in terms of filtering to the + * level one on PHY side, but at the MAC level the frame should be + * forwarded to the upper layer directly + * @IEEE802154_FILTERING_3_SCAN: Third filtering level, scan related, where + * only beacons must be processed, all remaining traffic gets dropped + * @IEEE802154_FILTERING_4_FRAME_FIELDS: Fourth filtering level actually + * enforcing the validity of the content of the frame with various checks + */ +enum ieee802154_filtering_level { + IEEE802154_FILTERING_NONE, + IEEE802154_FILTERING_1_FCS, + IEEE802154_FILTERING_2_PROMISCUOUS, + IEEE802154_FILTERING_3_SCAN, + IEEE802154_FILTERING_4_FRAME_FIELDS, +}; + /* frame control handling */ #define IEEE802154_FCTL_FTYPE 0x0003 #define IEEE802154_FCTL_ACKREQ 0x0020 diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index d62ef428e3aa..1668ac4d7adc 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -59,6 +59,7 @@ struct br_ip_list { #define BR_MRP_LOST_IN_CONT BIT(19) #define BR_TX_FWD_OFFLOAD BIT(20) #define BR_PORT_LOCKED BIT(21) +#define BR_PORT_MAB BIT(22) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index e00c4ee81ff7..6864b89ef868 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -76,7 +76,7 @@ static inline bool is_vlan_dev(const struct net_device *dev) return dev->priv_flags & IFF_802_1Q_VLAN; } -#define skb_vlan_tag_present(__skb) ((__skb)->vlan_present) +#define skb_vlan_tag_present(__skb) (!!(__skb)->vlan_all) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) #define skb_vlan_tag_get_cfi(__skb) (!!((__skb)->vlan_tci & VLAN_CFI_MASK)) @@ -471,7 +471,7 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, */ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) { - skb->vlan_present = 0; + skb->vlan_all = 0; } /** @@ -483,9 +483,7 @@ static inline void __vlan_hwaccel_clear_tag(struct sk_buff *skb) */ static inline void __vlan_hwaccel_copy_tag(struct sk_buff *dst, const struct sk_buff *src) { - dst->vlan_present = src->vlan_present; - dst->vlan_proto = src->vlan_proto; - dst->vlan_tci = src->vlan_tci; + dst->vlan_all = src->vlan_all; } /* @@ -519,7 +517,6 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, { skb->vlan_proto = vlan_proto; skb->vlan_tci = vlan_tci; - skb->vlan_present = 1; } /** diff --git a/include/linux/ima.h b/include/linux/ima.h index 81708ca0ebc7..5a0b2a285a18 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -187,6 +187,15 @@ extern void ima_inode_post_setattr(struct user_namespace *mnt_userns, struct dentry *dentry); extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len); +extern int ima_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl); +static inline int ima_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return ima_inode_set_acl(mnt_userns, dentry, acl_name, NULL); +} extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name); #else static inline bool is_ima_appraise_enabled(void) @@ -208,11 +217,26 @@ static inline int ima_inode_setxattr(struct dentry *dentry, return 0; } +static inline int ima_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl) +{ + + return 0; +} + static inline int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name) { return 0; } + +static inline int ima_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return 0; +} #endif /* CONFIG_IMA_APPRAISE */ #if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING) diff --git a/include/linux/init.h b/include/linux/init.h index 077d7f93b402..c5fe6d26f5b1 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -2,7 +2,9 @@ #ifndef _LINUX_INIT_H #define _LINUX_INIT_H +#include <linux/build_bug.h> #include <linux/compiler.h> +#include <linux/stringify.h> #include <linux/types.h> /* Built-in __init functions needn't be compiled with retpoline */ @@ -143,6 +145,7 @@ struct file_system_type; extern int do_one_initcall(initcall_t fn); extern char __initdata boot_command_line[]; extern char *saved_command_line; +extern unsigned int saved_command_line_len; extern unsigned int reset_devices; /* used by init/main.c */ diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 207ef06ba3e1..f9a0d44f6fdb 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -13,17 +13,4 @@ #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) #define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) -/* - * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only - * for access to kernel addresses. No IOTLB flushes are automatically done - * for kernel mappings; it is valid only for access to the kernel's static - * 1:1 mapping of physical memory — not to vmalloc or even module mappings. - * A future API addition may permit the use of such ranges, by means of an - * explicit IOTLB flush call (akin to the DMA API's unmap method). - * - * It is unlikely that we will ever hook into flush_tlb_kernel_range() to - * do such IOTLB flushes automatically. - */ -#define SVM_FLAG_SUPERVISOR_MODE BIT(0) - #endif /* __INTEL_SVM_H__ */ diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h index 288c26f50732..2b8026a39906 100644 --- a/include/linux/interval_tree.h +++ b/include/linux/interval_tree.h @@ -27,4 +27,62 @@ extern struct interval_tree_node * interval_tree_iter_next(struct interval_tree_node *node, unsigned long start, unsigned long last); +/** + * struct interval_tree_span_iter - Find used and unused spans. + * @start_hole: Start of an interval for a hole when is_hole == 1 + * @last_hole: Inclusive end of an interval for a hole when is_hole == 1 + * @start_used: Start of a used interval when is_hole == 0 + * @last_used: Inclusive end of a used interval when is_hole == 0 + * @is_hole: 0 == used, 1 == is_hole, -1 == done iteration + * + * This iterator travels over spans in an interval tree. It does not return + * nodes but classifies each span as either a hole, where no nodes intersect, or + * a used, which is fully covered by nodes. Each iteration step toggles between + * hole and used until the entire range is covered. The returned spans always + * fully cover the requested range. + * + * The iterator is greedy, it always returns the largest hole or used possible, + * consolidating all consecutive nodes. + * + * Use interval_tree_span_iter_done() to detect end of iteration. + */ +struct interval_tree_span_iter { + /* private: not for use by the caller */ + struct interval_tree_node *nodes[2]; + unsigned long first_index; + unsigned long last_index; + + /* public: */ + union { + unsigned long start_hole; + unsigned long start_used; + }; + union { + unsigned long last_hole; + unsigned long last_used; + }; + int is_hole; +}; + +void interval_tree_span_iter_first(struct interval_tree_span_iter *state, + struct rb_root_cached *itree, + unsigned long first_index, + unsigned long last_index); +void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter, + struct rb_root_cached *itree, + unsigned long new_index); +void interval_tree_span_iter_next(struct interval_tree_span_iter *state); + +static inline bool +interval_tree_span_iter_done(struct interval_tree_span_iter *state) +{ + return state->is_hole == -1; +} + +#define interval_tree_for_each_span(span, itree, first_index, last_index) \ + for (interval_tree_span_iter_first(span, itree, \ + first_index, last_index); \ + !interval_tree_span_iter_done(span); \ + interval_tree_span_iter_next(span)) + #endif /* _LINUX_INTERVAL_TREE_H */ diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 66a774d2710e..09d4f17c8d3b 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -213,7 +213,7 @@ io_mapping_free(struct io_mapping *iomap) kfree(iomap); } -#endif /* _LINUX_IO_MAPPING_H */ - int io_mapping_map_user(struct io_mapping *iomap, struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size); + +#endif /* _LINUX_IO_MAPPING_H */ diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 43bc8a2edccf..934e5dd4ccc0 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -9,13 +9,17 @@ enum io_uring_cmd_flags { IO_URING_F_COMPLETE_DEFER = 1, IO_URING_F_UNLOCKED = 2, + /* the request is executed from poll, it should not be freed */ + IO_URING_F_MULTISHOT = 4, + /* executed by io-wq */ + IO_URING_F_IOWQ = 8, /* int's last bit, sign checks are usually faster than a bit test */ IO_URING_F_NONBLOCK = INT_MIN, /* ctx state flags, for URING_CMD */ - IO_URING_F_SQE128 = 4, - IO_URING_F_CQE32 = 8, - IO_URING_F_IOPOLL = 16, + IO_URING_F_SQE128 = (1 << 8), + IO_URING_F_CQE32 = (1 << 9), + IO_URING_F_IOPOLL = (1 << 10), }; struct io_uring_cmd { diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index f5b687a787a3..dcd8a563ab52 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -174,7 +174,9 @@ struct io_submit_state { bool plug_started; bool need_plug; unsigned short submit_nr; + unsigned int cqes_count; struct blk_plug plug; + struct io_uring_cqe cqes[16]; }; struct io_ev_fd { @@ -206,6 +208,8 @@ struct io_ring_ctx { unsigned int drain_disabled: 1; unsigned int has_evfd: 1; unsigned int syscall_iopoll: 1; + /* all CQEs should be posted only by the submitter task */ + unsigned int task_complete: 1; } ____cacheline_aligned_in_smp; /* submission data */ @@ -324,6 +328,7 @@ struct io_ring_ctx { struct io_rsrc_data *buf_data; struct delayed_work rsrc_put_work; + struct callback_head rsrc_put_tw; struct llist_head rsrc_put_llist; struct list_head rsrc_ref_list; spinlock_t rsrc_ref_lock; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 238a03087e17..0983dfc9a203 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -49,26 +49,35 @@ struct vm_fault; * * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of * buffer heads for this mapping. + * + * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent + * rather than a file data extent. */ -#define IOMAP_F_NEW 0x01 -#define IOMAP_F_DIRTY 0x02 -#define IOMAP_F_SHARED 0x04 -#define IOMAP_F_MERGED 0x08 -#define IOMAP_F_BUFFER_HEAD 0x10 -#define IOMAP_F_ZONE_APPEND 0x20 +#define IOMAP_F_NEW (1U << 0) +#define IOMAP_F_DIRTY (1U << 1) +#define IOMAP_F_SHARED (1U << 2) +#define IOMAP_F_MERGED (1U << 3) +#define IOMAP_F_BUFFER_HEAD (1U << 4) +#define IOMAP_F_ZONE_APPEND (1U << 5) +#define IOMAP_F_XATTR (1U << 6) /* * Flags set by the core iomap code during operations: * * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size * has changed as the result of this write operation. + * + * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file + * range it covers needs to be remapped by the high level before the operation + * can proceed. */ -#define IOMAP_F_SIZE_CHANGED 0x100 +#define IOMAP_F_SIZE_CHANGED (1U << 8) +#define IOMAP_F_STALE (1U << 9) /* * Flags from 0x1000 up are for file system specific usage: */ -#define IOMAP_F_PRIVATE 0x1000 +#define IOMAP_F_PRIVATE (1U << 12) /* @@ -89,6 +98,7 @@ struct iomap { void *inline_data; void *private; /* filesystem private */ const struct iomap_page_ops *page_ops; + u64 validity_cookie; /* used with .iomap_valid() */ }; static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) @@ -128,6 +138,23 @@ struct iomap_page_ops { int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, struct page *page); + + /* + * Check that the cached iomap still maps correctly to the filesystem's + * internal extent map. FS internal extent maps can change while iomap + * is iterating a cached iomap, so this hook allows iomap to detect that + * the iomap needs to be refreshed during a long running write + * operation. + * + * The filesystem can store internal state (e.g. a sequence number) in + * iomap->validity_cookie when the iomap is first mapped to be able to + * detect changes between mapping time and whenever .iomap_valid() is + * called. + * + * This is called with the folio over the specified file position held + * locked by the iomap code. + */ + bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); }; /* @@ -226,6 +253,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); +int iomap_file_buffered_write_punch_delalloc(struct inode *inode, + struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)); + int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3c9da1f8979e..46e1347bfa22 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -64,6 +64,8 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */ #define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */ +#define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */ + /* * This are the possible domain-types * @@ -77,6 +79,8 @@ struct iommu_domain_geometry { * certain optimizations for these domains * IOMMU_DOMAIN_DMA_FQ - As above, but definitely using batched TLB * invalidation. + * IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses + * represented by mm_struct's. */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) @@ -86,15 +90,27 @@ struct iommu_domain_geometry { #define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \ __IOMMU_DOMAIN_DMA_API | \ __IOMMU_DOMAIN_DMA_FQ) +#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA) struct iommu_domain { unsigned type; const struct iommu_domain_ops *ops; unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ - iommu_fault_handler_t handler; - void *handler_token; struct iommu_domain_geometry geometry; struct iommu_dma_cookie *iova_cookie; + enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault, + void *data); + void *fault_data; + union { + struct { + iommu_fault_handler_t handler; + void *handler_token; + }; + struct { /* IOMMU_DOMAIN_SVA */ + struct mm_struct *mm; + int users; + }; + }; }; static inline bool iommu_is_dma_domain(struct iommu_domain *domain) @@ -108,6 +124,11 @@ enum iommu_cap { IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for DMA protection and we should too */ + /* + * Per-device flag indicating if enforce_cache_coherency() will work on + * this device. + */ + IOMMU_CAP_ENFORCE_CACHE_COHERENCY, }; /* These are the possible reserved region types */ @@ -214,15 +235,15 @@ struct iommu_iotlb_gather { * driver init to device driver init (default no) * @dev_enable/disable_feat: per device entries to enable/disable * iommu specific features. - * @sva_bind: Bind process address space to device - * @sva_unbind: Unbind process address space from device - * @sva_get_pasid: Get PASID associated to a SVA handle * @page_response: handle page request response * @def_domain_type: device default domain type, return value: * - IOMMU_DOMAIN_IDENTITY: must use an identity domain * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains + * @remove_dev_pasid: Remove any translation configurations of a specific + * pasid, so that any DMA transactions with this pasid + * will be blocked by the hardware. * @pgsize_bitmap: bitmap of all possible supported page sizes * @owner: Driver module providing these ops */ @@ -247,16 +268,12 @@ struct iommu_ops { int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm, - void *drvdata); - void (*sva_unbind)(struct iommu_sva *handle); - u32 (*sva_get_pasid)(struct iommu_sva *handle); - int (*page_response)(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); + void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid); const struct iommu_domain_ops *default_domain_ops; unsigned long pgsize_bitmap; @@ -266,7 +283,20 @@ struct iommu_ops { /** * struct iommu_domain_ops - domain specific operations * @attach_dev: attach an iommu domain to a device + * Return: + * * 0 - success + * * EINVAL - can indicate that device and domain are incompatible due to + * some previous configuration of the domain, in which case the + * driver shouldn't log an error, since it is legitimate for a + * caller to test reuse of existing domains. Otherwise, it may + * still represent some other fundamental problem + * * ENOMEM - out of memory + * * ENOSPC - non-ENOMEM type of resource allocation failures + * * EBUSY - device is attached to a domain and cannot be changed + * * ENODEV - device specific errors, not able to be attached + * * <others> - treated as ENODEV by the caller. Use is discouraged * @detach_dev: detach an iommu domain from a device + * @set_dev_pasid: set an iommu domain to a pasid of device * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to * an iommu domain. @@ -287,6 +317,8 @@ struct iommu_ops { struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, + ioasid_t pasid); int (*map)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); @@ -322,12 +354,14 @@ struct iommu_domain_ops { * @list: Used by the iommu-core to keep a list of registered iommus * @ops: iommu-ops for talking to this iommu * @dev: struct device for sysfs handling + * @max_pasids: number of supported PASIDs */ struct iommu_device { struct list_head list; const struct iommu_ops *ops; struct fwnode_handle *fwnode; struct device *dev; + u32 max_pasids; }; /** @@ -366,6 +400,7 @@ struct iommu_fault_param { * @fwspec: IOMMU fwspec data * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data + * @max_pasids: number of PASIDs this device can consume * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -377,6 +412,7 @@ struct dev_iommu { struct iommu_fwspec *fwspec; struct iommu_device *iommu_dev; void *priv; + u32 max_pasids; }; int iommu_device_register(struct iommu_device *iommu, @@ -605,6 +641,10 @@ struct iommu_group *fsl_mc_device_group(struct device *dev); * @flags: IOMMU_FWSPEC_* flags * @num_ids: number of associated device IDs * @ids: IDs which this device may present to the IOMMU + * + * Note that the IDs (and any other information, really) stored in this structure should be + * considered private to the IOMMU device driver and are not to be used directly by IOMMU + * consumers. */ struct iommu_fwspec { const struct iommu_ops *ops; @@ -622,6 +662,7 @@ struct iommu_fwspec { */ struct iommu_sva { struct device *dev; + struct iommu_domain *domain; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, @@ -663,12 +704,6 @@ void iommu_release_device(struct device *dev); int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); -struct iommu_sva *iommu_sva_bind_device(struct device *dev, - struct mm_struct *mm, - void *drvdata); -void iommu_sva_unbind_device(struct iommu_sva *handle); -u32 iommu_sva_get_pasid(struct iommu_sva *handle); - int iommu_device_use_default_domain(struct device *dev); void iommu_device_unuse_default_domain(struct device *dev); @@ -676,6 +711,18 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner); void iommu_group_release_dma_owner(struct iommu_group *group); bool iommu_group_dma_owner_claimed(struct iommu_group *group); +int iommu_device_claim_dma_owner(struct device *dev, void *owner); +void iommu_device_release_dma_owner(struct device *dev); + +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); +int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +void iommu_detach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid); +struct iommu_domain * +iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, + unsigned int type); #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -995,21 +1042,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) return -ENODEV; } -static inline struct iommu_sva * -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) -{ - return NULL; -} - -static inline void iommu_sva_unbind_device(struct iommu_sva *handle) -{ -} - -static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) -{ - return IOMMU_PASID_INVALID; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; @@ -1038,6 +1070,39 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group) { return false; } + +static inline void iommu_device_release_dma_owner(struct device *dev) +{ +} + +static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) +{ + return -ENODEV; +} + +static inline struct iommu_domain * +iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} + +static inline int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + return -ENODEV; +} + +static inline void iommu_detach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ +} + +static inline struct iommu_domain * +iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, + unsigned int type) +{ + return NULL; +} #endif /* CONFIG_IOMMU_API */ /** @@ -1099,4 +1164,47 @@ static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_m #endif /* CONFIG_IOMMU_DMA */ +/* + * Newer generations of Tegra SoCs require devices' stream IDs to be directly programmed into + * some registers. These are always paired with a Tegra SMMU or ARM SMMU, for which the contents + * of the struct iommu_fwspec are known. Use this helper to formalize access to these internals. + */ +#define TEGRA_STREAM_ID_BYPASS 0x7f + +static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream_id) +{ +#ifdef CONFIG_IOMMU_API + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + if (fwspec && fwspec->num_ids == 1) { + *stream_id = fwspec->ids[0] & 0xffff; + return true; + } +#endif + + return false; +} + +#ifdef CONFIG_IOMMU_SVA +struct iommu_sva *iommu_sva_bind_device(struct device *dev, + struct mm_struct *mm); +void iommu_sva_unbind_device(struct iommu_sva *handle); +u32 iommu_sva_get_pasid(struct iommu_sva *handle); +#else +static inline struct iommu_sva * +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) +{ + return NULL; +} + +static inline void iommu_sva_unbind_device(struct iommu_sva *handle) +{ +} + +static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle) +{ + return IOMMU_PASID_INVALID; +} +#endif /* CONFIG_IOMMU_SVA */ + #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h new file mode 100644 index 000000000000..650d45629647 --- /dev/null +++ b/include/linux/iommufd.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Intel Corporation + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#ifndef __LINUX_IOMMUFD_H +#define __LINUX_IOMMUFD_H + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/err.h> + +struct device; +struct iommufd_device; +struct page; +struct iommufd_ctx; +struct iommufd_access; +struct file; + +struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, + struct device *dev, u32 *id); +void iommufd_device_unbind(struct iommufd_device *idev); + +int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); +void iommufd_device_detach(struct iommufd_device *idev); + +struct iommufd_access_ops { + u8 needs_pin_pages : 1; + void (*unmap)(void *data, unsigned long iova, unsigned long length); +}; + +enum { + IOMMUFD_ACCESS_RW_READ = 0, + IOMMUFD_ACCESS_RW_WRITE = 1 << 0, + /* Set if the caller is in a kthread then rw will use kthread_use_mm() */ + IOMMUFD_ACCESS_RW_KTHREAD = 1 << 1, + + /* Only for use by selftest */ + __IOMMUFD_ACCESS_RW_SLOW_PATH = 1 << 2, +}; + +struct iommufd_access * +iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id, + const struct iommufd_access_ops *ops, void *data); +void iommufd_access_destroy(struct iommufd_access *access); + +void iommufd_ctx_get(struct iommufd_ctx *ictx); + +#if IS_ENABLED(CONFIG_IOMMUFD) +struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); +void iommufd_ctx_put(struct iommufd_ctx *ictx); + +int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, + unsigned long length, struct page **out_pages, + unsigned int flags); +void iommufd_access_unpin_pages(struct iommufd_access *access, + unsigned long iova, unsigned long length); +int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t len, unsigned int flags); +int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +#else /* !CONFIG_IOMMUFD */ +static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void iommufd_ctx_put(struct iommufd_ctx *ictx) +{ +} + +static inline int iommufd_access_pin_pages(struct iommufd_access *access, + unsigned long iova, + unsigned long length, + struct page **out_pages, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline void iommufd_access_unpin_pages(struct iommufd_access *access, + unsigned long iova, + unsigned long length) +{ +} + +static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t len, unsigned int flags) +{ + return -EOPNOTSUPP; +} + +static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, + u32 *out_ioas_id) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_IOMMUFD */ +#endif diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 27642ca15d93..4ae3c541ea6f 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -318,6 +318,8 @@ extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); extern bool iomem_is_exclusive(u64 addr); +extern bool resource_is_exclusive(struct resource *resource, u64 addr, + resource_size_t size); extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 00d577f90883..a372086750ca 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -31,6 +31,7 @@ #define _LINUX_IRQDOMAIN_H #include <linux/types.h> +#include <linux/irqdomain_defs.h> #include <linux/irqhandler.h> #include <linux/of.h> #include <linux/mutex.h> @@ -45,6 +46,7 @@ struct irq_desc; struct cpumask; struct seq_file; struct irq_affinity_desc; +struct msi_parent_ops; #define IRQ_DOMAIN_IRQ_SPEC_PARAMS 16 @@ -68,27 +70,6 @@ struct irq_fwspec { void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, unsigned int count, struct irq_fwspec *fwspec); -/* - * Should several domains have the same device node, but serve - * different purposes (for example one domain is for PCI/MSI, and the - * other for wired IRQs), they can be distinguished using a - * bus-specific token. Most domains are expected to only carry - * DOMAIN_BUS_ANY. - */ -enum irq_domain_bus_token { - DOMAIN_BUS_ANY = 0, - DOMAIN_BUS_WIRED, - DOMAIN_BUS_GENERIC_MSI, - DOMAIN_BUS_PCI_MSI, - DOMAIN_BUS_PLATFORM_MSI, - DOMAIN_BUS_NEXUS, - DOMAIN_BUS_IPI, - DOMAIN_BUS_FSL_MC_MSI, - DOMAIN_BUS_TI_SCI_INTA_MSI, - DOMAIN_BUS_WAKEUP, - DOMAIN_BUS_VMD_MSI, -}; - /** * struct irq_domain_ops - Methods for irq_domain objects * @match: Match an interrupt controller device node to a host, returns @@ -137,53 +118,61 @@ struct irq_domain_chip_generic; /** * struct irq_domain - Hardware interrupt number translation object - * @link: Element in global irq_domain list. - * @name: Name of interrupt domain - * @ops: pointer to irq_domain methods - * @host_data: private data pointer for use by owner. Not touched by irq_domain - * core code. - * @flags: host per irq_domain flags - * @mapcount: The number of mapped interrupts + * @link: Element in global irq_domain list. + * @name: Name of interrupt domain + * @ops: Pointer to irq_domain methods + * @host_data: Private data pointer for use by owner. Not touched by irq_domain + * core code. + * @flags: Per irq_domain flags + * @mapcount: The number of mapped interrupts * - * Optional elements - * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy - * to swap it for the of_node via the irq_domain_get_of_node accessor - * @gc: Pointer to a list of generic chips. There is a helper function for - * setting up one or more generic chips for interrupt controllers - * drivers using the generic chip library which uses this pointer. - * @dev: Pointer to a device that the domain represent, and that will be - * used for power management purposes. - * @parent: Pointer to parent irq_domain to support hierarchy irq_domains + * Optional elements: + * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy + * to swap it for the of_node via the irq_domain_get_of_node accessor + * @gc: Pointer to a list of generic chips. There is a helper function for + * setting up one or more generic chips for interrupt controllers + * drivers using the generic chip library which uses this pointer. + * @dev: Pointer to the device which instantiated the irqdomain + * With per device irq domains this is not necessarily the same + * as @pm_dev. + * @pm_dev: Pointer to a device that can be utilized for power management + * purposes related to the irq domain. + * @parent: Pointer to parent irq_domain to support hierarchy irq_domains + * @msi_parent_ops: Pointer to MSI parent domain methods for per device domain init * - * Revmap data, used internally by irq_domain - * @revmap_size: Size of the linear map table @revmap[] - * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map - * @revmap_mutex: Lock for the revmap - * @revmap: Linear table of irq_data pointers + * Revmap data, used internally by the irq domain code: + * @revmap_size: Size of the linear map table @revmap[] + * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map + * @revmap_mutex: Lock for the revmap + * @revmap: Linear table of irq_data pointers */ struct irq_domain { - struct list_head link; - const char *name; - const struct irq_domain_ops *ops; - void *host_data; - unsigned int flags; - unsigned int mapcount; + struct list_head link; + const char *name; + const struct irq_domain_ops *ops; + void *host_data; + unsigned int flags; + unsigned int mapcount; /* Optional data */ - struct fwnode_handle *fwnode; - enum irq_domain_bus_token bus_token; - struct irq_domain_chip_generic *gc; - struct device *dev; + struct fwnode_handle *fwnode; + enum irq_domain_bus_token bus_token; + struct irq_domain_chip_generic *gc; + struct device *dev; + struct device *pm_dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - struct irq_domain *parent; + struct irq_domain *parent; +#endif +#ifdef CONFIG_GENERIC_MSI_IRQ + const struct msi_parent_ops *msi_parent_ops; #endif /* reverse map data. The linear map gets appended to the irq_domain */ - irq_hw_number_t hwirq_max; - unsigned int revmap_size; - struct radix_tree_root revmap_tree; - struct mutex revmap_mutex; - struct irq_data __rcu *revmap[]; + irq_hw_number_t hwirq_max; + unsigned int revmap_size; + struct radix_tree_root revmap_tree; + struct mutex revmap_mutex; + struct irq_data __rcu *revmap[]; }; /* Irq domain flags */ @@ -206,15 +195,14 @@ enum { /* Irq domain implements MSI remapping */ IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), - /* - * Quirk to handle MSI implementations which do not provide - * masking. Currently known to affect x86, but partially - * handled in core code. - */ - IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6), - /* Irq domain doesn't translate anything */ - IRQ_DOMAIN_FLAG_NO_MAP = (1 << 7), + IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6), + + /* Irq domain is a MSI parent domain */ + IRQ_DOMAIN_FLAG_MSI_PARENT = (1 << 8), + + /* Irq domain is a MSI device domain */ + IRQ_DOMAIN_FLAG_MSI_DEVICE = (1 << 9), /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved @@ -233,7 +221,7 @@ static inline void irq_domain_set_pm_device(struct irq_domain *d, struct device *dev) { if (d) - d->dev = dev; + d->pm_dev = dev; } #ifdef CONFIG_IRQ_DOMAIN @@ -578,6 +566,16 @@ static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain); +static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT; +} + +static inline bool irq_domain_is_msi_device(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE; +} + #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) @@ -623,6 +621,17 @@ irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) { return false; } + +static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) +{ + return false; +} + +static inline bool irq_domain_is_msi_device(struct irq_domain *domain) +{ + return false; +} + #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #else /* CONFIG_IRQ_DOMAIN */ diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h new file mode 100644 index 000000000000..c29921fd8cd1 --- /dev/null +++ b/include/linux/irqdomain_defs.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IRQDOMAIN_DEFS_H +#define _LINUX_IRQDOMAIN_DEFS_H + +/* + * Should several domains have the same device node, but serve + * different purposes (for example one domain is for PCI/MSI, and the + * other for wired IRQs), they can be distinguished using a + * bus-specific token. Most domains are expected to only carry + * DOMAIN_BUS_ANY. + */ +enum irq_domain_bus_token { + DOMAIN_BUS_ANY = 0, + DOMAIN_BUS_WIRED, + DOMAIN_BUS_GENERIC_MSI, + DOMAIN_BUS_PCI_MSI, + DOMAIN_BUS_PLATFORM_MSI, + DOMAIN_BUS_NEXUS, + DOMAIN_BUS_IPI, + DOMAIN_BUS_FSL_MC_MSI, + DOMAIN_BUS_TI_SCI_INTA_MSI, + DOMAIN_BUS_WAKEUP, + DOMAIN_BUS_VMD_MSI, + DOMAIN_BUS_PCI_DEVICE_MSI, + DOMAIN_BUS_PCI_DEVICE_MSIX, + DOMAIN_BUS_DMAR, + DOMAIN_BUS_AMDVI, + DOMAIN_BUS_PCI_DEVICE_IMS, +}; + +#endif /* _LINUX_IRQDOMAIN_DEFS_H */ diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h index bd4c066ad39b..d426c7ad92bf 100644 --- a/include/linux/irqreturn.h +++ b/include/linux/irqreturn.h @@ -3,10 +3,10 @@ #define _LINUX_IRQRETURN_H /** - * enum irqreturn - * @IRQ_NONE interrupt was not from this device or was not handled - * @IRQ_HANDLED interrupt was handled by this device - * @IRQ_WAKE_THREAD handler requests to wake the handler thread + * enum irqreturn - irqreturn type values + * @IRQ_NONE: interrupt was not from this device or was not handled + * @IRQ_HANDLED: interrupt was handled by this device + * @IRQ_WAKE_THREAD: handler requests to wake the handler thread */ enum irqreturn { IRQ_NONE = (0 << 0), diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0b7242370b56..2170e0cc279d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1662,7 +1662,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); int jbd2_fc_end_commit_fallback(journal_t *journal); int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); -int jbd2_submit_inode_data(struct jbd2_inode *jinode); +int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 570831ca9951..4e968ebadce6 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -224,9 +224,10 @@ extern bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type); extern void arch_jump_label_transform_apply(void); extern int jump_label_text_reserved(void *start, void *end); -extern void static_key_slow_inc(struct static_key *key); +extern bool static_key_slow_inc(struct static_key *key); +extern bool static_key_fast_inc_not_disabled(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); -extern void static_key_slow_inc_cpuslocked(struct static_key *key); +extern bool static_key_slow_inc_cpuslocked(struct static_key *key); extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); @@ -278,11 +279,23 @@ static __always_inline bool static_key_true(struct static_key *key) return false; } -static inline void static_key_slow_inc(struct static_key *key) +static inline bool static_key_fast_inc_not_disabled(struct static_key *key) { + int v; + STATIC_KEY_CHECK_USE(key); - atomic_inc(&key->enabled); + /* + * Prevent key->enabled getting negative to follow the same semantics + * as for CONFIG_JUMP_LABEL=y, see kernel/jump_label.c comment. + */ + v = atomic_read(&key->enabled); + do { + if (v < 0 || (v + 1) < 0) + return false; + } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1))); + return true; } +#define static_key_slow_inc(key) static_key_fast_inc_not_disabled(key) static inline void static_key_slow_dec(struct static_key *key) { diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 649faac31ddb..0065209cc004 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -66,9 +66,12 @@ static inline void *dereference_symbol_descriptor(void *ptr) } #ifdef CONFIG_KALLSYMS +unsigned long kallsyms_sym_address(int idx); int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); +int kallsyms_on_each_match_symbol(int (*fn)(void *, unsigned long), + const char *name, void *data); /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); @@ -168,6 +171,12 @@ static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct { return -EOPNOTSUPP; } + +static inline int kallsyms_on_each_match_symbol(int (*fn)(void *, unsigned long), + const char *name, void *data) +{ + return -EOPNOTSUPP; +} #endif /*CONFIG_KALLSYMS*/ static inline void print_ip_sym(const char *loglvl, unsigned long ip) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d811b3d7d2a1..96c9d56e5510 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -302,7 +302,7 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {} #ifdef CONFIG_KASAN_GENERIC -size_t kasan_metadata_size(struct kmem_cache *cache); +size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object); slab_flags_t kasan_never_merge(void); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, slab_flags_t *flags); @@ -315,7 +315,8 @@ void kasan_record_aux_stack_noalloc(void *ptr); #else /* CONFIG_KASAN_GENERIC */ /* Tag-based KASAN modes do not use per-object metadata. */ -static inline size_t kasan_metadata_size(struct kmem_cache *cache) +static inline size_t kasan_metadata_size(struct kmem_cache *cache, + bool in_object) { return 0; } diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 55dc338f6bcd..ee04256f28af 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -56,7 +56,7 @@ static inline void kcov_remote_start_usb(u64 id) /* * The softirq flavor of kcov_remote_*() functions is introduced as a temporary * work around for kcov's lack of nested remote coverage sections support in - * task context. Adding suport for nested sections is tracked in: + * task context. Adding support for nested sections is tracked in: * https://bugzilla.kernel.org/show_bug.cgi?id=210337 */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 41a686996aaa..5dd4343c1bbe 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -17,6 +17,7 @@ #include <linux/crash_core.h> #include <asm/io.h> +#include <linux/range.h> #include <uapi/linux/kexec.h> #include <linux/verification.h> @@ -240,14 +241,10 @@ static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 -struct crash_mem_range { - u64 start, end; -}; - struct crash_mem { unsigned int max_nr_ranges; unsigned int nr_ranges; - struct crash_mem_range ranges[]; + struct range ranges[]; }; extern int crash_exclude_mem_range(struct crash_mem *mem, diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 70162d707caf..f68865e19b0b 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -15,6 +15,7 @@ extern void __khugepaged_exit(struct mm_struct *mm); extern void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags); extern void khugepaged_min_free_kbytes_update(void); +extern bool current_is_khugepaged(void); #ifdef CONFIG_SHMEM extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd); @@ -57,6 +58,11 @@ static inline int collapse_pte_mapped_thp(struct mm_struct *mm, static inline void khugepaged_min_free_kbytes_update(void) { } + +static inline bool current_is_khugepaged(void) +{ + return false; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_KHUGEPAGED_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f16c4689322b..4f26b244f6d0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -427,7 +427,7 @@ static __always_inline void guest_context_enter_irqoff(void) */ if (!context_tracking_guest_enter()) { instrumentation_begin(); - rcu_virt_note_context_switch(smp_processor_id()); + rcu_virt_note_context_switch(); instrumentation_end(); } } @@ -794,6 +794,7 @@ struct kvm { struct srcu_struct srcu; struct srcu_struct irq_srcu; pid_t userspace_pid; + bool override_halt_poll_ns; unsigned int max_halt_poll_ns; u32 dirty_ring_size; bool dirty_ring_with_bitmap; diff --git a/include/linux/libata.h b/include/linux/libata.h index fe990176e6ee..c9149ebe7423 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1043,6 +1043,11 @@ static inline int ata_port_is_dummy(struct ata_port *ap) return ap->ops == &ata_dummy_port_ops; } +static inline bool ata_port_is_frozen(const struct ata_port *ap) +{ + return ap->pflags & ATA_PFLAG_FROZEN; +} + extern int ata_std_prereset(struct ata_link *link, unsigned long deadline); extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline, int (*check_ready)(struct ata_link *link)); @@ -1913,8 +1918,6 @@ extern void ata_sff_dev_select(struct ata_port *ap, unsigned int device); extern u8 ata_sff_check_status(struct ata_port *ap); extern void ata_sff_pause(struct ata_port *ap); extern void ata_sff_dma_pause(struct ata_port *ap); -extern int ata_sff_busy_sleep(struct ata_port *ap, - unsigned long timeout_pat, unsigned long timeout); extern int ata_sff_wait_ready(struct ata_link *link, unsigned long deadline); extern void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf); extern void ata_sff_tf_read(struct ata_port *ap, struct ata_taskfile *tf); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index c74acfa1a3fe..af38252ad704 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -35,6 +35,11 @@ enum { NDD_WORK_PENDING = 4, /* dimm supports namespace labels */ NDD_LABELING = 6, + /* + * dimm contents have changed requiring invalidation of CPU caches prior + * to activation of a region that includes this device + */ + NDD_INCOHERENT = 7, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, @@ -183,6 +188,8 @@ struct nvdimm_security_ops { int (*overwrite)(struct nvdimm *nvdimm, const struct nvdimm_key_data *key_data); int (*query_overwrite)(struct nvdimm *nvdimm); + int (*disable_master)(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data); }; enum nvdimm_fwa_state { diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 1feab6136b5b..5c8865bb59d9 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -69,8 +69,8 @@ #endif #ifndef __ALIGN -#define __ALIGN .align 4,0x90 -#define __ALIGN_STR ".align 4,0x90" +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT +#define __ALIGN_STR __stringify(__ALIGN) #endif #ifdef __ASSEMBLY__ diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 07add7882a5d..c9afcdd9324c 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -199,7 +199,6 @@ struct lru_cache { unsigned long flags; - void *lc_private; const char *name; /* nr_elements there */ @@ -241,7 +240,6 @@ extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, unsigned e_count, size_t e_size, size_t e_off); extern void lc_reset(struct lru_cache *lc); extern void lc_destroy(struct lru_cache *lc); -extern void lc_set(struct lru_cache *lc, unsigned int enr, int index); extern void lc_del(struct lru_cache *lc, struct lc_element *element); extern struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr); @@ -297,6 +295,5 @@ extern bool lc_is_used(struct lru_cache *lc, unsigned int enr); container_of(ptr, type, member) extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i); -extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e); #endif diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index ec119da1d89b..ed6cb2ac55fa 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -145,6 +145,12 @@ LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name) LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry) LSM_HOOK(int, 0, inode_removexattr, struct user_namespace *mnt_userns, struct dentry *dentry, const char *name) +LSM_HOOK(int, 0, inode_set_acl, struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) +LSM_HOOK(int, 0, inode_get_acl, struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name) +LSM_HOOK(int, 0, inode_remove_acl, struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name) LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry) LSM_HOOK(int, 0, inode_killpriv, struct user_namespace *mnt_userns, struct dentry *dentry) @@ -177,6 +183,7 @@ LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk, struct fown_struct *fown, int sig) LSM_HOOK(int, 0, file_receive, struct file *file) LSM_HOOK(int, 0, file_open, struct file *file) +LSM_HOOK(int, 0, file_truncate, struct file *file) LSM_HOOK(int, 0, task_alloc, struct task_struct *task, unsigned long clone_flags) LSM_HOOK(void, LSM_RET_VOID, task_free, struct task_struct *task) @@ -302,7 +309,7 @@ LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, - char __user *optval, int __user *optlen, unsigned len) + sockptr_t optval, sockptr_t optlen, unsigned int len) LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 4ec80b96c22e..0a5ba81f7367 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -92,13 +92,14 @@ * is initialised to NULL by the caller. * @fc indicates the new filesystem context. * @src_fc indicates the original filesystem context. + * Return 0 on success or a negative error code on failure. * @fs_context_parse_param: * Userspace provided a parameter to configure a superblock. The LSM may * reject it with an error and may use it for itself, in which case it * should return 0; otherwise it should return -ENOPARAM to pass it on to * the filesystem. * @fc indicates the filesystem context. - * @param The parameter + * @param The parameter. * * Security hooks for filesystem operations. * @@ -118,6 +119,7 @@ * Free memory associated with @mnt_ops. * @sb_eat_lsm_opts: * Eat (scan @orig options) and save them in @mnt_opts. + * Return 0 on success, negative values on failure. * @sb_statfs: * Check permission before obtaining filesystem statistics for the @mnt * mountpoint. @@ -136,31 +138,24 @@ * @flags contains the mount flags. * @data contains the filesystem-specific data. * Return 0 if permission is granted. - * @sb_copy_data: - * Allow mount option data to be copied prior to parsing by the filesystem, - * so that the security module can extract security-specific mount - * options cleanly (a filesystem may modify the data e.g. with strsep()). - * This also allows the original mount data to be stripped of security- - * specific options to avoid having to make filesystems aware of them. - * @orig the original mount data copied from userspace. - * @copy copied data which will be passed to the security module. - * Returns 0 if the copy was successful. * @sb_mnt_opts_compat: * Determine if the new mount options in @mnt_opts are allowed given * the existing mounted filesystem at @sb. - * @sb superblock being compared - * @mnt_opts new mount options + * @sb superblock being compared. + * @mnt_opts new mount options. * Return 0 if options are compatible. * @sb_remount: * Extracts security system specific mount options and verifies no changes * are being made to those options. - * @sb superblock being remounted + * @sb superblock being remounted. * @data contains the filesystem-specific data. * Return 0 if permission is granted. * @sb_kern_mount: - * Mount this @sb if allowed by permissions. + * Mount this @sb if allowed by permissions. + * Return 0 if permission is granted. * @sb_show_options: * Show (print on @m) mount options for this @sb. + * Return 0 on success, negative values on failure. * @sb_umount: * Check permission before the @mnt file system is unmounted. * @mnt contains the mounted file system. @@ -174,31 +169,31 @@ * Return 0 if permission is granted. * @sb_set_mnt_opts: * Set the security relevant mount options used for a superblock - * @sb the superblock to set security mount options for - * @opts binary data structure containing all lsm mount data + * @sb the superblock to set security mount options for. + * @opts binary data structure containing all lsm mount data. + * Return 0 on success, error on failure. * @sb_clone_mnt_opts: * Copy all security options from a given superblock to another - * @oldsb old superblock which contain information to clone - * @newsb new superblock which needs filled in - * @sb_parse_opts_str: - * Parse a string of security data filling in the opts structure - * @options string containing all mount options known by the LSM - * @opts binary data structure usable by the LSM + * @oldsb old superblock which contain information to clone. + * @newsb new superblock which needs filled in. + * Return 0 on success, error on failure. * @move_mount: * Check permission before a mount is moved. * @from_path indicates the mount that is going to be moved. * @to_path indicates the mountpoint that will be mounted upon. + * Return 0 if permission is granted. * @dentry_init_security: * Compute a context for a dentry as the inode is not yet available * since NFSv4 has no label backed by an EA anyway. * @dentry dentry to use in calculating the context. * @mode mode used to determine resource type. - * @name name of the last path component used to create file + * @name name of the last path component used to create file. * @xattr_name pointer to place the pointer to security xattr name. * Caller does not have to free the resulting pointer. Its * a pointer to static string. * @ctx pointer to place the pointer to the resulting context in. * @ctxlen point to place the length of the resulting context. + * Return 0 on success, negative values on failure. * @dentry_create_files_as: * Compute a context for a dentry as the inode is not yet available * and set that context in passed in creds so that new files are @@ -206,9 +201,10 @@ * passed in creds and not the creds of the caller. * @dentry dentry to use in calculating the context. * @mode mode used to determine resource type. - * @name name of the last path component used to create file - * @old creds which should be used for context calculation - * @new creds to modify + * @name name of the last path component used to create file. + * @old creds which should be used for context calculation. + * @new creds to modify. + * Return 0 on success, error on failure. * * * Security hooks for inode operations. @@ -236,7 +232,7 @@ * then it should return -EOPNOTSUPP to skip this processing. * @inode contains the inode structure of the newly created inode. * @dir contains the inode structure of the parent directory. - * @qstr contains the last path component of the new object + * @qstr contains the last path component of the new object. * @name will be set to the allocated name suffix (e.g. selinux). * @value will be set to the allocated attribute value. * @len will be set to the length of the value. @@ -247,9 +243,9 @@ * Set up the incore security field for the new anonymous inode * and return whether the inode creation is permitted by the security * module or not. - * @inode contains the inode structure - * @name name of the anonymous inode class - * @context_inode optional related inode + * @inode contains the inode structure. + * @name name of the anonymous inode class. + * @context_inode optional related inode. * Returns 0 on success, -EACCES if the security module denies the * creation of this inode, or another -errno upon other errors. * @inode_create: @@ -365,7 +361,7 @@ * mode is specified in @mode. * @path contains the path structure of the file to change the mode. * @mode contains the new DAC's permission, which is a bitmask of - * constants from <include/uapi/linux/stat.h> + * constants from <include/uapi/linux/stat.h>. * Return 0 if permission is granted. * @path_chown: * Check for permission to change owner/group of a file or directory. @@ -380,6 +376,7 @@ * @path_notify: * Check permissions before setting a watch on events as defined by @mask, * on an object at @path, whose type is defined by @obj_type. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -387,7 +384,7 @@ * @inode_follow_link: * Check permission to follow a symbolic link when looking up a pathname. * @dentry contains the dentry structure for the link. - * @inode contains the inode, which itself is not stable in RCU-walk + * @inode contains the inode, which itself is not stable in RCU-walk. * @rcu indicates whether we are in RCU-walk mode. * Return 0 if permission is granted. * @inode_permission: @@ -409,7 +406,9 @@ * @attr is the iattr structure containing the new file attributes. * Return 0 if permission is granted. * @path_truncate: - * Check permission before truncating a file. + * Check permission before truncating the file indicated by path. + * Note that truncation permissions may also be checked based on + * already opened files, using the @file_truncate hook. * @path contains the path structure for the file. * Return 0 if permission is granted. * @inode_getattr: @@ -435,13 +434,25 @@ * Check permission before removing the extended attribute * identified by @name for @dentry. * Return 0 if permission is granted. + * @inode_set_acl: + * Check permission before setting posix acls + * The posix acls in @kacl are identified by @acl_name. + * Return 0 if permission is granted. + * @inode_get_acl: + * Check permission before getting osix acls + * The posix acls are identified by @acl_name. + * Return 0 if permission is granted. + * @inode_remove_acl: + * Check permission before removing posix acls + * The posix acls are identified by @acl_name. + * Return 0 if permission is granted. * @inode_getsecurity: * Retrieve a copy of the extended attribute representation of the * security label associated with @name for @inode via @buffer. Note that * @name is the remainder of the attribute name after the security prefix - * has been removed. @alloc is used to specify of the call should return a - * value via the buffer or just the value length Return size of buffer on - * success. + * has been removed. @alloc is used to specify if the call should return a + * value via the buffer or just the value length. + * Return size of buffer on success. * @inode_setsecurity: * Set the security label associated with @name for @inode from the * extended attribute value @value. @size indicates the size of the @@ -464,7 +475,7 @@ * @inode_killpriv: * The setuid bit is being removed. Remove similar security labels. * Called with the dentry->d_inode->i_mutex held. - * @mnt_userns: user namespace of the mount + * @mnt_userns: user namespace of the mount. * @dentry is the dentry being changed. * Return 0 on success. If error is returned, then the operation * causing setuid bit removal is failed. @@ -491,20 +502,22 @@ * to abort the copy up. Note that the caller is responsible for reading * and writing the xattrs as this hook is merely a filter. * @d_instantiate: - * Fill in @inode security information for a @dentry if allowed. + * Fill in @inode security information for a @dentry if allowed. * @getprocattr: - * Read attribute @name for process @p and store it into @value if allowed. + * Read attribute @name for process @p and store it into @value if allowed. + * Return the length of @value on success, a negative value otherwise. * @setprocattr: - * Write (set) attribute @name to @value, size @size if allowed. + * Write (set) attribute @name to @value, size @size if allowed. + * Return written bytes on success, a negative value otherwise. * * Security hooks for kernfs node operations * * @kernfs_init_security: * Initialize the security context of a newly created kernfs node based * on its own and its parent's attributes. - * - * @kn_dir the parent kernfs node - * @kn the new child kernfs node + * @kn_dir the parent kernfs node. + * @kn the new child kernfs node. + * Return 0 if permission is granted. * * Security hooks for file operations * @@ -543,11 +556,11 @@ * simple integer value. When @arg represents a user space pointer, it * should never be used by the security module. * Return 0 if permission is granted. - * @mmap_addr : + * @mmap_addr: * Check permissions for a mmap operation at @addr. * @addr contains virtual address that will be used for the operation. * Return 0 if permission is granted. - * @mmap_file : + * @mmap_file: * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). @@ -598,10 +611,17 @@ * to receive an open file descriptor via socket IPC. * @file contains the file structure being received. * Return 0 if permission is granted. + * @file_truncate: + * Check permission before truncating a file, i.e. using ftruncate. + * Note that truncation permission may also be checked based on the path, + * using the @path_truncate hook. + * @file contains the file structure for the file. + * Return 0 if permission is granted. * @file_open: * Save open-time permission checking state for later use upon * file_permission, and recheck access if anything has changed * since inode_permission. + * Return 0 if permission is granted. * * Security hooks for task operations. * @@ -619,6 +639,7 @@ * @gfp indicates the atomicity of any memory allocations. * Only allocate sufficient memory and attach to @cred such that * cred_transfer() will not get ENOMEM. + * Return 0 on success, negative values on failure. * @cred_free: * @cred points to the credentials. * Deallocate and clear the cred->security field in a set of credentials. @@ -627,6 +648,7 @@ * @old points to the original credentials. * @gfp indicates the atomicity of any memory allocations. * Prepare a new set of credentials by copying the data from the old set. + * Return 0 on success, negative values on failure. * @cred_transfer: * @new points to the new credentials. * @old points to the original credentials. @@ -638,7 +660,7 @@ * @kernel_act_as: * Set the credentials for a kernel service to act as (subjective context). * @new points to the credentials to be modified. - * @secid specifies the security ID to be set + * @secid specifies the security ID to be set. * The current task must be the one that nominated @secid. * Return 0 if successful. * @kernel_create_files_as: @@ -651,19 +673,19 @@ * @kernel_module_request: * Ability to trigger the kernel to automatically upcall to userspace for * userspace to load a kernel module with the given name. - * @kmod_name name of the module requested by the kernel + * @kmod_name name of the module requested by the kernel. * Return 0 if successful. * @kernel_load_data: * Load data provided by userspace. - * @id kernel load data identifier + * @id kernel load data identifier. * @contents if a subsequent @kernel_post_load_data will be called. * Return 0 if permission is granted. * @kernel_post_load_data: * Load data provided by a non-file source (usually userspace buffer). * @buf pointer to buffer containing the data contents. * @size length of the data contents. - * @id kernel load data identifier - * @description a text description of what was loaded, @id-specific + * @id kernel load data identifier. + * @description a text description of what was loaded, @id-specific. * Return 0 if permission is granted. * This must be paired with a prior @kernel_load_data call that had * @contents set to true. @@ -671,7 +693,7 @@ * Read a file specified by userspace. * @file contains the file structure pointing to the file being read * by the kernel. - * @id kernel read file identifier + * @id kernel read file identifier. * @contents if a subsequent @kernel_post_read_file will be called. * Return 0 if permission is granted. * @kernel_post_read_file: @@ -680,7 +702,7 @@ * by the kernel. * @buf pointer to buffer containing the file contents. * @size length of the file contents. - * @id kernel read file identifier + * @id kernel read file identifier. * This must be paired with a prior @kernel_read_file call that had * @contents set to true. * Return 0 if permission is granted. @@ -690,7 +712,7 @@ * indicates which of the set*uid system calls invoked this hook. If * @new is the set of credentials that will be installed. Modifications * should be made to this rather than to @current->cred. - * @old is the set of credentials that are being replaces + * @old is the set of credentials that are being replaced. * @flags contains one of the LSM_SETID_* values. * Return 0 on success. * @task_fix_setgid: @@ -742,7 +764,7 @@ * @task_setioprio: * Check permission before setting the ioprio value of @p to @ioprio. * @p contains the task_struct of process. - * @ioprio contains the new ioprio value + * @ioprio contains the new ioprio value. * Return 0 if permission is granted. * @task_getioprio: * Check permission before getting the ioprio value of @p. @@ -873,6 +895,7 @@ * @type contains the requested communications type. * @protocol contains the requested protocol. * @kern set to 1 if a kernel socket. + * Return 0 if permission is granted. * @socket_socketpair: * Check permissions before creating a fresh pair of sockets. * @socka contains the first socket structure. @@ -956,14 +979,15 @@ * Must not sleep inside this hook because some callers hold spinlocks. * @sk contains the sock (not socket) associated with the incoming sk_buff. * @skb contains the incoming network data. + * Return 0 if permission is granted. * @socket_getpeersec_stream: * This hook allows the security module to provide peer socket security * state for unix or connected tcp sockets to userspace via getsockopt * SO_GETPEERSEC. For tcp sockets this can be meaningful if the * socket is associated with an ipsec SA. * @sock is the local socket. - * @optval userspace memory where the security state is to be copied. - * @optlen userspace int where the module should copy the actual length + * @optval memory where the security state is to be copied. + * @optlen memory where the module should copy the actual length * of the security state. * @len as input is the maximum length to copy to userspace provided * by the caller. @@ -983,6 +1007,7 @@ * @sk_alloc_security: * Allocate and attach a security structure to the sk->sk_security field, * which is used to copy security attributes between local stream sockets. + * Return 0 on success, error on failure. * @sk_free_security: * Deallocate security structure. * @sk_clone_security: @@ -995,17 +1020,19 @@ * @inet_conn_request: * Sets the openreq's sid to socket's sid with MLS portion taken * from peer sid. + * Return 0 if permission is granted. * @inet_csk_clone: * Sets the new child socket's sid to the openreq sid. * @inet_conn_established: * Sets the connection's peersid to the secmark on skb. * @secmark_relabel_packet: - * check if the process should be allowed to relabel packets to - * the given secid + * Check if the process should be allowed to relabel packets to + * the given secid. + * Return 0 if permission is granted. * @secmark_refcount_inc: - * tells the LSM to increment the number of secmark labeling rules loaded + * Tells the LSM to increment the number of secmark labeling rules loaded. * @secmark_refcount_dec: - * tells the LSM to decrement the number of secmark labeling rules loaded + * Tells the LSM to decrement the number of secmark labeling rules loaded. * @req_classify_flow: * Sets the flow's sid to the openreq sid. * @tun_dev_alloc_security: @@ -1016,21 +1043,25 @@ * @tun_dev_free_security: * This hook allows a module to free the security structure for a TUN * device. - * @security pointer to the TUN device's security structure + * @security pointer to the TUN device's security structure. * @tun_dev_create: * Check permissions prior to creating a new TUN device. + * Return 0 if permission is granted. * @tun_dev_attach_queue: * Check permissions prior to attaching to a TUN device queue. * @security pointer to the TUN device's security structure. + * Return 0 if permission is granted. * @tun_dev_attach: * This hook can be used by the module to update any security state * associated with the TUN device's sock structure. * @sk contains the existing sock structure. * @security pointer to the TUN device's security structure. + * Return 0 if permission is granted. * @tun_dev_open: * This hook can be used by the module to update any security state * associated with the TUN device's security structure. * @security pointer to the TUN devices's security structure. + * Return 0 if permission is granted. * * Security hooks for SCTP * @@ -1063,6 +1094,7 @@ * to the security module. * @asoc pointer to sctp association structure. * @skb pointer to skbuff of association packet. + * Return 0 if permission is granted. * * Security hooks for Infiniband * @@ -1071,15 +1103,17 @@ * @subnet_prefix the subnet prefix of the port being used. * @pkey the pkey to be accessed. * @sec pointer to a security structure. + * Return 0 if permission is granted. * @ib_endport_manage_subnet: * Check permissions to send and receive SMPs on a end port. * @dev_name the IB device name (i.e. mlx4_0). * @port_num the port number. * @sec pointer to a security structure. + * Return 0 if permission is granted. * @ib_alloc_security: * Allocate a security structure for Infiniband objects. * @sec pointer to a security structure pointer. - * Returns 0 on success, non-zero on failure + * Returns 0 on success, non-zero on failure. * @ib_free_security: * Deallocate an Infiniband security structure. * @sec contains the security structure to be freed. @@ -1091,10 +1125,11 @@ * Database used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). + * @gfp is to specify the context for the allocation. * Allocate a security structure to the xp->security field; the security * field is initialized to NULL when the xfrm_policy is allocated. - * Return 0 if operation was successful (memory to allocate, legal context) - * @gfp is to specify the context for the allocation + * Return 0 if operation was successful (memory to allocate, legal + * context). * @xfrm_policy_clone_security: * @old_ctx contains an existing xfrm_sec_ctx. * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. @@ -1102,11 +1137,12 @@ * information from the old_ctx structure. * Return 0 if operation was successful (memory to allocate). * @xfrm_policy_free_security: - * @ctx contains the xfrm_sec_ctx + * @ctx contains the xfrm_sec_ctx. * Deallocate xp->security. * @xfrm_policy_delete_security: * @ctx contains the xfrm_sec_ctx. * Authorize deletion of xp->security. + * Return 0 if permission is granted. * @xfrm_state_alloc: * @x contains the xfrm_state being added to the Security Association * Database by the XFRM system. @@ -1132,6 +1168,7 @@ * @xfrm_state_delete_security: * @x contains the xfrm_state. * Authorize deletion of x->security. + * Return 0 if permission is granted. * @xfrm_policy_lookup: * @ctx contains the xfrm_sec_ctx for which the access control is being * checked. @@ -1160,7 +1197,7 @@ * Permit allocation of a key and assign security data. Note that key does * not have a serial number assigned at this point. * @key points to the key. - * @flags is the allocation flags + * @flags is the allocation flags. * Return 0 if permission is granted, -ve error otherwise. * @key_free: * Notification of destruction; free security data. @@ -1190,8 +1227,8 @@ * * @ipc_permission: * Check permissions for access to IPC - * @ipcp contains the kernel IPC permission structure - * @flag contains the desired (requested) permission set + * @ipcp contains the kernel IPC permission structure. + * @flag contains the desired (requested) permission set. * Return 0 if permission is granted. * @ipc_getsecid: * Get the secid associated with the ipc object. @@ -1336,15 +1373,18 @@ * to @to. * @from contains the struct cred for the sending process. * @to contains the struct cred for the receiving process. + * Return 0 if permission is granted. * @binder_transfer_binder: * Check whether @from is allowed to transfer a binder reference to @to. * @from contains the struct cred for the sending process. * @to contains the struct cred for the receiving process. + * Return 0 if permission is granted. * @binder_transfer_file: * Check whether @from is allowed to transfer @file to @to. * @from contains the struct cred for the sending process. * @file contains the struct file being transferred. * @to contains the struct cred for the receiving process. + * Return 0 if permission is granted. * * @ptrace_access_check: * Check permission before allowing the current process to trace the @@ -1386,32 +1426,39 @@ * Check whether the @tsk process has the @cap capability in the indicated * credentials. * @cred contains the credentials to use. - * @ns contains the user namespace we want the capability in + * @ns contains the user namespace we want the capability in. * @cap contains the capability <include/linux/capability.h>. - * @opts contains options for the capable check <include/linux/security.h> + * @opts contains options for the capable check <include/linux/security.h>. * Return 0 if the capability is granted for @tsk. * @quotactl: - * Check whether the quotactl syscall is allowed for this @sb. + * Check whether the quotactl syscall is allowed for this @sb. + * Return 0 if permission is granted. * @quota_on: - * Check whether QUOTAON is allowed for this @dentry. + * Check whether QUOTAON is allowed for this @dentry. + * Return 0 if permission is granted. * @syslog: * Check permission before accessing the kernel message ring or changing * logging to the console. * See the syslog(2) manual page for an explanation of the @type values. - * @type contains the SYSLOG_ACTION_* constant from <include/linux/syslog.h> + * @type contains the SYSLOG_ACTION_* constant from + * <include/linux/syslog.h>. * Return 0 if permission is granted. * @settime: * Check permission to change the system time. * struct timespec64 is defined in <include/linux/time64.h> and timezone * is defined in <include/linux/time.h> - * @ts contains new time - * @tz contains new timezone + * @ts contains new time. + * @tz contains new timezone. * Return 0 if permission is granted. * @vm_enough_memory: * Check permissions for allocating a new virtual mapping. * @mm contains the mm struct it is being added to. * @pages contains the number of pages. - * Return 0 if permission is granted. + * Return 0 if permission is granted by the LSM infrastructure to the + * caller. If all LSMs return a positive value, __vm_enough_memory() will + * be called with cap_sys_admin set. If at least one LSM returns 0 or + * negative, __vm_enough_memory() will be called with cap_sys_admin + * cleared. * * @ismaclabel: * Check if the extended attribute specified by @name @@ -1429,11 +1476,13 @@ * @secid contains the security ID. * @secdata contains the pointer that stores the converted security * context. - * @seclen pointer which contains the length of the data + * @seclen pointer which contains the length of the data. + * Return 0 on success, error on failure. * @secctx_to_secid: * Convert security context to secid. * @secid contains the pointer to the generated security ID. * @secdata contains the security context. + * Return 0 on success, error on failure. * * @release_secctx: * Release the security context. @@ -1470,7 +1519,7 @@ * @audit_rule_free: * Deallocate the LSM audit rule structure previously allocated by * audit_rule_init. - * @lsmrule contains the allocated rule + * @lsmrule contains the allocated rule. * * @inode_invalidate_secctx: * Notify the security module that it must revalidate the security context @@ -1487,6 +1536,7 @@ * @inode we wish to set the security context of. * @ctx contains the string which we wish to set in the inode. * @ctxlen contains the length of @ctx. + * Return 0 on success, error on failure. * * @inode_setsecctx: * Change the security context of an inode. Updates the @@ -1500,6 +1550,7 @@ * @dentry contains the inode we wish to set the security context of. * @ctx contains the string which we wish to set in the inode. * @ctxlen contains the length of @ctx. + * Return 0 on success, error on failure. * * @inode_getsecctx: * On success, returns 0 and fills out @ctx and @ctxlen with the security @@ -1507,6 +1558,7 @@ * @inode we wish to get the security context of. * @ctx is a pointer in which to place the allocated security context. * @ctxlen points to the place to put the length of @ctx. + * Return 0 on success, error on failure. * * Security hooks for the general notification queue: * @@ -1514,13 +1566,15 @@ * Check to see if a watch notification can be posted to a particular * queue. * @w_cred: The credentials of the whoever set the watch. - * @cred: The event-triggerer's credentials - * @n: The notification being posted + * @cred: The event-triggerer's credentials. + * @n: The notification being posted. + * Return 0 if permission is granted. * * @watch_key: * Check to see if a process is allowed to watch for event notifications * from a key or keyring. * @key: The key to watch. + * Return 0 if permission is granted. * * Security hooks for using the eBPF maps and programs functionalities through * eBPF syscalls. @@ -1529,65 +1583,74 @@ * Do a initial check for all bpf syscalls after the attribute is copied * into the kernel. The actual security module can implement their own * rules to check the specific cmd they need. + * Return 0 if permission is granted. * * @bpf_map: * Do a check when the kernel generate and return a file descriptor for * eBPF maps. - * - * @map: bpf map that we want to access - * @mask: the access flags + * @map: bpf map that we want to access. + * @mask: the access flags. + * Return 0 if permission is granted. * * @bpf_prog: * Do a check when the kernel generate and return a file descriptor for * eBPF programs. - * * @prog: bpf prog that userspace want to use. + * Return 0 if permission is granted. * * @bpf_map_alloc_security: * Initialize the security field inside bpf map. + * Return 0 on success, error on failure. * * @bpf_map_free_security: * Clean up the security information stored inside bpf map. * * @bpf_prog_alloc_security: * Initialize the security field inside bpf program. + * Return 0 on success, error on failure. * * @bpf_prog_free_security: * Clean up the security information stored inside bpf prog. * * @locked_down: - * Determine whether a kernel feature that potentially enables arbitrary - * code execution in kernel space should be permitted. - * - * @what: kernel feature being accessed + * Determine whether a kernel feature that potentially enables arbitrary + * code execution in kernel space should be permitted. + * @what: kernel feature being accessed. + * Return 0 if permission is granted. * * Security hooks for perf events * * @perf_event_open: - * Check whether the @type of perf_event_open syscall is allowed. + * Check whether the @type of perf_event_open syscall is allowed. + * Return 0 if permission is granted. * @perf_event_alloc: - * Allocate and save perf_event security info. + * Allocate and save perf_event security info. + * Return 0 on success, error on failure. * @perf_event_free: - * Release (free) perf_event security info. + * Release (free) perf_event security info. * @perf_event_read: - * Read perf_event security info if allowed. + * Read perf_event security info if allowed. + * Return 0 if permission is granted. * @perf_event_write: - * Write perf_event security info if allowed. + * Write perf_event security info if allowed. + * Return 0 if permission is granted. * * Security hooks for io_uring * * @uring_override_creds: - * Check if the current task, executing an io_uring operation, is allowed - * to override it's credentials with @new. - * - * @new: the new creds to use + * Check if the current task, executing an io_uring operation, is allowed + * to override it's credentials with @new. + * @new: the new creds to use. + * Return 0 if permission is granted. * * @uring_sqpoll: - * Check whether the current task is allowed to spawn a io_uring polling - * thread (IORING_SETUP_SQPOLL). + * Check whether the current task is allowed to spawn a io_uring polling + * thread (IORING_SETUP_SQPOLL). + * Return 0 if permission is granted. * * @uring_cmd: - * Check whether the file_operations uring_cmd is allowed to run. + * Check whether the file_operations uring_cmd is allowed to run. + * Return 0 if permission is granted. * */ union security_list_options { diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 2effab72add1..e594db58a0f1 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -638,6 +638,12 @@ static inline void mt_set_in_rcu(struct maple_tree *mt) } } +static inline unsigned int mt_height(const struct maple_tree *mt) + +{ + return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET; +} + void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max); void *mt_find_after(struct maple_tree *mt, unsigned long *index, unsigned long max); @@ -664,6 +670,7 @@ extern atomic_t maple_tree_tests_passed; void mt_dump(const struct maple_tree *mt); void mt_validate(struct maple_tree *mt); +void mt_cache_shrink(void); #define MT_BUG_ON(__tree, __x) do { \ atomic_inc(&maple_tree_tests_run); \ if (__x) { \ diff --git a/include/linux/math64.h b/include/linux/math64.h index a14f40de1dca..8958f4c005c1 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -29,7 +29,7 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) return dividend / divisor; } -/* +/** * div_s64_rem - signed 64bit divide with 32bit divisor with remainder * @dividend: signed 64bit dividend * @divisor: signed 32bit divisor @@ -43,7 +43,7 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) return dividend / divisor; } -/* +/** * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder * @dividend: unsigned 64bit dividend * @divisor: unsigned 64bit divisor @@ -57,7 +57,7 @@ static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) return dividend / divisor; } -/* +/** * div64_u64 - unsigned 64bit divide with 64bit divisor * @dividend: unsigned 64bit dividend * @divisor: unsigned 64bit divisor @@ -69,7 +69,7 @@ static inline u64 div64_u64(u64 dividend, u64 divisor) return dividend / divisor; } -/* +/** * div64_s64 - signed 64bit divide with 64bit divisor * @dividend: signed 64bit dividend * @divisor: signed 64bit divisor @@ -120,6 +120,8 @@ extern s64 div64_s64(s64 dividend, s64 divisor); * This is the most common 64bit divide and should be used if possible, * as many 32bit archs can optimize this variant better than a full 64bit * divide. + * + * Return: dividend / divisor */ #ifndef div_u64 static inline u64 div_u64(u64 dividend, u32 divisor) @@ -133,6 +135,8 @@ static inline u64 div_u64(u64 dividend, u32 divisor) * div_s64 - signed 64bit divide with 32bit divisor * @dividend: signed 64bit dividend * @divisor: signed 32bit divisor + * + * Return: dividend / divisor */ #ifndef div_s64 static inline s64 div_s64(s64 dividend, s32 divisor) @@ -284,6 +288,16 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); +/** + * DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up + * @ll: unsigned 64bit dividend + * @d: unsigned 64bit divisor + * + * Divide unsigned 64bit dividend by unsigned 64bit divisor + * and round up. + * + * Return: dividend / divisor rounded up + */ #define DIV64_U64_ROUND_UP(ll, d) \ ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); }) @@ -300,7 +314,7 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); #define DIV64_U64_ROUND_CLOSEST(dividend, divisor) \ ({ u64 _tmp = (divisor); div64_u64((dividend) + _tmp / 2, _tmp); }) -/* +/** * DIV_U64_ROUND_CLOSEST - unsigned 64bit divide with 32bit divisor rounded to nearest integer * @dividend: unsigned 64bit dividend * @divisor: unsigned 32bit divisor @@ -313,7 +327,7 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); #define DIV_U64_ROUND_CLOSEST(dividend, divisor) \ ({ u32 _tmp = (divisor); div_u64((u64)(dividend) + _tmp / 2, _tmp); }) -/* +/** * DIV_S64_ROUND_CLOSEST - signed 64bit divide with 32bit divisor rounded to nearest integer * @dividend: signed 64bit dividend * @divisor: signed 32bit divisor diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index 2da63fd7b98f..97e64184767d 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -10,6 +10,12 @@ struct mb_cache; +/* Cache entry flags */ +enum { + MBE_REFERENCED_B = 0, + MBE_REUSABLE_B +}; + struct mb_cache_entry { /* List of entries in cache - protected by cache->c_list_lock */ struct list_head e_list; @@ -26,8 +32,7 @@ struct mb_cache_entry { atomic_t e_refcnt; /* Key in hash - stable during lifetime of the entry */ u32 e_key; - u32 e_referenced:1; - u32 e_reusable:1; + unsigned long e_flags; /* User provided value - stable during lifetime of the entry */ u64 e_value; }; diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 00177567cfef..f7fbbf3069e7 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -488,6 +488,19 @@ static inline int mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad, return mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum), val); } +static inline int mdiodev_c45_read(struct mdio_device *mdiodev, int devad, + u16 regnum) +{ + return mdiobus_c45_read(mdiodev->bus, mdiodev->addr, devad, regnum); +} + +static inline int mdiodev_c45_write(struct mdio_device *mdiodev, u32 devad, + u16 regnum, u16 val) +{ + return mdiobus_c45_write(mdiodev->bus, mdiodev->addr, devad, regnum, + val); +} + int mdiobus_register_device(struct mdio_device *mdiodev); int mdiobus_unregister_device(struct mdio_device *mdiodev); bool mdiobus_is_registered_device(struct mii_bus *bus, int addr); diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index df1fab44ea5c..fd6e0620658d 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -11,6 +11,7 @@ struct mei_cl_device; struct mei_device; +struct scatterlist; typedef void (*mei_cldev_cb_t)(struct mei_cl_device *cldev); @@ -116,6 +117,11 @@ void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data); int mei_cldev_enable(struct mei_cl_device *cldev); int mei_cldev_disable(struct mei_cl_device *cldev); bool mei_cldev_enabled(const struct mei_cl_device *cldev); +ssize_t mei_cldev_send_gsc_command(struct mei_cl_device *cldev, + u8 client_id, u32 fence_id, + struct scatterlist *sg_in, + size_t total_in_len, + struct scatterlist *sg_out); void *mei_cldev_dma_map(struct mei_cl_device *cldev, u8 buffer_id, size_t size); int mei_cldev_dma_unmap(struct mei_cl_device *cldev); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e1644a24009c..d3c8203cab6c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -615,28 +615,32 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root, void mem_cgroup_calculate_protection(struct mem_cgroup *root, struct mem_cgroup *memcg); -static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg) +static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, + struct mem_cgroup *memcg) { /* * The root memcg doesn't account charges, and doesn't support - * protection. + * protection. The target memcg's protection is ignored, see + * mem_cgroup_calculate_protection() and mem_cgroup_protection() */ - return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg); - + return mem_cgroup_disabled() || mem_cgroup_is_root(memcg) || + memcg == target; } -static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_low(struct mem_cgroup *target, + struct mem_cgroup *memcg) { - if (!mem_cgroup_supports_protection(memcg)) + if (mem_cgroup_unprotected(target, memcg)) return false; return READ_ONCE(memcg->memory.elow) >= page_counter_read(&memcg->memory); } -static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_min(struct mem_cgroup *target, + struct mem_cgroup *memcg) { - if (!mem_cgroup_supports_protection(memcg)) + if (mem_cgroup_unprotected(target, memcg)) return false; return READ_ONCE(memcg->memory.emin) >= @@ -1209,12 +1213,19 @@ static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, { } -static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) +static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, + struct mem_cgroup *memcg) +{ + return true; +} +static inline bool mem_cgroup_below_low(struct mem_cgroup *target, + struct mem_cgroup *memcg) { return false; } -static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_min(struct mem_cgroup *target, + struct mem_cgroup *memcg) { return false; } diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 965009aa01d7..fc9647b1b4f9 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -18,7 +18,6 @@ * the same memory tier. */ #define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1)) -#define MEMTIER_HOTPLUG_PRIO 100 struct memory_tier; struct memory_dev_type { diff --git a/include/linux/memory.h b/include/linux/memory.h index aa619464a1df..31343566c221 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -19,7 +19,6 @@ #include <linux/node.h> #include <linux/compiler.h> #include <linux/mutex.h> -#include <linux/notifier.h> #define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) @@ -85,6 +84,9 @@ struct memory_block { unsigned long nr_vmemmap_pages; struct memory_group *group; /* group (if any) for this block */ struct list_head group_next; /* next block inside memory group */ +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) + atomic_long_t nr_hwpoison; +#endif }; int arch_get_memory_phys_device(unsigned long start_pfn); @@ -113,8 +115,13 @@ struct mem_section; * Priorities for the hotplug memory callback routines (stored in decreasing * order in the callback chain) */ -#define SLAB_CALLBACK_PRI 1 -#define IPC_CALLBACK_PRI 10 +#define DEFAULT_CALLBACK_PRI 0 +#define SLAB_CALLBACK_PRI 1 +#define HMAT_CALLBACK_PRI 2 +#define MM_COMPUTE_BATCH_PRI 10 +#define CPUSET_CALLBACK_PRI 10 +#define MEMTIER_HOTPLUG_PRI 100 +#define KSM_CALLBACK_PRI 100 #ifndef CONFIG_MEMORY_HOTPLUG static inline void memory_dev_init(void) @@ -136,9 +143,6 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) { return 0; } -/* These aren't inline functions due to a GCC bug. */ -#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; }) -#define unregister_hotmemory_notifier(nb) ({ (void)(nb); }) #else /* CONFIG_MEMORY_HOTPLUG */ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); @@ -166,8 +170,6 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, { .notifier_call = fn, .priority = pri };\ register_memory_notifier(&fn##_mem_nb); \ }) -#define register_hotmemory_notifier(nb) register_memory_notifier(nb) -#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb) #ifdef CONFIG_NUMA void memory_block_add_nid(struct memory_block *mem, int nid, diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 0c964ac107c2..4aae6c06c5f2 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -30,6 +30,11 @@ static inline bool mempool_initialized(mempool_t *pool) return pool->elements != NULL; } +static inline bool mempool_is_saturated(mempool_t *pool) +{ + return READ_ONCE(pool->curr_nr) >= pool->min_nr; +} + void mempool_exit(mempool_t *pool); int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, diff --git a/include/linux/memregion.h b/include/linux/memregion.h index c04c4fd2e209..bf83363807ac 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -3,6 +3,7 @@ #define _MEMREGION_H_ #include <linux/types.h> #include <linux/errno.h> +#include <linux/bug.h> struct memregion_info { int target_node; @@ -20,4 +21,41 @@ static inline void memregion_free(int id) { } #endif + +/** + * cpu_cache_invalidate_memregion - drop any CPU cached data for + * memregions described by @res_desc + * @res_desc: one of the IORES_DESC_* types + * + * Perform cache maintenance after a memory event / operation that + * changes the contents of physical memory in a cache-incoherent manner. + * For example, device memory technologies like NVDIMM and CXL have + * device secure erase, and dynamic region provision that can replace + * the memory mapped to a given physical address. + * + * Limit the functionality to architectures that have an efficient way + * to writeback and invalidate potentially terabytes of address space at + * once. Note that this routine may or may not write back any dirty + * contents while performing the invalidation. It is only exported for + * the explicit usage of the NVDIMM and CXL modules in the 'DEVMEM' + * symbol namespace on bare platforms. + * + * Returns 0 on success or negative error code on a failure to perform + * the cache maintenance. + */ +#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION +int cpu_cache_invalidate_memregion(int res_desc); +bool cpu_cache_has_invalidate_memregion(void); +#else +static inline bool cpu_cache_has_invalidate_memregion(void) +{ + return false; +} + +static inline int cpu_cache_invalidate_memregion(int res_desc) +{ + WARN_ON_ONCE("CPU cache invalidation required"); + return -ENXIO; +} +#endif #endif /* _MEMREGION_H_ */ diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h index 6c98edcf4b0b..6193905abbb5 100644 --- a/include/linux/mfd/max8997.h +++ b/include/linux/mfd/max8997.h @@ -110,8 +110,6 @@ enum max8997_haptic_pwm_divisor { /** * max8997_haptic_platform_data - * @pwm_channel_id: channel number of PWM device - * valid for MAX8997_EXTERNAL_MODE * @pwm_period: period in nano second for PWM device * valid for MAX8997_EXTERNAL_MODE * @type: motor type @@ -128,7 +126,6 @@ enum max8997_haptic_pwm_divisor { * [0 - 255]: available period */ struct max8997_haptic_platform_data { - unsigned int pwm_channel_id; unsigned int pwm_period; enum max8997_haptic_motor_type type; diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 27264fe4b3b9..e8bf90281ba0 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -102,7 +102,6 @@ struct tmio_mmc_data { unsigned long capabilities2; unsigned long flags; u32 ocr_mask; /* available voltages */ - int alignment_shift; dma_addr_t dma_rx_offset; unsigned int max_blk_count; unsigned short max_segs; diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 5433c08fcc68..396df1121bff 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -37,6 +37,28 @@ __cmp(x, y, op), \ __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) +#define __clamp(val, lo, hi) \ + ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) + +#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ + typeof(val) unique_val = (val); \ + typeof(lo) unique_lo = (lo); \ + typeof(hi) unique_hi = (hi); \ + __clamp(unique_val, unique_lo, unique_hi); }) + +#define __clamp_input_check(lo, hi) \ + (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ + __is_constexpr((lo) > (hi)), (lo) > (hi), false))) + +#define __careful_clamp(val, lo, hi) ({ \ + __clamp_input_check(lo, hi) + \ + __builtin_choose_expr(__typecheck(val, lo) && __typecheck(val, hi) && \ + __typecheck(hi, lo) && __is_constexpr(val) && \ + __is_constexpr(lo) && __is_constexpr(hi), \ + __clamp(val, lo, hi), \ + __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ + __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) + /** * min - return minimum of two values of the same or compatible types * @x: first value @@ -86,7 +108,7 @@ * This macro does strict typechecking of @lo/@hi to make sure they are of the * same type as @val. See the unnecessary pointer comparisons. */ -#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) +#define clamp(val, lo, hi) __careful_clamp(val, lo, hi) /* * ..and if you can't take the strict @@ -121,7 +143,7 @@ * This macro does no typechecking and uses temporary variables of type * @type to make all the comparisons. */ -#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) +#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi)) /** * clamp_val - return a value clamped to a given range using val's type diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1ff91cb79ded..5fe5d198b57a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -290,10 +290,9 @@ enum { MLX5_UMR_INLINE = (1 << 7), }; -#define MLX5_UMR_KLM_ALIGNMENT 4 -#define MLX5_UMR_MTT_ALIGNMENT 0x40 -#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1) -#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT +#define MLX5_UMR_FLEX_ALIGNMENT 0x40 +#define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt)) +#define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm)) #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8) @@ -882,6 +881,12 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe) return cqe->op_own >> 4; } +static inline u8 get_cqe_enhanced_num_mini_cqes(struct mlx5_cqe64 *cqe) +{ + /* num_of_mini_cqes is zero based */ + return get_cqe_opcode(cqe) + 1; +} + static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro.tcppsh_abort_dupack >> 6) & 1; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index af2ceb4160bc..d476255c9a3f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -606,8 +606,6 @@ struct mlx5_priv { struct list_head pgdir_list; /* end: alloc staff */ - struct list_head ctx_list; - spinlock_t ctx_lock; struct mlx5_adev **adev; int adev_idx; int sw_vhca_id; @@ -981,6 +979,7 @@ struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; u16 opcode; /* cmd opcode */ + u16 op_mod; /* cmd op_mod */ void *out; /* pointer to the cmd output buffer */ }; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index c7a91981cd5a..ba6958b49a8e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -50,6 +50,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_PORT, MLX5_FLOW_DESTINATION_TYPE_COUNTER, MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM, + MLX5_FLOW_DESTINATION_TYPE_RANGE, }; enum { @@ -143,6 +144,10 @@ enum { MLX5_FLOW_DEST_VPORT_REFORMAT_ID = BIT(1), }; +enum mlx5_flow_dest_range_field { + MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN = 0, +}; + struct mlx5_flow_destination { enum mlx5_flow_destination_type type; union { @@ -156,6 +161,13 @@ struct mlx5_flow_destination { struct mlx5_pkt_reformat *pkt_reformat; u8 flags; } vport; + struct { + struct mlx5_flow_table *hit_ft; + struct mlx5_flow_table *miss_ft; + enum mlx5_flow_dest_range_field field; + u32 min; + u32 max; + } range; u32 sampler_id; }; }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5a4e914e2a6f..152d2d7f8743 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -68,6 +68,7 @@ enum { MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION = 0x25, }; @@ -445,7 +446,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 max_modify_header_actions[0x8]; u8 max_ft_level[0x8]; - u8 reserved_at_40[0x6]; + u8 reformat_add_esp_trasport[0x1]; + u8 reserved_at_41[0x2]; + u8 reformat_del_esp_trasport[0x1]; + u8 reserved_at_44[0x2]; u8 execute_aso[0x1]; u8 reserved_at_47[0x19]; @@ -638,8 +642,10 @@ struct mlx5_ifc_fte_match_set_misc2_bits { u8 reserved_at_1a0[0x8]; u8 macsec_syndrome[0x8]; + u8 ipsec_syndrome[0x8]; + u8 reserved_at_1b8[0x8]; - u8 reserved_at_1b0[0x50]; + u8 reserved_at_1c0[0x40]; }; struct mlx5_ifc_fte_match_set_misc3_bits { @@ -1875,7 +1881,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { }; struct mlx5_ifc_cmd_hca_cap_2_bits { - u8 reserved_at_0[0xa0]; + u8 reserved_at_0[0x80]; + + u8 migratable[0x1]; + u8 reserved_at_81[0x1f]; u8 max_reformat_insert_size[0x8]; u8 max_reformat_insert_offset[0x8]; @@ -6104,6 +6113,38 @@ struct mlx5_ifc_match_definer_format_32_bits { u8 inner_dmac_15_0[0x10]; }; +enum { + MLX5_IFC_DEFINER_FORMAT_ID_SELECT = 61, +}; + +#define MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED 0x0 +#define MLX5_IFC_DEFINER_FORMAT_OFFSET_OUTER_ETH_PKT_LEN 0x48 +#define MLX5_IFC_DEFINER_DW_SELECTORS_NUM 9 +#define MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM 8 + +struct mlx5_ifc_match_definer_match_mask_bits { + u8 reserved_at_1c0[5][0x20]; + u8 match_dw_8[0x20]; + u8 match_dw_7[0x20]; + u8 match_dw_6[0x20]; + u8 match_dw_5[0x20]; + u8 match_dw_4[0x20]; + u8 match_dw_3[0x20]; + u8 match_dw_2[0x20]; + u8 match_dw_1[0x20]; + u8 match_dw_0[0x20]; + + u8 match_byte_7[0x8]; + u8 match_byte_6[0x8]; + u8 match_byte_5[0x8]; + u8 match_byte_4[0x8]; + + u8 match_byte_3[0x8]; + u8 match_byte_2[0x8]; + u8 match_byte_1[0x8]; + u8 match_byte_0[0x8]; +}; + struct mlx5_ifc_match_definer_bits { u8 modify_field_select[0x40]; @@ -6112,9 +6153,41 @@ struct mlx5_ifc_match_definer_bits { u8 reserved_at_80[0x10]; u8 format_id[0x10]; - u8 reserved_at_a0[0x160]; + u8 reserved_at_a0[0x60]; + + u8 format_select_dw3[0x8]; + u8 format_select_dw2[0x8]; + u8 format_select_dw1[0x8]; + u8 format_select_dw0[0x8]; + + u8 format_select_dw7[0x8]; + u8 format_select_dw6[0x8]; + u8 format_select_dw5[0x8]; + u8 format_select_dw4[0x8]; + + u8 reserved_at_100[0x18]; + u8 format_select_dw8[0x8]; + + u8 reserved_at_120[0x20]; + + u8 format_select_byte3[0x8]; + u8 format_select_byte2[0x8]; + u8 format_select_byte1[0x8]; + u8 format_select_byte0[0x8]; + + u8 format_select_byte7[0x8]; + u8 format_select_byte6[0x8]; + u8 format_select_byte5[0x8]; + u8 format_select_byte4[0x8]; + + u8 reserved_at_180[0x40]; - u8 match_mask[16][0x20]; + union { + struct { + u8 match_mask[16][0x20]; + }; + struct mlx5_ifc_match_definer_match_mask_bits match_mask_format; + }; }; struct mlx5_ifc_general_obj_in_cmd_hdr_bits { @@ -6384,6 +6457,9 @@ enum mlx5_reformat_ctx_type { MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2, MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3, MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4, + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4 = 0x5, + MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT = 0x8, + MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV6 = 0xb, MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf, MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10, MLX5_REFORMAT_TYPE_ADD_MACSEC = 0x11, @@ -11563,6 +11639,41 @@ enum { MLX5_IPSEC_OBJECT_ICV_LEN_16B, }; +enum { + MLX5_IPSEC_ASO_REG_C_0_1 = 0x0, + MLX5_IPSEC_ASO_REG_C_2_3 = 0x1, + MLX5_IPSEC_ASO_REG_C_4_5 = 0x2, + MLX5_IPSEC_ASO_REG_C_6_7 = 0x3, +}; + +enum { + MLX5_IPSEC_ASO_MODE = 0x0, + MLX5_IPSEC_ASO_REPLAY_PROTECTION = 0x1, + MLX5_IPSEC_ASO_INC_SN = 0x2, +}; + +struct mlx5_ifc_ipsec_aso_bits { + u8 valid[0x1]; + u8 reserved_at_201[0x1]; + u8 mode[0x2]; + u8 window_sz[0x2]; + u8 soft_lft_arm[0x1]; + u8 hard_lft_arm[0x1]; + u8 remove_flow_enable[0x1]; + u8 esn_event_arm[0x1]; + u8 reserved_at_20a[0x16]; + + u8 remove_flow_pkt_cnt[0x20]; + + u8 remove_flow_soft_lft[0x20]; + + u8 reserved_at_260[0x80]; + + u8 mode_parameter[0x20]; + + u8 replay_protection_window[0x100]; +}; + struct mlx5_ifc_ipsec_obj_bits { u8 modify_field_select[0x40]; u8 full_offload[0x1]; @@ -11584,7 +11695,11 @@ struct mlx5_ifc_ipsec_obj_bits { u8 implicit_iv[0x40]; - u8 reserved_at_100[0x700]; + u8 reserved_at_100[0x8]; + u8 ipsec_aso_access_pd[0x18]; + u8 reserved_at_120[0xe0]; + + struct mlx5_ifc_ipsec_aso_bits ipsec_aso; }; struct mlx5_ifc_create_ipsec_obj_in_bits { diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index aad53cb72f17..7f31432f44c2 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -132,4 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, + u16 opmod); #endif /* __MLX5_VPORT_H__ */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 3c84f4e48cd7..f3f196e4d66d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -74,6 +74,7 @@ static inline void totalram_pages_add(long count) extern void * high_memory; extern int page_cluster; +extern const int page_cluster_max; #ifdef CONFIG_SYSCTL extern int sysctl_legacy_va_layout; @@ -549,7 +550,7 @@ struct vm_operations_struct { /* * Called by mprotect() to make driver-specific permission * checks before mprotect() is finalised. The VMA must not - * be modified. Returns 0 if eprotect() can proceed. + * be modified. Returns 0 if mprotect() can proceed. */ int (*mprotect)(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long newflags); @@ -699,8 +700,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) * paths in userfault. */ bool vma_is_shmem(struct vm_area_struct *vma); +bool vma_is_anon_shmem(struct vm_area_struct *vma); #else static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } +static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; } #endif int vma_is_stack_for_current(struct vm_area_struct *vma); @@ -817,8 +820,8 @@ static inline int is_vmalloc_or_module_addr(const void *x) /* * How many times the entire folio is mapped as a single unit (eg by a * PMD or PUD entry). This is probably not what you want, except for - * debugging purposes; look at folio_mapcount() or page_mapcount() - * instead. + * debugging purposes - it does not include PTE-mapped sub-pages; look + * at folio_mapcount() or page_mapcount() or total_mapcount() instead. */ static inline int folio_entire_mapcount(struct folio *folio) { @@ -828,12 +831,29 @@ static inline int folio_entire_mapcount(struct folio *folio) /* * Mapcount of compound page as a whole, does not include mapped sub-pages. - * - * Must be called only for compound pages. + * Must be called only on head of compound page. */ -static inline int compound_mapcount(struct page *page) +static inline int head_compound_mapcount(struct page *head) { - return folio_entire_mapcount(page_folio(page)); + return atomic_read(compound_mapcount_ptr(head)) + 1; +} + +/* + * If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages, + * its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit + * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently + * leaves subpages_mapcount at 0, but avoid surprise if it participates later. + */ +#define COMPOUND_MAPPED 0x800000 +#define SUBPAGES_MAPPED (COMPOUND_MAPPED - 1) + +/* + * Number of sub-pages mapped by PTE, does not include compound mapcount. + * Must be called only on head of compound page. + */ +static inline int head_subpages_mapcount(struct page *head) +{ + return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED; } /* @@ -846,11 +866,9 @@ static inline void page_mapcount_reset(struct page *page) atomic_set(&(page)->_mapcount, -1); } -int __page_mapcount(struct page *page); - /* * Mapcount of 0-order page; when compound sub-page, includes - * compound_mapcount(). + * compound_mapcount of compound_head of page. * * Result is undefined for pages which cannot be mapped into userspace. * For example SLAB or special types of pages. See function page_has_type(). @@ -858,25 +876,75 @@ int __page_mapcount(struct page *page); */ static inline int page_mapcount(struct page *page) { - if (unlikely(PageCompound(page))) - return __page_mapcount(page); - return atomic_read(&page->_mapcount) + 1; + int mapcount = atomic_read(&page->_mapcount) + 1; + + if (likely(!PageCompound(page))) + return mapcount; + page = compound_head(page); + return head_compound_mapcount(page) + mapcount; } -int folio_mapcount(struct folio *folio); +int total_compound_mapcount(struct page *head); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int total_mapcount(struct page *page) +/** + * folio_mapcount() - Calculate the number of mappings of this folio. + * @folio: The folio. + * + * A large folio tracks both how many times the entire folio is mapped, + * and how many times each individual page in the folio is mapped. + * This function calculates the total number of times the folio is + * mapped. + * + * Return: The number of times this folio is mapped. + */ +static inline int folio_mapcount(struct folio *folio) { - return folio_mapcount(page_folio(page)); + if (likely(!folio_test_large(folio))) + return atomic_read(&folio->_mapcount) + 1; + return total_compound_mapcount(&folio->page); } -#else static inline int total_mapcount(struct page *page) { - return page_mapcount(page); + if (likely(!PageCompound(page))) + return atomic_read(&page->_mapcount) + 1; + return total_compound_mapcount(compound_head(page)); +} + +static inline bool folio_large_is_mapped(struct folio *folio) +{ + /* + * Reading folio_mapcount_ptr() below could be omitted if hugetlb + * participated in incrementing subpages_mapcount when compound mapped. + */ + return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 || + atomic_read(folio_mapcount_ptr(folio)) >= 0; +} + +/** + * folio_mapped - Is this folio mapped into userspace? + * @folio: The folio. + * + * Return: True if any page in this folio is referenced by user page tables. + */ +static inline bool folio_mapped(struct folio *folio) +{ + if (likely(!folio_test_large(folio))) + return atomic_read(&folio->_mapcount) >= 0; + return folio_large_is_mapped(folio); +} + +/* + * Return true if this page is mapped into pagetables. + * For compound page it returns true if any sub-page of compound page is mapped, + * even if this particular sub-page is not itself mapped by any PTE or PMD. + */ +static inline bool page_mapped(struct page *page) +{ + if (likely(!PageCompound(page))) + return atomic_read(&page->_mapcount) >= 0; + return folio_large_is_mapped(page_folio(page)); } -#endif static inline struct page *virt_to_head_page(const void *x) { @@ -929,6 +997,13 @@ static inline void set_compound_page_dtor(struct page *page, page[1].compound_dtor = compound_dtor; } +static inline void folio_set_compound_dtor(struct folio *folio, + enum compound_dtor_id compound_dtor) +{ + VM_BUG_ON_FOLIO(compound_dtor >= NR_COMPOUND_DTORS, folio); + folio->_folio_dtor = compound_dtor; +} + void destroy_large_folio(struct folio *folio); static inline int head_compound_pincount(struct page *head) @@ -944,6 +1019,22 @@ static inline void set_compound_order(struct page *page, unsigned int order) #endif } +/* + * folio_set_compound_order is generally passed a non-zero order to + * initialize a large folio. However, hugetlb code abuses this by + * passing in zero when 'dissolving' a large folio. + */ +static inline void folio_set_compound_order(struct folio *folio, + unsigned int order) +{ + VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); + + folio->_folio_order = order; +#ifdef CONFIG_64BIT + folio->_folio_nr_pages = order ? 1U << order : 0; +#endif +} + /* Returns the number of pages in this potentially compound page. */ static inline unsigned long compound_nr(struct page *page) { @@ -1129,7 +1220,7 @@ static inline void get_page(struct page *page) folio_get(page_folio(page)); } -bool __must_check try_grab_page(struct page *page, unsigned int flags); +int __must_check try_grab_page(struct page *page, unsigned int flags); static inline __must_check bool try_get_page(struct page *page) { @@ -1179,7 +1270,24 @@ static inline void folio_put_refs(struct folio *folio, int refs) __folio_put(folio); } -void release_pages(struct page **pages, int nr); +/** + * release_pages - release an array of pages or folios + * + * This just releases a simple array of multiple pages, and + * accepts various different forms of said page array: either + * a regular old boring array of pages, an array of folios, or + * an array of encoded page pointers. + * + * The transparent union syntax for this kind of "any of these + * argument types" is all kinds of ugly, so look away. + */ +typedef union { + struct page **pages; + struct folio **folios; + struct encoded_page **encoded_pages; +} release_pages_arg __attribute__ ((__transparent_union__)); + +void release_pages(release_pages_arg, int nr); /** * folios_put - Decrement the reference count on an array of folios. @@ -1195,7 +1303,7 @@ void release_pages(struct page **pages, int nr); */ static inline void folios_put(struct folio **folios, unsigned int nr) { - release_pages((struct page **)folios, nr); + release_pages(folios, nr); } static inline void put_page(struct page *page) @@ -1799,9 +1907,6 @@ static inline pgoff_t page_index(struct page *page) return page->index; } -bool page_mapped(struct page *page); -bool folio_mapped(struct folio *folio); - /* * Return true only if the page has been allocated with * ALLOC_NO_WATERMARKS and the low watermark was not @@ -1852,6 +1957,25 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); } +/* + * Parameter block passed down to zap_pte_range in exceptional cases. + */ +struct zap_details { + struct folio *single_folio; /* Locked folio to be unmapped */ + bool even_cows; /* Zap COWed private pages too? */ + zap_flags_t zap_flags; /* Extra flags for zapping */ +}; + +/* + * Whether to drop the pte markers, for example, the uffd-wp information for + * file-backed memory. This should only be specified when we will completely + * drop the page in the mm, either by truncation or unmapping of the vma. By + * default, the flag is not set. + */ +#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) +/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ +#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) + #ifdef CONFIG_MMU extern bool can_do_mlock(void); #else @@ -1869,6 +1993,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, + unsigned long size, struct zap_details *details); void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); @@ -2004,6 +2130,22 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, #define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ MM_CP_UFFD_WP_RESOLVE) +int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); +static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) +{ + /* + * We want to check manually if we can change individual PTEs writable + * if we can't do that automatically for all PTEs in a mapping. For + * private mappings, that's always the case when we have write + * permissions as we properly have to handle COW. + */ + if (vma->vm_flags & VM_SHARED) + return vma_wants_writenotify(vma, vma->vm_page_prot); + return !!(vma->vm_flags & VM_WRITE); + +} +bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, + pte_t pte); extern unsigned long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot, @@ -2030,40 +2172,30 @@ static inline bool get_user_page_fast_only(unsigned long addr, */ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) { - long val = atomic_long_read(&mm->rss_stat.count[member]); - -#ifdef SPLIT_RSS_COUNTING - /* - * counter is updated in asynchronous manner and may go to minus. - * But it's never be expected number for users. - */ - if (val < 0) - val = 0; -#endif - return (unsigned long)val; + return percpu_counter_read_positive(&mm->rss_stat[member]); } -void mm_trace_rss_stat(struct mm_struct *mm, int member, long count); +void mm_trace_rss_stat(struct mm_struct *mm, int member); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) { - long count = atomic_long_add_return(value, &mm->rss_stat.count[member]); + percpu_counter_add(&mm->rss_stat[member], value); - mm_trace_rss_stat(mm, member, count); + mm_trace_rss_stat(mm, member); } static inline void inc_mm_counter(struct mm_struct *mm, int member) { - long count = atomic_long_inc_return(&mm->rss_stat.count[member]); + percpu_counter_inc(&mm->rss_stat[member]); - mm_trace_rss_stat(mm, member, count); + mm_trace_rss_stat(mm, member); } static inline void dec_mm_counter(struct mm_struct *mm, int member) { - long count = atomic_long_dec_return(&mm->rss_stat.count[member]); + percpu_counter_dec(&mm->rss_stat[member]); - mm_trace_rss_stat(mm, member, count); + mm_trace_rss_stat(mm, member); } /* Optimized variant when page is already known not to be PageAnon */ @@ -2153,8 +2285,6 @@ static inline int pte_devmap(pte_t pte) } #endif -int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); - extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, @@ -2403,7 +2533,7 @@ static inline void pgtable_pte_page_dtor(struct page *page) #if USE_SPLIT_PMD_PTLOCKS -static struct page *pmd_to_page(pmd_t *pmd) +static inline struct page *pmd_pgtable_page(pmd_t *pmd) { unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); return virt_to_page((void *)((unsigned long) pmd & mask)); @@ -2411,7 +2541,7 @@ static struct page *pmd_to_page(pmd_t *pmd) static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(pmd_to_page(pmd)); + return ptlock_ptr(pmd_pgtable_page(pmd)); } static inline bool pmd_ptlock_init(struct page *page) @@ -2430,7 +2560,7 @@ static inline void pmd_ptlock_free(struct page *page) ptlock_free(page); } -#define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte) +#define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte) #else @@ -2950,7 +3080,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, * and return without waiting upon it */ #define FOLL_NOFAULT 0x80 /* do not fault in pages */ #define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ -#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_ANON 0x8000 /* don't do file mappings */ @@ -2958,7 +3087,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ #define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ #define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ -#define FOLL_INTERRUPTIBLE 0x100000 /* allow interrupts from generic signals */ +#define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */ +#define FOLL_INTERRUPTIBLE 0x200000 /* allow interrupts from generic signals */ /* * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each @@ -3043,8 +3173,12 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) * Must be called with the (sub)page that's actually referenced via the * page table entry, which might not necessarily be the head page for a * PTE-mapped THP. + * + * If the vma is NULL, we're coming from the GUP-fast path and might have + * to fallback to the slow path just to lookup the vma. */ -static inline bool gup_must_unshare(unsigned int flags, struct page *page) +static inline bool gup_must_unshare(struct vm_area_struct *vma, + unsigned int flags, struct page *page) { /* * FOLL_WRITE is implicitly handled correctly as the page table entry @@ -3057,8 +3191,25 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page) * Note: PageAnon(page) is stable until the page is actually getting * freed. */ - if (!PageAnon(page)) - return false; + if (!PageAnon(page)) { + /* + * We only care about R/O long-term pining: R/O short-term + * pinning does not have the semantics to observe successive + * changes through the process page tables. + */ + if (!(flags & FOLL_LONGTERM)) + return false; + + /* We really need the vma ... */ + if (!vma) + return true; + + /* + * ... because we only care about writable private ("COW") + * mappings where we have to break COW early. + */ + return is_cow_mapping(vma->vm_flags); + } /* Paired with a memory barrier in page_try_share_anon_rmap(). */ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) @@ -3234,6 +3385,8 @@ void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); +void pmd_init(void *addr); +void pud_init(void *addr); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); @@ -3245,8 +3398,14 @@ struct vmem_altmap; void *vmemmap_alloc_block_buf(unsigned long size, int node, struct vmem_altmap *altmap); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); +void vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next); +int vmemmap_check_pmd(pmd_t *pmd, int node, + unsigned long addr, unsigned long next); int vmemmap_populate_basepages(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); +int vmemmap_populate_hugepages(unsigned long start, unsigned long end, + int node, struct vmem_altmap *altmap); int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); @@ -3269,7 +3428,6 @@ enum mf_flags { int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, unsigned long count, int mf_flags); extern int memory_failure(unsigned long pfn, int flags); -extern void memory_failure_queue(unsigned long pfn, int flags); extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); extern int sysctl_memory_failure_early_kill; @@ -3278,12 +3436,42 @@ extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); #ifdef CONFIG_MEMORY_FAILURE -extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags); +extern void memory_failure_queue(unsigned long pfn, int flags); +extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared); +void num_poisoned_pages_inc(unsigned long pfn); +void num_poisoned_pages_sub(unsigned long pfn, long i); #else -static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags) +static inline void memory_failure_queue(unsigned long pfn, int flags) +{ +} + +static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, + bool *migratable_cleared) { return 0; } + +static inline void num_poisoned_pages_inc(unsigned long pfn) +{ +} + +static inline void num_poisoned_pages_sub(unsigned long pfn, long i) +{ +} +#endif + +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) +extern void memblk_nr_poison_inc(unsigned long pfn); +extern void memblk_nr_poison_sub(unsigned long pfn, long i); +#else +static inline void memblk_nr_poison_inc(unsigned long pfn) +{ +} + +static inline void memblk_nr_poison_sub(unsigned long pfn, long i) +{ +} #endif #ifndef arch_memory_failure @@ -3468,12 +3656,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif -/* - * Whether to drop the pte markers, for example, the uffd-wp information for - * file-backed memory. This should only be specified when we will completely - * drop the page in the mm, either by truncation or unmapping of the vma. By - * default, the flag is not set. - */ -#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) - #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 500e536796ca..3b8475007734 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -18,6 +18,7 @@ #include <linux/page-flags-layout.h> #include <linux/workqueue.h> #include <linux/seqlock.h> +#include <linux/percpu_counter.h> #include <asm/mmu.h> @@ -67,7 +68,7 @@ struct mem_cgroup; #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) #else -#define _struct_page_alignment +#define _struct_page_alignment __aligned(sizeof(unsigned long)) #endif struct page { @@ -103,7 +104,10 @@ struct page { }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; - pgoff_t index; /* Our offset within mapping. */ + union { + pgoff_t index; /* Our offset within mapping. */ + unsigned long share; /* share count for fsdax */ + }; /** * @private: Mapping-private opaque data. * Usually used for buffer_heads if PagePrivate. @@ -141,17 +145,26 @@ struct page { unsigned char compound_dtor; unsigned char compound_order; atomic_t compound_mapcount; + atomic_t subpages_mapcount; atomic_t compound_pincount; #ifdef CONFIG_64BIT unsigned int compound_nr; /* 1 << compound_order */ #endif }; - struct { /* Second tail page of compound page */ + struct { /* Second tail page of transparent huge page */ unsigned long _compound_pad_1; /* compound_head */ unsigned long _compound_pad_2; /* For both global and memcg */ struct list_head deferred_list; }; + struct { /* Second tail page of hugetlb page */ + unsigned long _hugetlb_pad_1; /* compound_head */ + void *hugetlb_subpool; + void *hugetlb_cgroup; + void *hugetlb_cgroup_rsvd; + void *hugetlb_hwpoison; + /* No more space on 32-bit: use third tail if more */ + }; struct { /* Page table pages */ unsigned long _pt_pad_1; /* compound_head */ pgtable_t pmd_huge_pte; /* protected by page->ptl */ @@ -241,6 +254,38 @@ struct page { #endif } _struct_page_alignment; +/* + * struct encoded_page - a nonexistent type marking this pointer + * + * An 'encoded_page' pointer is a pointer to a regular 'struct page', but + * with the low bits of the pointer indicating extra context-dependent + * information. Not super-common, but happens in mmu_gather and mlock + * handling, and this acts as a type system check on that use. + * + * We only really have two guaranteed bits in general, although you could + * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE) + * for more. + * + * Use the supplied helper functions to endcode/decode the pointer and bits. + */ +struct encoded_page; +#define ENCODE_PAGE_BITS 3ul +static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags) +{ + BUILD_BUG_ON(flags > ENCODE_PAGE_BITS); + return (struct encoded_page *)(flags | (unsigned long)page); +} + +static inline unsigned long encoded_page_flags(struct encoded_page *page) +{ + return ENCODE_PAGE_BITS & (unsigned long)page; +} + +static inline struct page *encoded_page_ptr(struct encoded_page *page) +{ + return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page); +} + /** * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. @@ -258,12 +303,19 @@ struct page { * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. * @_flags_1: For large folios, additional page flags. - * @__head: Points to the folio. Do not use. + * @_head_1: Points to the folio. Do not use. * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). - * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). + * @_compound_mapcount: Do not use directly, call folio_entire_mapcount(). + * @_subpages_mapcount: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). + * @_flags_2: For alignment. Do not use. + * @_head_2: Points to the folio. Do not use. + * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. + * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. + * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. + * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head(). * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -302,15 +354,32 @@ struct folio { }; struct page page; }; - unsigned long _flags_1; - unsigned long __head; - unsigned char _folio_dtor; - unsigned char _folio_order; - atomic_t _total_mapcount; - atomic_t _pincount; + union { + struct { + unsigned long _flags_1; + unsigned long _head_1; + unsigned char _folio_dtor; + unsigned char _folio_order; + atomic_t _compound_mapcount; + atomic_t _subpages_mapcount; + atomic_t _pincount; #ifdef CONFIG_64BIT - unsigned int _folio_nr_pages; + unsigned int _folio_nr_pages; #endif + }; + struct page __page_1; + }; + union { + struct { + unsigned long _flags_2; + unsigned long _head_2; + void *_hugetlb_subpool; + void *_hugetlb_cgroup; + void *_hugetlb_cgroup_rsvd; + void *_hugetlb_hwpoison; + }; + struct page __page_2; + }; }; #define FOLIO_MATCH(pg, fl) \ @@ -331,15 +400,26 @@ FOLIO_MATCH(memcg_data, memcg_data); static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); -FOLIO_MATCH(compound_head, __head); +FOLIO_MATCH(compound_head, _head_1); FOLIO_MATCH(compound_dtor, _folio_dtor); FOLIO_MATCH(compound_order, _folio_order); -FOLIO_MATCH(compound_mapcount, _total_mapcount); +FOLIO_MATCH(compound_mapcount, _compound_mapcount); +FOLIO_MATCH(subpages_mapcount, _subpages_mapcount); FOLIO_MATCH(compound_pincount, _pincount); #ifdef CONFIG_64BIT FOLIO_MATCH(compound_nr, _folio_nr_pages); #endif #undef FOLIO_MATCH +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct folio, fl) == \ + offsetof(struct page, pg) + 2 * sizeof(struct page)) +FOLIO_MATCH(flags, _flags_2); +FOLIO_MATCH(compound_head, _head_2); +FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool); +FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup); +FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd); +FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison); +#undef FOLIO_MATCH static inline atomic_t *folio_mapcount_ptr(struct folio *folio) { @@ -347,11 +427,22 @@ static inline atomic_t *folio_mapcount_ptr(struct folio *folio) return &tail->compound_mapcount; } +static inline atomic_t *folio_subpages_mapcount_ptr(struct folio *folio) +{ + struct page *tail = &folio->page + 1; + return &tail->subpages_mapcount; +} + static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; } +static inline atomic_t *subpages_mapcount_ptr(struct page *page) +{ + return &page[1].subpages_mapcount; +} + static inline atomic_t *compound_pincount_ptr(struct page *page) { return &page[1].compound_pincount; @@ -461,21 +552,11 @@ struct vm_area_struct { * For areas with an address space and backing store, * linkage into the address_space->i_mmap interval tree. * - * For private anonymous mappings, a pointer to a null terminated string - * containing the name given to the vma, or NULL if unnamed. */ - - union { - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; - /* - * Serialized by mmap_sem. Never use directly because it is - * valid only when vm_file is NULL. Use anon_vma_name instead. - */ - struct anon_vma_name *anon_name; - }; + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma @@ -496,6 +577,14 @@ struct vm_area_struct { struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ +#ifdef CONFIG_ANON_VMA_NAME + /* + * For private and shared anonymous mappings, a pointer to a null + * terminated string containing the name given to the vma, or NULL if + * unnamed. Serialized by mmap_sem. Use anon_vma_name to access. + */ + struct anon_vma_name *anon_name; +#endif #ifdef CONFIG_SWAP atomic_long_t swap_readahead_info; #endif @@ -612,11 +701,7 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ - /* - * Special counters, in some configurations protected by the - * page_table_lock, in other configurations by being atomic. - */ - struct mm_rss_stat rss_stat; + struct percpu_counter rss_stat[NR_MM_COUNTERS]; struct linux_binfmt *binfmt; @@ -847,7 +932,6 @@ typedef __bitwise unsigned int vm_fault_t; * @VM_FAULT_OOM: Out Of Memory * @VM_FAULT_SIGBUS: Bad access * @VM_FAULT_MAJOR: Page read from storage - * @VM_FAULT_WRITE: Special case for get_user_pages * @VM_FAULT_HWPOISON: Hit poisoned small page * @VM_FAULT_HWPOISON_LARGE: Hit poisoned large page. Index encoded * in upper bits @@ -868,7 +952,6 @@ enum vm_fault_reason { VM_FAULT_OOM = (__force vm_fault_t)0x000001, VM_FAULT_SIGBUS = (__force vm_fault_t)0x000002, VM_FAULT_MAJOR = (__force vm_fault_t)0x000004, - VM_FAULT_WRITE = (__force vm_fault_t)0x000008, VM_FAULT_HWPOISON = (__force vm_fault_t)0x000010, VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020, VM_FAULT_SIGSEGV = (__force vm_fault_t)0x000040, @@ -894,7 +977,6 @@ enum vm_fault_reason { { VM_FAULT_OOM, "OOM" }, \ { VM_FAULT_SIGBUS, "SIGBUS" }, \ { VM_FAULT_MAJOR, "MAJOR" }, \ - { VM_FAULT_WRITE, "WRITE" }, \ { VM_FAULT_HWPOISON, "HWPOISON" }, \ { VM_FAULT_HWPOISON_LARGE, "HWPOISON_LARGE" }, \ { VM_FAULT_SIGSEGV, "SIGSEGV" }, \ @@ -957,9 +1039,9 @@ typedef struct { * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. - * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to unshare (and mark - * exclusive) a possibly shared anonymous page that is - * mapped R/O. + * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to break COW in a + * COW mapping, making sure that an exclusive anon page is + * mapped after the fault. * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached. * We should only access orig_pte if this flag set. * @@ -984,7 +1066,7 @@ typedef struct { * * The combination FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE is illegal. * FAULT_FLAG_UNSHARE is ignored and treated like an ordinary read fault when - * no existing R/O-mapped anonymous page is encountered. + * applied to mappings that are not COW mappings. */ enum fault_flag { FAULT_FLAG_WRITE = 1 << 0, diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h index 0bb4b6da9993..5414b5c6a103 100644 --- a/include/linux/mm_types_task.h +++ b/include/linux/mm_types_task.h @@ -36,19 +36,6 @@ enum { NR_MM_COUNTERS }; -#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU) -#define SPLIT_RSS_COUNTING -/* per-thread cached information, */ -struct task_rss_stat { - int events; /* for synchronization threshold */ - int count[NR_MM_COUNTERS]; -}; -#endif /* USE_SPLIT_PTE_PTLOCKS */ - -struct mm_rss_stat { - atomic_long_t count[NR_MM_COUNTERS]; -}; - struct page_frag { struct page *page; #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 9c50bc40f8ff..6f7993803ee7 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status) #define MMC_SECURE_TRIM1_ARG 0x80000001 #define MMC_SECURE_TRIM2_ARG 0x80008000 #define MMC_SECURE_ARGS 0x80000000 -#define MMC_TRIM_ARGS 0x00008001 +#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003 #define mmc_driver_type_mask(n) (1 << (n)) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5f74891556f3..cd28a100d9e4 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -986,6 +986,25 @@ static inline bool is_zone_device_page(const struct page *page) { return page_zonenum(page) == ZONE_DEVICE; } + +/* + * Consecutive zone device pages should not be merged into the same sgl + * or bvec segment with other types of pages or if they belong to different + * pgmaps. Otherwise getting the pgmap of a given segment is not possible + * without scanning the entire segment. This helper returns true either if + * both pages are not zone device pages or both pages are zone device pages + * with the same pgmap. + */ +static inline bool zone_device_pages_have_same_pgmap(const struct page *a, + const struct page *b) +{ + if (is_zone_device_page(a) != is_zone_device_page(b)) + return false; + if (!is_zone_device_page(a)) + return true; + return a->pgmap == b->pgmap; +} + extern void memmap_init_zone_device(struct zone *, unsigned long, unsigned long, struct dev_pagemap *); #else @@ -993,6 +1012,11 @@ static inline bool is_zone_device_page(const struct page *page) { return false; } +static inline bool zone_device_pages_have_same_pgmap(const struct page *a, + const struct page *b) +{ + return true; +} #endif static inline bool folio_is_zone_device(const struct folio *folio) @@ -1200,7 +1224,7 @@ typedef struct pglist_data { /* start time in ms of current promote threshold adjustment period */ unsigned int nbp_th_start; /* - * number of promote candidate pages at stat time of current promote + * number of promote candidate pages at start time of current promote * threshold adjustment period */ unsigned long nbp_th_nr_cand; diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index f6e5369d2928..092c52aa6c2c 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -5,12 +5,10 @@ #include <linux/types.h> #include <linux/uidgid.h> +struct mnt_idmap; struct user_namespace; -/* - * Carries the initial idmapping of 0:0:4294967295 which is an identity - * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is - * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...]. - */ + +extern struct mnt_idmap nop_mnt_idmap; extern struct user_namespace init_user_ns; typedef struct { @@ -98,6 +96,26 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid) return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid); } +static inline bool vfsuid_gt_kuid(vfsuid_t vfsuid, kuid_t kuid) +{ + return __vfsuid_val(vfsuid) > __kuid_val(kuid); +} + +static inline bool vfsgid_gt_kgid(vfsgid_t vfsgid, kgid_t kgid) +{ + return __vfsgid_val(vfsgid) > __kgid_val(kgid); +} + +static inline bool vfsuid_lt_kuid(vfsuid_t vfsuid, kuid_t kuid) +{ + return __vfsuid_val(vfsuid) < __kuid_val(kuid); +} + +static inline bool vfsgid_lt_kgid(vfsgid_t vfsgid, kgid_t kgid) +{ + return __vfsgid_val(vfsgid) < __kgid_val(kgid); +} + /* * vfs{g,u}ids are created from k{g,u}ids. * We don't allow them to be created from regular {u,g}id. @@ -208,13 +226,6 @@ static inline vfsuid_t make_vfsuid(struct user_namespace *mnt_userns, return VFSUIDT_INIT(make_kuid(mnt_userns, uid)); } -static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kuid_t kuid) -{ - return AS_KUIDT(make_vfsuid(mnt_userns, fs_userns, kuid)); -} - /** * make_vfsgid - map a filesystem kgid into a mnt_userns * @mnt_userns: the mount's idmapping @@ -253,13 +264,6 @@ static inline vfsgid_t make_vfsgid(struct user_namespace *mnt_userns, return VFSGIDT_INIT(make_kgid(mnt_userns, gid)); } -static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kgid_t kgid) -{ - return AS_KGIDT(make_vfsgid(mnt_userns, fs_userns, kgid)); -} - /** * from_vfsuid - map a vfsuid into the filesystem idmapping * @mnt_userns: the mount's idmapping @@ -288,33 +292,6 @@ static inline kuid_t from_vfsuid(struct user_namespace *mnt_userns, } /** - * mapped_kuid_user - map a user kuid into a mnt_userns - * @mnt_userns: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @kuid : kuid to be mapped - * - * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this - * function when preparing a @kuid to be written to disk or inode. - * - * If no_idmapping() determines that this is not an idmapped mount we can - * simply return @kuid unchanged. - * If initial_idmapping() tells us that the filesystem is not mounted with an - * idmapping we know the value of @kuid won't change when calling - * make_kuid() so we can simply retrieve the value via KUIDT_INIT() - * directly. - * - * Return: @kuid mapped according to @mnt_userns. - * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is - * returned. - */ -static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kuid_t kuid) -{ - return from_vfsuid(mnt_userns, fs_userns, VFSUIDT_INIT(kuid)); -} - -/** * vfsuid_has_fsmapping - check whether a vfsuid maps into the filesystem * @mnt_userns: the mount's idmapping * @fs_userns: the filesystem's idmapping @@ -333,6 +310,12 @@ static inline bool vfsuid_has_fsmapping(struct user_namespace *mnt_userns, return uid_valid(from_vfsuid(mnt_userns, fs_userns, vfsuid)); } +static inline bool vfsuid_has_mapping(struct user_namespace *userns, + vfsuid_t vfsuid) +{ + return from_kuid(userns, AS_KUIDT(vfsuid)) != (uid_t)-1; +} + /** * vfsuid_into_kuid - convert vfsuid into kuid * @vfsuid: the vfsuid to convert @@ -374,33 +357,6 @@ static inline kgid_t from_vfsgid(struct user_namespace *mnt_userns, } /** - * mapped_kgid_user - map a user kgid into a mnt_userns - * @mnt_userns: the mount's idmapping - * @fs_userns: the filesystem's idmapping - * @kgid : kgid to be mapped - * - * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this - * function when preparing a @kgid to be written to disk or inode. - * - * If no_idmapping() determines that this is not an idmapped mount we can - * simply return @kgid unchanged. - * If initial_idmapping() tells us that the filesystem is not mounted with an - * idmapping we know the value of @kgid won't change when calling - * make_kgid() so we can simply retrieve the value via KGIDT_INIT() - * directly. - * - * Return: @kgid mapped according to @mnt_userns. - * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is - * returned. - */ -static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - kgid_t kgid) -{ - return from_vfsgid(mnt_userns, fs_userns, VFSGIDT_INIT(kgid)); -} - -/** * vfsgid_has_fsmapping - check whether a vfsgid maps into the filesystem * @mnt_userns: the mount's idmapping * @fs_userns: the filesystem's idmapping @@ -419,6 +375,12 @@ static inline bool vfsgid_has_fsmapping(struct user_namespace *mnt_userns, return gid_valid(from_vfsgid(mnt_userns, fs_userns, vfsgid)); } +static inline bool vfsgid_has_mapping(struct user_namespace *userns, + vfsgid_t vfsgid) +{ + return from_kgid(userns, AS_KGIDT(vfsgid)) != (gid_t)-1; +} + /** * vfsgid_into_kgid - convert vfsgid into kgid * @vfsgid: the vfsgid to convert diff --git a/include/linux/module.h b/include/linux/module.h index ec61fb53979a..8c5909c0076c 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -827,7 +827,6 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr) #ifdef CONFIG_SYSFS extern struct kset *module_kset; extern struct kobj_type module_ktype; -extern int module_sysfs_initialized; #endif /* CONFIG_SYSFS */ #define symbol_request(x) try_then_request_module(symbol_get(x), "symbol:" #x) @@ -879,8 +878,17 @@ static inline bool module_sig_ok(struct module *module) } #endif /* CONFIG_MODULE_SIG */ +#if defined(CONFIG_MODULES) && defined(CONFIG_KALLSYMS) int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); +#else +static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, + struct module *, unsigned long), + void *data) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_MODULES && CONFIG_KALLSYMS */ #endif /* _LINUX_MODULE_H */ diff --git a/include/linux/mount.h b/include/linux/mount.h index 55a4abaf6715..62475996fac6 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -16,6 +16,7 @@ struct super_block; struct dentry; struct user_namespace; +struct mnt_idmap; struct file_system_type; struct fs_context; struct file; @@ -70,13 +71,15 @@ struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ struct super_block *mnt_sb; /* pointer to superblock */ int mnt_flags; - struct user_namespace *mnt_userns; + struct mnt_idmap *mnt_idmap; } __randomize_layout; -static inline struct user_namespace *mnt_user_ns(const struct vfsmount *mnt) +struct user_namespace *mnt_user_ns(const struct vfsmount *mnt); +struct user_namespace *mnt_idmap_owner(const struct mnt_idmap *idmap); +static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt) { /* Pairs with smp_store_release() in do_idmap_mount(). */ - return smp_load_acquire(&mnt->mnt_userns); + return smp_load_acquire(&mnt->mnt_idmap); } extern int mnt_want_write(struct vfsmount *mnt); diff --git a/include/linux/msi.h b/include/linux/msi.h index fc918a658d48..a112b913fff9 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -13,13 +13,20 @@ * * Regular device drivers have no business with any of these functions and * especially storing MSI descriptor pointers in random code is considered - * abuse. The only function which is relevant for drivers is msi_get_virq(). + * abuse. + * + * Device driver relevant functions are available in <linux/msi_api.h> */ +#include <linux/irqdomain_defs.h> #include <linux/cpumask.h> +#include <linux/msi_api.h> #include <linux/xarray.h> #include <linux/mutex.h> #include <linux/list.h> +#include <linux/irq.h> +#include <linux/bits.h> + #include <asm/msi.h> /* Dummy shadow structures if an architecture does not define them */ @@ -68,19 +75,18 @@ struct msi_msg { extern int pci_msi_ignore_mask; /* Helper functions */ -struct irq_data; struct msi_desc; struct pci_dev; struct platform_msi_priv_data; struct device_attribute; +struct irq_domain; +struct irq_affinity_desc; void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); #ifdef CONFIG_GENERIC_MSI_IRQ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); #else -static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) -{ -} +static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) { } #endif typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, @@ -120,6 +126,38 @@ struct pci_msi_desc { }; }; +/** + * union msi_domain_cookie - Opaque MSI domain specific data + * @value: u64 value store + * @ptr: Pointer to domain specific data + * @iobase: Domain specific IOmem pointer + * + * The content of this data is implementation defined and used by the MSI + * domain to store domain specific information which is requried for + * interrupt chip callbacks. + */ +union msi_domain_cookie { + u64 value; + void *ptr; + void __iomem *iobase; +}; + +/** + * struct msi_desc_data - Generic MSI descriptor data + * @dcookie: Cookie for MSI domain specific data which is required + * for irq_chip callbacks + * @icookie: Cookie for the MSI interrupt instance provided by + * the usage site to the allocation function + * + * The content of this data is implementation defined, e.g. PCI/IMS + * implementations define the meaning of the data. The MSI core ignores + * this data completely. + */ +struct msi_desc_data { + union msi_domain_cookie dcookie; + union msi_instance_cookie icookie; +}; + #define MSI_MAX_INDEX ((unsigned int)USHRT_MAX) /** @@ -137,6 +175,7 @@ struct pci_msi_desc { * * @msi_index: Index of the msi descriptor * @pci: PCI specific msi descriptor data + * @data: Generic MSI descriptor data */ struct msi_desc { /* Shared device/bus type independent data */ @@ -156,7 +195,10 @@ struct msi_desc { void *write_msi_msg_data; u16 msi_index; - struct pci_msi_desc pci; + union { + struct pci_msi_desc pci; + struct msi_desc_data data; + }; }; /* @@ -171,33 +213,80 @@ enum msi_desc_filter { MSI_DESC_ASSOCIATED, }; + +/** + * struct msi_dev_domain - The internals of MSI domain info per device + * @store: Xarray for storing MSI descriptor pointers + * @irqdomain: Pointer to a per device interrupt domain + */ +struct msi_dev_domain { + struct xarray store; + struct irq_domain *domain; +}; + /** * msi_device_data - MSI per device data * @properties: MSI properties which are interesting to drivers * @platform_data: Platform-MSI specific data * @mutex: Mutex protecting the MSI descriptor store - * @__store: Xarray for storing MSI descriptor pointers + * @__domains: Internal data for per device MSI domains * @__iter_idx: Index to search the next entry for iterators */ struct msi_device_data { unsigned long properties; struct platform_msi_priv_data *platform_data; struct mutex mutex; - struct xarray __store; + struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS]; unsigned long __iter_idx; }; int msi_setup_device_data(struct device *dev); -unsigned int msi_get_virq(struct device *dev, unsigned int index); void msi_lock_descs(struct device *dev); void msi_unlock_descs(struct device *dev); -struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter); -struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); +struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter); /** - * msi_for_each_desc - Iterate the MSI descriptors + * msi_first_desc - Get the first MSI descriptor of the default irqdomain + * @dev: Device to operate on + * @filter: Descriptor state filter + * + * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs() + * must be invoked before the call. + * + * Return: Pointer to the first MSI descriptor matching the search + * criteria, NULL if none found. + */ +static inline struct msi_desc *msi_first_desc(struct device *dev, + enum msi_desc_filter filter) +{ + return msi_domain_first_desc(dev, MSI_DEFAULT_DOMAIN, filter); +} + +struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter); + +/** + * msi_domain_for_each_desc - Iterate the MSI descriptors in a specific domain + * + * @desc: struct msi_desc pointer used as iterator + * @dev: struct device pointer - device to iterate + * @domid: The id of the interrupt domain which should be walked. + * @filter: Filter for descriptor selection + * + * Notes: + * - The loop must be protected with a msi_lock_descs()/msi_unlock_descs() + * pair. + * - It is safe to remove a retrieved MSI descriptor in the loop. + */ +#define msi_domain_for_each_desc(desc, dev, domid, filter) \ + for ((desc) = msi_domain_first_desc((dev), (domid), (filter)); (desc); \ + (desc) = msi_next_desc((dev), (domid), (filter))) + +/** + * msi_for_each_desc - Iterate the MSI descriptors in the default irqdomain * * @desc: struct msi_desc pointer used as iterator * @dev: struct device pointer - device to iterate @@ -208,9 +297,8 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); * pair. * - It is safe to remove a retrieved MSI descriptor in the loop. */ -#define msi_for_each_desc(desc, dev, filter) \ - for ((desc) = msi_first_desc((dev), (filter)); (desc); \ - (desc) = msi_next_desc((dev), (filter))) +#define msi_for_each_desc(desc, dev, filter) \ + msi_domain_for_each_desc((desc), (dev), MSI_DEFAULT_DOMAIN, (filter)) #define msi_desc_to_dev(desc) ((desc)->dev) @@ -237,34 +325,47 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, } #endif -#ifdef CONFIG_PCI_MSI -struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); -void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); -#else /* CONFIG_PCI_MSI */ -static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) +int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, + struct msi_desc *init_desc); +/** + * msi_insert_msi_desc - Allocate and initialize a MSI descriptor in the + * default irqdomain and insert it at @init_desc->msi_index + * @dev: Pointer to the device for which the descriptor is allocated + * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor + * + * Return: 0 on success or an appropriate failure code. + */ +static inline int msi_insert_msi_desc(struct device *dev, struct msi_desc *init_desc) { + return msi_domain_insert_msi_desc(dev, MSI_DEFAULT_DOMAIN, init_desc); } -#endif /* CONFIG_PCI_MSI */ -int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc); -void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter, - unsigned int first_index, unsigned int last_index); +void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); + +/** + * msi_free_msi_descs_range - Free a range of MSI descriptors of a device + * in the default irqdomain + * + * @dev: Device for which to free the descriptors + * @first: Index to start freeing from (inclusive) + * @last: Last index to be freed (inclusive) + */ +static inline void msi_free_msi_descs_range(struct device *dev, unsigned int first, + unsigned int last) +{ + msi_domain_free_msi_descs_range(dev, MSI_DEFAULT_DOMAIN, first, last); +} /** - * msi_free_msi_descs - Free MSI descriptors of a device + * msi_free_msi_descs - Free all MSI descriptors of a device in the default irqdomain * @dev: Device to free the descriptors */ static inline void msi_free_msi_descs(struct device *dev) { - msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX); + msi_free_msi_descs_range(dev, 0, MSI_MAX_INDEX); } -void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); -void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); - -void pci_msi_mask_irq(struct irq_data *data); -void pci_msi_unmask_irq(struct irq_data *data); - /* * The arch hooks to setup up msi irqs. Default functions are implemented * as weak symbols so that they /can/ be overriden by architecture specific @@ -293,7 +394,7 @@ static inline void msi_device_destroy_sysfs(struct device *dev) { } */ bool arch_restore_msi_irqs(struct pci_dev *dev); -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ #include <linux/irqhandler.h> @@ -309,19 +410,22 @@ struct msi_domain_info; * @get_hwirq: Retrieve the resulting hw irq number * @msi_init: Domain specific init function for MSI interrupts * @msi_free: Domain specific function to free a MSI interrupts - * @msi_check: Callback for verification of the domain/info/dev data * @msi_prepare: Prepare the allocation of the interrupts in the domain + * @prepare_desc: Optional function to prepare the allocated MSI descriptor + * in the domain * @set_desc: Set the msi descriptor for an interrupt * @domain_alloc_irqs: Optional function to override the default allocation * function. * @domain_free_irqs: Optional function to override the default free * function. + * @msi_post_free: Optional function which is invoked after freeing + * all interrupts. * * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. * - * @msi_check, @msi_prepare and @set_desc are callbacks used by - * msi_domain_alloc/free_irqs(). + * @msi_check, @msi_prepare, @prepare_desc and @set_desc are callbacks used by the + * msi_domain_alloc/free_irqs*() variants. * * @domain_alloc_irqs, @domain_free_irqs can be used to override the * default allocation/free functions (__msi_domain_alloc/free_irqs). This @@ -329,15 +433,6 @@ struct msi_domain_info; * be wrapped into the regular irq domains concepts by mere mortals. This * allows to universally use msi_domain_alloc/free_irqs without having to * special case XEN all over the place. - * - * Contrary to other operations @domain_alloc_irqs and @domain_free_irqs - * are set to the default implementation if NULL and even when - * MSI_FLAG_USE_DEF_DOM_OPS is not set to avoid breaking existing users and - * because these callbacks are obviously mandatory. - * - * This is NOT meant to be abused, but it can be useful to build wrappers - * for specialized MSI irq domains which need extra work before and after - * calling __msi_domain_alloc_irqs()/__msi_domain_free_irqs(). */ struct msi_domain_ops { irq_hw_number_t (*get_hwirq)(struct msi_domain_info *info, @@ -349,23 +444,29 @@ struct msi_domain_ops { void (*msi_free)(struct irq_domain *domain, struct msi_domain_info *info, unsigned int virq); - int (*msi_check)(struct irq_domain *domain, - struct msi_domain_info *info, - struct device *dev); int (*msi_prepare)(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); + void (*prepare_desc)(struct irq_domain *domain, msi_alloc_info_t *arg, + struct msi_desc *desc); void (*set_desc)(msi_alloc_info_t *arg, struct msi_desc *desc); int (*domain_alloc_irqs)(struct irq_domain *domain, struct device *dev, int nvec); void (*domain_free_irqs)(struct irq_domain *domain, struct device *dev); + void (*msi_post_free)(struct irq_domain *domain, + struct device *dev); }; /** * struct msi_domain_info - MSI interrupt domain data * @flags: Flags to decribe features and capabilities + * @bus_token: The domain bus token + * @hwsize: The hardware table size or the software index limit. + * If 0 then the size is considered unlimited and + * gets initialized to the maximum software index limit + * by the domain creation code. * @ops: The callback data structure * @chip: Optional: associated interrupt chip * @chip_data: Optional: associated interrupt chip data @@ -375,17 +476,42 @@ struct msi_domain_ops { * @data: Optional: domain specific data */ struct msi_domain_info { - u32 flags; - struct msi_domain_ops *ops; - struct irq_chip *chip; - void *chip_data; - irq_flow_handler_t handler; - void *handler_data; - const char *handler_name; - void *data; + u32 flags; + enum irq_domain_bus_token bus_token; + unsigned int hwsize; + struct msi_domain_ops *ops; + struct irq_chip *chip; + void *chip_data; + irq_flow_handler_t handler; + void *handler_data; + const char *handler_name; + void *data; }; -/* Flags for msi_domain_info */ +/** + * struct msi_domain_template - Template for MSI device domains + * @name: Storage for the resulting name. Filled in by the core. + * @chip: Interrupt chip for this domain + * @ops: MSI domain ops + * @info: MSI domain info data + */ +struct msi_domain_template { + char name[48]; + struct irq_chip chip; + struct msi_domain_ops ops; + struct msi_domain_info info; +}; + +/* + * Flags for msi_domain_info + * + * Bit 0-15: Generic MSI functionality which is not subject to restriction + * by parent domains + * + * Bit 16-31: Functionality which depends on the underlying parent domain and + * can be masked out by msi_parent_ops::init_dev_msi_info() when + * a device MSI domain is initialized. + */ enum { /* * Init non implemented ops callbacks with default MSI domain @@ -397,44 +523,100 @@ enum { * callbacks. */ MSI_FLAG_USE_DEF_CHIP_OPS = (1 << 1), - /* Support multiple PCI MSI interrupts */ - MSI_FLAG_MULTI_PCI_MSI = (1 << 2), - /* Support PCI MSIX interrupts */ - MSI_FLAG_PCI_MSIX = (1 << 3), /* Needs early activate, required for PCI */ - MSI_FLAG_ACTIVATE_EARLY = (1 << 4), + MSI_FLAG_ACTIVATE_EARLY = (1 << 2), /* * Must reactivate when irq is started even when * MSI_FLAG_ACTIVATE_EARLY has been set. */ - MSI_FLAG_MUST_REACTIVATE = (1 << 5), - /* Is level-triggered capable, using two messages */ - MSI_FLAG_LEVEL_CAPABLE = (1 << 6), + MSI_FLAG_MUST_REACTIVATE = (1 << 3), /* Populate sysfs on alloc() and destroy it on free() */ - MSI_FLAG_DEV_SYSFS = (1 << 7), - /* MSI-X entries must be contiguous */ - MSI_FLAG_MSIX_CONTIGUOUS = (1 << 8), + MSI_FLAG_DEV_SYSFS = (1 << 4), /* Allocate simple MSI descriptors */ - MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 9), + MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5), /* Free MSI descriptors */ - MSI_FLAG_FREE_MSI_DESCS = (1 << 10), + MSI_FLAG_FREE_MSI_DESCS = (1 << 6), + /* + * Quirk to handle MSI implementations which do not provide + * masking. Currently known to affect x86, but has to be partially + * handled in the core MSI code. + */ + MSI_FLAG_NOMASK_QUIRK = (1 << 7), + + /* Mask for the generic functionality */ + MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), + + /* Mask for the domain specific functionality */ + MSI_DOMAIN_FLAGS_MASK = GENMASK(31, 16), + + /* Support multiple PCI MSI interrupts */ + MSI_FLAG_MULTI_PCI_MSI = (1 << 16), + /* Support PCI MSIX interrupts */ + MSI_FLAG_PCI_MSIX = (1 << 17), + /* Is level-triggered capable, using two messages */ + MSI_FLAG_LEVEL_CAPABLE = (1 << 18), + /* MSI-X entries must be contiguous */ + MSI_FLAG_MSIX_CONTIGUOUS = (1 << 19), + /* PCI/MSI-X vectors can be dynamically allocated/freed post MSI-X enable */ + MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20), + /* Support for PCI/IMS */ + MSI_FLAG_PCI_IMS = (1 << 21), }; +/** + * struct msi_parent_ops - MSI parent domain callbacks and configuration info + * + * @supported_flags: Required: The supported MSI flags of the parent domain + * @prefix: Optional: Prefix for the domain and chip name + * @init_dev_msi_info: Required: Callback for MSI parent domains to setup parent + * domain specific domain flags, domain ops and interrupt chip + * callbacks when a per device domain is created. + */ +struct msi_parent_ops { + u32 supported_flags; + const char *prefix; + bool (*init_dev_msi_info)(struct device *dev, struct irq_domain *domain, + struct irq_domain *msi_parent_domain, + struct msi_domain_info *msi_child_info); +}; + +bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *msi_parent_domain, + struct msi_domain_info *msi_child_info); + int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force); struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec); -int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev, - int nvec); -int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec); -void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); -void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev); -void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); + +bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, + const struct msi_domain_template *template, + unsigned int hwsize, void *domain_data, + void *chip_data); +void msi_remove_device_irq_domain(struct device *dev, unsigned int domid); + +bool msi_match_device_irq_domain(struct device *dev, unsigned int domid, + enum irq_domain_bus_token bus_token); + +int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs); + +struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index, + const struct irq_affinity_desc *affdesc, + union msi_instance_cookie *cookie); + +void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid); +void msi_domain_free_irqs_all(struct device *dev, unsigned int domid); + struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, @@ -467,20 +649,27 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); -#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ +#endif /* CONFIG_GENERIC_MSI_IRQ */ -#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +/* PCI specific interfaces */ +#ifdef CONFIG_PCI_MSI +struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); +void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); +void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +void pci_msi_mask_irq(struct irq_data *data); +void pci_msi_unmask_irq(struct irq_data *data); struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); -bool pci_dev_has_special_msi_domain(struct pci_dev *pdev); -#else +#else /* CONFIG_PCI_MSI */ static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) { return NULL; } -#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ +static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) { } +#endif /* !CONFIG_PCI_MSI */ #endif /* LINUX_MSI_H */ diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h new file mode 100644 index 000000000000..391087ad99b1 --- /dev/null +++ b/include/linux/msi_api.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_MSI_API_H +#define LINUX_MSI_API_H + +/* + * APIs which are relevant for device driver code for allocating and + * freeing MSI interrupts and querying the associations between + * hardware/software MSI indices and the Linux interrupt number. + */ + +struct device; + +/* + * Per device interrupt domain related constants. + */ +enum msi_domain_ids { + MSI_DEFAULT_DOMAIN, + MSI_SECONDARY_DOMAIN, + MSI_MAX_DEVICE_IRQDOMAINS, +}; + +/** + * union msi_instance_cookie - MSI instance cookie + * @value: u64 value store + * @ptr: Pointer to usage site specific data + * + * This cookie is handed to the IMS allocation function and stored in the + * MSI descriptor for the interrupt chip callbacks. + * + * The content of this cookie is MSI domain implementation defined. For + * PCI/IMS implementations this could be a PASID or a pointer to queue + * memory. + */ +union msi_instance_cookie { + u64 value; + void *ptr; +}; + +/** + * msi_map - Mapping between MSI index and Linux interrupt number + * @index: The MSI index, e.g. slot in the MSI-X table or + * a software managed index if >= 0. If negative + * the allocation function failed and it contains + * the error code. + * @virq: The associated Linux interrupt number + */ +struct msi_map { + int index; + int virq; +}; + +/* + * Constant to be used for dynamic allocations when the allocation is any + * free MSI index, which is either an entry in a hardware table or a + * software managed index. + */ +#define MSI_ANY_INDEX UINT_MAX + +unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index); + +/** + * msi_get_virq - Lookup the Linux interrupt number for a MSI index on the default interrupt domain + * @dev: Device for which the lookup happens + * @index: The MSI index to lookup + * + * Return: The Linux interrupt number on success (> 0), 0 if not found + */ +static inline unsigned int msi_get_virq(struct device *dev, unsigned int index) +{ + return msi_domain_get_virq(dev, MSI_DEFAULT_DOMAIN, index); +} + +#endif diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c3693bb87b4c..b2996dc987ff 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -999,7 +999,6 @@ static inline bool nanddev_io_iter_end(struct nand_device *nand, bool nanddev_isbad(struct nand_device *nand, const struct nand_pos *pos); bool nanddev_isreserved(struct nand_device *nand, const struct nand_pos *pos); -int nanddev_erase(struct nand_device *nand, const struct nand_pos *pos); int nanddev_markbad(struct nand_device *nand, const struct nand_pos *pos); /* ECC related functions */ diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 42218a1164f6..25765556223a 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -349,6 +349,8 @@ struct spi_nor_flash_parameter; * @bouncebuf: bounce buffer used when the buffer passed by the MTD * layer is not DMA-able * @bouncebuf_size: size of the bounce buffer + * @id: The flash's ID bytes. Always contains + * SPI_NOR_MAX_ID_LEN bytes. * @info: SPI NOR part JEDEC MFR ID and other info * @manufacturer: SPI NOR manufacturer * @addr_nbytes: number of address bytes @@ -379,6 +381,7 @@ struct spi_nor { struct spi_mem *spimem; u8 *bouncebuf; size_t bouncebuf_size; + u8 *id; const struct flash_info *info; const struct spi_nor_manufacturer *manufacturer; u8 addr_nbytes; diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 3682ae75c7aa..145169be2ed8 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -8,6 +8,7 @@ #include <linux/mbus.h> #include <linux/if_ether.h> +#include <linux/phy.h> #define MV643XX_ETH_SHARED_NAME "mv643xx_eth" #define MV643XX_ETH_NAME "mv643xx_eth_port" @@ -59,6 +60,7 @@ struct mv643xx_eth_platform_data { */ int speed; int duplex; + phy_interface_t interface; /* * How many RX/TX queues to use. diff --git a/include/linux/net.h b/include/linux/net.h index 18d942bbdf6e..b73ad8e3c212 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -42,6 +42,7 @@ struct net; #define SOCK_PASSCRED 3 #define SOCK_PASSSEC 4 #define SOCK_SUPPORT_ZC 5 +#define SOCK_CUSTOM_SOCKOPT 6 #ifndef ARCH_HAS_SOCKET_TYPES /** diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index eddf8ee270e7..aad12a179e54 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -78,6 +78,7 @@ struct xdp_buff; void synchronize_net(void); void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); +void netdev_sw_irq_coalesce_default_on(struct net_device *dev); /* Backlog congestion levels */ #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ @@ -171,31 +172,38 @@ static inline bool dev_xmit_complete(int rc) * (unsigned long) so they can be read and written atomically. */ +#define NET_DEV_STAT(FIELD) \ + union { \ + unsigned long FIELD; \ + atomic_long_t __##FIELD; \ + } + struct net_device_stats { - unsigned long rx_packets; - unsigned long tx_packets; - unsigned long rx_bytes; - unsigned long tx_bytes; - unsigned long rx_errors; - unsigned long tx_errors; - unsigned long rx_dropped; - unsigned long tx_dropped; - unsigned long multicast; - unsigned long collisions; - unsigned long rx_length_errors; - unsigned long rx_over_errors; - unsigned long rx_crc_errors; - unsigned long rx_frame_errors; - unsigned long rx_fifo_errors; - unsigned long rx_missed_errors; - unsigned long tx_aborted_errors; - unsigned long tx_carrier_errors; - unsigned long tx_fifo_errors; - unsigned long tx_heartbeat_errors; - unsigned long tx_window_errors; - unsigned long rx_compressed; - unsigned long tx_compressed; + NET_DEV_STAT(rx_packets); + NET_DEV_STAT(tx_packets); + NET_DEV_STAT(rx_bytes); + NET_DEV_STAT(tx_bytes); + NET_DEV_STAT(rx_errors); + NET_DEV_STAT(tx_errors); + NET_DEV_STAT(rx_dropped); + NET_DEV_STAT(tx_dropped); + NET_DEV_STAT(multicast); + NET_DEV_STAT(collisions); + NET_DEV_STAT(rx_length_errors); + NET_DEV_STAT(rx_over_errors); + NET_DEV_STAT(rx_crc_errors); + NET_DEV_STAT(rx_frame_errors); + NET_DEV_STAT(rx_fifo_errors); + NET_DEV_STAT(rx_missed_errors); + NET_DEV_STAT(tx_aborted_errors); + NET_DEV_STAT(tx_carrier_errors); + NET_DEV_STAT(tx_fifo_errors); + NET_DEV_STAT(tx_heartbeat_errors); + NET_DEV_STAT(tx_window_errors); + NET_DEV_STAT(rx_compressed); + NET_DEV_STAT(tx_compressed); }; +#undef NET_DEV_STAT /* per-cpu stats, allocated on demand. * Try to fit them in a single cache line, for dev_get_stats() sake. @@ -1033,6 +1041,10 @@ struct xfrmdev_ops { bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); + void (*xdo_dev_state_update_curlft) (struct xfrm_state *x); + int (*xdo_dev_policy_add) (struct xfrm_policy *x); + void (*xdo_dev_policy_delete) (struct xfrm_policy *x); + void (*xdo_dev_policy_free) (struct xfrm_policy *x); }; #endif @@ -1366,10 +1378,6 @@ struct netdev_net_notifier { * queue id bound to an AF_XDP socket. The flags field specifies if * only RX, only Tx, or both should be woken up using the flags * XDP_WAKEUP_RX and XDP_WAKEUP_TX. - * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev); - * Get devlink port instance associated with a given netdev. - * Called with a reference on the netdevice and devlink locks only, - * rtnl_lock is not held. * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, * int cmd); * Add, change, delete or get information on an IPv4 tunnel. @@ -1600,7 +1608,6 @@ struct net_device_ops { struct xdp_buff *xdp); int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); - struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); @@ -1655,7 +1662,7 @@ struct net_device_ops { * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device - * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running + * @IFF_NO_ADDRCONF: prevent ipv6 addrconf * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0) * @IFF_CHANGE_PROTO_DOWN: device supports setting carrier via IFLA_PROTO_DOWN @@ -1691,7 +1698,7 @@ enum netdev_priv_flags { IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, - IFF_LIVE_RENAME_OK = 1<<30, + IFF_NO_ADDRCONF = BIT_ULL(30), IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), }; @@ -1726,7 +1733,6 @@ enum netdev_priv_flags { #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER -#define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK #define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR /* Specifies the type of the struct net_device::ml_priv pointer */ @@ -1999,6 +2005,11 @@ enum netdev_ml_priv_type { * registered * @offload_xstats_l3: L3 HW stats for this netdevice. * + * @devlink_port: Pointer to related devlink port structure. + * Assigned by a driver before netdev registration using + * SET_NETDEV_DEVLINK_PORT macro. This pointer is static + * during the time netdevice is registered. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2349,9 +2360,22 @@ struct net_device { netdevice_tracker watchdog_dev_tracker; netdevice_tracker dev_registered_tracker; struct rtnl_hw_stats64 *offload_xstats_l3; + + struct devlink_port *devlink_port; }; #define to_net_dev(d) container_of(d, struct net_device, dev) +/* + * Driver should use this to assign devlink port instance to a netdevice + * before it registers the netdevice. Therefore devlink_port is static + * during the netdev lifetime after it is registered. + */ +#define SET_NETDEV_DEVLINK_PORT(dev, port) \ +({ \ + WARN_ON((dev)->reg_state != NETREG_UNINITIALIZED); \ + ((dev)->devlink_port = (port)); \ +}) + static inline bool netif_elide_gro(const struct net_device *dev) { if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog) @@ -2785,6 +2809,7 @@ enum netdev_cmd { NETDEV_PRE_TYPE_CHANGE, NETDEV_POST_TYPE_CHANGE, NETDEV_POST_INIT, + NETDEV_PRE_UNINIT, NETDEV_RELEASE, NETDEV_NOTIFY_PEERS, NETDEV_JOIN, @@ -2814,6 +2839,8 @@ int unregister_netdevice_notifier(struct notifier_block *nb); int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int unregister_netdevice_notifier_net(struct net *net, struct notifier_block *nb); +void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, + struct notifier_block *nb); int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn); @@ -3114,7 +3141,6 @@ struct softnet_data { /* stats */ unsigned int processed; unsigned int time_squeeze; - unsigned int received_rps; #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; #endif @@ -3147,6 +3173,7 @@ struct softnet_data { unsigned int cpu; unsigned int input_queue_tail; #endif + unsigned int received_rps; unsigned int dropped; struct sk_buff_head input_pkt_queue; struct napi_struct backlog; @@ -3855,8 +3882,6 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); -void __dev_notify_flags(struct net_device *, unsigned int old_flags, - unsigned int gchanges); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int __dev_change_net_namespace(struct net_device *dev, struct net *net, @@ -5101,11 +5126,6 @@ static inline const char *netdev_name(const struct net_device *dev) return dev->name; } -static inline bool netdev_unregistering(const struct net_device *dev) -{ - return dev->reg_state == NETREG_UNREGISTERING; -} - static inline const char *netdev_reg_state(const struct net_device *dev) { switch (dev->reg_state) { @@ -5164,4 +5184,9 @@ extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; extern struct net_device *blackhole_netdev; +/* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */ +#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) +#define DEV_STATS_ADD(DEV, FIELD, VAL) \ + atomic_long_add((VAL), &(DEV)->stats.__##FIELD) + #endif /* _LINUX_NETDEVICE_H */ diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ada1296c87d5..ab934ad951a8 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -515,6 +515,16 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, *skbinfo = ext->skbinfo; } +static inline void +nf_inet_addr_mask_inplace(union nf_inet_addr *a1, + const union nf_inet_addr *mask) +{ + a1->all[0] &= mask->all[0]; + a1->all[1] &= mask->all[1]; + a1->all[2] &= mask->all[2]; + a1->all[3] &= mask->all[3]; +} + #define IP_SET_INIT_KEXT(skb, opt, set) \ { .bytes = (skb)->len, .packets = 1, .target = true,\ .timeout = ip_set_adt_opt_timeout(opt, set) } diff --git a/include/linux/netfs.h b/include/linux/netfs.h index f2402ddeafbf..4c76ddfb6a67 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -267,6 +267,14 @@ struct netfs_cache_ops { loff_t *_start, size_t *_len, loff_t i_size, bool no_space_allocated_yet); + /* Prepare an on-demand read operation, shortening it to a cached/uncached + * boundary as appropriate. + */ + enum netfs_io_source (*prepare_ondemand_read)(struct netfs_cache_resources *cres, + loff_t start, size_t *_len, + loff_t i_size, + unsigned long *_flags, ino_t ino); + /* Query the occupancy of the cache in a region, returning where the * next chunk of data starts and how long it is. */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index d51e041d2242..d81bde5a5844 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -64,6 +64,7 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) /* this can be increased when necessary - don't expose to userland */ #define NETLINK_MAX_COOKIE_LEN 20 +#define NETLINK_MAX_FMTMSG_LEN 80 /** * struct netlink_ext_ack - netlink extended ACK report struct @@ -75,6 +76,8 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) * @miss_nest: nest missing an attribute (%NULL if missing top level attr) * @cookie: cookie data to return to userspace (for success) * @cookie_len: actual cookie data length + * @_msg_buf: output buffer for formatted message strings - don't access + * directly, use %NL_SET_ERR_MSG_FMT */ struct netlink_ext_ack { const char *_msg; @@ -84,13 +87,13 @@ struct netlink_ext_ack { u16 miss_type; u8 cookie[NETLINK_MAX_COOKIE_LEN]; u8 cookie_len; + char _msg_buf[NETLINK_MAX_FMTMSG_LEN]; }; /* Always use this macro, this allows later putting the * message into a separate section or such for things * like translation or listing all possible messages. - * Currently string formatting is not supported (due - * to the lack of an output buffer.) + * If string formatting is needed use NL_SET_ERR_MSG_FMT. */ #define NL_SET_ERR_MSG(extack, msg) do { \ static const char __msg[] = msg; \ @@ -102,9 +105,31 @@ struct netlink_ext_ack { __extack->_msg = __msg; \ } while (0) +/* We splice fmt with %s at each end even in the snprintf so that both calls + * can use the same string constant, avoiding its duplication in .ro + */ +#define NL_SET_ERR_MSG_FMT(extack, fmt, args...) do { \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (!__extack) \ + break; \ + if (snprintf(__extack->_msg_buf, NETLINK_MAX_FMTMSG_LEN, \ + "%s" fmt "%s", "", ##args, "") >= \ + NETLINK_MAX_FMTMSG_LEN) \ + net_warn_ratelimited("%s" fmt "%s", "truncated extack: ", \ + ##args, "\n"); \ + \ + do_trace_netlink_extack(__extack->_msg_buf); \ + \ + __extack->_msg = __extack->_msg_buf; \ +} while (0) + #define NL_SET_ERR_MSG_MOD(extack, msg) \ NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) +#define NL_SET_ERR_MSG_FMT_MOD(extack, fmt, args...) \ + NL_SET_ERR_MSG_FMT((extack), KBUILD_MODNAME ": " fmt, ##args) + #define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do { \ if ((extack)) { \ (extack)->bad_attr = (attr); \ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8d04b6a5964c..730003c4f4af 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -732,4 +732,17 @@ enum nfs4_setxattr_options { SETXATTR4_CREATE = 1, SETXATTR4_REPLACE = 2, }; + +enum { + RCA4_TYPE_MASK_RDATA_DLG = 0, + RCA4_TYPE_MASK_WDATA_DLG = 1, + RCA4_TYPE_MASK_DIR_DLG = 2, + RCA4_TYPE_MASK_FILE_LAYOUT = 3, + RCA4_TYPE_MASK_BLK_LAYOUT = 4, + RCA4_TYPE_MASK_OBJ_LAYOUT_MIN = 8, + RCA4_TYPE_MASK_OBJ_LAYOUT_MAX = 9, + RCA4_TYPE_MASK_OTHER_LAYOUT_MIN = 12, + RCA4_TYPE_MASK_OTHER_LAYOUT_MAX = 15, +}; + #endif diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7931fa472561..d92fdfd2444c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -59,6 +59,7 @@ struct nfs_access_entry { kuid_t fsuid; kgid_t fsgid; struct group_info *group_info; + u64 timestamp; __u32 mask; struct rcu_head rcu_head; }; diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index efef68c9352a..bb0ee80526b2 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -516,7 +516,7 @@ static inline int node_random(const nodemask_t *maskp) bit = first_node(*maskp); break; default: - bit = find_nth_bit(maskp->bits, MAX_NUMNODES, prandom_u32_max(w)); + bit = find_nth_bit(maskp->bits, MAX_NUMNODES, get_random_u32_below(w)); break; } return bit; diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index cdb171efc7cb..fee881cded01 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -94,6 +94,7 @@ static inline struct cred *nsset_cred(struct nsset *set) int copy_namespaces(unsigned long flags, struct task_struct *tsk); void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); +int exec_task_namespaces(void); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, struct cred *, struct fs_struct *); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 050d7d0cd81b..d6be2a686100 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -797,6 +797,7 @@ enum nvme_opcode { nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_recv = 0x7a, nvme_cmd_zone_append = 0x7d, + nvme_cmd_vendor_start = 0x80, }; #define nvme_opcode_name(opcode) { opcode, #opcode } @@ -963,6 +964,7 @@ enum { NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, NVME_RW_PRINFO_PRACT = 1 << 13, NVME_RW_DTYPE_STREAMS = 1 << 4, + NVME_WZ_DEAC = 1 << 9, }; struct nvme_dsm_cmd { diff --git a/include/linux/of.h b/include/linux/of.h index 6b79ef9a6541..8b9f94386dc3 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1549,9 +1549,9 @@ enum of_overlay_notify_action { OF_OVERLAY_POST_REMOVE, }; -static inline char *of_overlay_action_name(enum of_overlay_notify_action action) +static inline const char *of_overlay_action_name(enum of_overlay_notify_action action) { - static char *of_overlay_action_name[] = { + static const char *const of_overlay_action_name[] = { "init", "pre-apply", "post-apply", diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 45598dbec269..265f26eeaf6b 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -154,4 +154,15 @@ static inline const __be32 *of_get_pci_address(struct device_node *dev, int bar_ return __of_get_address(dev, -1, bar_no, size, flags); } +static inline int of_address_count(struct device_node *np) +{ + struct resource res; + int count = 0; + + while (of_address_to_resource(np, count, &res) == 0) + count++; + + return count; +} + #endif /* __OF_ADDRESS_H */ diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index a5166eb93437..6db627257a7b 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -34,7 +34,7 @@ enum of_gpio_flags { #ifdef CONFIG_OF_GPIO -#include <linux/kernel.h> +#include <linux/container_of.h> /* * OF GPIO chip for memory mapped banks diff --git a/include/linux/of_net.h b/include/linux/of_net.h index 0484b613ca64..d88715a0b3a5 100644 --- a/include/linux/of_net.h +++ b/include/linux/of_net.h @@ -14,6 +14,7 @@ struct net_device; extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface); extern int of_get_mac_address(struct device_node *np, u8 *mac); +extern int of_get_mac_address_nvmem(struct device_node *np, u8 *mac); int of_get_ethdev_address(struct device_node *np, struct net_device *dev); extern struct net_device *of_find_net_device_by_node(struct device_node *np); #else @@ -28,6 +29,11 @@ static inline int of_get_mac_address(struct device_node *np, u8 *mac) return -ENODEV; } +static inline int of_get_mac_address_nvmem(struct device_node *np, u8 *mac) +{ + return -ENODEV; +} + static inline int of_get_ethdev_address(struct device_node *np, struct net_device *dev) { return -ENODEV; diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 1d3be1a2204c..0e33b5cbdb9f 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -128,6 +128,53 @@ static inline bool __must_check __must_check_overflow(bool overflow) (*_d >> _to_shift) != _a); \ })) +#define __overflows_type_constexpr(x, T) ( \ + is_unsigned_type(typeof(x)) ? \ + (x) > type_max(typeof(T)) : \ + is_unsigned_type(typeof(T)) ? \ + (x) < 0 || (x) > type_max(typeof(T)) : \ + (x) < type_min(typeof(T)) || (x) > type_max(typeof(T))) + +#define __overflows_type(x, T) ({ \ + typeof(T) v = 0; \ + check_add_overflow((x), v, &v); \ +}) + +/** + * overflows_type - helper for checking the overflows between value, variables, + * or data type + * + * @n: source constant value or variable to be checked + * @T: destination variable or data type proposed to store @x + * + * Compares the @x expression for whether or not it can safely fit in + * the storage of the type in @T. @x and @T can have different types. + * If @x is a constant expression, this will also resolve to a constant + * expression. + * + * Returns: true if overflow can occur, false otherwise. + */ +#define overflows_type(n, T) \ + __builtin_choose_expr(__is_constexpr(n), \ + __overflows_type_constexpr(n, T), \ + __overflows_type(n, T)) + +/** + * castable_to_type - like __same_type(), but also allows for casted literals + * + * @n: variable or constant value + * @T: variable or data type + * + * Unlike the __same_type() macro, this allows a constant value as the + * first argument. If this value would not overflow into an assignment + * of the second argument's type, it returns true. Otherwise, this falls + * back to __same_type(). + */ +#define castable_to_type(n, T) \ + __builtin_choose_expr(__is_constexpr(n), \ + !__overflows_type_constexpr(n, T), \ + __same_type(n, T)) + /** * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX * @factor1: first factor diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c50ce2812f17..69e93a0c1277 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -177,9 +177,6 @@ enum pageflags { /* SLOB */ PG_slob_free = PG_private, - /* Compound pages. Stored in first tail page's flags */ - PG_double_map = PG_workingset, - #ifdef CONFIG_MEMORY_FAILURE /* * Compound pages. Stored in first tail page's flags. @@ -642,7 +639,7 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) * Different with flags above, this flag is used only for fsdax mode. It * indicates that this page->mapping is now under reflink case. */ -#define PAGE_MAPPING_DAX_COW 0x1 +#define PAGE_MAPPING_DAX_SHARED ((void *)0x1) static __always_inline bool folio_mapping_flags(struct folio *folio) { @@ -875,29 +872,11 @@ static inline int PageTransTail(struct page *page) { return PageTail(page); } - -/* - * PageDoubleMap indicates that the compound page is mapped with PTEs as well - * as PMDs. - * - * This is required for optimization of rmap operations for THP: we can postpone - * per small page mapcount accounting (and its overhead from atomic operations) - * until the first PMD split. - * - * For the page PageDoubleMap means ->_mapcount in all sub-pages is offset up - * by one. This reference will go away with last compound_mapcount. - * - * See also __split_huge_pmd_locked() and page_remove_anon_compound_rmap(). - */ -PAGEFLAG(DoubleMap, double_map, PF_SECOND) - TESTSCFLAG(DoubleMap, double_map, PF_SECOND) #else TESTPAGEFLAG_FALSE(TransHuge, transhuge) TESTPAGEFLAG_FALSE(TransCompound, transcompound) TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap) TESTPAGEFLAG_FALSE(TransTail, transtail) -PAGEFLAG_FALSE(DoubleMap, double_map) - TESTSCFLAG_FALSE(DoubleMap, double_map) #endif #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bbccb4044222..29e1f9e76eb6 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -504,9 +504,8 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_NOFS 0x00000010 #define FGP_NOWAIT 0x00000020 #define FGP_FOR_MMAP 0x00000040 -#define FGP_HEAD 0x00000080 -#define FGP_ENTRY 0x00000100 -#define FGP_STABLE 0x00000200 +#define FGP_ENTRY 0x00000080 +#define FGP_STABLE 0x00000100 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, int fgp_flags, gfp_t gfp); @@ -1102,12 +1101,10 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, int filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp); void filemap_remove_folio(struct folio *folio); -void delete_from_page_cache(struct page *page); void __filemap_remove_folio(struct folio *folio, void *shadow); -void replace_page_cache_page(struct page *old, struct page *new); +void replace_page_cache_folio(struct folio *old, struct folio *new); void delete_from_page_cache_batch(struct address_space *mapping, struct folio_batch *fbatch); -int try_to_release_page(struct page *page, gfp_t gfp); bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, int whence); diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index f3fafb731ffd..959f52e5867d 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -27,6 +27,8 @@ struct mm_walk; * "do page table walk over the current vma", returning * a negative value means "abort current page table walk * right now" and returning 1 means "skip the current vma" + * Note that this callback is not called when the caller + * passes in a single VMA as for walk_page_vma(). * @pre_vma: if set, called before starting walk on a non-null vma. * @post_vma: if set, called after a walk on a non-null vma, provided * that @pre_vma and the vma walk succeeded. @@ -99,6 +101,9 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start, unsigned long end, const struct mm_walk_ops *ops, pgd_t *pgd, void *private); +int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start, + unsigned long end, const struct mm_walk_ops *ops, + void *private); int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, void *private); int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, diff --git a/include/linux/panic.h b/include/linux/panic.h index c7759b3f2045..979b776e3bcb 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -11,6 +11,7 @@ extern long (*panic_blink)(int state); __printf(1, 2) void panic(const char *fmt, ...) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); +void check_panic_on_warn(const char *origin); extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); diff --git a/include/linux/pci.h b/include/linux/pci.h index 2bda4a4e47e8..adffd65e84b4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -38,6 +38,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/resource_ext.h> +#include <linux/msi_api.h> #include <uapi/linux/pci.h> #include <linux/pci_ids.h> @@ -409,6 +410,7 @@ struct pci_dev { */ unsigned int irq; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ + struct resource driver_exclusive_resource; /* driver exclusive resource ranges */ bool match_driver; /* Skip attaching driver */ @@ -843,6 +845,9 @@ struct pci_error_handlers { /* Device driver may resume normal operations */ void (*resume)(struct pci_dev *dev); + + /* Allow device driver to record more details of a correctable error */ + void (*cor_error_detected)(struct pci_dev *dev); }; @@ -1407,6 +1412,21 @@ int pci_request_selected_regions(struct pci_dev *, int, const char *); int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *); void pci_release_selected_regions(struct pci_dev *, int); +static inline __must_check struct resource * +pci_request_config_region_exclusive(struct pci_dev *pdev, unsigned int offset, + unsigned int len, const char *name) +{ + return __request_region(&pdev->driver_exclusive_resource, offset, len, + name, IORESOURCE_EXCLUSIVE); +} + +static inline void pci_release_config_region(struct pci_dev *pdev, + unsigned int offset, + unsigned int len) +{ + __release_region(&pdev->driver_exclusive_resource, offset, len); +} + /* drivers/pci/bus.c */ void pci_add_resource(struct list_head *resources, struct resource *res); void pci_add_resource_offset(struct list_head *resources, struct resource *res, @@ -1553,10 +1573,17 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, return rc; return 0; } +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags); int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, struct irq_affinity *affd); +bool pci_msix_can_alloc_dyn(struct pci_dev *dev); +struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index, + const struct irq_affinity_desc *affdesc); +void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map); + void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); @@ -1586,6 +1613,13 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, return 1; return -ENOSPC; } +static inline int +pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags) +{ + return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, + flags, NULL); +} static inline void pci_free_irq_vectors(struct pci_dev *dev) { @@ -1726,6 +1760,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus) { return 0; } #endif int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent); +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent); #endif /* Some architectures require additional setup to direct VGA traffic */ @@ -1898,15 +1933,13 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, { return -ENOSPC; } -#endif /* CONFIG_PCI */ - static inline int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags) { - return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags, - NULL); + return -ENOSPC; } +#endif /* CONFIG_PCI */ /* Include architecture-dependent settings and functions */ @@ -2474,6 +2507,14 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif +struct msi_domain_template; + +bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, + unsigned int hwsize, void *data); +struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie, + const struct irq_affinity_desc *affdesc); +void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); + #include <linux/dma-mapping.h> #define pci_printk(level, pdev, fmt, arg...) \ @@ -2484,6 +2525,7 @@ void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #define pci_crit(pdev, fmt, arg...) dev_crit(&(pdev)->dev, fmt, ##arg) #define pci_err(pdev, fmt, arg...) dev_err(&(pdev)->dev, fmt, ##arg) #define pci_warn(pdev, fmt, arg...) dev_warn(&(pdev)->dev, fmt, ##arg) +#define pci_warn_once(pdev, fmt, arg...) dev_warn_once(&(pdev)->dev, fmt, ##arg) #define pci_notice(pdev, fmt, arg...) dev_notice(&(pdev)->dev, fmt, ##arg) #define pci_info(pdev, fmt, arg...) dev_info(&(pdev)->dev, fmt, ##arg) #define pci_dbg(pdev, fmt, arg...) dev_dbg(&(pdev)->dev, fmt, ##arg) diff --git a/include/linux/pe.h b/include/linux/pe.h index 1d3836ef9d92..6ffabf1e6d03 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -29,7 +29,14 @@ * handover_offset and xloadflags fields in the bootparams structure. */ #define LINUX_EFISTUB_MAJOR_VERSION 0x1 -#define LINUX_EFISTUB_MINOR_VERSION 0x0 +#define LINUX_EFISTUB_MINOR_VERSION 0x1 + +/* + * LINUX_PE_MAGIC appears at offset 0x38 into the MS-DOS header of EFI bootable + * Linux kernel images that target the architecture as specified by the PE/COFF + * header machine type field. + */ +#define LINUX_PE_MAGIC 0x818223cd #define MZ_MAGIC 0x5a4d /* "MZ" */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index f1ec5ad1351c..1338ea2aa720 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -37,12 +37,11 @@ /* * Percpu allocator can serve percpu allocations before slab is * initialized which allows slab to depend on the percpu allocator. - * The following two parameters decide how much resource to - * preallocate for this. Keep PERCPU_DYNAMIC_RESERVE equal to or - * larger than PERCPU_DYNAMIC_EARLY_SIZE. + * The following parameter decide how much resource to preallocate + * for this. Keep PERCPU_DYNAMIC_RESERVE equal to or larger than + * PERCPU_DYNAMIC_EARLY_SIZE. */ -#define PERCPU_DYNAMIC_EARLY_SLOTS 128 -#define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10) +#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 8ed5fba6d156..a3aae8d57a42 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -13,7 +13,6 @@ #include <linux/threads.h> #include <linux/percpu.h> #include <linux/types.h> -#include <linux/gfp.h> /* percpu_counter batch for local add or sub */ #define PERCPU_COUNTER_LOCAL_BATCH INT_MAX @@ -46,6 +45,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); +s64 percpu_counter_sum_all(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); @@ -194,6 +194,11 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc) return percpu_counter_read(fbc); } +static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc) +{ + return percpu_counter_read(fbc); +} + static inline bool percpu_counter_initialized(struct percpu_counter *fbc) { return true; diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 0356cb6a215d..ef914a600087 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -100,7 +100,7 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); - int (*filter_match)(struct perf_event *event); + bool (*filter)(struct pmu *pmu, int cpu); int num_events; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 @@ -174,7 +174,6 @@ void kvm_host_pmu_init(struct arm_pmu *pmu); /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); -struct arm_pmu *armpmu_alloc_atomic(void); void armpmu_free(struct arm_pmu *pmu); int armpmu_register(struct arm_pmu *pmu); int armpmu_request_irq(int irq, int cpu); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0031f7b4d9ab..c6a3bac76966 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -266,6 +266,7 @@ struct hw_perf_event { }; struct perf_event; +struct perf_event_pmu_context; /* * Common implementation detail of pmu::{start,commit,cancel}_txn @@ -308,7 +309,7 @@ struct pmu { int capabilities; int __percpu *pmu_disable_count; - struct perf_cpu_context __percpu *pmu_cpu_context; + struct perf_cpu_pmu_context __percpu *cpu_pmu_context; atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ int task_ctx_nr; int hrtimer_interval_ms; @@ -443,7 +444,7 @@ struct pmu { /* * context-switches callback */ - void (*sched_task) (struct perf_event_context *ctx, + void (*sched_task) (struct perf_event_pmu_context *pmu_ctx, bool sched_in); /* @@ -457,8 +458,8 @@ struct pmu { * implementation and Perf core context switch handling callbacks for usage * examples. */ - void (*swap_task_ctx) (struct perf_event_context *prev, - struct perf_event_context *next); + void (*swap_task_ctx) (struct perf_event_pmu_context *prev_epc, + struct perf_event_pmu_context *next_epc); /* optional */ /* @@ -522,9 +523,10 @@ struct pmu { /* optional */ /* - * Filter events for PMU-specific reasons. + * Skip programming this PMU on the given CPU. Typically needed for + * big.LITTLE things. */ - int (*filter_match) (struct perf_event *event); /* optional */ + bool (*filter) (struct pmu *pmu, int cpu); /* optional */ /* * Check period value for PERF_EVENT_IOC_PERIOD ioctl. @@ -695,6 +697,11 @@ struct perf_event { int group_caps; struct perf_event *group_leader; + /* + * event->pmu will always point to pmu in which this event belongs. + * Whereas event->pmu_ctx->pmu may point to other pmu when group of + * different pmu events is created. + */ struct pmu *pmu; void *pmu_private; @@ -720,6 +727,12 @@ struct perf_event { struct hw_perf_event hw; struct perf_event_context *ctx; + /* + * event->pmu_ctx points to perf_event_pmu_context in which the event + * is added. This pmu_ctx can be of other pmu for sw event when that + * sw event is part of a group which also contains non-sw events. + */ + struct perf_event_pmu_context *pmu_ctx; atomic_long_t refcount; /* @@ -812,19 +825,69 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; +/* + * ,-----------------------[1:n]----------------------. + * V V + * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event + * ^ ^ | | + * `--------[1:n]---------' `-[n:1]-> pmu <-[1:n]-' + * + * + * struct perf_event_pmu_context lifetime is refcount based and RCU freed + * (similar to perf_event_context). Locking is as if it were a member of + * perf_event_context; specifically: + * + * modification, both: ctx->mutex && ctx->lock + * reading, either: ctx->mutex || ctx->lock + * + * There is one exception to this; namely put_pmu_ctx() isn't always called + * with ctx->mutex held; this means that as long as we can guarantee the epc + * has events the above rules hold. + * + * Specificially, sys_perf_event_open()'s group_leader case depends on + * ctx->mutex pinning the configuration. Since we hold a reference on + * group_leader (through the filedesc) it can't go away, therefore it's + * associated pmu_ctx must exist and cannot change due to ctx->mutex. + */ +struct perf_event_pmu_context { + struct pmu *pmu; + struct perf_event_context *ctx; + + struct list_head pmu_ctx_entry; + + struct list_head pinned_active; + struct list_head flexible_active; + + /* Used to avoid freeing per-cpu perf_event_pmu_context */ + unsigned int embedded : 1; + + unsigned int nr_events; + + atomic_t refcount; /* event <-> epc */ + struct rcu_head rcu_head; + + void *task_ctx_data; /* pmu specific data */ + /* + * Set when one or more (plausibly active) event can't be scheduled + * due to pmu overcommit or pmu constraints, except tolerant to + * events not necessary to be active due to scheduling constraints, + * such as cgroups. + */ + int rotate_necessary; +}; struct perf_event_groups { struct rb_root tree; u64 index; }; + /** * struct perf_event_context - event context structure * * Used as a container for task events and CPU events as well: */ struct perf_event_context { - struct pmu *pmu; /* * Protect the states of the events in the list, * nr_active, and the list: @@ -837,27 +900,21 @@ struct perf_event_context { */ struct mutex mutex; - struct list_head active_ctx_list; + struct list_head pmu_ctx_list; struct perf_event_groups pinned_groups; struct perf_event_groups flexible_groups; struct list_head event_list; - struct list_head pinned_active; - struct list_head flexible_active; - int nr_events; - int nr_active; int nr_user; int is_active; + + int nr_task_data; int nr_stat; int nr_freq; int rotate_disable; - /* - * Set when nr_events != nr_active, except tolerant to events not - * necessary to be active due to scheduling constraints, such as cgroups. - */ - int rotate_necessary; - refcount_t refcount; + + refcount_t refcount; /* event <-> ctx */ struct task_struct *task; /* @@ -878,7 +935,6 @@ struct perf_event_context { #ifdef CONFIG_CGROUP_PERF int nr_cgroups; /* cgroup evts */ #endif - void *task_ctx_data; /* pmu specific data */ struct rcu_head rcu_head; /* @@ -896,12 +952,13 @@ struct perf_event_context { */ #define PERF_NR_CONTEXTS 4 -/** - * struct perf_cpu_context - per cpu event context structure - */ -struct perf_cpu_context { - struct perf_event_context ctx; - struct perf_event_context *task_ctx; +struct perf_cpu_pmu_context { + struct perf_event_pmu_context epc; + struct perf_event_pmu_context *task_epc; + + struct list_head sched_cb_entry; + int sched_cb_usage; + int active_oncpu; int exclusive; @@ -909,16 +966,20 @@ struct perf_cpu_context { struct hrtimer hrtimer; ktime_t hrtimer_interval; unsigned int hrtimer_active; +}; + +/** + * struct perf_event_cpu_context - per cpu event context structure + */ +struct perf_cpu_context { + struct perf_event_context ctx; + struct perf_event_context *task_ctx; + int online; #ifdef CONFIG_CGROUP_PERF struct perf_cgroup *cgrp; - struct list_head cgrp_cpuctx_entry; #endif - struct list_head sched_cb_entry; - int sched_cb_usage; - - int online; /* * Per-CPU storage for iterators used in visit_groups_merge. The default * storage is of size 2 to hold the CPU and any CPU event iterators. @@ -982,6 +1043,8 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) #ifdef CONFIG_PERF_EVENTS +extern struct perf_event_context *perf_cpu_task_ctx(void); + extern void *perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event); extern void perf_aux_output_end(struct perf_output_handle *handle, @@ -1187,7 +1250,7 @@ static inline int is_software_event(struct perf_event *event) */ static inline int in_software_context(struct perf_event *event) { - return event->ctx->pmu->task_ctx_nr == perf_sw_context; + return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context; } static inline int is_exclusive_pmu(struct pmu *pmu) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..dfabd549d2e7 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); } +#ifndef pmd_young +static inline int pmd_young(pmd_t pmd) +{ + return 0; +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_nonleaf_pmd_young +/* + * Return whether the accessed bit in non-leaf PMD entries is supported on the + * local CPU. + */ +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); +} +#endif + #ifndef arch_has_hw_pte_young /* * Return whether the accessed bit is supported on the local CPU. @@ -407,9 +425,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long address, pte_t *ptep, int full) { - pte_t pte; - pte = ptep_get_and_clear(mm, address, ptep); - return pte; + return ptep_get_and_clear(mm, address, ptep); } #endif @@ -485,30 +501,6 @@ static inline pte_t pte_sw_mkyoung(pte_t pte) #define pte_sw_mkyoung pte_sw_mkyoung #endif -#ifndef pte_savedwrite -#define pte_savedwrite pte_write -#endif - -#ifndef pte_mk_savedwrite -#define pte_mk_savedwrite pte_mkwrite -#endif - -#ifndef pte_clear_savedwrite -#define pte_clear_savedwrite pte_wrprotect -#endif - -#ifndef pmd_savedwrite -#define pmd_savedwrite pmd_write -#endif - -#ifndef pmd_mk_savedwrite -#define pmd_mk_savedwrite pmd_mkwrite -#endif - -#ifndef pmd_clear_savedwrite -#define pmd_clear_savedwrite pmd_wrprotect -#endif - #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void pmdp_set_wrprotect(struct mm_struct *mm, diff --git a/include/linux/phy.h b/include/linux/phy.h index ddf66198f751..71eeb4e3b1fd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -529,6 +529,8 @@ struct macsec_ops; * * @mdio: MDIO bus this PHY is on * @drv: Pointer to the driver for this PHY instance + * @devlink: Create a link between phy dev and mac dev, if the external phy + * used by current mac interface is managed by another mac interface. * @phy_id: UID for this device found during discovery * @c45_ids: 802.3-c45 Device Identifiers if is_c45. * @is_c45: Set to true if this PHY uses clause 45 addressing. @@ -600,6 +602,7 @@ struct macsec_ops; * @psec: Pointer to Power Sourcing Equipment control struct * @lock: Mutex for serialization access to PHY * @state_queue: Work queue for state machine + * @link_down_events: Number of times link was lost * @shared: Pointer to private data shared by phys in one package * @priv: Pointer to driver private data * @@ -617,6 +620,8 @@ struct phy_device { /* And management functions */ struct phy_driver *drv; + struct device_link *devlink; + u32 phy_id; struct phy_c45_device_ids c45_ids; @@ -723,6 +728,8 @@ struct phy_device { int pma_extable; + unsigned int link_down_events; + void (*phy_link_change)(struct phy_device *phydev, bool up); void (*adjust_link)(struct net_device *dev); diff --git a/include/linux/phy/phy-mipi-dphy.h b/include/linux/phy/phy-mipi-dphy.h index a877ffee845d..1ac128d78dfe 100644 --- a/include/linux/phy/phy-mipi-dphy.h +++ b/include/linux/phy/phy-mipi-dphy.h @@ -279,6 +279,9 @@ int phy_mipi_dphy_get_default_config(unsigned long pixel_clock, unsigned int bpp, unsigned int lanes, struct phy_configure_opts_mipi_dphy *cfg); +int phy_mipi_dphy_get_default_config_for_hsclk(unsigned long long hs_clk_rate, + unsigned int lanes, + struct phy_configure_opts_mipi_dphy *cfg); int phy_mipi_dphy_config_validate(struct phy_configure_opts_mipi_dphy *cfg); #endif /* __PHY_MIPI_DPHY_H_ */ diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 3f01ac8017e0..c492c26202b5 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -207,6 +207,11 @@ struct phylink_mac_ops { * * If the @state->interface mode is not supported, then the @supported * mask must be cleared. + * + * This member is optional; if not set, the generic validator will be + * used making use of @config->mac_capabilities and + * @config->supported_interfaces to determine which link modes are + * supported. */ void validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); @@ -558,6 +563,9 @@ void phylink_caps_to_linkmodes(unsigned long *linkmodes, unsigned long caps); unsigned long phylink_get_capabilities(phy_interface_t interface, unsigned long mac_capabilities, int rate_matching); +void phylink_validate_mask_caps(unsigned long *supported, + struct phylink_link_state *state, + unsigned long caps); void phylink_generic_validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state); @@ -613,6 +621,30 @@ int phylink_speed_up(struct phylink *pl); void phylink_set_port_modes(unsigned long *bits); +/** + * phylink_get_link_timer_ns - return the PCS link timer value + * @interface: link &typedef phy_interface_t mode + * + * Return the PCS link timer setting in nanoseconds for the PHY @interface + * mode, or -EINVAL if not appropriate. + */ +static inline int phylink_get_link_timer_ns(phy_interface_t interface) +{ + switch (interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_USXGMII: + return 1600000; + + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + return 10000000; + + default: + return -EINVAL; + } +} + void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, u16 bmsr, u16 lpa); void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 019fecd75d0c..4729d54e8995 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -12,14 +12,15 @@ #define __LINUX_PINCTRL_CONSUMER_H #include <linux/err.h> -#include <linux/list.h> -#include <linux/seq_file.h> +#include <linux/types.h> + #include <linux/pinctrl/pinctrl-state.h> +struct device; + /* This struct is private to the core and should be regarded as a cookie */ struct pinctrl; struct pinctrl_state; -struct device; #ifdef CONFIG_PINCTRL @@ -33,9 +34,8 @@ extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config); extern struct pinctrl * __must_check pinctrl_get(struct device *dev); extern void pinctrl_put(struct pinctrl *p); -extern struct pinctrl_state * __must_check pinctrl_lookup_state( - struct pinctrl *p, - const char *name); +extern struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p, + const char *name); extern int pinctrl_select_state(struct pinctrl *p, struct pinctrl_state *s); extern struct pinctrl * __must_check devm_pinctrl_get(struct device *dev); @@ -101,9 +101,8 @@ static inline void pinctrl_put(struct pinctrl *p) { } -static inline struct pinctrl_state * __must_check pinctrl_lookup_state( - struct pinctrl *p, - const char *name) +static inline struct pinctrl_state * __must_check pinctrl_lookup_state(struct pinctrl *p, + const char *name) { return NULL; } @@ -145,8 +144,8 @@ static inline int pinctrl_pm_select_idle_state(struct device *dev) #endif /* CONFIG_PINCTRL */ -static inline struct pinctrl * __must_check pinctrl_get_select( - struct device *dev, const char *name) +static inline struct pinctrl * __must_check pinctrl_get_select(struct device *dev, + const char *name) { struct pinctrl *p; struct pinctrl_state *s; @@ -171,14 +170,13 @@ static inline struct pinctrl * __must_check pinctrl_get_select( return p; } -static inline struct pinctrl * __must_check pinctrl_get_select_default( - struct device *dev) +static inline struct pinctrl * __must_check pinctrl_get_select_default(struct device *dev) { return pinctrl_get_select(dev, PINCTRL_STATE_DEFAULT); } -static inline struct pinctrl * __must_check devm_pinctrl_get_select( - struct device *dev, const char *name) +static inline struct pinctrl * __must_check devm_pinctrl_get_select(struct device *dev, + const char *name) { struct pinctrl *p; struct pinctrl_state *s; @@ -203,8 +201,7 @@ static inline struct pinctrl * __must_check devm_pinctrl_get_select( return p; } -static inline struct pinctrl * __must_check devm_pinctrl_get_select_default( - struct device *dev) +static inline struct pinctrl * __must_check devm_pinctrl_get_select_default(struct device *dev) { return devm_pinctrl_get_select(dev, PINCTRL_STATE_DEFAULT); } diff --git a/include/linux/pinctrl/devinfo.h b/include/linux/pinctrl/devinfo.h index a48ff69acddd..9e8b559e1253 100644 --- a/include/linux/pinctrl/devinfo.h +++ b/include/linux/pinctrl/devinfo.h @@ -14,11 +14,15 @@ #ifndef PINCTRL_DEVINFO_H #define PINCTRL_DEVINFO_H +struct device; + #ifdef CONFIG_PINCTRL /* The device core acts as a consumer toward pinctrl */ #include <linux/pinctrl/consumer.h> +struct pinctrl; + /** * struct dev_pin_info - pin state container for devices * @p: pinctrl handle for the containing device @@ -42,8 +46,6 @@ extern int pinctrl_init_done(struct device *dev); #else -struct device; - /* Stubs if we're not using pinctrl */ static inline int pinctrl_bind_pins(struct device *dev) diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h index e987dc9fd2af..0639b36f43c5 100644 --- a/include/linux/pinctrl/machine.h +++ b/include/linux/pinctrl/machine.h @@ -11,7 +11,7 @@ #ifndef __LINUX_PINCTRL_MACHINE_H #define __LINUX_PINCTRL_MACHINE_H -#include <linux/bug.h> +#include <linux/kernel.h> /* ARRAY_SIZE() */ #include <linux/pinctrl/pinctrl-state.h> @@ -149,16 +149,18 @@ struct pinctrl_map { #define PIN_MAP_CONFIGS_GROUP_HOG_DEFAULT(dev, grp, cfgs) \ PIN_MAP_CONFIGS_GROUP(dev, PINCTRL_STATE_DEFAULT, dev, grp, cfgs) +struct pinctrl_map; + #ifdef CONFIG_PINCTRL extern int pinctrl_register_mappings(const struct pinctrl_map *map, - unsigned num_maps); + unsigned num_maps); extern void pinctrl_unregister_mappings(const struct pinctrl_map *map); extern void pinctrl_provide_dummies(void); #else static inline int pinctrl_register_mappings(const struct pinctrl_map *map, - unsigned num_maps) + unsigned num_maps) { return 0; } diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 2422211d6a5a..d74b7a4ea154 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -11,9 +11,12 @@ #ifndef __LINUX_PINCTRL_PINCONF_GENERIC_H #define __LINUX_PINCTRL_PINCONF_GENERIC_H -#include <linux/device.h> +#include <linux/types.h> + #include <linux/pinctrl/machine.h> +struct device_node; + struct pinctrl_dev; struct pinctrl_map; @@ -35,7 +38,8 @@ struct pinctrl_map; * impedance. * @PIN_CONFIG_BIAS_PULL_DOWN: the pin will be pulled down (usually with high * impedance to GROUND). If the argument is != 0 pull-down is enabled, - * if it is 0, pull-down is total, i.e. the pin is connected to GROUND. + * the value is interpreted by the driver and can be custom or an SI unit + * such as Ohms. * @PIN_CONFIG_BIAS_PULL_PIN_DEFAULT: the pin will be pulled up or down based * on embedded knowledge of the controller hardware, like current mux * function. The pull direction and possibly strength too will normally @@ -46,7 +50,8 @@ struct pinctrl_map; * @PIN_CONFIG_BIAS_DISABLE. * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high * impedance to VDD). If the argument is != 0 pull-up is enabled, - * if it is 0, pull-up is total, i.e. the pin is connected to VDD. + * the value is interpreted by the driver and can be custom or an SI unit + * such as Ohms. * @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open * collector) which means it is usually wired with other output ports * which are then pulled up with an external resistor. Setting this @@ -196,25 +201,25 @@ int pinconf_generic_dt_node_to_map(struct pinctrl_dev *pctldev, void pinconf_generic_dt_free_map(struct pinctrl_dev *pctldev, struct pinctrl_map *map, unsigned num_maps); -static inline int pinconf_generic_dt_node_to_map_group( - struct pinctrl_dev *pctldev, struct device_node *np_config, - struct pinctrl_map **map, unsigned *num_maps) +static inline int pinconf_generic_dt_node_to_map_group(struct pinctrl_dev *pctldev, + struct device_node *np_config, struct pinctrl_map **map, + unsigned *num_maps) { return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps, PIN_MAP_TYPE_CONFIGS_GROUP); } -static inline int pinconf_generic_dt_node_to_map_pin( - struct pinctrl_dev *pctldev, struct device_node *np_config, - struct pinctrl_map **map, unsigned *num_maps) +static inline int pinconf_generic_dt_node_to_map_pin(struct pinctrl_dev *pctldev, + struct device_node *np_config, struct pinctrl_map **map, + unsigned *num_maps) { return pinconf_generic_dt_node_to_map(pctldev, np_config, map, num_maps, PIN_MAP_TYPE_CONFIGS_PIN); } -static inline int pinconf_generic_dt_node_to_map_all( - struct pinctrl_dev *pctldev, struct device_node *np_config, - struct pinctrl_map **map, unsigned *num_maps) +static inline int pinconf_generic_dt_node_to_map_all(struct pinctrl_dev *pctldev, + struct device_node *np_config, struct pinctrl_map **map, + unsigned *num_maps) { /* * passing the type as PIN_MAP_TYPE_INVALID causes the underlying parser diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 487117ccb1bc..a0d39b303431 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -11,20 +11,20 @@ #ifndef __LINUX_PINCTRL_PINCTRL_H #define __LINUX_PINCTRL_PINCTRL_H -#include <linux/radix-tree.h> -#include <linux/list.h> -#include <linux/seq_file.h> -#include <linux/pinctrl/pinctrl-state.h> -#include <linux/pinctrl/devinfo.h> +#include <linux/types.h> struct device; +struct device_node; +struct gpio_chip; +struct module; +struct seq_file; + +struct pin_config_item; +struct pinconf_generic_params; +struct pinconf_ops; struct pinctrl_dev; struct pinctrl_map; struct pinmux_ops; -struct pinconf_ops; -struct pin_config_item; -struct gpio_chip; -struct device_node; /** * struct pingroup - provides information on pingroup @@ -40,7 +40,7 @@ struct pingroup { /* Convenience macro to define a single named or anonymous pingroup */ #define PINCTRL_PINGROUP(_name, _pins, _npins) \ -(struct pingroup){ \ +(struct pingroup) { \ .name = _name, \ .pins = _pins, \ .npins = _npins, \ diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h index 9a647fa5c8f1..a7e370965c53 100644 --- a/include/linux/pinctrl/pinmux.h +++ b/include/linux/pinctrl/pinmux.h @@ -11,11 +11,10 @@ #ifndef __LINUX_PINCTRL_PINMUX_H #define __LINUX_PINCTRL_PINMUX_H -#include <linux/list.h> -#include <linux/seq_file.h> -#include <linux/pinctrl/pinctrl.h> +#include <linux/types.h> struct pinctrl_dev; +struct pinctrl_gpio_range; /** * struct pinmux_ops - pinmux operations, to be implemented by pin controller diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h deleted file mode 100644 index f9c5ac80d59b..000000000000 --- a/include/linux/pktcdvd.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (C) 2000 Jens Axboe <axboe@suse.de> - * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com> - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and - * DVD-RW devices. - * - */ -#ifndef __PKTCDVD_H -#define __PKTCDVD_H - -#include <linux/blkdev.h> -#include <linux/completion.h> -#include <linux/cdrom.h> -#include <linux/kobject.h> -#include <linux/sysfs.h> -#include <linux/mempool.h> -#include <uapi/linux/pktcdvd.h> - -/* default bio write queue congestion marks */ -#define PKT_WRITE_CONGESTION_ON 10000 -#define PKT_WRITE_CONGESTION_OFF 9000 - - -struct packet_settings -{ - __u32 size; /* packet size in (512 byte) sectors */ - __u8 fp; /* fixed packets */ - __u8 link_loss; /* the rest is specified - * as per Mt Fuji */ - __u8 write_type; - __u8 track_mode; - __u8 block_mode; -}; - -/* - * Very crude stats for now - */ -struct packet_stats -{ - unsigned long pkt_started; - unsigned long pkt_ended; - unsigned long secs_w; - unsigned long secs_rg; - unsigned long secs_r; -}; - -struct packet_cdrw -{ - struct list_head pkt_free_list; - struct list_head pkt_active_list; - spinlock_t active_list_lock; /* Serialize access to pkt_active_list */ - struct task_struct *thread; - atomic_t pending_bios; -}; - -/* - * Switch to high speed reading after reading this many kilobytes - * with no interspersed writes. - */ -#define HI_SPEED_SWITCH 512 - -struct packet_iosched -{ - atomic_t attention; /* Set to non-zero when queue processing is needed */ - int writing; /* Non-zero when writing, zero when reading */ - spinlock_t lock; /* Protecting read/write queue manipulations */ - struct bio_list read_queue; - struct bio_list write_queue; - sector_t last_write; /* The sector where the last write ended */ - int successive_reads; -}; - -/* - * 32 buffers of 2048 bytes - */ -#if (PAGE_SIZE % CD_FRAMESIZE) != 0 -#error "PAGE_SIZE must be a multiple of CD_FRAMESIZE" -#endif -#define PACKET_MAX_SIZE 128 -#define FRAMES_PER_PAGE (PAGE_SIZE / CD_FRAMESIZE) -#define PACKET_MAX_SECTORS (PACKET_MAX_SIZE * CD_FRAMESIZE >> 9) - -enum packet_data_state { - PACKET_IDLE_STATE, /* Not used at the moment */ - PACKET_WAITING_STATE, /* Waiting for more bios to arrive, so */ - /* we don't have to do as much */ - /* data gathering */ - PACKET_READ_WAIT_STATE, /* Waiting for reads to fill in holes */ - PACKET_WRITE_WAIT_STATE, /* Waiting for the write to complete */ - PACKET_RECOVERY_STATE, /* Recover after read/write errors */ - PACKET_FINISHED_STATE, /* After write has finished */ - - PACKET_NUM_STATES /* Number of possible states */ -}; - -/* - * Information needed for writing a single packet - */ -struct pktcdvd_device; - -struct packet_data -{ - struct list_head list; - - spinlock_t lock; /* Lock protecting state transitions and */ - /* orig_bios list */ - - struct bio_list orig_bios; /* Original bios passed to pkt_make_request */ - /* that will be handled by this packet */ - int write_size; /* Total size of all bios in the orig_bios */ - /* list, measured in number of frames */ - - struct bio *w_bio; /* The bio we will send to the real CD */ - /* device once we have all data for the */ - /* packet we are going to write */ - sector_t sector; /* First sector in this packet */ - int frames; /* Number of frames in this packet */ - - enum packet_data_state state; /* Current state */ - atomic_t run_sm; /* Incremented whenever the state */ - /* machine needs to be run */ - long sleep_time; /* Set this to non-zero to make the state */ - /* machine run after this many jiffies. */ - - atomic_t io_wait; /* Number of pending IO operations */ - atomic_t io_errors; /* Number of read/write errors during IO */ - - struct bio *r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */ - struct page *pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE]; - - int cache_valid; /* If non-zero, the data for the zone defined */ - /* by the sector variable is completely cached */ - /* in the pages[] vector. */ - - int id; /* ID number for debugging */ - struct pktcdvd_device *pd; -}; - -struct pkt_rb_node { - struct rb_node rb_node; - struct bio *bio; -}; - -struct packet_stacked_data -{ - struct bio *bio; /* Original read request bio */ - struct pktcdvd_device *pd; -}; -#define PSD_POOL_SIZE 64 - -struct pktcdvd_device -{ - struct block_device *bdev; /* dev attached */ - dev_t pkt_dev; /* our dev */ - char name[20]; - struct packet_settings settings; - struct packet_stats stats; - int refcnt; /* Open count */ - int write_speed; /* current write speed, kB/s */ - int read_speed; /* current read speed, kB/s */ - unsigned long offset; /* start offset */ - __u8 mode_offset; /* 0 / 8 */ - __u8 type; - unsigned long flags; - __u16 mmc3_profile; - __u32 nwa; /* next writable address */ - __u32 lra; /* last recorded address */ - struct packet_cdrw cdrw; - wait_queue_head_t wqueue; - - spinlock_t lock; /* Serialize access to bio_queue */ - struct rb_root bio_queue; /* Work queue of bios we need to handle */ - int bio_queue_size; /* Number of nodes in bio_queue */ - bool congested; /* Someone is waiting for bio_queue_size - * to drop. */ - sector_t current_sector; /* Keep track of where the elevator is */ - atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */ - /* needs to be run. */ - mempool_t rb_pool; /* mempool for pkt_rb_node allocations */ - - struct packet_iosched iosched; - struct gendisk *disk; - - int write_congestion_off; - int write_congestion_on; - - struct device *dev; /* sysfs pktcdvd[0-7] dev */ - - struct dentry *dfs_d_root; /* debugfs: devname directory */ - struct dentry *dfs_f_info; /* debugfs: info file */ -}; - -#endif /* __PKTCDVD_H */ diff --git a/include/linux/platform_data/gpmc-omap.h b/include/linux/platform_data/gpmc-omap.h index c9cc4e32435d..dcca6c5e23bb 100644 --- a/include/linux/platform_data/gpmc-omap.h +++ b/include/linux/platform_data/gpmc-omap.h @@ -136,6 +136,13 @@ struct gpmc_device_timings { #define GPMC_MUX_AAD 1 /* Addr-Addr-Data multiplex */ #define GPMC_MUX_AD 2 /* Addr-Data multiplex */ +/* Wait pin polarity values */ +#define GPMC_WAITPINPOLARITY_INVALID UINT_MAX +#define GPMC_WAITPINPOLARITY_ACTIVE_LOW 0 +#define GPMC_WAITPINPOLARITY_ACTIVE_HIGH 1 + +#define GPMC_WAITPIN_INVALID UINT_MAX + struct gpmc_settings { bool burst_wrap; /* enables wrap bursting */ bool burst_read; /* enables read page/burst mode */ @@ -149,6 +156,7 @@ struct gpmc_settings { u32 device_width; /* device bus width (8 or 16 bit) */ u32 mux_add_data; /* multiplex address & data */ u32 wait_pin; /* wait-pin to be used */ + u32 wait_pin_polarity; }; /* Data for each chip select */ diff --git a/include/linux/platform_data/gsc_hwmon.h b/include/linux/platform_data/gsc_hwmon.h index 281f499eda97..f2781aa7eff8 100644 --- a/include/linux/platform_data/gsc_hwmon.h +++ b/include/linux/platform_data/gsc_hwmon.h @@ -29,18 +29,17 @@ struct gsc_hwmon_channel { /** * struct gsc_hwmon_platform_data - platform data for gsc_hwmon driver - * @channels: pointer to array of gsc_hwmon_channel structures - * describing channels * @nchannels: number of elements in @channels array * @vreference: voltage reference (mV) * @resolution: ADC bit resolution * @fan_base: register base for FAN controller + * @channels: array of gsc_hwmon_channel structures describing channels */ struct gsc_hwmon_platform_data { - const struct gsc_hwmon_channel *channels; int nchannels; unsigned int resolution; unsigned int vreference; unsigned int fan_base; + struct gsc_hwmon_channel channels[]; }; #endif diff --git a/include/linux/platform_data/st33zp24.h b/include/linux/platform_data/st33zp24.h deleted file mode 100644 index 61db674f36cc..000000000000 --- a/include/linux/platform_data/st33zp24.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * STMicroelectronics TPM Linux driver for TPM 1.2 ST33ZP24 - * Copyright (C) 2009 - 2016 STMicroelectronics - */ -#ifndef __ST33ZP24_H__ -#define __ST33ZP24_H__ - -#define TPM_ST33_I2C "st33zp24-i2c" -#define TPM_ST33_SPI "st33zp24-spi" - -struct st33zp24_platform_data { - int io_lpcpd; -}; - -#endif /* __ST33ZP24_H__ */ diff --git a/include/linux/platform_data/x86/pwm-lpss.h b/include/linux/platform_data/x86/pwm-lpss.h new file mode 100644 index 000000000000..c852fe24fe2a --- /dev/null +++ b/include/linux/platform_data/x86/pwm-lpss.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Intel Low Power Subsystem PWM controller driver */ + +#ifndef __PLATFORM_DATA_X86_PWM_LPSS_H +#define __PLATFORM_DATA_X86_PWM_LPSS_H + +#include <linux/types.h> + +struct device; + +struct pwm_lpss_chip; + +struct pwm_lpss_boardinfo { + unsigned long clk_rate; + unsigned int npwm; + unsigned long base_unit_bits; + /* + * Some versions of the IP may stuck in the state machine if enable + * bit is not set, and hence update bit will show busy status till + * the reset. For the rest it may be otherwise. + */ + bool bypass; + /* + * On some devices the _PS0/_PS3 AML code of the GPU (GFX0) device + * messes with the PWM0 controllers state, + */ + bool other_devices_aml_touches_pwm_regs; +}; + +struct pwm_lpss_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base, + const struct pwm_lpss_boardinfo *info); + +#endif /* __PLATFORM_DATA_X86_PWM_LPSS_H */ diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index ebc351698090..1cd41bdf73cf 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -17,6 +17,7 @@ #include <linux/notifier.h> #include <linux/spinlock.h> #include <linux/cpumask.h> +#include <linux/time64.h> /* * Flags to control the behaviour of a genpd. @@ -95,6 +96,7 @@ struct genpd_governor_data { s64 max_off_time_ns; bool max_off_time_changed; ktime_t next_wakeup; + ktime_t next_hrtimer; bool cached_power_down_ok; bool cached_power_down_state_idx; }; @@ -232,6 +234,7 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb); int dev_pm_genpd_remove_notifier(struct device *dev); void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next); +ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -293,6 +296,10 @@ static inline int dev_pm_genpd_remove_notifier(struct device *dev) static inline void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next) { } +static inline ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev) +{ + return KTIME_MAX; +} #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 7d1e604c1325..ee608d22ecb9 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -69,21 +69,21 @@ extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *); extern int __posix_acl_chmod(struct posix_acl **, gfp_t, umode_t); extern struct posix_acl *get_posix_acl(struct inode *, int); -extern int set_posix_acl(struct user_namespace *, struct inode *, int, - struct posix_acl *); +int set_posix_acl(struct user_namespace *, struct dentry *, int, + struct posix_acl *); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); struct posix_acl *posix_acl_clone(const struct posix_acl *acl, gfp_t flags); #ifdef CONFIG_FS_POSIX_ACL -int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t); +int posix_acl_chmod(struct user_namespace *, struct dentry *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, struct posix_acl **); int posix_acl_update_mode(struct user_namespace *, struct inode *, umode_t *, struct posix_acl **); -extern int simple_set_acl(struct user_namespace *, struct inode *, - struct posix_acl *, int); +int simple_set_acl(struct user_namespace *, struct dentry *, + struct posix_acl *, int); extern int simple_acl_create(struct inode *, struct inode *); struct posix_acl *get_cached_acl(struct inode *inode, int type); @@ -99,9 +99,16 @@ static inline void cache_no_acl(struct inode *inode) inode->i_acl = NULL; inode->i_default_acl = NULL; } + +int vfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + const char *acl_name, struct posix_acl *kacl); +struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name); +int vfs_remove_acl(struct user_namespace *mnt_userns, struct dentry *dentry, + const char *acl_name); #else static inline int posix_acl_chmod(struct user_namespace *mnt_userns, - struct inode *inode, umode_t mode) + struct dentry *dentry, umode_t mode) { return 0; } @@ -126,8 +133,28 @@ static inline int posix_acl_create(struct inode *inode, umode_t *mode, static inline void forget_all_cached_acls(struct inode *inode) { } + +static inline int vfs_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *name, + struct posix_acl *acl) +{ + return -EOPNOTSUPP; +} + +static inline struct posix_acl *vfs_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline int vfs_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_FS_POSIX_ACL */ -struct posix_acl *get_acl(struct inode *inode, int type); +struct posix_acl *get_inode_acl(struct inode *inode, int type); #endif /* __LINUX_POSIX_ACL_H */ diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index 8163dd48c430..54cd7a14330d 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -33,33 +33,40 @@ posix_acl_xattr_count(size_t size) } #ifdef CONFIG_FS_POSIX_ACL -void posix_acl_fix_xattr_from_user(void *value, size_t size); -void posix_acl_fix_xattr_to_user(void *value, size_t size); -void posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, - void *value, size_t size); +struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, + const void *value, size_t size); #else -static inline void posix_acl_fix_xattr_from_user(void *value, size_t size) -{ -} -static inline void posix_acl_fix_xattr_to_user(void *value, size_t size) -{ -} -static inline void -posix_acl_getxattr_idmapped_mnt(struct user_namespace *mnt_userns, - const struct inode *inode, void *value, - size_t size) +static inline struct posix_acl * +posix_acl_from_xattr(struct user_namespace *user_ns, const void *value, + size_t size) { + return ERR_PTR(-EOPNOTSUPP); } #endif -struct posix_acl *posix_acl_from_xattr(struct user_namespace *user_ns, - const void *value, size_t size); int posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, void *buffer, size_t size); -struct posix_acl *vfs_set_acl_prepare(struct user_namespace *mnt_userns, - struct user_namespace *fs_userns, - const void *value, size_t size); +static inline const char *posix_acl_xattr_name(int type) +{ + switch (type) { + case ACL_TYPE_ACCESS: + return XATTR_NAME_POSIX_ACL_ACCESS; + case ACL_TYPE_DEFAULT: + return XATTR_NAME_POSIX_ACL_DEFAULT; + } + + return ""; +} + +static inline int posix_acl_type(const char *name) +{ + if (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) + return ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0) + return ACL_TYPE_DEFAULT; + + return -1; +} extern const struct xattr_handler posix_acl_access_xattr_handler; extern const struct xattr_handler posix_acl_default_xattr_handler; diff --git a/include/linux/prandom.h b/include/linux/prandom.h index e0a0759dd09c..c94c02ba065c 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -9,6 +9,7 @@ #define _LINUX_PRANDOM_H #include <linux/types.h> +#include <linux/once.h> #include <linux/percpu.h> #include <linux/random.h> @@ -23,24 +24,10 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); #define prandom_init_once(pcpu_state) \ DO_ONCE(prandom_seed_full_state, (pcpu_state)) -/** - * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) - * @ep_ro: right open interval endpoint - * - * Returns a pseudo-random number that is in interval [0, ep_ro). This is - * useful when requesting a random index of an array containing ep_ro elements, - * for example. The result is somewhat biased when ep_ro is not a power of 2, - * so do not use this for cryptographic purposes. - * - * Returns: pseudo-random number in interval [0, ep_ro) - */ +/* Deprecated: use get_random_u32_below() instead. */ static inline u32 prandom_u32_max(u32 ep_ro) { - if (__builtin_constant_p(ep_ro <= 1U << 8) && ep_ro <= 1U << 8) - return (get_random_u8() * ep_ro) >> 8; - if (__builtin_constant_p(ep_ro <= 1U << 16) && ep_ro <= 1U << 16) - return (get_random_u16() * ep_ro) >> 16; - return ((u64)get_random_u32() * ep_ro) >> 32; + return get_random_u32_below(ep_ro); } /* diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 81d6e4ec2294..0260f5ea98fe 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -208,8 +208,10 @@ static inline void proc_remove(struct proc_dir_entry *de) {} static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; } #define proc_create_net_data(name, mode, parent, ops, state_size, data) ({NULL;}) +#define proc_create_net_data_write(name, mode, parent, ops, write, state_size, data) ({NULL;}) #define proc_create_net(name, mode, parent, state_size, ops) ({NULL;}) #define proc_create_net_single(name, mode, parent, show, data) ({NULL;}) +#define proc_create_net_single_write(name, mode, parent, show, write, data) ({NULL;}) static inline struct pid *tgid_pidfd_to_pid(const struct file *file) { diff --git a/include/linux/property.h b/include/linux/property.h index 117cc200c656..67371c963134 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -50,7 +50,6 @@ int device_property_read_string(struct device *dev, const char *propname, int device_property_match_string(struct device *dev, const char *propname, const char *string); -bool fwnode_device_is_available(const struct fwnode_handle *fwnode); bool fwnode_property_present(const struct fwnode_handle *fwnode, const char *propname); int fwnode_property_read_u8_array(const struct fwnode_handle *fwnode, @@ -72,6 +71,15 @@ int fwnode_property_read_string(const struct fwnode_handle *fwnode, const char *propname, const char **val); int fwnode_property_match_string(const struct fwnode_handle *fwnode, const char *propname, const char *string); + +bool fwnode_device_is_available(const struct fwnode_handle *fwnode); + +static inline +bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char *compat) +{ + return fwnode_property_match_string(fwnode, "compatible", compat) >= 0; +} + int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, unsigned int nargs, unsigned int index, diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 6e4372735068..1e0a0d7ace3a 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -72,6 +72,9 @@ enum psi_states { /* Use one bit in the state mask to track TSK_ONCPU */ #define PSI_ONCPU (1 << NR_PSI_STATES) +/* Flag whether to re-arm avgs_work, see details in get_recent_times() */ +#define PSI_STATE_RESCHEDULE (1 << (NR_PSI_STATES + 1)) + enum psi_aggregators { PSI_AVGS = 0, PSI_POLL, @@ -177,6 +180,7 @@ struct psi_group { struct timer_list poll_timer; wait_queue_head_t poll_wait; atomic_t poll_wakeup; + atomic_t poll_scheduled; /* Protects data used by the monitor */ struct mutex trigger_lock; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 9f16afec7290..9d65ff94e216 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -8,28 +8,7 @@ #ifndef __LINUX_PSTORE_RAM_H__ #define __LINUX_PSTORE_RAM_H__ -#include <linux/compiler.h> -#include <linux/device.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/list.h> #include <linux/pstore.h> -#include <linux/types.h> - -/* - * Choose whether access to the RAM zone requires locking or not. If a zone - * can be written to from different CPUs like with ftrace for example, then - * PRZ_FLAG_NO_LOCK is used. For all other cases, locking is required. - */ -#define PRZ_FLAG_NO_LOCK BIT(0) -/* - * If a PRZ should only have a single-boot lifetime, this marks it as - * getting wiped after its contents get copied out after boot. - */ -#define PRZ_FLAG_ZAP_OLD BIT(1) - -struct persistent_ram_buffer; -struct rs_control; struct persistent_ram_ecc_info { int block_size; @@ -39,84 +18,6 @@ struct persistent_ram_ecc_info { uint16_t *par; }; -/** - * struct persistent_ram_zone - Details of a persistent RAM zone (PRZ) - * used as a pstore backend - * - * @paddr: physical address of the mapped RAM area - * @size: size of mapping - * @label: unique name of this PRZ - * @type: frontend type for this PRZ - * @flags: holds PRZ_FLAGS_* bits - * - * @buffer_lock: - * locks access to @buffer "size" bytes and "start" offset - * @buffer: - * pointer to actual RAM area managed by this PRZ - * @buffer_size: - * bytes in @buffer->data (not including any trailing ECC bytes) - * - * @par_buffer: - * pointer into @buffer->data containing ECC bytes for @buffer->data - * @par_header: - * pointer into @buffer->data containing ECC bytes for @buffer header - * (i.e. all fields up to @data) - * @rs_decoder: - * RSLIB instance for doing ECC calculations - * @corrected_bytes: - * ECC corrected bytes accounting since boot - * @bad_blocks: - * ECC uncorrectable bytes accounting since boot - * @ecc_info: - * ECC configuration details - * - * @old_log: - * saved copy of @buffer->data prior to most recent wipe - * @old_log_size: - * bytes contained in @old_log - * - */ -struct persistent_ram_zone { - phys_addr_t paddr; - size_t size; - void *vaddr; - char *label; - enum pstore_type_id type; - u32 flags; - - raw_spinlock_t buffer_lock; - struct persistent_ram_buffer *buffer; - size_t buffer_size; - - char *par_buffer; - char *par_header; - struct rs_control *rs_decoder; - int corrected_bytes; - int bad_blocks; - struct persistent_ram_ecc_info ecc_info; - - char *old_log; - size_t old_log_size; -}; - -struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, - u32 sig, struct persistent_ram_ecc_info *ecc_info, - unsigned int memtype, u32 flags, char *label); -void persistent_ram_free(struct persistent_ram_zone *prz); -void persistent_ram_zap(struct persistent_ram_zone *prz); - -int persistent_ram_write(struct persistent_ram_zone *prz, const void *s, - unsigned int count); -int persistent_ram_write_user(struct persistent_ram_zone *prz, - const void __user *s, unsigned int count); - -void persistent_ram_save_old(struct persistent_ram_zone *prz); -size_t persistent_ram_old_size(struct persistent_ram_zone *prz); -void *persistent_ram_old(struct persistent_ram_zone *prz); -void persistent_ram_free_old(struct persistent_ram_zone *prz); -ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz, - char *str, size_t len); - /* * Ramoops platform data * @mem_size memory size for ramoops diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 92b44161408e..fdffa6a98d79 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -45,6 +45,8 @@ struct system_device_crosststamp; /** * struct ptp_system_timestamp - system time corresponding to a PHC timestamp + * @pre_ts: system timestamp before capturing PHC + * @post_ts: system timestamp after capturing PHC */ struct ptp_system_timestamp { struct timespec64 pre_ts; @@ -75,12 +77,6 @@ struct ptp_system_timestamp { * nominal frequency in parts per million, but with a * 16 bit binary fractional field. * - * @adjfreq: Adjusts the frequency of the hardware clock. - * This method is deprecated. New drivers should implement - * the @adjfine method instead. - * parameter delta: Desired frequency offset from nominal frequency - * in parts per billion - * * @adjphase: Adjusts the phase offset of the hardware clock. * parameter delta: Desired change in nanoseconds. * @@ -172,7 +168,6 @@ struct ptp_clock_info { int pps; struct ptp_pin_desc *pin_config; int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); - int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta); int (*adjphase)(struct ptp_clock_info *ptp, s32 phase); int (*adjtime)(struct ptp_clock_info *ptp, s64 delta); int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts); @@ -246,6 +241,52 @@ static inline long scaled_ppm_to_ppb(long ppm) return (long)ppb; } +/** + * diff_by_scaled_ppm - Calculate difference using scaled ppm + * @base: the base increment value to adjust + * @scaled_ppm: scaled parts per million to adjust by + * @diff: on return, the absolute value of calculated diff + * + * Calculate the difference to adjust the base increment using scaled parts + * per million. + * + * Use mul_u64_u64_div_u64 to perform the difference calculation in avoid + * possible overflow. + * + * Returns: true if scaled_ppm is negative, false otherwise + */ +static inline bool diff_by_scaled_ppm(u64 base, long scaled_ppm, u64 *diff) +{ + bool negative = false; + + if (scaled_ppm < 0) { + negative = true; + scaled_ppm = -scaled_ppm; + } + + *diff = mul_u64_u64_div_u64(base, (u64)scaled_ppm, 1000000ULL << 16); + + return negative; +} + +/** + * adjust_by_scaled_ppm - Adjust a base increment by scaled parts per million + * @base: the base increment value to adjust + * @scaled_ppm: scaled parts per million frequency adjustment + * + * Helper function which calculates a new increment value based on the + * requested scaled parts per million adjustment. + */ +static inline u64 adjust_by_scaled_ppm(u64 base, long scaled_ppm) +{ + u64 diff; + + if (diff_by_scaled_ppm(base, scaled_ppm, &diff)) + return base - diff; + + return base + diff; +} + #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) /** @@ -316,6 +357,11 @@ int ptp_find_pin(struct ptp_clock *ptp, * should most likely call ptp_find_pin() directly from their * ptp_clock_info::enable() method. * +* @ptp: The clock obtained from ptp_clock_register(). +* @func: One of the ptp_pin_function enumerated values. +* @chan: The particular functional channel to find. +* Return: Pin index in the range of zero to ptp_clock_caps.n_pins - 1, +* or -1 if the auxiliary function cannot be found. */ int ptp_find_pin_unlocked(struct ptp_clock *ptp, diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index c952c5ba8fab..eaaef3ffec22 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -389,15 +389,6 @@ static inline void user_single_step_report(struct pt_regs *regs) #define current_pt_regs() task_pt_regs(current) #endif -/* - * unlike current_pt_regs(), this one is equal to task_pt_regs(current) - * on *all* architectures; the only reason to have a per-arch definition - * is optimisation. - */ -#ifndef signal_pt_regs -#define signal_pt_regs() task_pt_regs(current) -#endif - #ifndef current_user_stack_pointer #define current_user_stack_pointer() user_stack_pointer(current_pt_regs()) #endif diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d70c6e5a839d..bba492eea96c 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -478,6 +478,11 @@ static inline int pwmchip_remove(struct pwm_chip *chip) return -EINVAL; } +static inline int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip) +{ + return -EINVAL; +} + static inline struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, unsigned int index, const char *label) diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index a3fec2de512f..cd1973e6ac4b 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -229,6 +229,7 @@ enum pxa_ssp_type { LPSS_SPT_SSP, LPSS_BXT_SSP, LPSS_CNL_SSP, + SSP_MAX }; struct ssp_device { diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index d6e5a1feb947..f29aaaf2eb21 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -10,17 +10,9 @@ #ifdef __KERNEL__ -/* Set to 1 to use kernel-wide empty_zero_page */ -#define RAID6_USE_EMPTY_ZERO_PAGE 0 #include <linux/blkdev.h> -/* We need a pre-zeroed page... if we don't want to use the kernel-provided - one define it here */ -#if RAID6_USE_EMPTY_ZERO_PAGE -# define raid6_empty_zero_page empty_zero_page -#else extern const char raid6_empty_zero_page[PAGE_SIZE]; -#endif #else /* ! __KERNEL__ */ /* Used for testing in user space */ diff --git a/include/linux/random.h b/include/linux/random.h index 147a5e0d0b8e..4a2a1de423cd 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -6,7 +6,6 @@ #include <linux/bug.h> #include <linux/kernel.h> #include <linux/list.h> -#include <linux/once.h> #include <uapi/linux/random.h> @@ -17,16 +16,16 @@ void __init add_bootloader_randomness(const void *buf, size_t len); void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; void add_interrupt_randomness(int irq) __latent_entropy; -void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); +void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy, bool sleep_after); -#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) { +#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); -} #else -static inline void add_latent_entropy(void) { } + add_device_randomness(NULL, 0); #endif +} #if IS_ENABLED(CONFIG_VMGENID) void add_vmfork_randomness(const void *unique_vm_id, size_t len); @@ -51,29 +50,76 @@ static inline unsigned long get_random_long(void) #endif } +u32 __get_random_u32_below(u32 ceil); + /* - * On 64-bit architectures, protect against non-terminated C string overflows - * by zeroing out the first byte of the canary; this leaves 56 bits of entropy. + * Returns a random integer in the interval [0, ceil), with uniform + * distribution, suitable for all uses. Fastest when ceil is a constant, but + * still fast for variable ceil as well. */ -#ifdef CONFIG_64BIT -# ifdef __LITTLE_ENDIAN -# define CANARY_MASK 0xffffffffffffff00UL -# else /* big endian, 64 bits: */ -# define CANARY_MASK 0x00ffffffffffffffUL -# endif -#else /* 32 bits: */ -# define CANARY_MASK 0xffffffffUL -#endif +static inline u32 get_random_u32_below(u32 ceil) +{ + if (!__builtin_constant_p(ceil)) + return __get_random_u32_below(ceil); + + /* + * For the fast path, below, all operations on ceil are precomputed by + * the compiler, so this incurs no overhead for checking pow2, doing + * divisions, or branching based on integer size. The resultant + * algorithm does traditional reciprocal multiplication (typically + * optimized by the compiler into shifts and adds), rejecting samples + * whose lower half would indicate a range indivisible by ceil. + */ + BUILD_BUG_ON_MSG(!ceil, "get_random_u32_below() must take ceil > 0"); + if (ceil <= 1) + return 0; + for (;;) { + if (ceil <= 1U << 8) { + u32 mult = ceil * get_random_u8(); + if (likely(is_power_of_2(ceil) || (u8)mult >= (1U << 8) % ceil)) + return mult >> 8; + } else if (ceil <= 1U << 16) { + u32 mult = ceil * get_random_u16(); + if (likely(is_power_of_2(ceil) || (u16)mult >= (1U << 16) % ceil)) + return mult >> 16; + } else { + u64 mult = (u64)ceil * get_random_u32(); + if (likely(is_power_of_2(ceil) || (u32)mult >= -ceil % ceil)) + return mult >> 32; + } + } +} + +/* + * Returns a random integer in the interval (floor, U32_MAX], with uniform + * distribution, suitable for all uses. Fastest when floor is a constant, but + * still fast for variable floor as well. + */ +static inline u32 get_random_u32_above(u32 floor) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(floor) && floor == U32_MAX, + "get_random_u32_above() must take floor < U32_MAX"); + return floor + 1 + get_random_u32_below(U32_MAX - floor); +} -static inline unsigned long get_random_canary(void) +/* + * Returns a random integer in the interval [floor, ceil], with uniform + * distribution, suitable for all uses. Fastest when floor and ceil are + * constant, but still fast for variable floor and ceil as well. + */ +static inline u32 get_random_u32_inclusive(u32 floor, u32 ceil) { - return get_random_long() & CANARY_MASK; + BUILD_BUG_ON_MSG(__builtin_constant_p(floor) && __builtin_constant_p(ceil) && + (floor > ceil || ceil - floor == U32_MAX), + "get_random_u32_inclusive() must take floor <= ceil"); + return floor + get_random_u32_below(ceil - floor + 1); } void __init random_init_early(const char *command_line); void __init random_init(void); bool rng_is_initialized(void); int wait_for_random_bytes(void); +int execute_with_initialized_rng(struct notifier_block *nb); /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). * Returns the result of the call to wait_for_random_bytes. */ @@ -108,26 +154,6 @@ declare_get_random_var_wait(long, unsigned long) #include <asm/archrandom.h> -/* - * Called from the boot CPU during startup; not valid to call once - * secondary CPUs are up and preemption is possible. - */ -#ifndef arch_get_random_seed_longs_early -static inline size_t __init arch_get_random_seed_longs_early(unsigned long *v, size_t max_longs) -{ - WARN_ON(system_state != SYSTEM_BOOTING); - return arch_get_random_seed_longs(v, max_longs); -} -#endif - -#ifndef arch_get_random_longs_early -static inline bool __init arch_get_random_longs_early(unsigned long *v, size_t max_longs) -{ - WARN_ON(system_state != SYSTEM_BOOTING); - return arch_get_random_longs(v, max_longs); -} -#endif - #ifdef CONFIG_SMP int random_prepare_cpu(unsigned int cpu); int random_online_cpu(unsigned int cpu); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 08605ce7379d..03abf883a281 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -108,6 +108,15 @@ static inline int rcu_preempt_depth(void) #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_RCU_LAZY +void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func); +#else +static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) +{ + call_rcu(head, func); +} +#endif + /* Internal to kernel */ void rcu_init(void); extern int rcu_scheduler_active; @@ -241,6 +250,18 @@ static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ /** + * rcu_trace_implies_rcu_gp - does an RCU Tasks Trace grace period imply an RCU grace period? + * + * As an accident of implementation, an RCU Tasks Trace grace period also + * acts as an RCU grace period. However, this could change at any time. + * Code relying on this accident must call this function to verify that + * this accident is still happening. + * + * You have been warned! + */ +static inline bool rcu_trace_implies_rcu_gp(void) { return true; } + +/** * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU * * This macro resembles cond_resched(), except that it is defined to @@ -340,6 +361,11 @@ static inline int rcu_read_lock_any_held(void) return !preemptible(); } +static inline int debug_lockdep_rcu_enabled(void) +{ + return 0; +} + #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 768196a5f39d..68f9070aa111 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -142,23 +142,17 @@ static inline int rcu_needs_cpu(void) * Take advantage of the fact that there is only one CPU, which * allows us to ignore virtualization-based context switches. */ -static inline void rcu_virt_note_context_switch(int cpu) { } +static inline void rcu_virt_note_context_switch(void) { } static inline void rcu_cpu_stall_reset(void) { } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } static inline void rcu_irq_exit_check_preempt(void) { } -#define rcu_is_idle_cpu(cpu) \ - (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { return false; } static inline void rcu_preempt_deferred_qs(struct task_struct *t) { } -#ifdef CONFIG_SRCU void rcu_scheduler_starting(void); -#else /* #ifndef CONFIG_SRCU */ -static inline void rcu_scheduler_starting(void) { } -#endif /* #else #ifndef CONFIG_SRCU */ static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 5efb51486e8a..4003bf6cfa1c 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -27,7 +27,7 @@ void rcu_cpu_stall_reset(void); * wrapper around rcu_note_context_switch(), which allows TINY_RCU * to save a few bytes. The caller must have disabled interrupts. */ -static inline void rcu_virt_note_context_switch(int cpu) +static inline void rcu_virt_note_context_switch(void) { rcu_note_context_switch(false); } @@ -87,8 +87,6 @@ bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); -bool rcu_is_idle_cpu(int cpu); - #ifdef CONFIG_PROVE_RCU void rcu_irq_exit_check_preempt(void); #else diff --git a/include/linux/regmap.h b/include/linux/regmap.h index ca3434dca3a0..a3bc695bcca0 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -24,6 +24,7 @@ struct module; struct clk; struct device; struct device_node; +struct fsi_device; struct i2c_client; struct i3c_device; struct irq_domain; @@ -628,6 +629,10 @@ struct regmap *__regmap_init_spi_avmm(struct spi_device *spi, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__regmap_init_fsi(struct fsi_device *fsi_dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); struct regmap *__devm_regmap_init(struct device *dev, const struct regmap_bus *bus, @@ -693,6 +698,11 @@ struct regmap *__devm_regmap_init_spi_avmm(struct spi_device *spi, const struct regmap_config *config, struct lock_class_key *lock_key, const char *lock_name); +struct regmap *__devm_regmap_init_fsi(struct fsi_device *fsi_dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); + /* * Wrapper for regmap_init macros to include a unique lockdep key and name * for each call. No-op if CONFIG_LOCKDEP is not set. @@ -920,6 +930,19 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); spi, config) /** + * regmap_init_fsi() - Initialise register map + * + * @fsi_dev: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer to + * a struct regmap. + */ +#define regmap_init_fsi(fsi_dev, config) \ + __regmap_lockdep_wrapper(__regmap_init_fsi, #config, fsi_dev, \ + config) + +/** * devm_regmap_init() - Initialise managed register map * * @dev: Device that will be interacted with @@ -1148,6 +1171,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); __regmap_lockdep_wrapper(__devm_regmap_init_spi_avmm, #config, \ spi, config) +/** + * devm_regmap_init_fsi() - Initialise managed register map + * + * @fsi_dev: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_fsi(fsi_dev, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_fsi, #config, \ + fsi_dev, config) + int regmap_mmio_attach_clk(struct regmap *map, struct clk *clk); void regmap_mmio_detach_clk(struct regmap *map); void regmap_exit(struct regmap *map); @@ -1219,6 +1256,7 @@ static inline int regmap_write_bits(struct regmap *map, unsigned int reg, int regmap_get_val_bytes(struct regmap *map); int regmap_get_max_register(struct regmap *map); int regmap_get_reg_stride(struct regmap *map); +bool regmap_might_sleep(struct regmap *map); int regmap_async_complete(struct regmap *map); bool regmap_can_raw_write(struct regmap *map); size_t regmap_get_raw_read_max(struct regmap *map); @@ -1542,6 +1580,8 @@ struct regmap_irq_chip_data; * before regmap_irq_handler process the interrupts. * @handle_post_irq: Driver specific callback to handle interrupt from device * after handling the interrupts in regmap_irq_handler(). + * @handle_mask_sync: Callback used to handle IRQ mask syncs. The index will be + * in the range [0, num_regs) * @set_type_virt: Driver specific callback to extend regmap_irq_set_type() * and configure virt regs. Deprecated, use @set_type_config * callback and config registers instead. @@ -1603,6 +1643,9 @@ struct regmap_irq_chip { int (*handle_pre_irq)(void *irq_drv_data); int (*handle_post_irq)(void *irq_drv_data); + int (*handle_mask_sync)(struct regmap *map, int index, + unsigned int mask_buf_def, + unsigned int mask_buf, void *irq_drv_data); int (*set_type_virt)(unsigned int **buf, unsigned int type, unsigned long hwirq, int reg); int (*set_type_config)(unsigned int **buf, unsigned int type, @@ -1905,6 +1948,12 @@ static inline int regmap_get_reg_stride(struct regmap *map) return -EINVAL; } +static inline bool regmap_might_sleep(struct regmap *map) +{ + WARN_ONCE(1, "regmap API is disabled"); + return true; +} + static inline int regcache_sync(struct regmap *map) { WARN_ONCE(1, "regmap API is disabled"); diff --git a/include/linux/regset.h b/include/linux/regset.h index a00765f0e8cf..9061266dd8de 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -275,15 +275,15 @@ static inline int user_regset_copyin(unsigned int *pos, unsigned int *count, return 0; } -static inline int user_regset_copyin_ignore(unsigned int *pos, - unsigned int *count, - const void **kbuf, - const void __user **ubuf, - const int start_pos, - const int end_pos) +static inline void user_regset_copyin_ignore(unsigned int *pos, + unsigned int *count, + const void **kbuf, + const void __user **ubuf, + const int start_pos, + const int end_pos) { if (*count == 0) - return 0; + return; BUG_ON(*pos < start_pos); if (end_pos < 0 || *pos < end_pos) { unsigned int copy = (end_pos < 0 ? *count @@ -295,7 +295,6 @@ static inline int user_regset_copyin_ignore(unsigned int *pos, *pos += copy; *count -= copy; } - return 0; } extern int regset_get(struct task_struct *target, diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index ee3b4a014611..39b666b40ea6 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -244,9 +244,13 @@ int regulator_disable_deferred(struct regulator *regulator, int ms); int __must_check regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers); +int __must_check of_regulator_bulk_get_all(struct device *dev, struct device_node *np, + struct regulator_bulk_data **consumers); int __must_check devm_regulator_bulk_get(struct device *dev, int num_consumers, struct regulator_bulk_data *consumers); void devm_regulator_bulk_put(struct regulator_bulk_data *consumers); +int __must_check devm_regulator_bulk_get_exclusive(struct device *dev, int num_consumers, + struct regulator_bulk_data *consumers); int __must_check devm_regulator_bulk_get_const( struct device *dev, int num_consumers, const struct regulator_bulk_data *in_consumers, @@ -479,6 +483,12 @@ static inline int devm_regulator_bulk_get(struct device *dev, int num_consumers, return 0; } +static inline int of_regulator_bulk_get_all(struct device *dev, struct device_node *np, + struct regulator_bulk_data **consumers) +{ + return 0; +} + static inline int regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers) { diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index f9a7461e72b8..d3b4a3d4514a 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -687,7 +687,8 @@ static inline int regulator_err2notif(int err) struct regulator_dev * -regulator_register(const struct regulator_desc *regulator_desc, +regulator_register(struct device *dev, + const struct regulator_desc *regulator_desc, const struct regulator_config *config); struct regulator_dev * devm_regulator_register(struct device *dev, diff --git a/include/linux/regulator/mt6357-regulator.h b/include/linux/regulator/mt6357-regulator.h new file mode 100644 index 000000000000..238b1ee77ea6 --- /dev/null +++ b/include/linux/regulator/mt6357-regulator.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 MediaTek Inc. + */ + +#ifndef __LINUX_REGULATOR_MT6357_H +#define __LINUX_REGULATOR_MT6357_H + +enum { + /* Bucks */ + MT6357_ID_VCORE, + MT6357_ID_VMODEM, + MT6357_ID_VPA, + MT6357_ID_VPROC, + MT6357_ID_VS1, + + /* LDOs */ + MT6357_ID_VAUX18, + MT6357_ID_VAUD28, + MT6357_ID_VCAMA, + MT6357_ID_VCAMD, + MT6357_ID_VCAMIO, + MT6357_ID_VCN18, + MT6357_ID_VCN28, + MT6357_ID_VCN33_BT, + MT6357_ID_VCN33_WIFI, + MT6357_ID_VDRAM, + MT6357_ID_VEFUSE, + MT6357_ID_VEMC, + MT6357_ID_VFE28, + MT6357_ID_VIBR, + MT6357_ID_VIO18, + MT6357_ID_VIO28, + MT6357_ID_VLDO28, + MT6357_ID_VMC, + MT6357_ID_VMCH, + MT6357_ID_VRF12, + MT6357_ID_VRF18, + MT6357_ID_VSIM1, + MT6357_ID_VSIM2, + MT6357_ID_VSRAM_OTHERS, + MT6357_ID_VSRAM_PROC, + MT6357_ID_VUSB33, + MT6357_ID_VXO22, + + MT6357_ID_RG_MAX, +}; + +#define MT6357_MAX_REGULATOR MT6357_ID_RG_MAX + +#endif /* __LINUX_REGULATOR_MT6357_H */ diff --git a/include/linux/regulator/userspace-consumer.h b/include/linux/regulator/userspace-consumer.h index b5dba0628951..2249ee697f8b 100644 --- a/include/linux/regulator/userspace-consumer.h +++ b/include/linux/regulator/userspace-consumer.h @@ -21,6 +21,7 @@ struct regulator_userspace_consumer_data { struct regulator_bulk_data *supplies; bool init_on; + bool no_autoswitch; }; #endif /* __REGULATOR_PLATFORM_CONSUMER_H_ */ diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 0cf5b20c6ddf..0cee154abc9f 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -89,11 +89,12 @@ struct rdt_domain { /** * struct resctrl_cache - Cache allocation related data * @cbm_len: Length of the cache bit mask - * @min_cbm_bits: Minimum number of consecutive bits to be set + * @min_cbm_bits: Minimum number of consecutive bits to be set. + * The value 0 means the architecture can support + * zero CBM. * @shareable_bits: Bitmask of shareable resource with other * executing entities * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. - * @arch_has_empty_bitmaps: True if the '0' bitmap is valid. * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache * level has CPU scope. */ @@ -102,7 +103,6 @@ struct resctrl_cache { unsigned int min_cbm_bits; unsigned int shareable_bits; bool arch_has_sparse_bitmaps; - bool arch_has_empty_bitmaps; bool arch_has_per_cpu_cfg; }; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 68dab3e08aad..5b5357c0bd8c 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -323,29 +323,36 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert( * When we write to a bucket without unlocking, we use rht_assign_locked(). */ -static inline void rht_lock(struct bucket_table *tbl, - struct rhash_lock_head __rcu **bkt) +static inline unsigned long rht_lock(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bkt) { - local_bh_disable(); + unsigned long flags; + + local_irq_save(flags); bit_spin_lock(0, (unsigned long *)bkt); lock_map_acquire(&tbl->dep_map); + return flags; } -static inline void rht_lock_nested(struct bucket_table *tbl, - struct rhash_lock_head __rcu **bucket, - unsigned int subclass) +static inline unsigned long rht_lock_nested(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bucket, + unsigned int subclass) { - local_bh_disable(); + unsigned long flags; + + local_irq_save(flags); bit_spin_lock(0, (unsigned long *)bucket); lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_); + return flags; } static inline void rht_unlock(struct bucket_table *tbl, - struct rhash_lock_head __rcu **bkt) + struct rhash_lock_head __rcu **bkt, + unsigned long flags) { lock_map_release(&tbl->dep_map); bit_spin_unlock(0, (unsigned long *)bkt); - local_bh_enable(); + local_irq_restore(flags); } static inline struct rhash_head *__rht_ptr( @@ -393,7 +400,8 @@ static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, static inline void rht_assign_unlock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt, - struct rhash_head *obj) + struct rhash_head *obj, + unsigned long flags) { if (rht_is_a_nulls(obj)) obj = NULL; @@ -401,7 +409,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, rcu_assign_pointer(*bkt, (void *)obj); preempt_enable(); __release(bitlock); - local_bh_enable(); + local_irq_restore(flags); } /** @@ -706,6 +714,7 @@ static inline void *__rhashtable_insert_fast( struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; + unsigned long flags; unsigned int hash; int elasticity; void *data; @@ -720,11 +729,11 @@ static inline void *__rhashtable_insert_fast( if (!bkt) goto out; pprev = NULL; - rht_lock(tbl, bkt); + flags = rht_lock(tbl, bkt); if (unlikely(rcu_access_pointer(tbl->future_tbl))) { slow_path: - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); rcu_read_unlock(); return rhashtable_insert_slow(ht, key, obj); } @@ -756,9 +765,9 @@ slow_path: RCU_INIT_POINTER(list->rhead.next, head); if (pprev) { rcu_assign_pointer(*pprev, obj); - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); } else - rht_assign_unlock(tbl, bkt, obj); + rht_assign_unlock(tbl, bkt, obj, flags); data = NULL; goto out; } @@ -785,7 +794,7 @@ slow_path: } atomic_inc(&ht->nelems); - rht_assign_unlock(tbl, bkt, obj); + rht_assign_unlock(tbl, bkt, obj, flags); if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); @@ -797,7 +806,7 @@ out: return data; out_unlock: - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); goto out; } @@ -991,6 +1000,7 @@ static inline int __rhashtable_remove_fast_one( struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; + unsigned long flags; unsigned int hash; int err = -ENOENT; @@ -999,7 +1009,7 @@ static inline int __rhashtable_remove_fast_one( if (!bkt) return -ENOENT; pprev = NULL; - rht_lock(tbl, bkt); + flags = rht_lock(tbl, bkt); rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; @@ -1043,14 +1053,14 @@ static inline int __rhashtable_remove_fast_one( if (pprev) { rcu_assign_pointer(*pprev, obj); - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); } else { - rht_assign_unlock(tbl, bkt, obj); + rht_assign_unlock(tbl, bkt, obj, flags); } goto unlocked; } - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); unlocked: if (err > 0) { atomic_dec(&ht->nelems); @@ -1143,6 +1153,7 @@ static inline int __rhashtable_replace_fast( struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; + unsigned long flags; unsigned int hash; int err = -ENOENT; @@ -1158,7 +1169,7 @@ static inline int __rhashtable_replace_fast( return -ENOENT; pprev = NULL; - rht_lock(tbl, bkt); + flags = rht_lock(tbl, bkt); rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { if (he != obj_old) { @@ -1169,15 +1180,15 @@ static inline int __rhashtable_replace_fast( rcu_assign_pointer(obj_new->next, obj_old->next); if (pprev) { rcu_assign_pointer(*pprev, obj_new); - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); } else { - rht_assign_unlock(tbl, bkt, obj_new); + rht_assign_unlock(tbl, bkt, obj_new, flags); } err = 0; goto unlocked; } - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); unlocked: return err; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 2504df9a0453..3c7d295746f6 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -100,7 +100,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, - struct file *filp, poll_table *poll_table); + struct file *filp, poll_table *poll_table, int full); void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); #define RING_BUFFER_ALL_CPUS -1 diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index ae2c6a3cec5d..92ad75549e9c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -12,21 +12,22 @@ extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, - u32 group, struct nlmsghdr *nlh, gfp_t flags); + u32 group, const struct nlmsghdr *nlh, gfp_t flags); extern void rtnl_set_sk_err(struct net *net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); -void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); +void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags, + u32 portid, const struct nlmsghdr *nlh); void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, gfp_t flags, int *new_nsid, int new_ifindex); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned change, u32 event, gfp_t flags, int *new_nsid, - int new_ifindex); + int new_ifindex, u32 portid, u32 seq); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, - gfp_t flags); + gfp_t flags, u32 portid, const struct nlmsghdr *nlh); /* RTNL is used as a global lock for all changes to network configuration */ diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 4d2d5205ab58..d662cf136021 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -87,11 +87,6 @@ struct sbitmap { */ struct sbq_wait_state { /** - * @wait_cnt: Number of frees remaining before we wake up. - */ - atomic_t wait_cnt; - - /** * @wait: Wait queue. */ wait_queue_head_t wait; @@ -138,6 +133,17 @@ struct sbitmap_queue { * sbitmap_queue_get_shallow() */ unsigned int min_shallow_depth; + + /** + * @completion_cnt: Number of bits cleared passed to the + * wakeup function. + */ + atomic_t completion_cnt; + + /** + * @wakeup_cnt: Number of thread wake ups issued. + */ + atomic_t wakeup_cnt; }; /** diff --git a/include/linux/sched.h b/include/linux/sched.h index ffb6eb55cd13..853d08f7562b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -870,9 +870,6 @@ struct task_struct { struct mm_struct *mm; struct mm_struct *active_mm; -#ifdef SPLIT_RSS_COUNTING - struct task_rss_stat rss_stat; -#endif int exit_state; int exit_code; int exit_signal; @@ -888,9 +885,6 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; -#ifdef CONFIG_PSI - unsigned sched_psi_wake_requeue:1; -#endif /* Force alignment to the next boundary: */ unsigned :0; @@ -1243,7 +1237,7 @@ struct task_struct { unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; + struct perf_event_context *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 303ee7dd0c7e..5a64582b086b 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -4,8 +4,6 @@ #include <linux/types.h> -struct ctl_table; - #ifdef CONFIG_DETECT_HUNG_TASK /* used for hung_task and block/ */ extern unsigned long sysctl_hung_task_timeout_secs; @@ -27,12 +25,8 @@ enum sched_tunable_scaling { #ifdef CONFIG_NUMA_BALANCING extern int sysctl_numa_balancing_mode; -extern unsigned int sysctl_numa_balancing_promote_rate_limit; #else #define sysctl_numa_balancing_mode 0 #endif -int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos); - #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index f054d0360a75..4cc52698e214 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -25,7 +25,7 @@ struct user_struct { #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \ defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \ - defined(CONFIG_VFIO_PCI_ZDEV_KVM) + defined(CONFIG_VFIO_PCI_ZDEV_KVM) || IS_ENABLED(CONFIG_IOMMUFD) atomic_long_t locked_vm; #endif #ifdef CONFIG_WATCH_QUEUE diff --git a/include/linux/scs.h b/include/linux/scs.h index 18122d9e17ff..4ab5bdc898cf 100644 --- a/include/linux/scs.h +++ b/include/linux/scs.h @@ -53,6 +53,22 @@ static inline bool task_scs_end_corrupted(struct task_struct *tsk) return sz >= SCS_SIZE - 1 || READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC; } +DECLARE_STATIC_KEY_FALSE(dynamic_scs_enabled); + +static inline bool scs_is_dynamic(void) +{ + if (!IS_ENABLED(CONFIG_DYNAMIC_SCS)) + return false; + return static_branch_likely(&dynamic_scs_enabled); +} + +static inline bool scs_is_enabled(void) +{ + if (!IS_ENABLED(CONFIG_DYNAMIC_SCS)) + return true; + return scs_is_dynamic(); +} + #else /* CONFIG_SHADOW_CALL_STACK */ static inline void *scs_alloc(int node) { return NULL; } @@ -62,6 +78,8 @@ static inline void scs_task_reset(struct task_struct *tsk) {} static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; } static inline void scs_release(struct task_struct *tsk) {} static inline bool task_scs_end_corrupted(struct task_struct *tsk) { return false; } +static inline bool scs_is_enabled(void) { return false; } +static inline bool scs_is_dynamic(void) { return false; } #endif /* CONFIG_SHADOW_CALL_STACK */ diff --git a/include/linux/sctp.h b/include/linux/sctp.h index a86e852507b3..358dc08e0831 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -820,4 +820,9 @@ struct sctp_new_encap_port_hdr { __be16 new_port; }; +/* Round an int up to the next multiple of 4. */ +#define SCTP_PAD4(s) (((s)+3)&~3) +/* Truncate to the previous multiple of 4. */ +#define SCTP_TRUNC4(s) ((s)&~3) + #endif /* __LINUX_SCTP_H__ */ diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index d31d76be4982..175079552f68 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -27,6 +27,7 @@ struct seccomp_filter; * * @mode: indicates one of the valid values above for controlled * system calls available to a process. + * @filter_count: number of seccomp filters * @filter: must always point to a valid seccomp-filter or NULL as it is * accessed without locking during system call entry. * diff --git a/include/linux/security.h b/include/linux/security.h index ca1b7109c0db..5b67f208f7de 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -31,6 +31,7 @@ #include <linux/err.h> #include <linux/string.h> #include <linux/mm.h> +#include <linux/sockptr.h> struct linux_binprm; struct cred; @@ -361,6 +362,13 @@ int security_inode_getattr(const struct path *path); int security_inode_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry, const char *name, const void *value, size_t size, int flags); +int security_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name, + struct posix_acl *kacl); +int security_inode_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name); +int security_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *acl_name); void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int security_inode_getxattr(struct dentry *dentry, const char *name); @@ -396,6 +404,7 @@ int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); int security_file_open(struct file *file); +int security_file_truncate(struct file *file); int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); int security_cred_alloc_blank(struct cred *cred, gfp_t gfp); @@ -872,6 +881,28 @@ static inline int security_inode_setxattr(struct user_namespace *mnt_userns, return cap_inode_setxattr(dentry, name, value, size, flags); } +static inline int security_inode_set_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name, + struct posix_acl *kacl) +{ + return 0; +} + +static inline int security_inode_get_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return 0; +} + +static inline int security_inode_remove_acl(struct user_namespace *mnt_userns, + struct dentry *dentry, + const char *acl_name) +{ + return 0; +} + static inline void security_inode_post_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { } @@ -1014,6 +1045,11 @@ static inline int security_file_open(struct file *file) return 0; } +static inline int security_file_truncate(struct file *file) +{ + return 0; +} + static inline int security_task_alloc(struct task_struct *task, unsigned long clone_flags) { @@ -1411,8 +1447,8 @@ int security_socket_getsockopt(struct socket *sock, int level, int optname); int security_socket_setsockopt(struct socket *sock, int level, int optname); int security_socket_shutdown(struct socket *sock, int how); int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb); -int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, - int __user *optlen, unsigned len); +int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, + sockptr_t optlen, unsigned int len); int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid); int security_sk_alloc(struct sock *sk, int family, gfp_t priority); void security_sk_free(struct sock *sk); @@ -1548,8 +1584,10 @@ static inline int security_sock_rcv_skb(struct sock *sk, return 0; } -static inline int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, - int __user *optlen, unsigned len) +static inline int security_socket_getpeersec_stream(struct socket *sock, + sockptr_t optval, + sockptr_t optlen, + unsigned int len) { return -ENOPROTOOPT; } diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 6f837bb6c715..31ac562a17d7 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -11,7 +11,8 @@ #define LINUX_OPAL_H #include <uapi/linux/sed-opal.h> -#include <linux/kernel.h> +#include <linux/compiler_types.h> +#include <linux/types.h> struct opal_dev; diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index d657f2a42a7b..91871464b99d 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -743,9 +743,15 @@ static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif -static inline bool uart_console_enabled(struct uart_port *port) +/* Variant of uart_console_registered() when the console_list_lock is held. */ +static inline bool uart_console_registered_locked(struct uart_port *port) { - return uart_console(port) && (port->cons->flags & CON_ENABLED); + return uart_console(port) && console_is_registered_locked(port->cons); +} + +static inline bool uart_console_registered(struct uart_port *port) +{ + return uart_console(port) && console_is_registered(port->cons); } struct uart_port *uart_get_console(struct uart_port *ports, int nr, diff --git a/include/linux/sfp.h b/include/linux/sfp.h index d1f343853b6c..52b98f9666a2 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -332,39 +332,33 @@ enum { /* SFP EEPROM registers */ enum { - SFP_PHYS_ID = 0x00, - SFP_PHYS_EXT_ID = 0x01, - SFP_CONNECTOR = 0x02, - SFP_COMPLIANCE = 0x03, - SFP_ENCODING = 0x0b, - SFP_BR_NOMINAL = 0x0c, - SFP_RATE_ID = 0x0d, - SFP_LINK_LEN_SM_KM = 0x0e, - SFP_LINK_LEN_SM_100M = 0x0f, - SFP_LINK_LEN_50UM_OM2_10M = 0x10, - SFP_LINK_LEN_62_5UM_OM1_10M = 0x11, - SFP_LINK_LEN_COPPER_1M = 0x12, - SFP_LINK_LEN_50UM_OM4_10M = 0x12, - SFP_LINK_LEN_50UM_OM3_10M = 0x13, - SFP_VENDOR_NAME = 0x14, - SFP_VENDOR_OUI = 0x25, - SFP_VENDOR_PN = 0x28, - SFP_VENDOR_REV = 0x38, - SFP_OPTICAL_WAVELENGTH_MSB = 0x3c, - SFP_OPTICAL_WAVELENGTH_LSB = 0x3d, - SFP_CABLE_SPEC = 0x3c, - SFP_CC_BASE = 0x3f, - SFP_OPTIONS = 0x40, /* 2 bytes, MSB, LSB */ - SFP_BR_MAX = 0x42, - SFP_BR_MIN = 0x43, - SFP_VENDOR_SN = 0x44, - SFP_DATECODE = 0x54, - SFP_DIAGMON = 0x5c, - SFP_ENHOPTS = 0x5d, - SFP_SFF8472_COMPLIANCE = 0x5e, - SFP_CC_EXT = 0x5f, + SFP_PHYS_ID = 0, + SFP_PHYS_EXT_ID = 1, SFP_PHYS_EXT_ID_SFP = 0x04, + + SFP_CONNECTOR = 2, + SFP_COMPLIANCE = 3, + SFP_ENCODING = 11, + SFP_BR_NOMINAL = 12, + SFP_RATE_ID = 13, + SFP_LINK_LEN_SM_KM = 14, + SFP_LINK_LEN_SM_100M = 15, + SFP_LINK_LEN_50UM_OM2_10M = 16, + SFP_LINK_LEN_62_5UM_OM1_10M = 17, + SFP_LINK_LEN_COPPER_1M = 18, + SFP_LINK_LEN_50UM_OM4_10M = 18, + SFP_LINK_LEN_50UM_OM3_10M = 19, + SFP_VENDOR_NAME = 20, + SFP_VENDOR_OUI = 37, + SFP_VENDOR_PN = 40, + SFP_VENDOR_REV = 56, + SFP_OPTICAL_WAVELENGTH_MSB = 60, + SFP_OPTICAL_WAVELENGTH_LSB = 61, + SFP_CABLE_SPEC = 60, + SFP_CC_BASE = 63, + + SFP_OPTIONS = 64, /* 2 bytes, MSB, LSB */ SFP_OPTIONS_HIGH_POWER_LEVEL = BIT(13), SFP_OPTIONS_PAGING_A2 = BIT(12), SFP_OPTIONS_RETIMER = BIT(11), @@ -378,11 +372,20 @@ enum { SFP_OPTIONS_TX_FAULT = BIT(3), SFP_OPTIONS_LOS_INVERTED = BIT(2), SFP_OPTIONS_LOS_NORMAL = BIT(1), + + SFP_BR_MAX = 66, + SFP_BR_MIN = 67, + SFP_VENDOR_SN = 68, + SFP_DATECODE = 84, + + SFP_DIAGMON = 92, SFP_DIAGMON_DDM = BIT(6), SFP_DIAGMON_INT_CAL = BIT(5), SFP_DIAGMON_EXT_CAL = BIT(4), SFP_DIAGMON_RXPWR_AVG = BIT(3), SFP_DIAGMON_ADDRMODE = BIT(2), + + SFP_ENHOPTS = 93, SFP_ENHOPTS_ALARMWARN = BIT(7), SFP_ENHOPTS_SOFT_TX_DISABLE = BIT(6), SFP_ENHOPTS_SOFT_TX_FAULT = BIT(5), @@ -390,6 +393,8 @@ enum { SFP_ENHOPTS_SOFT_RATE_SELECT = BIT(3), SFP_ENHOPTS_APP_SELECT_SFF8079 = BIT(2), SFP_ENHOPTS_SOFT_RATE_SFF8431 = BIT(1), + + SFP_SFF8472_COMPLIANCE = 94, SFP_SFF8472_COMPLIANCE_NONE = 0x00, SFP_SFF8472_COMPLIANCE_REV9_3 = 0x01, SFP_SFF8472_COMPLIANCE_REV9_5 = 0x02, @@ -399,68 +404,70 @@ enum { SFP_SFF8472_COMPLIANCE_REV11_3 = 0x06, SFP_SFF8472_COMPLIANCE_REV11_4 = 0x07, SFP_SFF8472_COMPLIANCE_REV12_0 = 0x08, + + SFP_CC_EXT = 95, }; /* SFP Diagnostics */ enum { /* Alarm and warnings stored MSB at lower address then LSB */ - SFP_TEMP_HIGH_ALARM = 0x00, - SFP_TEMP_LOW_ALARM = 0x02, - SFP_TEMP_HIGH_WARN = 0x04, - SFP_TEMP_LOW_WARN = 0x06, - SFP_VOLT_HIGH_ALARM = 0x08, - SFP_VOLT_LOW_ALARM = 0x0a, - SFP_VOLT_HIGH_WARN = 0x0c, - SFP_VOLT_LOW_WARN = 0x0e, - SFP_BIAS_HIGH_ALARM = 0x10, - SFP_BIAS_LOW_ALARM = 0x12, - SFP_BIAS_HIGH_WARN = 0x14, - SFP_BIAS_LOW_WARN = 0x16, - SFP_TXPWR_HIGH_ALARM = 0x18, - SFP_TXPWR_LOW_ALARM = 0x1a, - SFP_TXPWR_HIGH_WARN = 0x1c, - SFP_TXPWR_LOW_WARN = 0x1e, - SFP_RXPWR_HIGH_ALARM = 0x20, - SFP_RXPWR_LOW_ALARM = 0x22, - SFP_RXPWR_HIGH_WARN = 0x24, - SFP_RXPWR_LOW_WARN = 0x26, - SFP_LASER_TEMP_HIGH_ALARM = 0x28, - SFP_LASER_TEMP_LOW_ALARM = 0x2a, - SFP_LASER_TEMP_HIGH_WARN = 0x2c, - SFP_LASER_TEMP_LOW_WARN = 0x2e, - SFP_TEC_CUR_HIGH_ALARM = 0x30, - SFP_TEC_CUR_LOW_ALARM = 0x32, - SFP_TEC_CUR_HIGH_WARN = 0x34, - SFP_TEC_CUR_LOW_WARN = 0x36, - SFP_CAL_RXPWR4 = 0x38, - SFP_CAL_RXPWR3 = 0x3c, - SFP_CAL_RXPWR2 = 0x40, - SFP_CAL_RXPWR1 = 0x44, - SFP_CAL_RXPWR0 = 0x48, - SFP_CAL_TXI_SLOPE = 0x4c, - SFP_CAL_TXI_OFFSET = 0x4e, - SFP_CAL_TXPWR_SLOPE = 0x50, - SFP_CAL_TXPWR_OFFSET = 0x52, - SFP_CAL_T_SLOPE = 0x54, - SFP_CAL_T_OFFSET = 0x56, - SFP_CAL_V_SLOPE = 0x58, - SFP_CAL_V_OFFSET = 0x5a, - SFP_CHKSUM = 0x5f, - - SFP_TEMP = 0x60, - SFP_VCC = 0x62, - SFP_TX_BIAS = 0x64, - SFP_TX_POWER = 0x66, - SFP_RX_POWER = 0x68, - SFP_LASER_TEMP = 0x6a, - SFP_TEC_CUR = 0x6c, - - SFP_STATUS = 0x6e, + SFP_TEMP_HIGH_ALARM = 0, + SFP_TEMP_LOW_ALARM = 2, + SFP_TEMP_HIGH_WARN = 4, + SFP_TEMP_LOW_WARN = 6, + SFP_VOLT_HIGH_ALARM = 8, + SFP_VOLT_LOW_ALARM = 10, + SFP_VOLT_HIGH_WARN = 12, + SFP_VOLT_LOW_WARN = 14, + SFP_BIAS_HIGH_ALARM = 16, + SFP_BIAS_LOW_ALARM = 18, + SFP_BIAS_HIGH_WARN = 20, + SFP_BIAS_LOW_WARN = 22, + SFP_TXPWR_HIGH_ALARM = 24, + SFP_TXPWR_LOW_ALARM = 26, + SFP_TXPWR_HIGH_WARN = 28, + SFP_TXPWR_LOW_WARN = 30, + SFP_RXPWR_HIGH_ALARM = 32, + SFP_RXPWR_LOW_ALARM = 34, + SFP_RXPWR_HIGH_WARN = 36, + SFP_RXPWR_LOW_WARN = 38, + SFP_LASER_TEMP_HIGH_ALARM = 40, + SFP_LASER_TEMP_LOW_ALARM = 42, + SFP_LASER_TEMP_HIGH_WARN = 44, + SFP_LASER_TEMP_LOW_WARN = 46, + SFP_TEC_CUR_HIGH_ALARM = 48, + SFP_TEC_CUR_LOW_ALARM = 50, + SFP_TEC_CUR_HIGH_WARN = 52, + SFP_TEC_CUR_LOW_WARN = 54, + SFP_CAL_RXPWR4 = 56, + SFP_CAL_RXPWR3 = 60, + SFP_CAL_RXPWR2 = 64, + SFP_CAL_RXPWR1 = 68, + SFP_CAL_RXPWR0 = 72, + SFP_CAL_TXI_SLOPE = 76, + SFP_CAL_TXI_OFFSET = 78, + SFP_CAL_TXPWR_SLOPE = 80, + SFP_CAL_TXPWR_OFFSET = 82, + SFP_CAL_T_SLOPE = 84, + SFP_CAL_T_OFFSET = 86, + SFP_CAL_V_SLOPE = 88, + SFP_CAL_V_OFFSET = 90, + SFP_CHKSUM = 95, + + SFP_TEMP = 96, + SFP_VCC = 98, + SFP_TX_BIAS = 100, + SFP_TX_POWER = 102, + SFP_RX_POWER = 104, + SFP_LASER_TEMP = 106, + SFP_TEC_CUR = 108, + + SFP_STATUS = 110, SFP_STATUS_TX_DISABLE = BIT(7), SFP_STATUS_TX_DISABLE_FORCE = BIT(6), SFP_STATUS_TX_FAULT = BIT(2), SFP_STATUS_RX_LOS = BIT(1), - SFP_ALARM0 = 0x70, + SFP_ALARM0 = 112, SFP_ALARM0_TEMP_HIGH = BIT(7), SFP_ALARM0_TEMP_LOW = BIT(6), SFP_ALARM0_VCC_HIGH = BIT(5), @@ -470,11 +477,11 @@ enum { SFP_ALARM0_TXPWR_HIGH = BIT(1), SFP_ALARM0_TXPWR_LOW = BIT(0), - SFP_ALARM1 = 0x71, + SFP_ALARM1 = 113, SFP_ALARM1_RXPWR_HIGH = BIT(7), SFP_ALARM1_RXPWR_LOW = BIT(6), - SFP_WARN0 = 0x74, + SFP_WARN0 = 116, SFP_WARN0_TEMP_HIGH = BIT(7), SFP_WARN0_TEMP_LOW = BIT(6), SFP_WARN0_VCC_HIGH = BIT(5), @@ -484,13 +491,15 @@ enum { SFP_WARN0_TXPWR_HIGH = BIT(1), SFP_WARN0_TXPWR_LOW = BIT(0), - SFP_WARN1 = 0x75, + SFP_WARN1 = 117, SFP_WARN1_RXPWR_HIGH = BIT(7), SFP_WARN1_RXPWR_LOW = BIT(6), - SFP_EXT_STATUS = 0x76, - SFP_VSL = 0x78, - SFP_PAGE = 0x7f, + SFP_EXT_STATUS = 118, + SFP_EXT_STATUS_PWRLVL_SELECT = BIT(0), + + SFP_VSL = 120, + SFP_PAGE = 127, }; struct fwnode_handle; diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 08e6054e061f..71310efe2fab 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -2,6 +2,9 @@ #ifndef _LINUX_SHRINKER_H #define _LINUX_SHRINKER_H +#include <linux/atomic.h> +#include <linux/types.h> + /* * This struct is used to pass information from page reclaim to the shrinkers. * We consolidate the values for easier extension later. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7be5bb4c94b6..4c8492401a10 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -818,7 +818,7 @@ typedef unsigned char *sk_buff_data_t; * @mark: Generic packet mark * @reserved_tailroom: (aka @mark) number of bytes of free space available * at the tail of an sk_buff - * @vlan_present: VLAN tag is present + * @vlan_all: vlan fields (proto & tci) * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) @@ -951,7 +951,7 @@ struct sk_buff { /* private: */ __u8 __pkt_vlan_present_offset[0]; /* public: */ - __u8 vlan_present:1; /* See PKT_VLAN_PRESENT_BIT */ + __u8 remcsum_offload:1; __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 dst_pending_confirm:1; @@ -966,7 +966,6 @@ struct sk_buff { __u8 ipvs_property:1; __u8 inner_protocol_type:1; - __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1; @@ -999,8 +998,13 @@ struct sk_buff { __u32 priority; int skb_iif; __u32 hash; - __be16 vlan_proto; - __u16 vlan_tci; + union { + u32 vlan_all; + struct { + __be16 vlan_proto; + __u16 vlan_tci; + }; + }; #if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS) union { unsigned int napi_id; @@ -1059,15 +1063,13 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move pkt_vlan_present, tc_at_ingress, or mono_delivery_time +/* if you move tc_at_ingress or mono_delivery_time * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define PKT_VLAN_PRESENT_BIT 7 #define TC_AT_INGRESS_MASK (1 << 0) #define SKB_MONO_DELIVERY_TIME_MASK (1 << 2) #else -#define PKT_VLAN_PRESENT_BIT 0 #define TC_AT_INGRESS_MASK (1 << 7) #define SKB_MONO_DELIVERY_TIME_MASK (1 << 5) #endif @@ -1253,6 +1255,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, void skb_attempt_defer_free(struct sk_buff *skb); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); +struct sk_buff *slab_build_skb(void *data); /** * alloc_skb - allocate a network buffer @@ -5050,12 +5053,5 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb) } #endif -static inline bool skb_pp_recycle(struct sk_buff *skb, void *data) -{ - if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) - return false; - return page_pool_return_skb_page(virt_to_page(data)); -} - #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 48f4b645193b..84f787416a54 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -82,6 +82,7 @@ struct sk_psock { u32 apply_bytes; u32 cork_bytes; u32 eval; + bool redir_ingress; /* undefined if sk_redir is null */ struct sk_msg *cork; struct sk_psock_progs progs; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) @@ -376,7 +377,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err) } struct sk_psock *sk_psock_init(struct sock *sk, int node); -void sk_psock_stop(struct sk_psock *psock, bool wait); +void sk_psock_stop(struct sk_psock *psock); #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock); diff --git a/include/linux/slab.h b/include/linux/slab.h index 90877fcde70b..45af70315a94 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -76,6 +76,17 @@ * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. * + * Note that it is not possible to acquire a lock within a structure + * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference + * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages + * are not zeroed before being given to the slab, which means that any + * locks must be initialized after each and every kmem_struct_alloc(). + * Alternatively, make the ctor passed to kmem_cache_create() initialize + * the locks at page-allocation time, as is done in __i915_request_ctor(), + * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers + * to safely acquire those ctor-initialized locks under rcu_read_lock() + * protection. + * * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ /* Defer freeing slabs to RCU */ @@ -129,7 +140,11 @@ /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ +#ifndef CONFIG_SLUB_TINY #define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) +#else +#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0) +#endif #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ /* @@ -336,13 +351,18 @@ enum kmalloc_cache_type { #endif #ifndef CONFIG_MEMCG_KMEM KMALLOC_CGROUP = KMALLOC_NORMAL, -#else - KMALLOC_CGROUP, #endif +#ifdef CONFIG_SLUB_TINY + KMALLOC_RECLAIM = KMALLOC_NORMAL, +#else KMALLOC_RECLAIM, +#endif #ifdef CONFIG_ZONE_DMA KMALLOC_DMA, #endif +#ifdef CONFIG_MEMCG_KMEM + KMALLOC_CGROUP, +#endif NR_KMALLOC_TYPES }; @@ -441,7 +461,18 @@ static_assert(PAGE_SHIFT <= 20); #endif /* !CONFIG_SLOB */ void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); -void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc; + +/** + * kmem_cache_alloc - Allocate an object + * @cachep: The cache to allocate from. + * @flags: See kmalloc(). + * + * Allocate an object from this cache. + * See kmem_cache_zalloc() for a shortcut of adding __GFP_ZERO to flags. + * + * Return: pointer to the new object or %NULL in case of error + */ +void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) __assume_slab_alignment __malloc; void *kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru, gfp_t gfpflags) __assume_slab_alignment __malloc; void kmem_cache_free(struct kmem_cache *s, void *objp); @@ -470,35 +501,12 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignm void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; -#ifdef CONFIG_TRACING void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) __assume_kmalloc_alignment __alloc_size(3); void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) __assume_kmalloc_alignment __alloc_size(4); -#else /* CONFIG_TRACING */ -/* Save a function call when CONFIG_TRACING=n */ -static __always_inline __alloc_size(3) -void *kmalloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) -{ - void *ret = kmem_cache_alloc(s, flags); - - ret = kasan_kmalloc(s, ret, size, flags); - return ret; -} - -static __always_inline __alloc_size(4) -void *kmalloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) -{ - void *ret = kmem_cache_alloc_node(s, gfpflags, node); - - ret = kasan_kmalloc(s, ret, size, gfpflags); - return ret; -} -#endif /* CONFIG_TRACING */ - void *kmalloc_large(size_t size, gfp_t flags) __assume_page_alignment __alloc_size(1); @@ -506,9 +514,9 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align __alloc_size(1); /** - * kmalloc - allocate memory + * kmalloc - allocate kernel memory * @size: how many bytes of memory are required. - * @flags: the type of memory to allocate. + * @flags: describe the allocation context * * kmalloc is the normal method of allocating memory * for objects smaller than page size in the kernel. @@ -535,12 +543,12 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align * %GFP_ATOMIC * Allocation will not sleep. May use emergency pools. * - * %GFP_HIGHUSER - * Allocate memory from high memory on behalf of user. - * * Also it is possible to set different flags by OR'ing * in one or more of the following additional @flags: * + * %__GFP_ZERO + * Zero the allocated memory before returning. Also see kzalloc(). + * * %__GFP_HIGH * This allocation has high priority and may use emergency pools. * @@ -559,42 +567,42 @@ void *kmalloc_large_node(size_t size, gfp_t flags, int node) __assume_page_align * Try really hard to succeed the allocation but fail * eventually. */ +#ifndef CONFIG_SLOB static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) { - if (__builtin_constant_p(size)) { -#ifndef CONFIG_SLOB + if (__builtin_constant_p(size) && size) { unsigned int index; -#endif + if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large(size, flags); -#ifndef CONFIG_SLOB - index = kmalloc_index(size); - - if (!index) - return ZERO_SIZE_PTR; + index = kmalloc_index(size); return kmalloc_trace( kmalloc_caches[kmalloc_type(flags)][index], flags, size); -#endif } return __kmalloc(size, flags); } +#else +static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags) +{ + if (__builtin_constant_p(size) && size > KMALLOC_MAX_CACHE_SIZE) + return kmalloc_large(size, flags); + + return __kmalloc(size, flags); +} +#endif #ifndef CONFIG_SLOB static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node) { - if (__builtin_constant_p(size)) { + if (__builtin_constant_p(size) && size) { unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large_node(size, flags, node); index = kmalloc_index(size); - - if (!index) - return ZERO_SIZE_PTR; - return kmalloc_node_trace( kmalloc_caches[kmalloc_type(flags)][index], flags, node, size); diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index f0ffad6a3365..5834bad8ad78 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -80,8 +80,10 @@ struct kmem_cache { unsigned int *random_seq; #endif +#ifdef CONFIG_HARDENED_USERCOPY unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ +#endif struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index f9c68a9dac04..aa0ee1678d29 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -41,6 +41,7 @@ enum stat_item { CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */ NR_SLUB_STAT_ITEMS }; +#ifndef CONFIG_SLUB_TINY /* * When changing the layout, make sure freelist and tid are still compatible * with this_cpu_cmpxchg_double() alignment requirements. @@ -57,6 +58,7 @@ struct kmem_cache_cpu { unsigned stat[NR_SLUB_STAT_ITEMS]; #endif }; +#endif /* CONFIG_SLUB_TINY */ #ifdef CONFIG_SLUB_CPU_PARTIAL #define slub_percpu_partial(c) ((c)->partial) @@ -88,7 +90,9 @@ struct kmem_cache_order_objects { * Slab cache management. */ struct kmem_cache { +#ifndef CONFIG_SLUB_TINY struct kmem_cache_cpu __percpu *cpu_slab; +#endif /* Used for retrieving partial slabs, etc. */ slab_flags_t flags; unsigned long min_partial; @@ -136,13 +140,15 @@ struct kmem_cache { struct kasan_cache kasan_info; #endif +#ifdef CONFIG_HARDENED_USERCOPY unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ +#endif struct kmem_cache_node *node[MAX_NUMNODES]; }; -#ifdef CONFIG_SYSFS +#if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY) #define SLAB_SUPPORTS_SYSFS void sysfs_slab_unlink(struct kmem_cache *); void sysfs_slab_release(struct kmem_cache *); diff --git a/include/linux/smc911x.h b/include/linux/smc911x.h deleted file mode 100644 index 8cace8189e74..000000000000 --- a/include/linux/smc911x.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __SMC911X_H__ -#define __SMC911X_H__ - -#define SMC911X_USE_16BIT (1 << 0) -#define SMC911X_USE_32BIT (1 << 1) - -struct smc911x_platdata { - unsigned long flags; - unsigned long irq_flags; /* IRQF_... */ - int irq_polarity; -}; - -#endif /* __SMC911X_H__ */ diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index d2b02bb43768..b85f66db33e1 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -9,6 +9,13 @@ enum mtk_ddp_comp_id; struct device; +enum mtk_dpi_out_format_con { + MTK_DPI_RGB888_SDR_CON, + MTK_DPI_RGB888_DDR_CON, + MTK_DPI_RGB565_SDR_CON, + MTK_DPI_RGB565_DDR_CON +}; + enum mtk_ddp_comp_id { DDP_COMPONENT_AAL0, DDP_COMPONENT_AAL1, diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index 4450c8b7a1cb..a0746d4aec20 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -5,27 +5,76 @@ #include <linux/rcupdate.h> #include <linux/regmap.h> #include <linux/pci.h> +#include <linux/skbuff.h> #define MTK_WED_TX_QUEUES 2 +#define MTK_WED_RX_QUEUES 2 + +#define WED_WO_STA_REC 0x6 struct mtk_wed_hw; struct mtk_wdma_desc; +enum mtk_wed_wo_cmd { + MTK_WED_WO_CMD_WED_CFG, + MTK_WED_WO_CMD_WED_RX_STAT, + MTK_WED_WO_CMD_RRO_SER, + MTK_WED_WO_CMD_DBG_INFO, + MTK_WED_WO_CMD_DEV_INFO, + MTK_WED_WO_CMD_BSS_INFO, + MTK_WED_WO_CMD_STA_REC, + MTK_WED_WO_CMD_DEV_INFO_DUMP, + MTK_WED_WO_CMD_BSS_INFO_DUMP, + MTK_WED_WO_CMD_STA_REC_DUMP, + MTK_WED_WO_CMD_BA_INFO_DUMP, + MTK_WED_WO_CMD_FBCMD_Q_DUMP, + MTK_WED_WO_CMD_FW_LOG_CTRL, + MTK_WED_WO_CMD_LOG_FLUSH, + MTK_WED_WO_CMD_CHANGE_STATE, + MTK_WED_WO_CMD_CPU_STATS_ENABLE, + MTK_WED_WO_CMD_CPU_STATS_DUMP, + MTK_WED_WO_CMD_EXCEPTION_INIT, + MTK_WED_WO_CMD_PROF_CTRL, + MTK_WED_WO_CMD_STA_BA_DUMP, + MTK_WED_WO_CMD_BA_CTRL_DUMP, + MTK_WED_WO_CMD_RXCNT_CTRL, + MTK_WED_WO_CMD_RXCNT_INFO, + MTK_WED_WO_CMD_SET_CAP, + MTK_WED_WO_CMD_CCIF_RING_DUMP, + MTK_WED_WO_CMD_WED_END +}; + +struct mtk_rxbm_desc { + __le32 buf0; + __le32 token; +} __packed __aligned(4); + enum mtk_wed_bus_tye { MTK_WED_BUS_PCIE, MTK_WED_BUS_AXI, }; +#define MTK_WED_RING_CONFIGURED BIT(0) struct mtk_wed_ring { struct mtk_wdma_desc *desc; dma_addr_t desc_phys; u32 desc_size; int size; + u32 flags; u32 reg_base; void __iomem *wpdma; }; +struct mtk_wed_wo_rx_stats { + __le16 wlan_idx; + __le16 tid; + __le32 rx_pkt_cnt; + __le32 rx_byte_cnt; + __le32 rx_err_cnt; + __le32 rx_drop_cnt; +}; + struct mtk_wed_device { #ifdef CONFIG_NET_MEDIATEK_SOC_WED const struct mtk_wed_ops *ops; @@ -34,17 +83,36 @@ struct mtk_wed_device { bool init_done, running; int wdma_idx; int irq; + u8 version; + + /* used by wlan driver */ + u32 rev_id; struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES]; + struct mtk_wed_ring rx_ring[MTK_WED_RX_QUEUES]; struct mtk_wed_ring txfree_ring; struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES]; + struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES]; struct { int size; void **pages; struct mtk_wdma_desc *desc; dma_addr_t desc_phys; - } buf_ring; + } tx_buf_ring; + + struct { + int size; + struct page_frag_cache rx_page; + struct mtk_rxbm_desc *desc; + dma_addr_t desc_phys; + } rx_buf_ring; + + struct { + struct mtk_wed_ring ring; + dma_addr_t miod_phys; + dma_addr_t fdbk_phys; + } rro; /* filled by driver: */ struct { @@ -53,22 +121,36 @@ struct mtk_wed_device { struct pci_dev *pci_dev; }; enum mtk_wed_bus_tye bus_type; + void __iomem *base; + u32 phy_base; u32 wpdma_phys; u32 wpdma_int; u32 wpdma_mask; u32 wpdma_tx; u32 wpdma_txfree; + u32 wpdma_rx_glo; + u32 wpdma_rx; + + bool wcid_512; u16 token_start; unsigned int nbuf; + unsigned int rx_nbuf; + unsigned int rx_npkt; + unsigned int rx_size; u8 tx_tbit[MTK_WED_TX_QUEUES]; + u8 rx_tbit[MTK_WED_RX_QUEUES]; u8 txfree_tbit; u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id); int (*offload_enable)(struct mtk_wed_device *wed); void (*offload_disable)(struct mtk_wed_device *wed); + u32 (*init_rx_buf)(struct mtk_wed_device *wed, int size); + void (*release_rx_buf)(struct mtk_wed_device *wed); + void (*update_wo_rx_stats)(struct mtk_wed_device *wed, + struct mtk_wed_wo_rx_stats *stats); } wlan; #endif }; @@ -76,10 +158,16 @@ struct mtk_wed_device { struct mtk_wed_ops { int (*attach)(struct mtk_wed_device *dev); int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, - void __iomem *regs); + void __iomem *regs, bool reset); + int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring, + void __iomem *regs, bool reset); int (*txfree_ring_setup)(struct mtk_wed_device *dev, void __iomem *regs); + int (*msg_update)(struct mtk_wed_device *dev, int cmd_id, + void *data, int len); void (*detach)(struct mtk_wed_device *dev); + void (*ppe_check)(struct mtk_wed_device *dev, struct sk_buff *skb, + u32 reason, u32 hash); void (*stop)(struct mtk_wed_device *dev); void (*start)(struct mtk_wed_device *dev, u32 irq_mask); @@ -114,12 +202,22 @@ mtk_wed_device_attach(struct mtk_wed_device *dev) return ret; } +static inline bool +mtk_wed_get_rx_capa(struct mtk_wed_device *dev) +{ +#ifdef CONFIG_NET_MEDIATEK_SOC_WED + return dev->version != 1; +#else + return false; +#endif +} + #ifdef CONFIG_NET_MEDIATEK_SOC_WED #define mtk_wed_device_active(_dev) !!(_dev)->ops #define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev) #define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask) -#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \ - (_dev)->ops->tx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs, _reset) \ + (_dev)->ops->tx_ring_setup(_dev, _ring, _regs, _reset) #define mtk_wed_device_txfree_ring_setup(_dev, _regs) \ (_dev)->ops->txfree_ring_setup(_dev, _regs) #define mtk_wed_device_reg_read(_dev, _reg) \ @@ -130,6 +228,14 @@ mtk_wed_device_attach(struct mtk_wed_device *dev) (_dev)->ops->irq_get(_dev, _mask) #define mtk_wed_device_irq_set_mask(_dev, _mask) \ (_dev)->ops->irq_set_mask(_dev, _mask) +#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs, _reset) \ + (_dev)->ops->rx_ring_setup(_dev, _ring, _regs, _reset) +#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) \ + (_dev)->ops->ppe_check(_dev, _skb, _reason, _hash) +#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) \ + (_dev)->ops->msg_update(_dev, _id, _msg, _len) +#define mtk_wed_device_stop(_dev) (_dev)->ops->stop(_dev) +#define mtk_wed_device_dma_reset(_dev) (_dev)->ops->reset_dma(_dev) #else static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) { @@ -137,12 +243,17 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) } #define mtk_wed_device_detach(_dev) do {} while (0) #define mtk_wed_device_start(_dev, _mask) do {} while (0) -#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs, _reset) -ENODEV #define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV #define mtk_wed_device_reg_read(_dev, _reg) 0 #define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0) #define mtk_wed_device_irq_get(_dev, _mask) 0 #define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0) +#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs, _reset) -ENODEV +#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) do {} while (0) +#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV +#define mtk_wed_device_stop(_dev) do {} while (0) +#define mtk_wed_device_dma_reset(_dev) do {} while (0) #endif #endif diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index bc2fb8343a94..ad1fd718169d 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -42,7 +42,19 @@ #define LLCC_CPUHWT 36 #define LLCC_MDMCLAD2 37 #define LLCC_CAMEXP1 38 +#define LLCC_CMPTHCP 39 +#define LLCC_LCPDARE 40 #define LLCC_AENPU 45 +#define LLCC_ISLAND1 46 +#define LLCC_ISLAND2 47 +#define LLCC_ISLAND3 48 +#define LLCC_ISLAND4 49 +#define LLCC_CAMEXP2 50 +#define LLCC_CAMEXP3 51 +#define LLCC_CAMEXP4 52 +#define LLCC_DISP_WB 53 +#define LLCC_DISP_1 54 +#define LLCC_VIDVSP 64 /** * struct llcc_slice_desc - Cache slice descriptor diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 3ab8c07f71c0..62de54992e49 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -19,6 +19,7 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_CLK_BUF_A 0x616B6C63 #define QCOM_SMD_RPM_LDOA 0x616f646c #define QCOM_SMD_RPM_LDOB 0x626F646C +#define QCOM_SMD_RPM_LDOE 0x656f646c #define QCOM_SMD_RPM_RWCX 0x78637772 #define QCOM_SMD_RPM_RWMX 0x786d7772 #define QCOM_SMD_RPM_RWLC 0x636c7772 @@ -32,6 +33,7 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_QUP_CLK 0x707571 #define QCOM_SMD_RPM_SMPA 0x61706d73 #define QCOM_SMD_RPM_SMPB 0x62706d73 +#define QCOM_SMD_RPM_SMPE 0x65706d73 #define QCOM_SMD_RPM_SPDM 0x63707362 #define QCOM_SMD_RPM_VSA 0x00617376 #define QCOM_SMD_RPM_MMAXI_CLK 0x69786d6d diff --git a/include/linux/socket.h b/include/linux/socket.h index de3701a2a212..13c3a237b9c9 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -33,7 +33,10 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - char sa_data[14]; /* 14 bytes of protocol address */ + union { + char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ + DECLARE_FLEX_ARRAY(char, sa_data); + }; }; struct linger { diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 2e9fd91572d4..d2f581feed67 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -286,8 +286,6 @@ int sdw_intel_startup(struct sdw_intel_ctx *ctx); void sdw_intel_exit(struct sdw_intel_ctx *ctx); -void sdw_intel_enable_irq(void __iomem *mmio_base, bool enable); - irqreturn_t sdw_intel_thread(int irq, void *dev_id); #define SDW_INTEL_QUIRK_MASK_BUS_DISABLE BIT(1) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index fbf8c0d95968..9a32495fbb1f 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -356,6 +356,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @max_speed_hz: Highest supported transfer speed * @flags: other constraints relevant to this driver * @slave: indicates that this is an SPI slave controller + * @target: indicates that this is an SPI target controller * @devm_allocated: whether the allocation of this struct is devres-managed * @max_transfer_size: function that returns the max transfer size for * a &spi_device; may be %NULL, so the default %SIZE_MAX will be used. @@ -440,6 +441,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @mem_caps: controller capabilities for the handling of memory operations. * @unprepare_message: undo any work done by prepare_message(). * @slave_abort: abort the ongoing transfer request on an SPI slave controller + * @target_abort: abort the ongoing transfer request on an SPI target controller * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS * number. Any individual value may be NULL for CS lines that * are not GPIOs (driven by the SPI controller itself). @@ -535,8 +537,12 @@ struct spi_controller { /* Flag indicating if the allocation of this struct is devres-managed */ bool devm_allocated; - /* Flag indicating this is an SPI slave controller */ - bool slave; + union { + /* Flag indicating this is an SPI slave controller */ + bool slave; + /* Flag indicating this is an SPI target controller */ + bool target; + }; /* * on some hardware transfer / message size may be constrained @@ -649,7 +655,10 @@ struct spi_controller { struct spi_message *message); int (*unprepare_message)(struct spi_controller *ctlr, struct spi_message *message); - int (*slave_abort)(struct spi_controller *ctlr); + union { + int (*slave_abort)(struct spi_controller *ctlr); + int (*target_abort)(struct spi_controller *ctlr); + }; /* * These hooks are for drivers that use a generic implementation @@ -727,6 +736,11 @@ static inline bool spi_controller_is_slave(struct spi_controller *ctlr) return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->slave; } +static inline bool spi_controller_is_target(struct spi_controller *ctlr) +{ + return IS_ENABLED(CONFIG_SPI_SLAVE) && ctlr->target; +} + /* PM calls that need to be issued by the driver */ extern int spi_controller_suspend(struct spi_controller *ctlr); extern int spi_controller_resume(struct spi_controller *ctlr); @@ -763,6 +777,21 @@ static inline struct spi_controller *spi_alloc_slave(struct device *host, return __spi_alloc_controller(host, size, true); } +static inline struct spi_controller *spi_alloc_host(struct device *dev, + unsigned int size) +{ + return __spi_alloc_controller(dev, size, false); +} + +static inline struct spi_controller *spi_alloc_target(struct device *dev, + unsigned int size) +{ + if (!IS_ENABLED(CONFIG_SPI_SLAVE)) + return NULL; + + return __spi_alloc_controller(dev, size, true); +} + struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, bool slave); @@ -782,6 +811,21 @@ static inline struct spi_controller *devm_spi_alloc_slave(struct device *dev, return __devm_spi_alloc_controller(dev, size, true); } +static inline struct spi_controller *devm_spi_alloc_host(struct device *dev, + unsigned int size) +{ + return __devm_spi_alloc_controller(dev, size, false); +} + +static inline struct spi_controller *devm_spi_alloc_target(struct device *dev, + unsigned int size) +{ + if (!IS_ENABLED(CONFIG_SPI_SLAVE)) + return NULL; + + return __devm_spi_alloc_controller(dev, size, true); +} + extern int spi_register_controller(struct spi_controller *ctlr); extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); @@ -1141,6 +1185,7 @@ static inline void spi_message_free(struct spi_message *m) extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_slave_abort(struct spi_device *spi); +extern int spi_target_abort(struct spi_device *spi); static inline size_t spi_max_message_size(struct spi_device *spi) @@ -1514,6 +1559,9 @@ extern void spi_unregister_device(struct spi_device *spi); extern const struct spi_device_id * spi_get_device_id(const struct spi_device *sdev); +extern const void * +spi_get_device_match_data(const struct spi_device *sdev); + static inline bool spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer) { diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 01226e4d960a..9b9d0bbf1d3c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -47,11 +47,8 @@ int init_srcu_struct(struct srcu_struct *ssp); #include <linux/srcutiny.h> #elif defined(CONFIG_TREE_SRCU) #include <linux/srcutree.h> -#elif defined(CONFIG_SRCU) -#error "Unknown SRCU implementation specified to kernel configuration" #else -/* Dummy definition for things like notifiers. Actual use gets link error. */ -struct srcu_struct { }; +#error "Unknown SRCU implementation specified to kernel configuration" #endif void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, @@ -64,11 +61,21 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); -#ifdef CONFIG_SRCU +#ifdef CONFIG_NEED_SRCU_NMI_SAFE +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); +#else +static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +{ + return __srcu_read_lock(ssp); +} +static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +{ + __srcu_read_unlock(ssp, idx); +} +#endif /* CONFIG_NEED_SRCU_NMI_SAFE */ + void srcu_init(void); -#else /* #ifdef CONFIG_SRCU */ -static inline void srcu_init(void) { } -#endif /* #else #ifdef CONFIG_SRCU */ #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -104,6 +111,18 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#define SRCU_NMI_UNKNOWN 0x0 +#define SRCU_NMI_UNSAFE 0x1 +#define SRCU_NMI_SAFE 0x2 + +#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU) +void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe); +#else +static inline void srcu_check_nmi_safety(struct srcu_struct *ssp, + bool nmi_safe) { } +#endif + + /** * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing * @p: the pointer to fetch and protect for later dereferencing @@ -161,17 +180,36 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { int retval; + srcu_check_nmi_safety(ssp, false); retval = __srcu_read_lock(ssp); rcu_lock_acquire(&(ssp)->dep_map); return retval; } +/** + * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but in an NMI-safe manner. + * See srcu_read_lock() for more information. + */ +static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp) +{ + int retval; + + srcu_check_nmi_safety(ssp, true); + retval = __srcu_read_lock_nmisafe(ssp); + rcu_lock_acquire(&(ssp)->dep_map); + return retval; +} + /* Used by tracing, cannot be traced and cannot invoke lockdep. */ static inline notrace int srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) { int retval; + srcu_check_nmi_safety(ssp, false); retval = __srcu_read_lock(ssp); return retval; } @@ -187,14 +225,32 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); + srcu_check_nmi_safety(ssp, false); rcu_lock_release(&(ssp)->dep_map); __srcu_read_unlock(ssp, idx); } +/** + * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit an SRCU read-side critical section, but in an NMI-safe manner. + */ +static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) + __releases(ssp) +{ + WARN_ON_ONCE(idx & ~0x1); + srcu_check_nmi_safety(ssp, true); + rcu_lock_release(&(ssp)->dep_map); + __srcu_read_unlock_nmisafe(ssp, idx); +} + /* Used by tracing, cannot be traced and cannot call lockdep. */ static inline notrace void srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) { + srcu_check_nmi_safety(ssp, false); __srcu_read_unlock(ssp, idx); } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index e3014319d1ad..c689a81752c9 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -23,8 +23,9 @@ struct srcu_struct; */ struct srcu_data { /* Read-side state. */ - unsigned long srcu_lock_count[2]; /* Locks per CPU. */ - unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ + atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ + atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ + int srcu_nmi_safety; /* NMI-safe srcu_struct structure? */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index 4c678c4fec58..9c88707d9a0f 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -6,6 +6,25 @@ #include <linux/sched.h> #include <linux/random.h> +/* + * On 64-bit architectures, protect against non-terminated C string overflows + * by zeroing out the first byte of the canary; this leaves 56 bits of entropy. + */ +#ifdef CONFIG_64BIT +# ifdef __LITTLE_ENDIAN +# define CANARY_MASK 0xffffffffffffff00UL +# else /* big endian, 64 bits: */ +# define CANARY_MASK 0x00ffffffffffffffUL +# endif +#else /* 32 bits: */ +# define CANARY_MASK 0xffffffffUL +#endif + +static inline unsigned long get_random_canary(void) +{ + return get_random_long() & CANARY_MASK; +} + #if defined(CONFIG_STACKPROTECTOR) || defined(CONFIG_ARM64_PTR_AUTH) # include <asm/stackprotector.h> #else diff --git a/include/linux/static_call.h b/include/linux/static_call.h index df53bed9d71f..141e6b176a1b 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -162,6 +162,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool extern int __init static_call_init(void); +extern void static_call_force_reinit(void); + struct static_call_mod { struct static_call_mod *next; struct module *mod; /* for vmlinux, mod == NULL */ diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index fb2e88614f5d..83ca2e8eb6b5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -271,5 +271,6 @@ struct plat_stmmacenet_data { int msi_tx_base_vec; bool use_phy_wol; bool sph_disable; + bool serdes_up_after_phy_linkup; }; #endif diff --git a/include/linux/string.h b/include/linux/string.h index cf7607b32102..db28802ab0a6 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -176,7 +176,7 @@ extern void kfree_const(const void *x); extern char *kstrdup(const char *s, gfp_t gfp) __malloc; extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); -extern void *kmemdup(const void *src, size_t len, gfp_t gfp); +extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); extern char **argv_split(gfp_t gfp, const char *str, int *argcp); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 88de45491376..ed722dd33b46 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -220,13 +220,6 @@ static inline __be32 svc_getu32(struct kvec *iov) return val; } -static inline void svc_ungetu32(struct kvec *iov) -{ - __be32 *vp = (__be32 *)iov->iov_base; - iov->iov_base = (void *)(vp - 1); - iov->iov_len += sizeof(*vp); -} - static inline void svc_putu32(struct kvec *iov, __be32 val) { __be32 *vp = iov->iov_base + iov->iov_len; @@ -311,7 +304,6 @@ struct svc_rqst { struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ struct svc_cacherep * rq_cacherep; /* cache info */ struct task_struct *rq_task; /* service thread */ - spinlock_t rq_lock; /* per-request lock */ struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ diff --git a/include/linux/swap.h b/include/linux/swap.h index a18cf4b7c724..2787b84eaf12 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -55,22 +55,14 @@ static inline int current_is_kswapd(void) * actions on faults. */ -#define SWP_SWAPIN_ERROR_NUM 1 -#define SWP_SWAPIN_ERROR (MAX_SWAPFILES + SWP_HWPOISON_NUM + \ - SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \ - SWP_PTE_MARKER_NUM) /* - * PTE markers are used to persist information onto PTEs that are mapped with - * file-backed memories. As its name "PTE" hints, it should only be applied to - * the leaves of pgtables. + * PTE markers are used to persist information onto PTEs that otherwise + * should be a none pte. As its name "PTE" hints, it should only be + * applied to the leaves of pgtables. */ -#ifdef CONFIG_PTE_MARKER #define SWP_PTE_MARKER_NUM 1 #define SWP_PTE_MARKER (MAX_SWAPFILES + SWP_HWPOISON_NUM + \ SWP_MIGRATION_NUM + SWP_DEVICE_NUM) -#else -#define SWP_PTE_MARKER_NUM 0 -#endif /* * Unaddressable device memory support. See include/linux/hmm.h and @@ -125,7 +117,7 @@ static inline int current_is_kswapd(void) #define MAX_SWAPFILES \ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \ - SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM) + SWP_PTE_MARKER_NUM) /* * Magic header for a swap area. The first part of the union is @@ -384,11 +376,11 @@ extern unsigned long totalreserve_pages; /* linux/mm/swap.c */ -void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages); -void lru_note_cost_folio(struct folio *); +void lru_note_cost(struct lruvec *lruvec, bool file, + unsigned int nr_io, unsigned int nr_rotated); +void lru_note_cost_refault(struct folio *); void folio_add_lru(struct folio *); void folio_add_lru_vma(struct folio *, struct vm_area_struct *); -void lru_cache_add(struct page *); void mark_page_accessed(struct page *); void folio_mark_accessed(struct folio *); @@ -426,7 +418,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - unsigned int reclaim_options); + unsigned int reclaim_options, + nodemask_t *nodemask); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, @@ -470,7 +463,7 @@ static inline unsigned long total_swapcache_pages(void) extern void free_swap_cache(struct page *page); extern void free_page_and_swap_cache(struct page *); -extern void free_pages_and_swap_cache(struct page **, int); +extern void free_pages_and_swap_cache(struct encoded_page **, int); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 86b95ccb81bb..b982dd614572 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -33,11 +33,13 @@ * can use the extra bits to store other information besides PFN. */ #ifdef MAX_PHYSMEM_BITS -#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) +#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) #else /* MAX_PHYSMEM_BITS */ -#define SWP_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT) +#define SWP_PFN_BITS min_t(int, \ + sizeof(phys_addr_t) * 8 - PAGE_SHIFT, \ + SWP_TYPE_SHIFT) #endif /* MAX_PHYSMEM_BITS */ -#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) +#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) /** * Migration swap entry specific bitfield definitions. Layout: @@ -162,16 +164,6 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) return xa_mk_value(entry.val); } -static inline swp_entry_t make_swapin_error_entry(struct page *page) -{ - return swp_entry(SWP_SWAPIN_ERROR, page_to_pfn(page)); -} - -static inline int is_swapin_error_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_SWAPIN_ERROR; -} - #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { @@ -409,10 +401,9 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) typedef unsigned long pte_marker; -#define PTE_MARKER_UFFD_WP BIT(0) -#define PTE_MARKER_MASK (PTE_MARKER_UFFD_WP) - -#ifdef CONFIG_PTE_MARKER +#define PTE_MARKER_UFFD_WP BIT(0) +#define PTE_MARKER_SWAPIN_ERROR BIT(1) +#define PTE_MARKER_MASK (BIT(2) - 1) static inline swp_entry_t make_pte_marker_entry(pte_marker marker) { @@ -434,35 +425,20 @@ static inline bool is_pte_marker(pte_t pte) return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte)); } -#else /* CONFIG_PTE_MARKER */ - -static inline swp_entry_t make_pte_marker_entry(pte_marker marker) -{ - /* This should never be called if !CONFIG_PTE_MARKER */ - WARN_ON_ONCE(1); - return swp_entry(0, 0); -} - -static inline bool is_pte_marker_entry(swp_entry_t entry) -{ - return false; -} - -static inline pte_marker pte_marker_get(swp_entry_t entry) +static inline pte_t make_pte_marker(pte_marker marker) { - return 0; + return swp_entry_to_pte(make_pte_marker_entry(marker)); } -static inline bool is_pte_marker(pte_t pte) +static inline swp_entry_t make_swapin_error_entry(void) { - return false; + return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR); } -#endif /* CONFIG_PTE_MARKER */ - -static inline pte_t make_pte_marker(pte_marker marker) +static inline int is_swapin_error_entry(swp_entry_t entry) { - return swp_entry_to_pte(make_pte_marker_entry(marker)); + return is_pte_marker_entry(entry) && + (pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR); } /* @@ -477,9 +453,6 @@ static inline pte_t make_pte_marker(pte_marker marker) * memory, kernel-only memory (including when the system is during-boot), * non-ram based generic file-system. It's fine to be used even there, but the * extra pte marker check will be pure overhead. - * - * For systems configured with !CONFIG_PTE_MARKER this will be automatically - * optimized to pte_none(). */ static inline int pte_none_mostly(pte_t pte) { @@ -581,8 +554,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd) #ifdef CONFIG_MEMORY_FAILURE -extern atomic_long_t num_poisoned_pages __read_mostly; - /* * Support for hardware poisoned pages */ @@ -597,17 +568,7 @@ static inline int is_hwpoison_entry(swp_entry_t entry) return swp_type(entry) == SWP_HWPOISON; } -static inline void num_poisoned_pages_inc(void) -{ - atomic_long_inc(&num_poisoned_pages); -} - -static inline void num_poisoned_pages_sub(long i) -{ - atomic_long_sub(i, &num_poisoned_pages); -} - -#else /* CONFIG_MEMORY_FAILURE */ +#else static inline swp_entry_t make_hwpoison_entry(struct page *page) { @@ -618,15 +579,7 @@ static inline int is_hwpoison_entry(swp_entry_t swp) { return 0; } - -static inline void num_poisoned_pages_inc(void) -{ -} - -static inline void num_poisoned_pages_sub(long i) -{ -} -#endif /* CONFIG_MEMORY_FAILURE */ +#endif static inline int non_swap_entry(swp_entry_t entry) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a34b0f9a9972..33a0ee3bcb2e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -264,6 +264,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo) #ifdef CONFIG_COMPAT +#define SYSCALL32_DEFINE0 COMPAT_SYSCALL_DEFINE0 #define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1 #define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2 #define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3 @@ -271,6 +272,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5 #define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6 #else +#define SYSCALL32_DEFINE0 SYSCALL_DEFINE0 #define SYSCALL32_DEFINE1 SYSCALL_DEFINE1 #define SYSCALL32_DEFINE2 SYSCALL_DEFINE2 #define SYSCALL32_DEFINE3 SYSCALL_DEFINE3 diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 41b1da621a45..ca7f05a130d2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -423,6 +423,7 @@ struct tcp_sock { u32 probe_seq_start; u32 probe_seq_end; } mtu_probe; + u32 plb_rehash; /* PLB-triggered rehash attempts */ u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG * while socket was owned by user. */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 9ecc128944a1..5e093602e8fc 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -100,6 +100,7 @@ struct thermal_cooling_device_ops { struct thermal_cooling_device { int id; char *type; + unsigned long max_state; struct device device; struct device_node *np; void *devdata; diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 3146f1c056c9..bb9d3f5542f8 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -45,6 +45,7 @@ struct time_namespace *copy_time_ns(unsigned long flags, void free_time_ns(struct time_namespace *ns); void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); struct vdso_data *arch_get_vdso_data(void *vvar_page); +struct page *find_timens_vvar_page(struct vm_area_struct *vma); static inline void put_time_ns(struct time_namespace *ns) { @@ -141,6 +142,11 @@ static inline void timens_on_fork(struct nsproxy *nsproxy, return; } +static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} + static inline void timens_add_monotonic(struct timespec64 *ts) { } static inline void timens_add_boottime(struct timespec64 *ts) { } diff --git a/include/linux/timer.h b/include/linux/timer.h index 648f00105f58..9162f275819a 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -169,7 +169,6 @@ static inline int timer_pending(const struct timer_list * timer) } extern void add_timer_on(struct timer_list *timer, int cpu); -extern int del_timer(struct timer_list * timer); extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); extern int timer_reduce(struct timer_list *timer, unsigned long expires); @@ -183,14 +182,36 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); +extern int timer_delete_sync(struct timer_list *timer); +extern int timer_delete(struct timer_list *timer); +extern int timer_shutdown_sync(struct timer_list *timer); +extern int timer_shutdown(struct timer_list *timer); -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) - extern int del_timer_sync(struct timer_list *timer); -#else -# define del_timer_sync(t) del_timer(t) -#endif +/** + * del_timer_sync - Delete a pending timer and wait for a running callback + * @timer: The timer to be deleted + * + * See timer_delete_sync() for detailed explanation. + * + * Do not use in new code. Use timer_delete_sync() instead. + */ +static inline int del_timer_sync(struct timer_list *timer) +{ + return timer_delete_sync(timer); +} -#define del_singleshot_timer_sync(t) del_timer_sync(t) +/** + * del_timer - Delete a pending timer + * @timer: The timer to be deleted + * + * See timer_delete() for detailed explanation. + * + * Do not use in new code. Use timer_delete() instead. + */ +static inline int del_timer(struct timer_list *timer) +{ + return timer_delete(timer); +} extern void init_timers(void); struct hrtimer; diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h index 93884086f392..adc80e29168e 100644 --- a/include/linux/timerqueue.h +++ b/include/linux/timerqueue.h @@ -35,7 +35,7 @@ struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head) { struct rb_node *leftmost = rb_first_cached(&head->rb_root); - return rb_entry(leftmost, struct timerqueue_node, node); + return rb_entry_safe(leftmost, struct timerqueue_node, node); } static inline void timerqueue_init(struct timerqueue_node *node) diff --git a/include/linux/trace.h b/include/linux/trace.h index b5e16e438448..80ffda871749 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -26,13 +26,13 @@ struct trace_export { int flags; }; +struct trace_array; + #ifdef CONFIG_TRACING int register_ftrace_export(struct trace_export *export); int unregister_ftrace_export(struct trace_export *export); -struct trace_array; - void trace_printk_init_buffers(void); __printf(3, 4) int trace_array_printk(struct trace_array *tr, unsigned long ip, diff --git a/include/linux/udp.h b/include/linux/udp.h index e96da4157d04..a2892e151644 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -23,7 +23,9 @@ static inline struct udphdr *udp_hdr(const struct sk_buff *skb) return (struct udphdr *)skb_transport_header(skb); } +#define UDP_HTABLE_SIZE_MIN_PERNET 128 #define UDP_HTABLE_SIZE_MIN (CONFIG_BASE_SMALL ? 128 : 256) +#define UDP_HTABLE_SIZE_MAX 65536 static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask) { @@ -70,7 +72,8 @@ struct udp_sock { * For encapsulation sockets. */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); - void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); + void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload); int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); void (*encap_destroy)(struct sock *sk); @@ -87,6 +90,9 @@ struct udp_sock { /* This field is dirtied by udp_recvmsg() */ int forward_deficit; + + /* This fields follows rcvbuf value, and is touched by udp_recvmsg */ + int forward_threshold; }; #define UDP_MAX_SEGMENTS (1 << 6UL) diff --git a/include/linux/uio.h b/include/linux/uio.h index 2e3134b14ffd..9f158238edba 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -29,6 +29,9 @@ enum iter_type { ITER_UBUF, }; +#define ITER_SOURCE 1 // == WRITE +#define ITER_DEST 0 // == READ + struct iov_iter_state { size_t iov_offset; size_t count; @@ -247,8 +250,14 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count); void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray, loff_t start, size_t count); +ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, + size_t maxsize, unsigned maxpages, size_t *start, + unsigned gup_flags); ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, size_t *start, + unsigned gup_flags); ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); diff --git a/include/linux/usb.h b/include/linux/usb.h index 9ff1ad4dfad1..d2d2f41052c0 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1829,6 +1829,7 @@ static inline int usb_get_ptm_status(struct usb_device *dev, void *data) extern int usb_string(struct usb_device *dev, int index, char *buf, size_t size); +extern char *usb_cache_string(struct usb_device *udev, int index); /* wrappers that also update important state inside usbcore */ extern int usb_clear_halt(struct usb_device *dev, int pipe); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e7cebeb875dd..a615542df1e0 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -17,6 +17,9 @@ #include <linux/iova_bitmap.h> struct kvm; +struct iommufd_ctx; +struct iommufd_device; +struct iommufd_access; /* * VFIO devices can be placed in a set, this allows all devices to share this @@ -54,6 +57,12 @@ struct vfio_device { struct completion comp; struct list_head group_next; struct list_head iommu_entry; + struct iommufd_access *iommufd_access; +#if IS_ENABLED(CONFIG_IOMMUFD) + struct iommufd_device *iommufd_device; + struct iommufd_ctx *iommufd_ictx; + bool iommufd_attached; +#endif }; /** @@ -80,6 +89,10 @@ struct vfio_device_ops { char *name; int (*init)(struct vfio_device *vdev); void (*release)(struct vfio_device *vdev); + int (*bind_iommufd)(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); + void (*unbind_iommufd)(struct vfio_device *vdev); + int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -96,6 +109,32 @@ struct vfio_device_ops { void __user *arg, size_t argsz); }; +#if IS_ENABLED(CONFIG_IOMMUFD) +int vfio_iommufd_physical_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); +void vfio_iommufd_physical_unbind(struct vfio_device *vdev); +int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +int vfio_iommufd_emulated_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id); +void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); +int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id); +#else +#define vfio_iommufd_physical_bind \ + ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ + u32 *out_device_id)) NULL) +#define vfio_iommufd_physical_unbind \ + ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_physical_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#define vfio_iommufd_emulated_bind \ + ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ + u32 *out_device_id)) NULL) +#define vfio_iommufd_emulated_unbind \ + ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_emulated_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) +#endif + /** * @migration_set_state: Optional callback to change the migration state for * devices that support migration. It's mandatory for @@ -189,6 +228,7 @@ int vfio_register_emulated_iommu_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); +unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set); int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state cur_fsm, diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index a960de68ac69..bdf8de2cdd93 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -15,6 +15,7 @@ static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) case VIRTIO_NET_HDR_GSO_TCPV6: return protocol == cpu_to_be16(ETH_P_IPV6); case VIRTIO_NET_HDR_GSO_UDP: + case VIRTIO_NET_HDR_GSO_UDP_L4: return protocol == cpu_to_be16(ETH_P_IP) || protocol == cpu_to_be16(ETH_P_IPV6); default: @@ -31,6 +32,7 @@ static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: + case VIRTIO_NET_HDR_GSO_UDP_L4: skb->protocol = cpu_to_be16(ETH_P_IP); break; case VIRTIO_NET_HDR_GSO_TCPV6: @@ -69,6 +71,11 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, ip_proto = IPPROTO_UDP; thlen = sizeof(struct udphdr); break; + case VIRTIO_NET_HDR_GSO_UDP_L4: + gso_type = SKB_GSO_UDP_L4; + ip_proto = IPPROTO_UDP; + thlen = sizeof(struct udphdr); + break; default: return -EINVAL; } @@ -182,6 +189,8 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + else if (sinfo->gso_type & SKB_GSO_UDP_L4) + hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4; else return -EINVAL; if (sinfo->gso_type & SKB_GSO_TCP_ECN) diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 3518dba1e02f..7f5d1caf5890 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -40,10 +40,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGREUSE, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, + PGSTEAL_KHUGEPAGED, PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, + PGDEMOTE_KHUGEPAGED, PGSCAN_KSWAPD, PGSCAN_DIRECT, + PGSCAN_KHUGEPAGED, PGSCAN_DIRECT_THROTTLE, PGSCAN_ANON, PGSCAN_FILE, diff --git a/include/linux/wait.h b/include/linux/wait.h index 7f5a51aae0a7..a0307b516b09 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -209,7 +209,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq list_del(&wq_entry->entry); } -void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); +int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, unsigned int mode, void *key, wait_queue_entry_t *bookmark); diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h deleted file mode 100644 index 03d61f1d23ab..000000000000 --- a/include/linux/wl12xx.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * This file is part of wl12xx - * - * Copyright (C) 2009 Nokia Corporation - * - * Contact: Luciano Coelho <luciano.coelho@nokia.com> - */ - -#ifndef _LINUX_WL12XX_H -#define _LINUX_WL12XX_H - -#include <linux/err.h> - -struct wl1251_platform_data { - int power_gpio; - /* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */ - int irq; - bool use_eeprom; -}; - -#ifdef CONFIG_WILINK_PLATFORM_DATA - -int wl1251_set_platform_data(const struct wl1251_platform_data *data); - -struct wl1251_platform_data *wl1251_get_platform_data(void); - -#else - -static inline -int wl1251_set_platform_data(const struct wl1251_platform_data *data) -{ - return -ENOSYS; -} - -static inline -struct wl1251_platform_data *wl1251_get_platform_data(void) -{ - return ERR_PTR(-ENODATA); -} - -#endif - -#endif diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 5ce2acf444fb..24d76500b1cc 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -15,6 +15,7 @@ * @WWAN_PORT_QMI: Qcom modem/MSM interface for modem control * @WWAN_PORT_QCDM: Qcom Modem diagnostic interface * @WWAN_PORT_FIREHOSE: XML based command protocol + * @WWAN_PORT_XMMRPC: Control protocol for Intel XMM modems * * @WWAN_PORT_MAX: Highest supported port types * @WWAN_PORT_UNKNOWN: Special value to indicate an unknown port type @@ -26,6 +27,7 @@ enum wwan_port_type { WWAN_PORT_QMI, WWAN_PORT_QCDM, WWAN_PORT_FIREHOSE, + WWAN_PORT_XMMRPC, /* Add new port types above this line */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 4c379d23ec6e..2e7dd44926e4 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -22,6 +22,12 @@ struct inode; struct dentry; +static inline bool is_posix_acl_xattr(const char *name) +{ + return (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || + (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0); +} + /* * struct xattr_handler: When @name is set, match attributes with exactly that * name. When @prefix is set instead, match attributes with that prefix and @@ -68,9 +74,9 @@ int __vfs_removexattr_locked(struct user_namespace *, struct dentry *, int vfs_removexattr(struct user_namespace *, struct dentry *, const char *); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); -ssize_t vfs_getxattr_alloc(struct user_namespace *mnt_userns, - struct dentry *dentry, const char *name, - char **xattr_value, size_t size, gfp_t flags); +int vfs_getxattr_alloc(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *name, + char **xattr_value, size_t size, gfp_t flags); int xattr_supported_namespace(struct inode *inode, const char *prefix); @@ -80,48 +86,28 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler) } struct simple_xattrs { - struct list_head head; - spinlock_t lock; + struct rb_root rb_root; + rwlock_t lock; }; struct simple_xattr { - struct list_head list; + struct rb_node rb_node; char *name; size_t size; char value[]; }; -/* - * initialize the simple_xattrs structure - */ -static inline void simple_xattrs_init(struct simple_xattrs *xattrs) -{ - INIT_LIST_HEAD(&xattrs->head); - spin_lock_init(&xattrs->lock); -} - -/* - * free all the xattrs - */ -static inline void simple_xattrs_free(struct simple_xattrs *xattrs) -{ - struct simple_xattr *xattr, *node; - - list_for_each_entry_safe(xattr, node, &xattrs->head, list) { - kfree(xattr->name); - kvfree(xattr); - } -} - +void simple_xattrs_init(struct simple_xattrs *xattrs); +void simple_xattrs_free(struct simple_xattrs *xattrs); struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); int simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags, ssize_t *removed_size); -ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, - size_t size); -void simple_xattr_list_add(struct simple_xattrs *xattrs, - struct simple_xattr *new_xattr); +ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, + char *buffer, size_t size); +void simple_xattr_add(struct simple_xattrs *xattrs, + struct simple_xattr *new_xattr); #endif /* _LINUX_XATTR_H */ diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 2a430e713ce5..a48cd0ffe57d 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -55,5 +55,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle); unsigned long zs_get_total_pages(struct zs_pool *pool); unsigned long zs_compact(struct zs_pool *pool); +unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size); + void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats); #endif |