diff options
Diffstat (limited to 'include/linux')
718 files changed, 22252 insertions, 8867 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6adcd1b92b20..71e692f95290 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -330,14 +330,16 @@ static inline bool acpi_sci_irq_valid(void) } extern int sbf_port; -extern unsigned long acpi_realmode_flags; int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); +typedef struct fwnode_handle *(*acpi_gsi_domain_disp_fn)(u32); + void acpi_set_irq_model(enum acpi_irq_model_id model, - struct fwnode_handle *(*)(u32)); + acpi_gsi_domain_disp_fn fn); +acpi_gsi_domain_disp_fn acpi_get_gsi_dispatcher(void); void acpi_set_gsi_to_irq_fallback(u32 (*)(u32)); struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, @@ -773,6 +775,10 @@ int acpi_get_local_u64_address(acpi_handle handle, u64 *addr); int acpi_get_local_address(acpi_handle handle, u32 *addr); const char *acpi_get_subsystem_id(acpi_handle handle); +#ifdef CONFIG_ACPI_MRRM +int acpi_mrrm_max_mem_region(void); +#endif + #else /* !CONFIG_ACPI */ #define acpi_disabled 1 @@ -854,6 +860,11 @@ static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev) return NULL; } +static inline acpi_handle acpi_device_handle(struct acpi_device *adev) +{ + return NULL; +} + static inline bool has_acpi_companion(struct device *dev) { return false; @@ -1088,8 +1099,24 @@ static inline acpi_handle acpi_get_processor_handle(int cpu) return NULL; } +static inline int acpi_mrrm_max_mem_region(void) +{ + return 1; +} + #endif /* !CONFIG_ACPI */ +#ifdef CONFIG_ACPI_HMAT +int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid, + resource_size_t *size); +#else +static inline int hmat_get_extended_linear_cache_size(struct resource *backing_res, + int nid, resource_size_t *size) +{ + return -EOPNOTSUPP; +} +#endif + extern void arch_post_acpi_subsys_init(void); #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC @@ -1110,13 +1137,13 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); -#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) struct acpi_s2idle_dev_ops { struct list_head list_node; void (*prepare)(void); void (*check)(void); void (*restore)(void); }; +#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg); int acpi_get_lps0_constraint(struct acpi_device *adev); @@ -1125,6 +1152,13 @@ static inline int acpi_get_lps0_constraint(struct device *dev) { return ACPI_STATE_UNKNOWN; } +static inline int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg) +{ + return -ENODEV; +} +static inline void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg) +{ +} #endif /* CONFIG_SUSPEND && CONFIG_X86 */ void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else @@ -1469,7 +1503,7 @@ int acpi_parse_spcr(bool enable_earlycon, bool enable_console); #else static inline int acpi_parse_spcr(bool enable_earlycon, bool enable_console) { - return 0; + return -ENODEV; } #endif diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h index c637e0997f6d..d83c9175828f 100644 --- a/include/linux/adreno-smmu-priv.h +++ b/include/linux/adreno-smmu-priv.h @@ -45,11 +45,16 @@ struct adreno_smmu_fault_info { * TTBR0 translation is enabled with the specified cfg * @get_fault_info: Called by the GPU fault handler to get information about * the fault - * @set_stall: Configure whether stall on fault (CFCFG) is enabled. Call - * before set_ttbr0_cfg(). If stalling on fault is enabled, - * the GPU driver must call resume_translation() + * @set_stall: Configure whether stall on fault (CFCFG) is enabled. If + * stalling on fault is enabled, the GPU driver must call + * resume_translation() * @resume_translation: Resume translation after a fault * + * @set_prr_bit: [optional] Configure the GPU's Partially Resident + * Region (PRR) bit in the ACTLR register. + * @set_prr_addr: [optional] Configure the PRR_CFG_*ADDR register with + * the physical address of PRR page passed from GPU + * driver. * * The GPU driver (drm/msm) and adreno-smmu work together for controlling * the GPU's SMMU instance. This is by necessity, as the GPU is directly @@ -67,6 +72,8 @@ struct adreno_smmu_priv { void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info); void (*set_stall)(const void *cookie, bool enabled); void (*resume_translation)(const void *cookie, bool terminate); + void (*set_prr_bit)(const void *cookie, bool set); + void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr); }; #endif /* __ADRENO_SMMU_PRIV_H */ diff --git a/include/linux/aer.h b/include/linux/aer.h index 4b97f38f3fcf..02940be66324 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -16,10 +16,26 @@ #define AER_CORRECTABLE 2 #define DPC_FATAL 3 +/* + * AER and DPC capabilities TLP Logging register sizes (PCIe r6.2, sec 7.8.4 + * & 7.9.14). + */ +#define PCIE_STD_NUM_TLP_HEADERLOG 4 +#define PCIE_STD_MAX_TLP_PREFIXLOG 4 +#define PCIE_STD_MAX_TLP_HEADERLOG (PCIE_STD_NUM_TLP_HEADERLOG + 10) + struct pci_dev; struct pcie_tlp_log { - u32 dw[4]; + union { + u32 dw[PCIE_STD_MAX_TLP_HEADERLOG]; + struct { + u32 _do_not_use[PCIE_STD_NUM_TLP_HEADERLOG]; + u32 prefix[PCIE_STD_MAX_TLP_PREFIXLOG]; + }; + }; + u8 header_len; /* Length of the Logged TLP Header in DWORDs */ + bool flit; /* TLP was logged when in Flit mode */ }; struct aer_capability_regs { @@ -37,8 +53,6 @@ struct aer_capability_regs { u16 uncor_err_source; }; -int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log *log); - #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); diff --git a/include/linux/align.h b/include/linux/align.h index 2b4acec7b95a..55debf105a5d 100644 --- a/include/linux/align.h +++ b/include/linux/align.h @@ -2,14 +2,6 @@ #ifndef _LINUX_ALIGN_H #define _LINUX_ALIGN_H -#include <linux/const.h> - -/* @a is a power of 2 value */ -#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) -#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) -#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) -#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) -#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) -#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) +#include <vdso/align.h> #endif /* _LINUX_ALIGN_H */ diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index a946e0203e6d..8f7931eb7d16 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #else /* ARCH_NEEDS_WEAK_PER_CPU */ +#ifdef MODULE + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section(ALLOC_TAG_SECTION_NAME) = { \ + .ct = CODE_TAG_INIT, \ + .counters = NULL }; + +#else /* MODULE */ + #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ @@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; +#endif /* MODULE */ + #endif /* ARCH_NEEDS_WEAK_PER_CPU */ DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 2b90c48a6a87..062fbd4c9b77 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -31,11 +31,11 @@ struct amd_iommu_pi_data { struct task_struct; struct pci_dev; -extern int amd_iommu_detect(void); +extern void amd_iommu_detect(void); #else /* CONFIG_AMD_IOMMU */ -static inline int amd_iommu_detect(void) { return -ENODEV; } +static inline void amd_iommu_detect(void) { } #endif /* CONFIG_AMD_IOMMU */ diff --git a/include/linux/amd-pmf-io.h b/include/linux/amd-pmf-io.h index b4f818205216..6fa510f419c0 100644 --- a/include/linux/amd-pmf-io.h +++ b/include/linux/amd-pmf-io.h @@ -18,10 +18,12 @@ * enum sfh_message_type - Query the SFH message type * @MT_HPD: Message ID to know the Human presence info from MP2 FW * @MT_ALS: Message ID to know the Ambient light info from MP2 FW + * @MT_SRA: Message ID to know the SRA data from MP2 FW */ enum sfh_message_type { MT_HPD, MT_ALS, + MT_SRA, }; /** @@ -40,10 +42,23 @@ enum sfh_hpd_info { * struct amd_sfh_info - get HPD sensor info from MP2 FW * @ambient_light: Populates the ambient light information * @user_present: Populates the user presence information + * @platform_type: Operating modes (clamshell, flat, tent, etc.) + * @laptop_placement: Device states (ontable, onlap, outbag) */ struct amd_sfh_info { u32 ambient_light; u8 user_present; + u32 platform_type; + u32 laptop_placement; +}; + +enum laptop_placement { + LP_UNKNOWN = 0, + ON_TABLE, + ON_LAP_MOTION, + IN_BAG, + OUT_OF_BAG, + LP_UNDEFINED, }; int amd_get_sfh_info(struct amd_sfh_info *sfh_info, enum sfh_message_type op); diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 2222e8b03ff4..d72d6e5aa200 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -14,14 +14,6 @@ int topology_update_cpu_topology(void); struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); -DECLARE_PER_CPU(unsigned long, cpu_scale); - -static inline unsigned long topology_get_cpu_scale(int cpu) -{ - return per_cpu(cpu_scale, cpu); -} - -void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); DECLARE_PER_CPU(unsigned long, capacity_freq_ref); diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 67f6fdf2e7cd..50b47eba7d01 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -7,6 +7,11 @@ #include <linux/args.h> #include <linux/init.h> + +#ifndef __ASSEMBLY__ +#include <linux/uuid.h> +#endif + #include <uapi/linux/const.h> /* @@ -107,10 +112,10 @@ ARM_SMCCC_FUNC_QUERY_CALL_UID) /* KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 */ -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM UUID_INIT(\ + 0x28b46fb6, 0x2ec5, 0x11e9, \ + 0xa9, 0xca, 0x4b, 0x56, \ + 0x4d, 0x00, 0x3a, 0x74) /* KVM "vendor specific" services */ #define ARM_SMCCC_KVM_FUNC_FEATURES 0 @@ -179,6 +184,9 @@ #define ARM_SMCCC_KVM_FUNC_PKVM_RESV_62 62 #define ARM_SMCCC_KVM_FUNC_PKVM_RESV_63 63 /* End of pKVM hypercall range */ +#define ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_VER 64 +#define ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_CPUS 65 + #define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 #define ARM_SMCCC_KVM_NUM_FUNCS 128 @@ -225,6 +233,18 @@ ARM_SMCCC_OWNER_VENDOR_HYP, \ ARM_SMCCC_KVM_FUNC_MMIO_GUARD) +#define ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_VER_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_VER) + +#define ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_CPUS_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_DISCOVER_IMPL_CPUS) + /* ptp_kvm counter type ID */ #define KVM_PTP_VIRT_COUNTER 0 #define KVM_PTP_PHYS_COUNTER 1 @@ -333,6 +353,57 @@ s32 arm_smccc_get_soc_id_version(void); */ s32 arm_smccc_get_soc_id_revision(void); +#ifndef __ASSEMBLY__ + +/* + * Returns whether a specific hypervisor UUID is advertised for the + * Vendor Specific Hypervisor Service range. + */ +bool arm_smccc_hypervisor_has_uuid(const uuid_t *uuid); + +static inline uuid_t smccc_res_to_uuid(u32 r0, u32 r1, u32 r2, u32 r3) +{ + uuid_t uuid = { + .b = { + [0] = (r0 >> 0) & 0xff, + [1] = (r0 >> 8) & 0xff, + [2] = (r0 >> 16) & 0xff, + [3] = (r0 >> 24) & 0xff, + + [4] = (r1 >> 0) & 0xff, + [5] = (r1 >> 8) & 0xff, + [6] = (r1 >> 16) & 0xff, + [7] = (r1 >> 24) & 0xff, + + [8] = (r2 >> 0) & 0xff, + [9] = (r2 >> 8) & 0xff, + [10] = (r2 >> 16) & 0xff, + [11] = (r2 >> 24) & 0xff, + + [12] = (r3 >> 0) & 0xff, + [13] = (r3 >> 8) & 0xff, + [14] = (r3 >> 16) & 0xff, + [15] = (r3 >> 24) & 0xff, + }, + }; + + return uuid; +} + +static inline u32 smccc_uuid_to_reg(const uuid_t *uuid, int reg) +{ + u32 val = 0; + + val |= (u32)(uuid->b[4 * reg + 0] << 0); + val |= (u32)(uuid->b[4 * reg + 1] << 8); + val |= (u32)(uuid->b[4 * reg + 2] << 16); + val |= (u32)(uuid->b[4 * reg + 3] << 24); + + return val; +} + +#endif /* !__ASSEMBLY__ */ + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 @@ -639,5 +710,45 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, method; \ }) +#ifdef CONFIG_ARM64 + +#define __fail_smccc_1_2(___res) \ + do { \ + if (___res) \ + ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \ + } while (0) + +/* + * arm_smccc_1_2_invoke() - make an SMCCC v1.2 compliant call + * + * @args: SMC args are in the a0..a17 fields of the arm_smcc_1_2_regs structure + * @res: result values from registers 0 to 17 + * + * This macro will make either an HVC call or an SMC call depending on the + * current SMCCC conduit. If no valid conduit is available then -1 + * (SMCCC_RET_NOT_SUPPORTED) is returned in @res.a0 (if supplied). + * + * The return value also provides the conduit that was used. + */ +#define arm_smccc_1_2_invoke(args, res) ({ \ + struct arm_smccc_1_2_regs *__args = args; \ + struct arm_smccc_1_2_regs *__res = res; \ + int method = arm_smccc_1_1_get_conduit(); \ + switch (method) { \ + case SMCCC_CONDUIT_HVC: \ + arm_smccc_1_2_hvc(__args, __res); \ + break; \ + case SMCCC_CONDUIT_SMC: \ + arm_smccc_1_2_smc(__args, __res); \ + break; \ + default: \ + __fail_smccc_1_2(__res); \ + method = SMCCC_CONDUIT_NONE; \ + break; \ + } \ + method; \ + }) +#endif /*CONFIG_ARM64*/ + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index 74169dd0f659..e1634897e159 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -112,6 +112,7 @@ FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor))) #define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0) #define FFA_VERSION_1_1 FFA_PACK_VERSION_INFO(1, 1) +#define FFA_VERSION_1_2 FFA_PACK_VERSION_INFO(1, 2) /** * FF-A specification mentions explicitly about '4K pages'. This should @@ -176,6 +177,7 @@ void ffa_device_unregister(struct ffa_device *ffa_dev); int ffa_driver_register(struct ffa_driver *driver, struct module *owner, const char *mod_name); void ffa_driver_unregister(struct ffa_driver *driver); +void ffa_devices_unregister(void); bool ffa_device_is_valid(struct ffa_device *ffa_dev); #else @@ -188,6 +190,8 @@ ffa_device_register(const struct ffa_partition_info *part_info, static inline void ffa_device_unregister(struct ffa_device *dev) {} +static inline void ffa_devices_unregister(void) {} + static inline int ffa_driver_register(struct ffa_driver *driver, struct module *owner, const char *mod_name) @@ -237,8 +241,12 @@ struct ffa_partition_info { #define FFA_PARTITION_NOTIFICATION_RECV BIT(3) /* partition runs in the AArch64 execution state. */ #define FFA_PARTITION_AARCH64_EXEC BIT(8) +/* partition supports receipt of direct request2 */ +#define FFA_PARTITION_DIRECT_REQ2_RECV BIT(9) +/* partition can send direct request2. */ +#define FFA_PARTITION_DIRECT_REQ2_SEND BIT(10) u32 properties; - u32 uuid[4]; + uuid_t uuid; }; static inline @@ -256,6 +264,10 @@ bool ffa_partition_check_property(struct ffa_device *dev, u32 property) #define ffa_partition_supports_direct_recv(dev) \ ffa_partition_check_property(dev, FFA_PARTITION_DIRECT_RECV) +#define ffa_partition_supports_direct_req2_recv(dev) \ + (ffa_partition_check_property(dev, FFA_PARTITION_DIRECT_REQ2_RECV) && \ + !dev->mode_32bit) + /* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP} which pass data via registers */ struct ffa_send_direct_data { unsigned long data0; /* w3/x3 */ @@ -271,6 +283,8 @@ struct ffa_indirect_msg_hdr { u32 offset; u32 send_recv_id; u32 size; + u32 res1; + uuid_t uuid; }; /* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP}2 which pass data via registers */ @@ -439,7 +453,7 @@ struct ffa_msg_ops { int (*sync_send_receive)(struct ffa_device *dev, struct ffa_send_direct_data *data); int (*indirect_send)(struct ffa_device *dev, void *buf, size_t sz); - int (*sync_send_receive2)(struct ffa_device *dev, const uuid_t *uuid, + int (*sync_send_receive2)(struct ffa_device *dev, struct ffa_send_direct_data2 *data); }; @@ -455,6 +469,7 @@ struct ffa_cpu_ops { typedef void (*ffa_sched_recv_cb)(u16 vcpu, bool is_per_vcpu, void *cb_data); typedef void (*ffa_notifier_cb)(int notify_id, void *cb_data); +typedef void (*ffa_fwk_notifier_cb)(int notify_id, void *cb_data, void *buf); struct ffa_notifier_ops { int (*sched_recv_cb_register)(struct ffa_device *dev, @@ -463,6 +478,10 @@ struct ffa_notifier_ops { int (*notify_request)(struct ffa_device *dev, bool per_vcpu, ffa_notifier_cb cb, void *cb_data, int notify_id); int (*notify_relinquish)(struct ffa_device *dev, int notify_id); + int (*fwk_notify_request)(struct ffa_device *dev, + ffa_fwk_notifier_cb cb, void *cb_data, + int notify_id); + int (*fwk_notify_relinquish)(struct ffa_device *dev, int notify_id); int (*notify_send)(struct ffa_device *dev, int notify_id, bool per_vcpu, u16 vcpu); }; diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 255701e1251b..f652a5028b59 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -46,12 +46,12 @@ int sdei_unregister_ghes(struct ghes *ghes); /* For use by arch code when CPU hotplug notifiers are not appropriate. */ int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); -void __init sdei_init(void); +void __init acpi_sdei_init(void); void sdei_handler_abort(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } -static inline void sdei_init(void) { } +static inline void acpi_sdei_init(void) { } static inline void sdei_handler_abort(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index 5cc73d7e5b52..1ca9f9e05f4f 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -168,11 +168,6 @@ async_xor_offs(struct page *dest, unsigned int offset, int src_cnt, size_t len, struct async_submit_ctl *submit); struct dma_async_tx_descriptor * -async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, - int src_cnt, size_t len, enum sum_check_flags *result, - struct async_submit_ctl *submit); - -struct dma_async_tx_descriptor * async_xor_val_offs(struct page *dest, unsigned int offset, struct page **src_list, unsigned int *src_offset, int src_cnt, size_t len, enum sum_check_flags *result, diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 9b02961d65ee..45f2f278b50a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -249,6 +249,12 @@ static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) ATM_SKB(skb)->atm_options = vcc->atm_options; } +static inline void atm_return_tx(struct atm_vcc *vcc, struct sk_buff *skb) +{ + WARN_ON_ONCE(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, + &sk_atm(vcc)->sk_wmem_alloc)); +} + static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { atomic_add(truesize, &sk_atm(vcc)->sk_rmem_alloc); diff --git a/include/linux/audit.h b/include/linux/audit.h index 0050ef288ab3..a394614ccd0b 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -417,7 +417,7 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, extern void __audit_log_capset(const struct cred *new, const struct cred *old); extern void __audit_mmap_fd(int fd, int flags); extern void __audit_openat2_how(struct open_how *how); -extern void __audit_log_kern_module(char *name); +extern void __audit_log_kern_module(const char *name); extern void __audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar); extern void __audit_tk_injoffset(struct timespec64 offset); extern void __audit_ntp_log(const struct audit_ntp_data *ad); @@ -519,7 +519,7 @@ static inline void audit_openat2_how(struct open_how *how) __audit_openat2_how(how); } -static inline void audit_log_kern_module(char *name) +static inline void audit_log_kern_module(const char *name) { if (!audit_dummy_context()) __audit_log_kern_module(name); @@ -677,9 +677,8 @@ static inline void audit_mmap_fd(int fd, int flags) static inline void audit_openat2_how(struct open_how *how) { } -static inline void audit_log_kern_module(char *name) -{ -} +static inline void audit_log_kern_module(const char *name) +{ } static inline void audit_fanotify(u32 response, struct fanotify_response_info_audit_rule *friar) { } diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 65dd7f154374..4086afd0cc6b 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -254,6 +254,23 @@ int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, struct module * void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv); +struct auxiliary_device *auxiliary_device_create(struct device *dev, + const char *modname, + const char *devname, + void *platform_data, + int id); +void auxiliary_device_destroy(void *auxdev); + +struct auxiliary_device *__devm_auxiliary_device_create(struct device *dev, + const char *modname, + const char *devname, + void *platform_data, + int id); + +#define devm_auxiliary_device_create(dev, devname, platform_data) \ + __devm_auxiliary_device_create(dev, KBUILD_MODNAME, devname, \ + platform_data, 0) + /** * module_auxiliary_driver() - Helper macro for registering an auxiliary driver * @__auxiliary_driver: auxiliary driver struct diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 13a11f3c09b8..cf0afa60e4a7 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -154,7 +154,10 @@ enum virtchnl_ops { VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2 = 55, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2 = 56, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2 = 57, - /* opcode 57 - 65 are reserved */ + /* opcode 58 and 59 are reserved */ + VIRTCHNL_OP_1588_PTP_GET_CAPS = 60, + VIRTCHNL_OP_1588_PTP_GET_TIME = 61, + /* opcode 62 - 65 are reserved */ VIRTCHNL_OP_GET_QOS_CAPS = 66, /* opcode 68 through 111 are reserved */ VIRTCHNL_OP_CONFIG_QUEUE_BW = 112, @@ -270,6 +273,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF BIT(27) #define VIRTCHNL_VF_OFFLOAD_FDIR_PF BIT(28) #define VIRTCHNL_VF_OFFLOAD_QOS BIT(29) +#define VIRTCHNL_VF_CAP_PTP BIT(31) #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \ VIRTCHNL_VF_OFFLOAD_VLAN | \ @@ -309,6 +313,60 @@ struct virtchnl_txq_info { VIRTCHNL_CHECK_STRUCT_LEN(24, virtchnl_txq_info); +/* RX descriptor IDs (range from 0 to 63) */ +enum virtchnl_rx_desc_ids { + VIRTCHNL_RXDID_0_16B_BASE = 0, + VIRTCHNL_RXDID_1_32B_BASE = 1, + VIRTCHNL_RXDID_2_FLEX_SQ_NIC = 2, + VIRTCHNL_RXDID_3_FLEX_SQ_SW = 3, + VIRTCHNL_RXDID_4_FLEX_SQ_NIC_VEB = 4, + VIRTCHNL_RXDID_5_FLEX_SQ_NIC_ACL = 5, + VIRTCHNL_RXDID_6_FLEX_SQ_NIC_2 = 6, + VIRTCHNL_RXDID_7_HW_RSVD = 7, + /* 8 through 15 are reserved */ + VIRTCHNL_RXDID_16_COMMS_GENERIC = 16, + VIRTCHNL_RXDID_17_COMMS_AUX_VLAN = 17, + VIRTCHNL_RXDID_18_COMMS_AUX_IPV4 = 18, + VIRTCHNL_RXDID_19_COMMS_AUX_IPV6 = 19, + VIRTCHNL_RXDID_20_COMMS_AUX_FLOW = 20, + VIRTCHNL_RXDID_21_COMMS_AUX_TCP = 21, + /* 22 through 63 are reserved */ +}; + +#define VIRTCHNL_RXDID_BIT(x) BIT_ULL(VIRTCHNL_RXDID_##x) + +/* RX descriptor ID bitmasks */ +enum virtchnl_rx_desc_id_bitmasks { + VIRTCHNL_RXDID_0_16B_BASE_M = VIRTCHNL_RXDID_BIT(0_16B_BASE), + VIRTCHNL_RXDID_1_32B_BASE_M = VIRTCHNL_RXDID_BIT(1_32B_BASE), + VIRTCHNL_RXDID_2_FLEX_SQ_NIC_M = VIRTCHNL_RXDID_BIT(2_FLEX_SQ_NIC), + VIRTCHNL_RXDID_3_FLEX_SQ_SW_M = VIRTCHNL_RXDID_BIT(3_FLEX_SQ_SW), + VIRTCHNL_RXDID_4_FLEX_SQ_NIC_VEB_M = VIRTCHNL_RXDID_BIT(4_FLEX_SQ_NIC_VEB), + VIRTCHNL_RXDID_5_FLEX_SQ_NIC_ACL_M = VIRTCHNL_RXDID_BIT(5_FLEX_SQ_NIC_ACL), + VIRTCHNL_RXDID_6_FLEX_SQ_NIC_2_M = VIRTCHNL_RXDID_BIT(6_FLEX_SQ_NIC_2), + VIRTCHNL_RXDID_7_HW_RSVD_M = VIRTCHNL_RXDID_BIT(7_HW_RSVD), + /* 8 through 15 are reserved */ + VIRTCHNL_RXDID_16_COMMS_GENERIC_M = VIRTCHNL_RXDID_BIT(16_COMMS_GENERIC), + VIRTCHNL_RXDID_17_COMMS_AUX_VLAN_M = VIRTCHNL_RXDID_BIT(17_COMMS_AUX_VLAN), + VIRTCHNL_RXDID_18_COMMS_AUX_IPV4_M = VIRTCHNL_RXDID_BIT(18_COMMS_AUX_IPV4), + VIRTCHNL_RXDID_19_COMMS_AUX_IPV6_M = VIRTCHNL_RXDID_BIT(19_COMMS_AUX_IPV6), + VIRTCHNL_RXDID_20_COMMS_AUX_FLOW_M = VIRTCHNL_RXDID_BIT(20_COMMS_AUX_FLOW), + VIRTCHNL_RXDID_21_COMMS_AUX_TCP_M = VIRTCHNL_RXDID_BIT(21_COMMS_AUX_TCP), + /* 22 through 63 are reserved */ +}; + +/* virtchnl_rxq_info_flags - definition of bits in the flags field of the + * virtchnl_rxq_info structure. + * + * @VIRTCHNL_PTP_RX_TSTAMP: request to enable Rx timestamping + * + * Other flag bits are currently reserved and they may be extended in the + * future. + */ +enum virtchnl_rxq_info_flags { + VIRTCHNL_PTP_RX_TSTAMP = BIT(0), +}; + /* VIRTCHNL_OP_CONFIG_RX_QUEUE * VF sends this message to set up parameters for one RX queue. * External data buffer contains one instance of virtchnl_rxq_info. @@ -331,8 +389,14 @@ struct virtchnl_rxq_info { u32 databuffer_size; u32 max_pkt_size; u8 crc_disable; - u8 rxdid; - u8 pad1[2]; + /* see enum virtchnl_rx_desc_ids; + * only used when VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC is supported. Note + * that when the offload is not supported, the descriptor format aligns + * with VIRTCHNL_RXDID_1_32B_BASE. + */ + enum virtchnl_rx_desc_ids rxdid:8; + enum virtchnl_rxq_info_flags flags:8; /* see virtchnl_rxq_info_flags */ + u8 pad1; u64 dma_ring_addr; /* see enum virtchnl_rx_hsplit; deprecated with AVF 1.0 */ @@ -1032,10 +1096,6 @@ struct virtchnl_filter { VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter); -struct virtchnl_supported_rxdids { - u64 supported_rxdids; -}; - /* VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other @@ -1283,7 +1343,7 @@ struct virtchnl_proto_hdrs { * 2 - from the second inner layer * .... **/ - int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ + u32 count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */ union { struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS]; @@ -1335,7 +1395,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action); struct virtchnl_filter_action_set { /* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */ - int count; + u32 count; struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS]; }; @@ -1425,6 +1485,61 @@ struct virtchnl_fdir_del { VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del); +#define VIRTCHNL_1588_PTP_CAP_RX_TSTAMP BIT(1) +#define VIRTCHNL_1588_PTP_CAP_READ_PHC BIT(2) + +/** + * struct virtchnl_ptp_caps - Defines the PTP caps available to the VF. + * @caps: On send, VF sets what capabilities it requests. On reply, PF + * indicates what has been enabled for this VF. The PF shall not set + * bits which were not requested by the VF. + * @rsvd: Reserved bits for future extension. + * + * Structure that defines the PTP capabilities available to the VF. The VF + * sends VIRTCHNL_OP_1588_PTP_GET_CAPS, and must fill in the ptp_caps field + * indicating what capabilities it is requesting. The PF will respond with the + * same message with the virtchnl_ptp_caps structure indicating what is + * enabled for the VF. + * + * VIRTCHNL_1588_PTP_CAP_RX_TSTAMP indicates that the VF receive queues have + * receive timestamps enabled in the flexible descriptors. Note that this + * requires a VF to also negotiate to enable advanced flexible descriptors in + * the receive path instead of the default legacy descriptor format. + * + * VIRTCHNL_1588_PTP_CAP_READ_PHC indicates that the VF may read the PHC time + * via the VIRTCHNL_OP_1588_PTP_GET_TIME command. + * + * Note that in the future, additional capability flags may be added which + * indicate additional extended support. All fields marked as reserved by this + * header will be set to zero. VF implementations should verify this to ensure + * that future extensions do not break compatibility. + */ +struct virtchnl_ptp_caps { + u32 caps; + u8 rsvd[44]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(48, virtchnl_ptp_caps); + +/** + * struct virtchnl_phc_time - Contains the 64bits of PHC clock time in ns. + * @time: PHC time in nanoseconds + * @rsvd: Reserved for future extension + * + * Structure received with VIRTCHNL_OP_1588_PTP_GET_TIME. Contains the 64bits + * of PHC clock time in nanoseconds. + * + * VIRTCHNL_OP_1588_PTP_GET_TIME may be sent to request the current time of + * the PHC. This op is available in case direct access via the PHC registers + * is not available. + */ +struct virtchnl_phc_time { + u64 time; + u8 rsvd[8]; +}; + +VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_phc_time); + struct virtchnl_shaper_bw { /* Unit is Kbps */ u32 committed; @@ -1757,6 +1872,12 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, } } break; + case VIRTCHNL_OP_1588_PTP_GET_CAPS: + valid_len = sizeof(struct virtchnl_ptp_caps); + break; + case VIRTCHNL_OP_1588_PTP_GET_TIME: + valid_len = sizeof(struct virtchnl_phc_time); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 8e7af9a03b41..e721148c95d0 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -249,6 +249,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) { #ifdef CONFIG_LOCKDEP WARN_ON_ONCE(debug_locks && + (inode->i_sb->s_iflags & SB_I_CGROUPWB) && (!lockdep_is_held(&inode->i_lock) && !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) && !lockdep_is_held(&inode->i_wb->list_lock))); diff --git a/include/linux/backlight.h b/include/linux/backlight.h index f5652e5a9060..10e626db7eee 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -12,7 +12,6 @@ #include <linux/device.h> #include <linux/fb.h> #include <linux/mutex.h> -#include <linux/notifier.h> #include <linux/types.h> /** @@ -279,11 +278,6 @@ struct backlight_device { const struct backlight_ops *ops; /** - * @fb_notif: The framebuffer notifier block - */ - struct notifier_block fb_notif; - - /** * @entry: List entry of all registered backlight devices */ struct list_head entry; @@ -294,15 +288,7 @@ struct backlight_device { struct device dev; /** - * @fb_bl_on: The state of individual fbdev's. - * - * Multiple fbdev's may share one backlight device. The fb_bl_on - * records the state of the individual fbdev. - */ - bool fb_bl_on[FB_MAX]; - - /** - * @use_count: The number of uses of fb_bl_on. + * @use_count: The number of unblanked displays. */ int use_count; }; @@ -408,6 +394,22 @@ struct backlight_device *backlight_device_get_by_type(enum backlight_type type); int backlight_device_set_brightness(struct backlight_device *bd, unsigned long brightness); +#if IS_REACHABLE(CONFIG_BACKLIGHT_CLASS_DEVICE) +void backlight_notify_blank(struct backlight_device *bd, + struct device *display_dev, + bool fb_on, bool prev_fb_on); +void backlight_notify_blank_all(struct device *display_dev, + bool fb_on, bool prev_fb_on); +#else +static inline void backlight_notify_blank(struct backlight_device *bd, + struct device *display_dev, + bool fb_on, bool prev_fb_on) +{ } +static inline void backlight_notify_blank_all(struct device *display_dev, + bool fb_on, bool prev_fb_on) +{ } +#endif + #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev) /** diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h index 670f2dae692f..996493917f36 100644 --- a/include/linux/badblocks.h +++ b/include/linux/badblocks.h @@ -48,11 +48,11 @@ struct badblocks_context { int ack; }; -int badblocks_check(struct badblocks *bb, sector_t s, int sectors, - sector_t *first_bad, int *bad_sectors); -int badblocks_set(struct badblocks *bb, sector_t s, int sectors, - int acknowledged); -int badblocks_clear(struct badblocks *bb, sector_t s, int sectors); +int badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors, + sector_t *first_bad, sector_t *bad_sectors); +bool badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors, + int acknowledged); +bool badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors); void ack_all_badblocks(struct badblocks *bb); ssize_t badblocks_show(struct badblocks *bb, char *page, int unack); ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, diff --git a/include/linux/bcm963xx_nvram.h b/include/linux/bcm963xx_nvram.h index c8c7f01159fe..48830bf18042 100644 --- a/include/linux/bcm963xx_nvram.h +++ b/include/linux/bcm963xx_nvram.h @@ -81,25 +81,21 @@ static int __maybe_unused bcm963xx_nvram_checksum( const struct bcm963xx_nvram *nvram, u32 *expected_out, u32 *actual_out) { + const u32 zero = 0; u32 expected, actual; size_t len; if (nvram->version <= 4) { expected = nvram->checksum_v4; - len = BCM963XX_NVRAM_V4_SIZE - sizeof(u32); + len = BCM963XX_NVRAM_V4_SIZE; } else { expected = nvram->checksum_v5; - len = BCM963XX_NVRAM_V5_SIZE - sizeof(u32); + len = BCM963XX_NVRAM_V5_SIZE; } - /* - * Calculate the CRC32 value for the nvram with a checksum value - * of 0 without modifying or copying the nvram by combining: - * - The CRC32 of the nvram without the checksum value - * - The CRC32 of a zero checksum value (which is also 0) - */ - actual = crc32_le_combine( - crc32_le(~0, (u8 *)nvram, len), 0, sizeof(u32)); + /* Calculate the CRC32 of the nvram with the checksum field set to 0. */ + actual = crc32_le(~0, nvram, len - sizeof(u32)); + actual = crc32_le(actual, &zero, sizeof(u32)); if (expected_out) *expected_out = expected; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 3305c849abd6..65abd5ab8836 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -44,7 +44,12 @@ struct linux_binprm { */ point_of_no_return:1, /* Set when "comm" must come from the dentry. */ - comm_from_dentry:1; + comm_from_dentry:1, + /* + * Set by user space to check executability according to the + * caller's environment. + */ + is_check:1; struct file *executable; /* Executable to pass to the interpreter */ struct file *interpreter; struct file *file; @@ -59,7 +64,7 @@ struct linux_binprm { const char *fdpath; /* generated filename for execveat */ unsigned interp_flags; int execfd; /* File descriptor of the executable */ - unsigned long loader, exec; + unsigned long exec; struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */ @@ -85,7 +90,6 @@ struct linux_binfmt { struct list_head lh; struct module *module; int (*load_binary)(struct linux_binprm *); - int (*load_shlib)(struct file *); #ifdef CONFIG_COREDUMP int (*core_dump)(struct coredump_params *cprm); unsigned long min_coredump; /* minimal dump size */ diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h index dbf0f74c1529..0a25716820fe 100644 --- a/include/linux/bio-integrity.h +++ b/include/linux/bio-integrity.h @@ -7,29 +7,28 @@ enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ - BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ - BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ - BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ - BIP_COPY_USER = 1 << 5, /* Kernel bounce buffer in use */ + BIP_DISK_NOCHECK = 1 << 2, /* disable disk integrity checking */ + BIP_IP_CHECKSUM = 1 << 3, /* IP checksum */ + BIP_COPY_USER = 1 << 4, /* Kernel bounce buffer in use */ + BIP_CHECK_GUARD = 1 << 5, /* guard check */ + BIP_CHECK_REFTAG = 1 << 6, /* reftag check */ + BIP_CHECK_APPTAG = 1 << 7, /* apptag check */ }; struct bio_integrity_payload { - struct bio *bip_bio; /* parent bio */ - struct bvec_iter bip_iter; unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ unsigned short bip_flags; /* control flags */ - - struct bvec_iter bio_iter; /* for rewinding parent bio */ - - struct work_struct bip_work; /* I/O completion */ + u16 app_tag; /* application tag value */ struct bio_vec *bip_vec; - struct bio_vec bip_inline_vecs[];/* embedded bvec array */ }; +#define BIP_CLONE_FLAGS (BIP_MAPPED_INTEGRITY | BIP_IP_CHECKSUM | \ + BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG) + #ifdef CONFIG_BLK_DEV_INTEGRITY #define bip_for_each_vec(bvl, bip, iter) \ @@ -68,19 +67,19 @@ static inline void bip_set_seed(struct bio_integrity_payload *bip, bip->bip_iter.bi_sector = seed; } +void bio_integrity_init(struct bio *bio, struct bio_integrity_payload *bip, + struct bio_vec *bvecs, unsigned int nr_vecs); struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr); int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); -int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len); +int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter); +int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta); void bio_integrity_unmap_user(struct bio *bio); bool bio_integrity_prep(struct bio *bio); void bio_integrity_advance(struct bio *bio, unsigned int bytes_done); void bio_integrity_trim(struct bio *bio); int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask); -int bioset_integrity_create(struct bio_set *bs, int pool_size); -void bioset_integrity_free(struct bio_set *bs); -void bio_integrity_init(void); #else /* CONFIG_BLK_DEV_INTEGRITY */ @@ -89,17 +88,12 @@ static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) return NULL; } -static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) -{ - return 0; -} - -static inline void bioset_integrity_free(struct bio_set *bs) +static inline int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter) { + return -EINVAL; } -static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, - ssize_t len) +static inline int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta) { return -EINVAL; } @@ -128,10 +122,6 @@ static inline void bio_integrity_trim(struct bio *bio) { } -static inline void bio_integrity_init(void) -{ -} - static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) { return false; diff --git a/include/linux/bio.h b/include/linux/bio.h index 7a1b3b1a8fed..46ffac5caab7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -11,6 +11,7 @@ #include <linux/uio.h> #define BIO_MAX_VECS 256U +#define BIO_MAX_INLINE_VECS UIO_MAXIOV struct queue_limits; @@ -19,9 +20,6 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs) return min(nr_segs, BIO_MAX_VECS); } -#define bio_prio(bio) (bio)->bi_ioprio -#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) - #define bio_iter_iovec(bio, iter) \ bvec_iter_bvec((bio)->bi_io_vec, (iter)) @@ -293,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + - PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); @@ -405,7 +403,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) struct request_queue; -extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, blk_opf_t opf); extern void bio_uninit(struct bio *); @@ -416,12 +413,34 @@ int __must_check bio_add_page(struct bio *bio, struct page *page, unsigned len, unsigned off); bool __must_check bio_add_folio(struct bio *bio, struct folio *folio, size_t len, size_t off); -extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, - unsigned int, unsigned int); void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, size_t off); +void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len); + +/** + * bio_add_max_vecs - number of bio_vecs needed to add data to a bio + * @kaddr: kernel virtual address to add + * @len: length in bytes to add + * + * Calculate how many bio_vecs need to be allocated to add the kernel virtual + * address range in [@kaddr:@len] in the worse case. + */ +static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len) +{ + if (is_vmalloc_addr(kaddr)) + return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE); + return 1; +} + +unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len); +bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len); + +int submit_bio_wait(struct bio *bio); +int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, + size_t len, enum req_op op); + int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); @@ -630,10 +649,6 @@ struct bio_set { mempool_t bio_pool; mempool_t bvec_pool; -#if defined(CONFIG_BLK_DEV_INTEGRITY) - mempool_t bio_integrity_pool; - mempool_t bvec_integrity_pool; -#endif unsigned int back_pad; /* diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h index bbc4730a6505..c0989b5b0407 100644 --- a/include/linux/bit_spinlock.h +++ b/include/linux/bit_spinlock.h @@ -13,7 +13,7 @@ * Don't use this unless you really need to: spin_lock() and spin_unlock() * are significantly faster. */ -static inline void bit_spin_lock(int bitnum, unsigned long *addr) +static __always_inline void bit_spin_lock(int bitnum, unsigned long *addr) { /* * Assuming the lock is uncontended, this never enters @@ -38,7 +38,7 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr) /* * Return true if it was acquired */ -static inline int bit_spin_trylock(int bitnum, unsigned long *addr) +static __always_inline int bit_spin_trylock(int bitnum, unsigned long *addr) { preempt_disable(); #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) @@ -54,7 +54,7 @@ static inline int bit_spin_trylock(int bitnum, unsigned long *addr) /* * bit-based spin_unlock() */ -static inline void bit_spin_unlock(int bitnum, unsigned long *addr) +static __always_inline void bit_spin_unlock(int bitnum, unsigned long *addr) { #ifdef CONFIG_DEBUG_SPINLOCK BUG_ON(!test_bit(bitnum, addr)); @@ -71,7 +71,7 @@ static inline void bit_spin_unlock(int bitnum, unsigned long *addr) * non-atomic version, which can be used eg. if the bit lock itself is * protecting the rest of the flags in the word. */ -static inline void __bit_spin_unlock(int bitnum, unsigned long *addr) +static __always_inline void __bit_spin_unlock(int bitnum, unsigned long *addr) { #ifdef CONFIG_DEBUG_SPINLOCK BUG_ON(!test_bit(bitnum, addr)); diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 63928f173223..6d9a53db54b6 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -8,6 +8,7 @@ #define _LINUX_BITFIELD_H #include <linux/build_bug.h> +#include <linux/typecheck.h> #include <asm/byteorder.h> /* @@ -38,8 +39,7 @@ * FIELD_PREP(REG_FIELD_D, 0x40); * * Modify: - * reg &= ~REG_FIELD_C; - * reg |= FIELD_PREP(REG_FIELD_C, c); + * FIELD_MODIFY(REG_FIELD_C, ®, c); */ #define __bf_shf(x) (__builtin_ffsll(x) - 1) @@ -156,6 +156,23 @@ (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ }) +/** + * FIELD_MODIFY() - modify a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg_p: pointer to the memory that should be updated + * @_val: value to store in the bitfield + * + * FIELD_MODIFY() modifies the set of bits in @_reg_p specified by @_mask, + * by replacing them with the bitfield value passed in as @_val. + */ +#define FIELD_MODIFY(_mask, _reg_p, _val) \ + ({ \ + typecheck_pointer(_reg_p); \ + __BF_FIELD_CHECK(_mask, *(_reg_p), _val, "FIELD_MODIFY: "); \ + *(_reg_p) &= ~(_mask); \ + *(_reg_p) |= (((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask)); \ + }) + extern void __compiletime_error("value doesn't fit into mask") __field_overflow(void); extern void __compiletime_error("bad bitfield mask") diff --git a/include/linux/bitmap-str.h b/include/linux/bitmap-str.h index 17caeca94cab..53d3e1b32d3d 100644 --- a/include/linux/bitmap-str.h +++ b/include/linux/bitmap-str.h @@ -2,12 +2,14 @@ #ifndef __LINUX_BITMAP_STR_H #define __LINUX_BITMAP_STR_H +#include <linux/types.h> + int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); -extern int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); -extern int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, - int nmaskbits, loff_t off, size_t count); +int bitmap_print_bitmask_to_buf(char *buf, const unsigned long *maskp, int nmaskbits, + loff_t off, size_t count); +int bitmap_print_list_to_buf(char *buf, const unsigned long *maskp, int nmaskbits, + loff_t off, size_t count); int bitmap_parse(const char *buf, unsigned int buflen, unsigned long *dst, int nbits); int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 262b6596eca5..595217b7a6e7 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -23,7 +23,7 @@ struct device; * * Function implementations generic to all architectures are in * lib/bitmap.c. Functions implementations that are architecture - * specific are in various include/asm-<arch>/bitops.h headers + * specific are in various arch/<arch>/include/asm/bitops.h headers * and other arch/<arch> specific files. * * See lib/bitmap.c for more details. @@ -560,9 +560,9 @@ void bitmap_replace(unsigned long *dst, * ...0..11...0..10 * dst: 0000001100000010 * - * A relationship exists between bitmap_scatter() and bitmap_gather(). + * A relationship exists between bitmap_scatter() and bitmap_gather(). See + * bitmap_gather() for the bitmap gather detailed operations. TL;DR: * bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation. - * See bitmap_scatter() for details related to this relationship. */ static __always_inline void bitmap_scatter(unsigned long *dst, const unsigned long *src, @@ -608,7 +608,9 @@ void bitmap_scatter(unsigned long *dst, const unsigned long *src, * dst: 0000000000011010 * * A relationship exists between bitmap_gather() and bitmap_scatter(). See - * bitmap_scatter() for the bitmap scatter detailed operations. + * bitmap_scatter() for the bitmap scatter detailed operations. TL;DR: + * bitmap_scatter() can be seen as the 'reverse' bitmap_gather() operation. + * * Suppose scattered computed using bitmap_scatter(scattered, src, mask, n). * The operation bitmap_gather(result, scattered, mask, n) leads to a result * equal or equivalent to src. diff --git a/include/linux/bitops.h b/include/linux/bitops.h index ba35bbf07798..9be2d50da09a 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -8,7 +8,6 @@ #include <uapi/linux/kernel.h> -#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) #define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) @@ -230,6 +229,37 @@ static inline int get_count_order_long(unsigned long l) } /** + * parity8 - get the parity of an u8 value + * @value: the value to be examined + * + * Determine the parity of the u8 argument. + * + * Returns: + * 0 for even parity, 1 for odd parity + * + * Note: This function informs you about the current parity. Example to bail + * out when parity is odd: + * + * if (parity8(val) == 1) + * return -EBADMSG; + * + * If you need to calculate a parity bit, you need to draw the conclusion from + * this result yourself. Example to enforce odd parity, parity bit is bit 7: + * + * if (parity8(val) == 0) + * val ^= BIT(7); + */ +static inline int parity8(u8 val) +{ + /* + * One explanation of this algorithm: + * https://funloop.org/codex/problem/parity/README.html + */ + val ^= val >> 4; + return (0x6996 >> (val & 0xf)) & 1; +} + +/** * __ffs64 - find first set bit in a 64 bit word * @word: The 64 bit word * diff --git a/include/linux/bits.h b/include/linux/bits.h index 60044b608817..7ad056219115 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -12,6 +12,7 @@ #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) #define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) #define BITS_PER_BYTE 8 +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) /* * Create a contiguous bitmask starting at bit position @l and ending at @@ -19,17 +20,68 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #if !defined(__ASSEMBLY__) + +/* + * Missing asm support + * + * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(), + * something not available in asm. Nevertheless, fixed width integers is a C + * concept. Assembly code can rely on the long and long long versions instead. + */ + #include <linux/build_bug.h> -#define GENMASK_INPUT_CHECK(h, l) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((l) > (h)), (l) > (h), 0))) -#else +#include <linux/compiler.h> +#include <linux/overflow.h> + +#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) + +/* + * Generate a mask for the specified type @t. Additional checks are made to + * guarantee the value returned fits in that type, relying on + * -Wshift-count-overflow compiler check to detect incompatible arguments. + * For example, all these create build errors or warnings: + * + * - GENMASK(15, 20): wrong argument order + * - GENMASK(72, 15): doesn't fit unsigned long + * - GENMASK_U32(33, 15): doesn't fit in a u32 + */ +#define GENMASK_TYPE(t, h, l) \ + ((t)(GENMASK_INPUT_CHECK(h, l) + \ + (type_max(t) << (l) & \ + type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) + +#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) +#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) +#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) +#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) + +/* + * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The + * following examples generate compiler warnings due to -Wshift-count-overflow: + * + * - BIT_U8(8) + * - BIT_U32(-1) + * - BIT_U32(40) + */ +#define BIT_INPUT_CHECK(type, nr) \ + BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type))) + +#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr))) + +#define BIT_U8(nr) BIT_TYPE(u8, nr) +#define BIT_U16(nr) BIT_TYPE(u16, nr) +#define BIT_U32(nr) BIT_TYPE(u32, nr) +#define BIT_U64(nr) BIT_TYPE(u64, nr) + +#else /* defined(__ASSEMBLY__) */ + /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ #define GENMASK_INPUT_CHECK(h, l) 0 -#endif + +#endif /* !defined(__ASSEMBLY__) */ #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) @@ -41,7 +93,7 @@ * Missing asm support * * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __init128' data + * in the asm code, as it shifts an 'unsigned __int128' data * type instead of direct representation of 128 bit constants * such as long and unsigned long. The fundamental problem is * that a 128 bit constant will get silently truncated by the diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h index 90ab33cb5d0e..4f39e9cd7576 100644 --- a/include/linux/blk-crypto-profile.h +++ b/include/linux/blk-crypto-profile.h @@ -57,6 +57,62 @@ struct blk_crypto_ll_ops { int (*keyslot_evict)(struct blk_crypto_profile *profile, const struct blk_crypto_key *key, unsigned int slot); + + /** + * @derive_sw_secret: Derive the software secret from a hardware-wrapped + * key in ephemerally-wrapped form. + * + * This only needs to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED + * is supported. + * + * Must return 0 on success, -EBADMSG if the key is invalid, or another + * -errno code on other errors. + */ + int (*derive_sw_secret)(struct blk_crypto_profile *profile, + const u8 *eph_key, size_t eph_key_size, + u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]); + + /** + * @import_key: Create a hardware-wrapped key by importing a raw key. + * + * This only needs to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED + * is supported. + * + * On success, must write the new key in long-term wrapped form to + * @lt_key and return its size in bytes. On failure, must return a + * -errno value. + */ + int (*import_key)(struct blk_crypto_profile *profile, + const u8 *raw_key, size_t raw_key_size, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + + /** + * @generate_key: Generate a hardware-wrapped key. + * + * This only needs to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED + * is supported. + * + * On success, must write the new key in long-term wrapped form to + * @lt_key and return its size in bytes. On failure, must return a + * -errno value. + */ + int (*generate_key)(struct blk_crypto_profile *profile, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + + /** + * @prepare_key: Prepare a hardware-wrapped key to be used. + * + * Prepare a hardware-wrapped key to be used by converting it from + * long-term wrapped form to ephemerally-wrapped form. This only needs + * to be implemented if BLK_CRYPTO_KEY_TYPE_HW_WRAPPED is supported. + * + * On success, must write the key in ephemerally-wrapped form to + * @eph_key and return its size in bytes. On failure, must return + * -EBADMSG if the key is invalid, or another -errno on other error. + */ + int (*prepare_key)(struct blk_crypto_profile *profile, + const u8 *lt_key, size_t lt_key_size, + u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); }; /** @@ -85,6 +141,12 @@ struct blk_crypto_profile { unsigned int max_dun_bytes_supported; /** + * @key_types_supported: A bitmask of the supported key types: + * BLK_CRYPTO_KEY_TYPE_RAW and/or BLK_CRYPTO_KEY_TYPE_HW_WRAPPED. + */ + unsigned int key_types_supported; + + /** * @modes_supported: Array of bitmasks that specifies whether each * combination of crypto mode and data unit size is supported. * Specifically, the i'th bit of modes_supported[crypto_mode] is set if @@ -143,6 +205,17 @@ void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile); void blk_crypto_profile_destroy(struct blk_crypto_profile *profile); +int blk_crypto_import_key(struct blk_crypto_profile *profile, + const u8 *raw_key, size_t raw_key_size, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + +int blk_crypto_generate_key(struct blk_crypto_profile *profile, + u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + +int blk_crypto_prepare_key(struct blk_crypto_profile *profile, + const u8 *lt_key, size_t lt_key_size, + u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE]); + void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent, const struct blk_crypto_profile *child); diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 5e5822c18ee4..58b0c5254a67 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -6,7 +6,9 @@ #ifndef __LINUX_BLK_CRYPTO_H #define __LINUX_BLK_CRYPTO_H +#include <linux/minmax.h> #include <linux/types.h> +#include <uapi/linux/blk-crypto.h> enum blk_crypto_mode_num { BLK_ENCRYPTION_MODE_INVALID, @@ -17,7 +19,55 @@ enum blk_crypto_mode_num { BLK_ENCRYPTION_MODE_MAX, }; -#define BLK_CRYPTO_MAX_KEY_SIZE 64 +/* + * Supported types of keys. Must be bitflags due to their use in + * blk_crypto_profile::key_types_supported. + */ +enum blk_crypto_key_type { + /* + * Raw keys (i.e. "software keys"). These keys are simply kept in raw, + * plaintext form in kernel memory. + */ + BLK_CRYPTO_KEY_TYPE_RAW = 0x1, + + /* + * Hardware-wrapped keys. These keys are only present in kernel memory + * in ephemerally-wrapped form, and they can only be unwrapped by + * dedicated hardware. For details, see the "Hardware-wrapped keys" + * section of Documentation/block/inline-encryption.rst. + */ + BLK_CRYPTO_KEY_TYPE_HW_WRAPPED = 0x2, +}; + +/* + * Currently the maximum raw key size is 64 bytes, as that is the key size of + * BLK_ENCRYPTION_MODE_AES_256_XTS which takes the longest key. + * + * The maximum hardware-wrapped key size depends on the hardware's key wrapping + * algorithm, which is a hardware implementation detail, so it isn't precisely + * specified. But currently 128 bytes is plenty in practice. Implementations + * are recommended to wrap a 32-byte key for the hardware KDF with AES-256-GCM, + * which should result in a size closer to 64 bytes than 128. + * + * Both of these values can trivially be increased if ever needed. + */ +#define BLK_CRYPTO_MAX_RAW_KEY_SIZE 64 +#define BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE 128 + +#define BLK_CRYPTO_MAX_ANY_KEY_SIZE \ + MAX(BLK_CRYPTO_MAX_RAW_KEY_SIZE, BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE) + +/* + * Size of the "software secret" which can be derived from a hardware-wrapped + * key. This is currently always 32 bytes. Note, the choice of 32 bytes + * assumes that the software secret is only used directly for algorithms that + * don't require more than a 256-bit key to get the desired security strength. + * If it were to be used e.g. directly as an AES-256-XTS key, then this would + * need to be increased (which is possible if hardware supports it, but care + * would need to be taken to avoid breaking users who need exactly 32 bytes). + */ +#define BLK_CRYPTO_SW_SECRET_SIZE 32 + /** * struct blk_crypto_config - an inline encryption key's crypto configuration * @crypto_mode: encryption algorithm this key is for @@ -26,20 +76,23 @@ enum blk_crypto_mode_num { * ciphertext. This is always a power of 2. It might be e.g. the * filesystem block size or the disk sector size. * @dun_bytes: the maximum number of bytes of DUN used when using this key + * @key_type: the type of this key -- either raw or hardware-wrapped */ struct blk_crypto_config { enum blk_crypto_mode_num crypto_mode; unsigned int data_unit_size; unsigned int dun_bytes; + enum blk_crypto_key_type key_type; }; /** * struct blk_crypto_key - an inline encryption key - * @crypto_cfg: the crypto configuration (like crypto_mode, key size) for this - * key + * @crypto_cfg: the crypto mode, data unit size, key type, and other + * characteristics of this key and how it will be used * @data_unit_size_bits: log2 of data_unit_size - * @size: size of this key in bytes (determined by @crypto_cfg.crypto_mode) - * @raw: the raw bytes of this key. Only the first @size bytes are used. + * @size: size of this key in bytes. The size of a raw key is fixed for a given + * crypto mode, but the size of a hardware-wrapped key can vary. + * @bytes: the bytes of this key. Only the first @size bytes are significant. * * A blk_crypto_key is immutable once created, and many bios can reference it at * the same time. It must not be freed until all bios using it have completed @@ -49,7 +102,7 @@ struct blk_crypto_key { struct blk_crypto_config crypto_cfg; unsigned int data_unit_size_bits; unsigned int size; - u8 raw[BLK_CRYPTO_MAX_KEY_SIZE]; + u8 bytes[BLK_CRYPTO_MAX_ANY_KEY_SIZE]; }; #define BLK_CRYPTO_MAX_IV_SIZE 32 @@ -87,7 +140,9 @@ bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc, unsigned int bytes, const u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]); -int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, +int blk_crypto_init_key(struct blk_crypto_key *blk_key, + const u8 *key_bytes, size_t key_size, + enum blk_crypto_key_type key_type, enum blk_crypto_mode_num crypto_mode, unsigned int dun_bytes, unsigned int data_unit_size); @@ -103,6 +158,10 @@ bool blk_crypto_config_supported_natively(struct block_device *bdev, bool blk_crypto_config_supported(struct block_device *bdev, const struct blk_crypto_config *cfg); +int blk_crypto_derive_sw_secret(struct block_device *bdev, + const u8 *eph_key, size_t eph_key_size, + u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE]); + #else /* CONFIG_BLK_INLINE_ENCRYPTION */ static inline bool bio_has_crypt_ctx(struct bio *bio) diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h deleted file mode 100644 index ca544e1d3508..000000000000 --- a/include/linux/blk-mq-pci.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BLK_MQ_PCI_H -#define _LINUX_BLK_MQ_PCI_H - -struct blk_mq_queue_map; -struct pci_dev; - -void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, - int offset); - -#endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h deleted file mode 100644 index 13226e9b22dd..000000000000 --- a/include/linux/blk-mq-virtio.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_BLK_MQ_VIRTIO_H -#define _LINUX_BLK_MQ_VIRTIO_H - -struct blk_mq_queue_map; -struct virtio_device; - -void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, - struct virtio_device *vdev, int first_vec); - -#endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7b19b83349cf..de8c85a03bb7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -9,6 +9,7 @@ #include <linux/prefetch.h> #include <linux/srcu.h> #include <linux/rw_hint.h> +#include <linux/rwsem.h> struct blk_mq_tags; struct blk_flush_queue; @@ -28,7 +29,7 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); typedef __u32 __bitwise req_flags_t; /* Keep rqf_name[] in sync with the definitions below */ -enum { +enum rqf_flags { /* drive already may have started this one */ __RQF_STARTED, /* request for flush sequence */ @@ -296,13 +297,6 @@ enum blk_eh_timer_return { BLK_EH_RESET_TIMER, }; -/* Keep alloc_policy_name[] in sync with the definitions below */ -enum { - BLK_TAG_ALLOC_FIFO, /* allocate starting from 0 */ - BLK_TAG_ALLOC_RR, /* allocate starting from last allocated tag */ - BLK_TAG_ALLOC_MAX -}; - /** * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware * block device @@ -513,6 +507,9 @@ enum hctx_type { * request_queue.tag_set_list. * @srcu: Use as lock when type of the request queue is blocking * (BLK_MQ_F_BLOCKING). + * @update_nr_hwq_lock: + * Synchronize updating nr_hw_queues with add/del disk & + * switching elevator. */ struct blk_mq_tag_set { const struct blk_mq_ops *ops; @@ -534,6 +531,8 @@ struct blk_mq_tag_set { struct mutex tag_list_lock; struct list_head tag_list; struct srcu_struct *srcu; + + struct rw_semaphore update_nr_hwq_lock; }; /** @@ -668,7 +667,6 @@ struct blk_mq_ops { /* Keep hctx_flag_name[] in sync with the definitions below */ enum { - BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, /* * Set when this device requires underlying blk-mq device for @@ -677,23 +675,20 @@ enum { BLK_MQ_F_STACKING = 1 << 2, BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 4, - /* Do not allow an I/O scheduler to be configured. */ - BLK_MQ_F_NO_SCHED = 1 << 5, + + /* + * Alloc tags on a round-robin base instead of the first available one. + */ + BLK_MQ_F_TAG_RR = 1 << 5, /* * Select 'none' during queue registration in case of a single hwq * or shared hwqs instead of 'mq-deadline'. */ BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6, - BLK_MQ_F_ALLOC_POLICY_START_BIT = 7, - BLK_MQ_F_ALLOC_POLICY_BITS = 1, + + BLK_MQ_F_MAX = 1 << 7, }; -#define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ - ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ - ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) -#define BLK_ALLOC_POLICY_TO_MQ_FLAG(policy) \ - ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ - << BLK_MQ_F_ALLOC_POLICY_START_BIT) #define BLK_MQ_MAX_DEPTH (10240) #define BLK_MQ_NO_HCTX_IDX (-1U) @@ -863,12 +858,20 @@ static inline bool blk_mq_is_reserved_rq(struct request *rq) return rq->rq_flags & RQF_RESV; } -/* +/** + * blk_mq_add_to_batch() - add a request to the completion batch + * @req: The request to add to batch + * @iob: The batch to add the request + * @is_error: Specify true if the request failed with an error + * @complete: The completaion handler for the request + * * Batched completions only work when there is no I/O error and no special * ->end_io handler. + * + * Return: true when the request was added to the batch, otherwise false */ static inline bool blk_mq_add_to_batch(struct request *req, - struct io_comp_batch *iob, int ioerror, + struct io_comp_batch *iob, bool is_error, void (*complete)(struct io_comp_batch *)) { /* @@ -876,7 +879,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, * 1) No batch container * 2) Has scheduler data attached * 3) Not a passthrough request and end_io set - * 4) Not a passthrough request and an ioerror + * 4) Not a passthrough request and failed with an error */ if (!iob) return false; @@ -885,7 +888,7 @@ static inline bool blk_mq_add_to_batch(struct request *req, if (!blk_rq_is_passthrough(req)) { if (req->end_io) return false; - if (ioerror < 0) + if (is_error) return false; } @@ -921,8 +924,22 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset); -void blk_mq_freeze_queue(struct request_queue *q); -void blk_mq_unfreeze_queue(struct request_queue *q); +void blk_mq_freeze_queue_nomemsave(struct request_queue *q); +void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q); +static inline unsigned int __must_check +blk_mq_freeze_queue(struct request_queue *q) +{ + unsigned int memflags = memalloc_noio_save(); + + blk_mq_freeze_queue_nomemsave(q); + return memflags; +} +static inline void +blk_mq_unfreeze_queue(struct request_queue *q, unsigned int memflags) +{ + blk_mq_unfreeze_queue_nomemrestore(q); + memalloc_noio_restore(memflags); +} void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, @@ -931,6 +948,8 @@ void blk_mq_unfreeze_queue_non_owner(struct request_queue *q); void blk_freeze_queue_start_non_owner(struct request_queue *q); void blk_mq_map_queues(struct blk_mq_queue_map *qmap); +void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, + struct device *dev, unsigned int offset); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); @@ -987,14 +1006,6 @@ static inline void blk_mq_cleanup_rq(struct request *rq) rq->q->mq_ops->cleanup_rq(rq); } -static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, - unsigned int nr_segs) -{ - rq->nr_phys_segments = nr_segs; - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; -} - void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, struct lock_class_key *key); @@ -1026,8 +1037,8 @@ int blk_rq_map_user_io(struct request *, struct rq_map_data *, int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); int blk_rq_unmap_user(struct bio *); -int blk_rq_map_kern(struct request_queue *, struct request *, void *, - unsigned int, gfp_t); +int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len, + gfp_t gfp); int blk_rq_append_bio(struct request *rq, struct bio *bio); void blk_execute_rq_nowait(struct request *rq, bool at_head); blk_status_t blk_execute_rq(struct request *rq, bool at_head); @@ -1168,14 +1179,13 @@ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) return max_t(unsigned short, rq->nr_phys_segments, 1); } -int __blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist, struct scatterlist **last_sg); -static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq, - struct scatterlist *sglist) +int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, + struct scatterlist **last_sg); +static inline int blk_rq_map_sg(struct request *rq, struct scatterlist *sglist) { struct scatterlist *last_sg = NULL; - return __blk_rq_map_sg(q, rq, sglist, &last_sg); + return __blk_rq_map_sg(rq, sglist, &last_sg); } void blk_dump_rq_flags(struct request *, char *); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dce7615c35e7..930daff207df 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -220,6 +220,7 @@ struct bio { unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; enum rw_hint bi_write_hint; + u8 bi_write_stream; blk_status_t bi_status; atomic_t __bi_remaining; @@ -286,7 +287,6 @@ struct bio { enum { BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */ BIO_CLONED, /* doesn't own data */ - BIO_BOUNCED, /* bio is a bounce bio */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_REFFED, /* bio has elevated ->bi_cnt */ @@ -296,6 +296,14 @@ enum { * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + /* + * This bio has completed bps throttling at the single tg granularity, + * which is different from BIO_BPS_THROTTLED. When the bio is enqueued + * into the sq->queued of the upper tg, or is about to be dispatched, + * this flag needs to be cleared. Since blk-throttle and rq_qos are not + * on the same hierarchical level, reuse the value. + */ + BIO_TG_BPS_THROTTLED = BIO_QOS_THROTTLED, BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */ @@ -342,11 +350,11 @@ enum req_op { /* Close a zone */ REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, /* Transition a zone to full */ - REQ_OP_ZONE_FINISH = (__force blk_opf_t)12, + REQ_OP_ZONE_FINISH = (__force blk_opf_t)13, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = (__force blk_opf_t)13, + REQ_OP_ZONE_RESET = (__force blk_opf_t)15, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)15, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, /* Driver private requests */ REQ_OP_DRV_IN = (__force blk_opf_t)34, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 315899779af1..620345ce3aaa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -182,7 +182,6 @@ struct gendisk { struct list_head slave_bdevs; #endif struct timer_rand_state *random; - atomic_t sync_io; /* RAID */ struct disk_events *ev; #ifdef CONFIG_BLK_DEV_ZONED @@ -218,6 +217,8 @@ struct gendisk { * devices that do not have multiple independent access ranges. */ struct blk_independent_access_ranges *ia_ranges; + + struct mutex rqos_state_mutex; /* rqos state change mutex */ }; /** @@ -268,10 +269,21 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* + * We should strive for 1 << (PAGE_SHIFT + MAX_PAGECACHE_ORDER) + * however we constrain this to what we can validate and test. + */ +#define BLK_MAX_BLOCK_SIZE SZ_64K +#else +#define BLK_MAX_BLOCK_SIZE PAGE_SIZE +#endif + + /* blk_validate_limits() validates bsize, so drivers don't usually need to */ static inline int blk_validate_block_size(unsigned long bsize) { - if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) + if (bsize < 512 || bsize > BLK_MAX_BLOCK_SIZE || !is_power_of_2(bsize)) return -EINVAL; return 0; @@ -325,15 +337,12 @@ typedef unsigned int __bitwise blk_features_t; /* skip this queue in blk_mq_(un)quiesce_tagset */ #define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13)) -/* bounce all highmem pages */ -#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14)) - /* undocumented magic for bcache */ #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ ((__force blk_features_t)(1u << 15)) -/* stacked device can/does support atomic writes */ -#define BLK_FEAT_ATOMIC_WRITES_STACKED \ +/* atomic writes enabled */ +#define BLK_FEAT_ATOMIC_WRITES \ ((__force blk_features_t)(1u << 16)) /* @@ -341,7 +350,7 @@ typedef unsigned int __bitwise blk_features_t; */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \ + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | \ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE) /* internal flags in queue_limits.flags */ @@ -368,6 +377,7 @@ struct queue_limits { unsigned int max_sectors; unsigned int max_user_sectors; unsigned int max_segment_size; + unsigned int min_segment_size; unsigned int physical_block_size; unsigned int logical_block_size; unsigned int alignment_offset; @@ -398,6 +408,9 @@ struct queue_limits { unsigned short max_integrity_segments; unsigned short max_discard_segments; + unsigned short max_write_streams; + unsigned int write_stream_granularity; + unsigned int max_open_zones; unsigned int max_active_zones; @@ -561,8 +574,22 @@ struct request_queue { struct blk_flush_queue *fq; struct list_head flush_list; + /* + * Protects against I/O scheduler switching, particularly when updating + * q->elevator. Since the elevator update code path may also modify q-> + * nr_requests and wbt latency, this lock also protects the sysfs attrs + * nr_requests and wbt_lat_usec. Additionally the nr_hw_queues update + * may modify hctx tags, reserved-tags and cpumask, so this lock also + * helps protect the hctx sysfs/debugfs attrs. To ensure proper locking + * order during an elevator or nr_hw_queue update, first freeze the + * queue, then acquire ->elevator_lock. + */ + struct mutex elevator_lock; + struct mutex sysfs_lock; - struct mutex sysfs_dir_lock; + /* + * Protects queue limits and also sysfs attribute read_ahead_kb. + */ struct mutex limits_lock; /* @@ -582,6 +609,12 @@ struct request_queue { #ifdef CONFIG_LOCKDEP struct task_struct *mq_freeze_owner; int mq_freeze_owner_depth; + /* + * Records disk & queue state in current context, used in unfreeze + * queue + */ + bool mq_freeze_disk_dead; + bool mq_freeze_queue_dying; #endif wait_queue_head_t mq_freeze_wq; /* @@ -600,8 +633,6 @@ struct request_queue { * Serializes all debugfs metadata operations using the above dentries. */ struct mutex debugfs_mutex; - - bool mq_sysfs_init_done; }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ @@ -619,6 +650,8 @@ enum { QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */ QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */ QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ + QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ + QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ QUEUE_FLAG_MAX }; @@ -654,6 +687,10 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) #define blk_queue_skip_tagset_quiesce(q) \ ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE) +#define blk_queue_disable_wbt(q) \ + test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags) +#define blk_queue_no_elv_switch(q) \ + test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); @@ -687,23 +724,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) (q->limits.features & BLK_FEAT_ZONED); } -#ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return disk->nr_zones; -} -bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return 0; -} -static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) -{ - return false; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { if (!blk_queue_is_zoned(disk->queue)) @@ -711,11 +731,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return disk_nr_zones(bdev->bd_disk); -} - static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; @@ -822,6 +837,106 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) (sb->s_blocksize_bits - SECTOR_SHIFT); } +#ifdef CONFIG_BLK_DEV_ZONED +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return disk->nr_zones; +} + +/** + * bio_needs_zone_write_plugging - Check if a BIO needs to be handled with zone + * write plugging + * @bio: The BIO being submitted + * + * Return true whenever @bio execution needs to be handled through zone + * write plugging (using blk_zone_plug_bio()). Return false otherwise. + */ +static inline bool bio_needs_zone_write_plugging(struct bio *bio) +{ + enum req_op op = bio_op(bio); + + /* + * Only zoned block devices have a zone write plug hash table. But not + * all of them have one (e.g. DM devices may not need one). + */ + if (!bio->bi_bdev->bd_disk->zone_wplugs_hash) + return false; + + /* Only write operations need zone write plugging. */ + if (!op_is_write(op)) + return false; + + /* Ignore empty flush */ + if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) + return false; + + /* Ignore BIOs that already have been handled by zone write plugging. */ + if (bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING)) + return false; + + /* + * All zone write operations must be handled through zone write plugging + * using blk_zone_plug_bio(). + */ + switch (op) { + case REQ_OP_ZONE_APPEND: + case REQ_OP_WRITE: + case REQ_OP_WRITE_ZEROES: + case REQ_OP_ZONE_FINISH: + case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_RESET_ALL: + return true; + default: + return false; + } +} + +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); + +/** + * disk_zone_capacity - returns the zone capacity of zone containing @sector + * @disk: disk to work with + * @sector: sector number within the querying zone + * + * Returns the zone capacity of a zone containing @sector. @sector can be any + * sector in the zone. + */ +static inline unsigned int disk_zone_capacity(struct gendisk *disk, + sector_t sector) +{ + sector_t zone_sectors = disk->queue->limits.chunk_sectors; + + if (sector + zone_sectors >= get_capacity(disk)) + return disk->last_zone_capacity; + return disk->zone_capacity; +} +static inline unsigned int bdev_zone_capacity(struct block_device *bdev, + sector_t pos) +{ + return disk_zone_capacity(bdev->bd_disk, pos); +} +#else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return 0; +} + +static inline bool bio_needs_zone_write_plugging(struct bio *bio) +{ + return false; +} + +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +{ + return false; +} +#endif /* CONFIG_BLK_DEV_ZONED */ + +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return disk_nr_zones(bdev->bd_disk); +} + int bdev_disk_changed(struct gendisk *disk, bool invalidate); void put_disk(struct gendisk *disk); @@ -939,8 +1054,7 @@ static inline unsigned int blk_boundary_sectors_left(sector_t offset, * the caller can modify. The caller must call queue_limits_commit_update() * to finish the update. * - * Context: process context. The caller must have frozen the queue or ensured - * that there is outstanding I/O by other means. + * Context: process context. */ static inline struct queue_limits queue_limits_start_update(struct request_queue *q) @@ -1241,6 +1355,13 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev) return queue_max_segments(bdev_get_queue(bdev)); } +static inline unsigned short bdev_max_write_streams(struct block_device *bdev) +{ + if (bdev_is_partition(bdev)) + return 0; + return bdev_limits(bdev)->max_write_streams; +} + static inline unsigned queue_logical_block_size(const struct request_queue *q) { return q->limits.logical_block_size; @@ -1395,6 +1516,13 @@ static inline bool bdev_is_zone_start(struct block_device *bdev, return bdev_offset_from_zone_start(bdev, sector) == 0; } +/* Check whether @sector is a multiple of the zone size. */ +static inline bool bdev_is_zone_aligned(struct block_device *bdev, + sector_t sector) +{ + return bdev_is_zone_start(bdev, sector); +} + /** * bdev_zone_is_seq - check if a sector belongs to a sequential write zone * @bdev: block device to check @@ -1590,6 +1718,7 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) return bio_end_io_acct_remapped(bio, start_time, bio->bi_bdev); } +int bdev_validate_blocksize(struct block_device *bdev, int block_size); int set_blocksize(struct file *file, int size); int lookup_bdev(const char *pathname, dev_t *dev); @@ -1646,10 +1775,6 @@ int bd_prepare_to_claim(struct block_device *bdev, void *holder, const struct blk_holder_ops *hops); void bd_abort_claiming(struct block_device *bdev, void *holder); -/* just for blk-cgroup, don't use elsewhere */ -struct block_device *blkdev_get_no_open(dev_t dev); -void blkdev_put_no_open(struct block_device *bdev); - struct block_device *I_BDEV(struct inode *inode); struct block_device *file_bdev(struct file *bdev_file); bool disk_live(struct gendisk *disk); @@ -1661,7 +1786,7 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); -void bdev_statx(struct path *, struct kstat *, u32); +void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask); void printk_all_partitions(void); int __init early_lookup_bdev(const char *pathname, dev_t *dev); #else @@ -1679,8 +1804,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -static inline void bdev_statx(struct path *path, struct kstat *stat, - u32 request_mask) +static inline void bdev_statx(const struct path *path, struct kstat *stat, + u32 request_mask) { } static inline void printk_all_partitions(void) diff --git a/include/linux/bootmem_info.h b/include/linux/bootmem_info.h index d8a8d245824a..4c506e76a808 100644 --- a/include/linux/bootmem_info.h +++ b/include/linux/bootmem_info.h @@ -18,6 +18,8 @@ enum bootmem_type { #ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE void __init register_page_bootmem_info_node(struct pglist_data *pgdat); +void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, + unsigned long nr_pages); void get_page_bootmem(unsigned long info, struct page *page, enum bootmem_type type); @@ -58,6 +60,11 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat) { } +static inline void register_page_bootmem_memmap(unsigned long section_nr, + struct page *map, unsigned long nr_pages) +{ +} + static inline void put_page_bootmem(struct page *page) { } diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 7fc69083e745..501873758ce6 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -77,9 +77,6 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) extern struct static_key_false cgroup_bpf_enabled_key[MAX_CGROUP_BPF_ATTACH_TYPE]; #define cgroup_bpf_enabled(atype) static_branch_unlikely(&cgroup_bpf_enabled_key[atype]) -#define for_each_cgroup_storage_type(stype) \ - for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) - struct bpf_cgroup_storage_map; struct bpf_storage_buffer { @@ -111,10 +108,10 @@ struct bpf_prog_list { struct bpf_prog *prog; struct bpf_cgroup_link *link; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; + u32 flags; }; -int cgroup_bpf_inherit(struct cgroup *cgrp); -void cgroup_bpf_offline(struct cgroup *cgrp); +void __init cgroup_bpf_lifetime_notifier_init(void); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, @@ -426,12 +423,12 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, const struct bpf_func_proto * cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); -const struct bpf_func_proto * -cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); #else -static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } -static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} +static inline void cgroup_bpf_lifetime_notifier_init(void) +{ + return; +} static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, @@ -464,12 +461,6 @@ cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return NULL; } -static inline const struct bpf_func_proto * -cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) -{ - return NULL; -} - static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( @@ -517,8 +508,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) -#define for_each_cgroup_storage_type(stype) for (; false; ) - #endif /* CONFIG_CGROUP_BPF */ #endif /* _BPF_CGROUP_H */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6e63dd3443b9..bcae876a2a60 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -30,6 +30,7 @@ #include <linux/static_call.h> #include <linux/memcontrol.h> #include <linux/cfi.h> +#include <asm/rqspinlock.h> struct bpf_verifier_env; struct bpf_verifier_log; @@ -204,8 +205,23 @@ enum btf_field_type { BPF_REFCOUNT = (1 << 9), BPF_WORKQUEUE = (1 << 10), BPF_UPTR = (1 << 11), + BPF_RES_SPIN_LOCK = (1 << 12), }; +enum bpf_cgroup_storage_type { + BPF_CGROUP_STORAGE_SHARED, + BPF_CGROUP_STORAGE_PERCPU, + __BPF_CGROUP_STORAGE_MAX +#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX +}; + +#ifdef CONFIG_CGROUP_BPF +# define for_each_cgroup_storage_type(stype) \ + for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) +#else +# define for_each_cgroup_storage_type(stype) for (; false; ) +#endif /* CONFIG_CGROUP_BPF */ + typedef void (*btf_dtor_kfunc_t)(void *); struct btf_field_kptr { @@ -239,6 +255,7 @@ struct btf_record { u32 cnt; u32 field_mask; int spin_lock_off; + int res_spin_lock_off; int timer_off; int wq_off; int refcount_off; @@ -257,6 +274,19 @@ struct bpf_list_node_kern { void *owner; } __attribute__((aligned(8))); +/* 'Ownership' of program-containing map is claimed by the first program + * that is going to use this map or by the first program which FD is + * stored in the map to make sure that all callers and callees have the + * same prog type, JITed flag and xdp_has_frags flag. + */ +struct bpf_map_owner { + enum bpf_prog_type type; + bool jited; + bool xdp_has_frags; + u64 storage_cookie[MAX_BPF_CGROUP_STORAGE_TYPE]; + const struct btf_type *attach_func_proto; +}; + struct bpf_map { const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; @@ -289,24 +319,15 @@ struct bpf_map { struct rcu_head rcu; }; atomic64_t writecnt; - /* 'Ownership' of program-containing map is claimed by the first program - * that is going to use this map or by the first program which FD is - * stored in the map to make sure that all callers and callees have the - * same prog type, JITed flag and xdp_has_frags flag. - */ - struct { - const struct btf_type *attach_func_proto; - spinlock_t lock; - enum bpf_prog_type type; - bool jited; - bool xdp_has_frags; - } owner; + spinlock_t owner_lock; + struct bpf_map_owner *owner; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ bool free_after_mult_rcu_gp; bool free_after_rcu_gp; atomic64_t sleepable_refcnt; s64 __percpu *elem_count; + u64 cookie; /* write-once */ }; static inline const char *btf_field_type_name(enum btf_field_type type) @@ -314,6 +335,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) switch (type) { case BPF_SPIN_LOCK: return "bpf_spin_lock"; + case BPF_RES_SPIN_LOCK: + return "bpf_res_spin_lock"; case BPF_TIMER: return "bpf_timer"; case BPF_WORKQUEUE: @@ -341,11 +364,19 @@ static inline const char *btf_field_type_name(enum btf_field_type type) } } +#if IS_ENABLED(CONFIG_DEBUG_KERNEL) +#define BPF_WARN_ONCE(cond, format...) WARN_ONCE(cond, format) +#else +#define BPF_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) +#endif + static inline u32 btf_field_type_size(enum btf_field_type type) { switch (type) { case BPF_SPIN_LOCK: return sizeof(struct bpf_spin_lock); + case BPF_RES_SPIN_LOCK: + return sizeof(struct bpf_res_spin_lock); case BPF_TIMER: return sizeof(struct bpf_timer); case BPF_WORKQUEUE: @@ -376,6 +407,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type) switch (type) { case BPF_SPIN_LOCK: return __alignof__(struct bpf_spin_lock); + case BPF_RES_SPIN_LOCK: + return __alignof__(struct bpf_res_spin_lock); case BPF_TIMER: return __alignof__(struct bpf_timer); case BPF_WORKQUEUE: @@ -419,6 +452,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_RB_ROOT: /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ case BPF_SPIN_LOCK: + case BPF_RES_SPIN_LOCK: case BPF_TIMER: case BPF_WORKQUEUE: case BPF_KPTR_UNREF: @@ -968,6 +1002,7 @@ struct bpf_insn_access_aux { struct { struct btf *btf; u32 btf_id; + u32 ref_obj_id; }; }; struct bpf_verifier_log *log; /* for verbose logs */ @@ -990,6 +1025,21 @@ static inline bool bpf_pseudo_func(const struct bpf_insn *insn) return bpf_is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; } +/* Given a BPF_ATOMIC instruction @atomic_insn, return true if it is an + * atomic load or store, and false if it is a read-modify-write instruction. + */ +static inline bool +bpf_atomic_is_load_store(const struct bpf_insn *atomic_insn) +{ + switch (atomic_insn->imm) { + case BPF_LOAD_ACQ: + case BPF_STORE_REL: + return true; + default: + return false; + } +} + struct bpf_prog_ops { int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); @@ -1050,14 +1100,6 @@ struct bpf_prog_offload { u32 jited_len; }; -enum bpf_cgroup_storage_type { - BPF_CGROUP_STORAGE_SHARED, - BPF_CGROUP_STORAGE_PERCPU, - __BPF_CGROUP_STORAGE_MAX -}; - -#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX - /* The longest tracepoint has 12 args. * See include/trace/bpf_probe.h */ @@ -1323,6 +1365,20 @@ u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, + void *src, u32 len, u64 flags); +void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, + void *buffer__opt, u32 buffer__szk); + +static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +{ + u32 size = __bpf_dynptr_size(ptr); + + if (len > size || offset > size - len) + return -E2BIG; + + return 0; +} #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, @@ -1481,6 +1537,8 @@ struct bpf_ctx_arg_aux { enum bpf_reg_type reg_type; struct btf *btf; u32 btf_id; + u32 ref_obj_id; + bool refcounted; }; struct btf_mod_pair { @@ -1503,11 +1561,12 @@ struct bpf_prog_aux { u32 real_func_cnt; /* includes hidden progs, only used for JIT and freeing progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ + u32 attach_st_ops_member_off; u32 ctx_arg_info_size; u32 max_rdonly_access; u32 max_rdwr_access; struct btf *attach_btf; - const struct bpf_ctx_arg_aux *ctx_arg_info; + struct bpf_ctx_arg_aux *ctx_arg_info; void __percpu *priv_stack_ptr; struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */ struct bpf_prog *dst_prog; @@ -1528,6 +1587,7 @@ struct bpf_prog_aux { bool jits_use_priv_stack; bool priv_stack_requested; bool changes_pkt_data; + bool might_sleep; u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ struct bpf_arena *arena; @@ -1547,6 +1607,7 @@ struct bpf_prog_aux { #endif struct bpf_ksym ksym; const struct bpf_prog_ops *ops; + const struct bpf_struct_ops *st_ops; struct bpf_map **used_maps; struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */ struct btf_mod_pair *used_btfs; @@ -1945,6 +2006,9 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op #endif +int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, + const struct bpf_ctx_arg_aux *info, u32 cnt); + #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM) int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, int cgroup_atype); @@ -1980,6 +2044,7 @@ struct bpf_array { */ enum { BPF_MAX_LOOPS = 8 * 1024 * 1024, + BPF_MAX_TIMED_LOOPS = 0xffff, }; #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ @@ -2016,6 +2081,16 @@ static inline bool bpf_map_flags_access_ok(u32 access_flags) (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); } +static inline struct bpf_map_owner *bpf_map_owner_alloc(struct bpf_map *map) +{ + return kzalloc(sizeof(*map->owner), GFP_ATOMIC); +} + +static inline void bpf_map_owner_free(struct bpf_map *map) +{ + kfree(map->owner); +} + struct bpf_event_entry { struct perf_event *event; struct file *perf_file; @@ -2036,6 +2111,8 @@ int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void); +const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void); + typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type, @@ -2299,6 +2376,14 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); +/* + * The __bpf_map_get() and __btf_get_by_fd() functions parse a file + * descriptor and return a corresponding map or btf object. + * Their names are double underscored to emphasize the fact that they + * do not increase refcnt. To also increase refcnt use corresponding + * bpf_map_get() and btf_get_by_fd() functions. + */ + static inline struct bpf_map *__bpf_map_get(struct fd f) { if (fd_empty(f)) @@ -2308,6 +2393,15 @@ static inline struct bpf_map *__bpf_map_get(struct fd f) return fd_file(f)->private_data; } +static inline struct btf *__btf_get_by_fd(struct fd f) +{ + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (unlikely(fd_file(f)->f_op != &btf_fops)) + return ERR_PTR(-EINVAL); + return fd_file(f)->private_data; +} + void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); @@ -2331,7 +2425,7 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); -int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, +int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, @@ -2529,7 +2623,7 @@ struct bpf_iter__bpf_map_elem { int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); -bool bpf_iter_prog_supported(struct bpf_prog *prog); +int bpf_iter_prog_supported(struct bpf_prog *prog); const struct bpf_func_proto * bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog); @@ -2589,10 +2683,10 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog); + const struct bpf_prog *xdp_prog); int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress); + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress); void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, @@ -2862,15 +2956,15 @@ struct sk_buff; static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { return 0; } static inline int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress) + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress) { return 0; } diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index aefcd6564251..643809cc78c3 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -48,6 +48,11 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func) int bpf_lsm_get_retval_range(const struct bpf_prog *prog, struct bpf_retval_range *range); +int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str, + const struct bpf_dynptr *value_p, int flags); +int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str); +bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog); + #else /* !CONFIG_BPF_LSM */ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) @@ -86,6 +91,19 @@ static inline int bpf_lsm_get_retval_range(const struct bpf_prog *prog, { return -EOPNOTSUPP; } +static inline int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__str, + const struct bpf_dynptr *value_p, int flags) +{ + return -EOPNOTSUPP; +} +static inline int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name__str) +{ + return -EOPNOTSUPP; +} +static inline bool bpf_lsm_has_d_inode_locked(const struct bpf_prog *prog) +{ + return false; +} #endif /* CONFIG_BPF_LSM */ #endif /* _LINUX_BPF_LSM_H */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 48b7b2eeb7e2..256274acb1d8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -115,6 +115,14 @@ struct bpf_reg_state { int depth:30; } iter; + /* For irq stack slots */ + struct { + enum { + IRQ_NATIVE_KFUNC, + IRQ_LOCK_KFUNC, + } kfunc_class; + } irq; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -233,6 +241,7 @@ enum bpf_stack_slot_type { */ STACK_DYNPTR, STACK_ITER, + STACK_IRQ_FLAG, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ @@ -254,8 +263,12 @@ struct bpf_reference_state { * default to pointer reference on zero initialization of a state. */ enum ref_state_type { - REF_TYPE_PTR = 0, - REF_TYPE_LOCK, + REF_TYPE_PTR = (1 << 1), + REF_TYPE_IRQ = (1 << 2), + REF_TYPE_LOCK = (1 << 3), + REF_TYPE_RES_LOCK = (1 << 4), + REF_TYPE_RES_LOCK_IRQ = (1 << 5), + REF_TYPE_LOCK_MASK = REF_TYPE_LOCK | REF_TYPE_RES_LOCK | REF_TYPE_RES_LOCK_IRQ, } type; /* Track each reference created with a unique id, even if the same * instruction creates the reference multiple times (eg, via CALL). @@ -315,9 +328,6 @@ struct bpf_func_state { u32 callback_depth; /* The following fields should be last. See copy_func_state() */ - int acquired_refs; - int active_locks; - struct bpf_reference_state *refs; /* The state of the stack. Each element of the array describes BPF_REG_SIZE * (i.e. 8) bytes worth of stack memory. * stack[0] represents bytes [*(r10-8)..*(r10-1)] @@ -346,7 +356,11 @@ enum { INSN_F_SPI_MASK = 0x3f, /* 6 bits */ INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */ - INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */ + INSN_F_STACK_ACCESS = BIT(9), + + INSN_F_DST_REG_STACK = BIT(10), /* dst_reg is PTR_TO_STACK */ + INSN_F_SRC_REG_STACK = BIT(11), /* src_reg is PTR_TO_STACK */ + /* total 12 bits are used now. */ }; static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES); @@ -355,9 +369,9 @@ static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8); struct bpf_insn_hist_entry { u32 idx; /* insn idx can't be bigger than 1 million */ - u32 prev_idx : 22; - /* special flags, e.g., whether insn is doing register stack spill/load */ - u32 flags : 10; + u32 prev_idx : 20; + /* special INSN_F_xxx flags */ + u32 flags : 12; /* additional registers that need precision tracking when this * jump is backtracked, vector of six 10-bit records */ @@ -370,6 +384,8 @@ struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; struct bpf_verifier_state *parent; + /* Acquired reference states */ + struct bpf_reference_state *refs; /* * 'branches' field is the number of branches left to explore: * 0 - all possible paths from this state reached bpf_exit or @@ -419,14 +435,15 @@ struct bpf_verifier_state { u32 insn_idx; u32 curframe; - bool speculative; + u32 acquired_refs; + u32 active_locks; + u32 active_preempt_locks; + u32 active_irq_id; + u32 active_lock_id; + void *active_lock_ptr; bool active_rcu_lock; - u32 active_preempt_lock; - /* If this state was ever pointed-to by other state's loop_entry field - * this flag would be set to true. Used to avoid freeing such states - * while they are still in use. - */ - bool used_as_loop_entry; + + bool speculative; bool in_sleepable; /* first and last insn idx of this verifier state */ @@ -453,6 +470,11 @@ struct bpf_verifier_state { u32 dfs_depth; u32 callback_unroll_depth; u32 may_goto_depth; + /* If this state was ever pointed-to by other state's loop_entry field + * this flag would be set to true. Used to avoid freeing such states + * while they are still in use. + */ + u32 used_as_loop_entry; }; #define bpf_get_spilled_reg(slot, frame, mask) \ @@ -493,8 +515,10 @@ struct bpf_verifier_state { /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; - struct bpf_verifier_state_list *next; - int miss_cnt, hit_cnt; + struct list_head node; + u32 miss_cnt; + u32 hit_cnt:31; + u32 in_free_list:1; }; struct bpf_loop_inline_state { @@ -571,6 +595,7 @@ struct bpf_insn_aux_data { * bpf_fastcall pattern. */ u8 fastcall_spills_num:3; + u8 arg_prog:4; /* below fields are initialized once */ unsigned int orig_idx; /* original instruction index */ @@ -584,6 +609,8 @@ struct bpf_insn_aux_data { * accepts callback function as a parameter. */ bool calls_callback; + /* registers alive before this instruction. */ + u16 live_regs_before; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ @@ -660,6 +687,7 @@ struct bpf_subprog_info { /* true if bpf_fastcall stack region is used by functions that can't be inlined */ bool keep_fastcall_stack: 1; bool changes_pkt_data: 1; + bool might_sleep: 1; enum priv_stack_mode priv_stack_mode; u8 arg_cnt; @@ -705,8 +733,11 @@ struct bpf_verifier_env { bool test_state_freq; /* test verifier with different pruning frequency */ bool test_reg_invariants; /* fail verification on register invariants violations */ struct bpf_verifier_state *cur_state; /* current verifier state */ - struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ - struct bpf_verifier_state_list *free_list; + /* Search pruning optimization, array of list_heads for + * lists of struct bpf_verifier_state_list. + */ + struct list_head *explored_states; + struct list_head free_list; /* list of struct bpf_verifier_state_list */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */ u32 used_map_cnt; /* number of used maps */ @@ -737,7 +768,11 @@ struct bpf_verifier_env { struct { int *insn_state; int *insn_stack; + /* vector of instruction indexes sorted in post-order */ + int *insn_postorder; int cur_stack; + /* current position in the insn_postorder vector */ + int cur_postorder; } cfg; struct backtrack_state bt; struct bpf_insn_hist_entry *insn_hist; @@ -762,6 +797,8 @@ struct bpf_verifier_env { u32 peak_states; /* longest register parentage chain walked for liveness marking */ u32 longest_mark_read_walk; + u32 free_list_size; + u32 explored_states_size; bpfptr_t fd_array; /* bit mask to keep track of whether a register has been accessed @@ -806,6 +843,17 @@ __printf(3, 4) void verbose_linfo(struct bpf_verifier_env *env, u32 insn_off, const char *prefix_fmt, ...); +#define verifier_bug_if(cond, env, fmt, args...) \ + ({ \ + bool __cond = (cond); \ + if (unlikely(__cond)) { \ + BPF_WARN_ONCE(1, "verifier bug: " fmt "(" #cond ")\n", ##args); \ + bpf_log(&env->log, "verifier bug: " fmt "(" #cond ")\n", ##args); \ + } \ + (__cond); \ + }) +#define verifier_bug(env, fmt, args...) verifier_bug_if(1, env, fmt, ##args) + static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; @@ -980,8 +1028,9 @@ const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); const char *iter_state_str(enum bpf_iter_state state); -void print_verifier_state(struct bpf_verifier_env *env, - const struct bpf_func_state *state, bool print_all); -void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state); +void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno, bool print_all); +void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/btf.h b/include/linux/btf.h index 2a08a2b55592..b2983706292f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -76,6 +76,9 @@ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ #define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ #define KF_FASTCALL (1 << 12) /* kfunc supports bpf_fastcall protocol */ +#define KF_ARENA_RET (1 << 13) /* kfunc returns an arena pointer */ +#define KF_ARENA_ARG1 (1 << 14) /* kfunc takes an arena pointer as its first argument */ +#define KF_ARENA_ARG2 (1 << 15) /* kfunc takes an arena pointer as its second argument */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the @@ -519,6 +522,7 @@ bool btf_param_match_suffix(const struct btf *btf, const char *suffix); int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, u32 arg_no); +u32 btf_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, int off); struct bpf_verifier_log; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 932139c5d46f..0029ff880e27 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -34,6 +34,7 @@ enum bh_state_bits { BH_Meta, /* Buffer contains metadata */ BH_Prio, /* Buffer should be submitted with REQ_PRIO */ BH_Defer_Completion, /* Defer AIO completion to workqueue */ + BH_Migrate, /* Buffer is being migrated (norefs) */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities @@ -182,7 +183,6 @@ static inline unsigned long bh_offset(const struct buffer_head *bh) BUG_ON(!PagePrivate(page)); \ ((struct buffer_head *)page_private(page)); \ }) -#define page_has_buffers(page) PagePrivate(page) #define folio_buffers(folio) folio_get_private(folio) void buffer_check_dirty_writeback(struct folio *folio, @@ -223,6 +223,8 @@ void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, unsigned size); +struct buffer_head *__find_get_block_nonatomic(struct block_device *bdev, + sector_t block, unsigned size); struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp); void __brelse(struct buffer_head *); @@ -271,7 +273,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, struct folio **, void **, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); -void block_commit_write(struct page *page, unsigned int from, unsigned int to); +void block_commit_write(struct folio *folio, size_t from, size_t to); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); @@ -398,6 +400,12 @@ sb_find_get_block(struct super_block *sb, sector_t block) return __find_get_block(sb->s_bdev, block, sb->s_blocksize); } +static inline struct buffer_head * +sb_find_get_block_nonatomic(struct super_block *sb, sector_t block) +{ + return __find_get_block_nonatomic(sb->s_bdev, block, sb->s_blocksize); +} + static inline void map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block) { diff --git a/include/linux/bug.h b/include/linux/bug.h index 348acf2558f3..a9948a9f1093 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -73,15 +73,23 @@ static inline void generic_bug_clear_once(void) {} #endif /* CONFIG_GENERIC_BUG */ +#ifdef CONFIG_PRINTK +void mem_dump_obj(void *object); +#else +static inline void mem_dump_obj(void *object) {} +#endif + /* * Since detected data corruption should stop operation on the affected * structures. Return value must be checked and sanely acted on by caller. */ static inline __must_check bool check_data_corruption(bool v) { return v; } -#define CHECK_DATA_CORRUPTION(condition, fmt, ...) \ +#define CHECK_DATA_CORRUPTION(condition, addr, fmt, ...) \ check_data_corruption(({ \ bool corruption = unlikely(condition); \ if (corruption) { \ + if (addr) \ + mem_dump_obj(addr); \ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \ pr_err(fmt, ##__VA_ARGS__); \ BUG(); \ diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 3aa3640f8c18..2cfbb4c65c78 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -4,17 +4,17 @@ #include <linux/compiler.h> -#ifdef __CHECKER__ -#define BUILD_BUG_ON_ZERO(e) (0) -#else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). + * + * Take an error message as an optional second argument. If omitted, + * default to the stringification of the tested expression. */ -#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) -#endif /* __CHECKER__ */ +#define BUILD_BUG_ON_ZERO(e, ...) \ + __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true") /* Force a compilation error if a constant expression is not a power of 2 */ #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ diff --git a/include/linux/bus/stm32_firewall_device.h b/include/linux/bus/stm32_firewall_device.h index 5178b72bc920..eaa7a3f54450 100644 --- a/include/linux/bus/stm32_firewall_device.h +++ b/include/linux/bus/stm32_firewall_device.h @@ -114,27 +114,30 @@ void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 su #else /* CONFIG_STM32_FIREWALL */ -int stm32_firewall_get_firewall(struct device_node *np, struct stm32_firewall *firewall, - unsigned int nb_firewall) +static inline int stm32_firewall_get_firewall(struct device_node *np, + struct stm32_firewall *firewall, + unsigned int nb_firewall) { return -ENODEV; } -int stm32_firewall_grant_access(struct stm32_firewall *firewall) +static inline int stm32_firewall_grant_access(struct stm32_firewall *firewall) { return -ENODEV; } -void stm32_firewall_release_access(struct stm32_firewall *firewall) +static inline void stm32_firewall_release_access(struct stm32_firewall *firewall) { } -int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id) +static inline int stm32_firewall_grant_access_by_id(struct stm32_firewall *firewall, + u32 subsystem_id) { return -ENODEV; } -void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, u32 subsystem_id) +static inline void stm32_firewall_release_access_by_id(struct stm32_firewall *firewall, + u32 subsystem_id) { } diff --git a/include/linux/bvec.h b/include/linux/bvec.h index f41c7f0ef91e..0a80e1f9aa20 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, * @offset: offset into the folio */ static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, - unsigned int len, unsigned int offset) + size_t len, size_t offset) { - bvec_set_page(bv, &folio->page, len, offset); + unsigned long nr = offset / PAGE_SIZE; + + WARN_ON_ONCE(len > UINT_MAX); + bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE); } /** @@ -184,6 +187,12 @@ static inline void bvec_iter_advance_single(const struct bio_vec *bv, ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) +#define for_each_mp_bvec(bvl, bio_vec, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = mp_bvec_iter_bvec((bio_vec), (iter))), 1); \ + bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) + /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ { \ @@ -286,12 +295,7 @@ static inline void *bvec_virt(struct bio_vec *bvec) */ static inline phys_addr_t bvec_phys(const struct bio_vec *bvec) { - /* - * Note this open codes page_to_phys because page_to_phys is defined in - * <asm/io.h>, which we don't want to pull in here. If it ever moves to - * a sensible place we should start using it. - */ - return PFN_PHYS(page_to_pfn(bvec->bv_page)) + bvec->bv_offset; + return page_to_phys(bvec->bv_page) + bvec->bv_offset; } #endif /* __LINUX_BVEC_H */ diff --git a/include/linux/cache.h b/include/linux/cache.h index ca2a05682a54..e69768f50d53 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -3,16 +3,13 @@ #define __LINUX_CACHE_H #include <uapi/linux/kernel.h> +#include <vdso/cache.h> #include <asm/cache.h> #ifndef L1_CACHE_ALIGN #define L1_CACHE_ALIGN(x) __ALIGN_KERNEL(x, L1_CACHE_BYTES) #endif -#ifndef SMP_CACHE_BYTES -#define SMP_CACHE_BYTES L1_CACHE_BYTES -#endif - /** * SMP_CACHE_ALIGN - align a value to the L2 cacheline size * @x: value to align @@ -63,10 +60,6 @@ #define __ro_after_init __section(".data..ro_after_init") #endif -#ifndef ____cacheline_aligned -#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) -#endif - #ifndef ____cacheline_aligned_in_smp #ifdef CONFIG_SMP #define ____cacheline_aligned_in_smp ____cacheline_aligned diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 7ad736538649..c8f4f0a0b874 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -147,7 +147,7 @@ static inline int get_cpu_cacheinfo_id(int cpu, int level) return ci ? ci->id : -1; } -#ifdef CONFIG_ARM64 +#if defined(CONFIG_ARM64) || defined(CONFIG_ARM) #define use_arch_cache_info() (true) #else #define use_arch_cache_info() (false) diff --git a/include/linux/call_once.h b/include/linux/call_once.h index 6261aa0b3fb0..13cd6469e7e5 100644 --- a/include/linux/call_once.h +++ b/include/linux/call_once.h @@ -26,20 +26,41 @@ do { \ __once_init((once), #once, &__key); \ } while (0) -static inline void call_once(struct once *once, void (*cb)(struct once *)) +/* + * call_once - Ensure a function has been called exactly once + * + * @once: Tracking struct + * @cb: Function to be called + * + * If @once has never completed successfully before, call @cb and, if + * it returns a zero or positive value, mark @once as completed. Return + * the value returned by @cb + * + * If @once has completed succesfully before, return 0. + * + * The call to @cb is implicitly surrounded by a mutex, though for + * efficiency the * function avoids taking it after the first call. + */ +static inline int call_once(struct once *once, int (*cb)(struct once *)) { - /* Pairs with atomic_set_release() below. */ - if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) - return; - - guard(mutex)(&once->lock); - WARN_ON(atomic_read(&once->state) == ONCE_RUNNING); - if (atomic_read(&once->state) != ONCE_NOT_STARTED) - return; - - atomic_set(&once->state, ONCE_RUNNING); - cb(once); - atomic_set_release(&once->state, ONCE_COMPLETED); + int r, state; + + /* Pairs with atomic_set_release() below. */ + if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) + return 0; + + guard(mutex)(&once->lock); + state = atomic_read(&once->state); + if (unlikely(state != ONCE_NOT_STARTED)) + return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0; + + atomic_set(&once->state, ONCE_RUNNING); + r = cb(once); + if (r < 0) + atomic_set(&once->state, ONCE_NOT_STARTED); + else + atomic_set_release(&once->state, ONCE_COMPLETED); + return r; } #endif /* _LINUX_CALL_ONCE_H */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 23492213ea35..492d23bec7be 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -38,6 +38,17 @@ enum can_termination_gpio { CAN_TERMINATION_GPIO_MAX, }; +struct data_bittiming_params { + const struct can_bittiming_const *data_bittiming_const; + struct can_bittiming data_bittiming; + const struct can_tdc_const *tdc_const; + struct can_tdc tdc; + const u32 *data_bitrate_const; + unsigned int data_bitrate_const_cnt; + int (*do_set_data_bittiming)(struct net_device *dev); + int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); +}; + /* * CAN common private data */ @@ -45,16 +56,11 @@ struct can_priv { struct net_device *dev; struct can_device_stats can_stats; - const struct can_bittiming_const *bittiming_const, - *data_bittiming_const; - struct can_bittiming bittiming, data_bittiming; - const struct can_tdc_const *tdc_const; - struct can_tdc tdc; - + const struct can_bittiming_const *bittiming_const; + struct can_bittiming bittiming; + struct data_bittiming_params fd; unsigned int bitrate_const_cnt; const u32 *bitrate_const; - const u32 *data_bitrate_const; - unsigned int data_bitrate_const_cnt; u32 bitrate_max; struct can_clock clock; @@ -77,14 +83,12 @@ struct can_priv { struct delayed_work restart_work; int (*do_set_bittiming)(struct net_device *dev); - int (*do_set_data_bittiming)(struct net_device *dev); int (*do_set_mode)(struct net_device *dev, enum can_mode mode); int (*do_set_termination)(struct net_device *dev, u16 term); int (*do_get_state)(const struct net_device *dev, enum can_state *state); int (*do_get_berr_counter)(const struct net_device *dev, struct can_berr_counter *bec); - int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv); }; static inline bool can_tdc_is_enabled(const struct can_priv *priv) @@ -114,11 +118,11 @@ static inline bool can_tdc_is_enabled(const struct can_priv *priv) */ static inline s32 can_get_relative_tdco(const struct can_priv *priv) { - const struct can_bittiming *dbt = &priv->data_bittiming; + const struct can_bittiming *dbt = &priv->fd.data_bittiming; s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg + dbt->phase_seg1) * dbt->brp; - return (s32)priv->tdc.tdco - sample_point_in_tc; + return (s32)priv->fd.tdc.tdco - sample_point_in_tc; } /* helper to define static CAN controller features at device creation time */ diff --git a/include/linux/capability.h b/include/linux/capability.h index 0c356a517991..1fb08922552c 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -139,7 +139,6 @@ static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, } #ifdef CONFIG_MULTIUSER -extern bool has_capability(struct task_struct *t, int cap); extern bool has_ns_capability(struct task_struct *t, struct user_namespace *ns, int cap); extern bool has_capability_noaudit(struct task_struct *t, int cap); @@ -150,10 +149,6 @@ extern bool ns_capable(struct user_namespace *ns, int cap); extern bool ns_capable_noaudit(struct user_namespace *ns, int cap); extern bool ns_capable_setid(struct user_namespace *ns, int cap); #else -static inline bool has_capability(struct task_struct *t, int cap) -{ - return true; -} static inline bool has_ns_capability(struct task_struct *t, struct user_namespace *ns, int cap) { diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index caa4b4430634..0bf7d33a1048 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -82,6 +82,14 @@ enum cc_attr { CC_ATTR_GUEST_SEV_SNP, /** + * @CC_ATTR_GUEST_SNP_SECURE_TSC: SNP Secure TSC is active. + * + * The platform/OS is running as a guest/virtual machine and actively + * using AMD SEV-SNP Secure TSC feature. + */ + CC_ATTR_GUEST_SNP_SECURE_TSC, + + /** * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. * * The host kernel is running with the necessary features diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 2d7d86f0290d..c7f2c63b3bc3 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -504,20 +504,6 @@ struct ceph_mds_request_head_legacy { #define CEPH_MDS_REQUEST_HEAD_VERSION 3 -struct ceph_mds_request_head_old { - __le16 version; /* struct version */ - __le64 oldest_client_tid; - __le32 mdsmap_epoch; /* on client */ - __le32 flags; /* CEPH_MDS_FLAG_* */ - __u8 num_retry, num_fwd; /* count retry, fwd attempts */ - __le16 num_releases; /* # include cap/lease release records */ - __le32 op; /* mds op code */ - __le32 caller_uid, caller_gid; - __le64 ino; /* use this ino for openc, mkdir, mknod, - etc. (if replaying) */ - union ceph_mds_request_args_ext args; -} __attribute__ ((packed)); - struct ceph_mds_request_head { __le16 version; /* struct version */ __le64 oldest_client_tid; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d55b30057a45..50b14a5661c7 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -490,9 +490,6 @@ extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *, struct page **pages, u64 length, u32 alignment, bool pages_from_pool, bool own_pages); -extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); #ifdef CONFIG_BLOCK void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, unsigned int which, @@ -509,9 +506,6 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req, unsigned int which, struct iov_iter *iter); -extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *, - unsigned int which, - struct ceph_pagelist *pagelist); extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, unsigned int which, struct page **pages, u64 length, diff --git a/include/linux/cfi.h b/include/linux/cfi.h index f0df518e11dd..1db17ecbb86c 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -11,6 +11,8 @@ #include <linux/module.h> #include <asm/cfi.h> +extern bool cfi_warn; + #ifndef cfi_get_offset static inline int cfi_get_offset(void) { diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 17960a1e858d..e61687d5e496 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -170,6 +170,23 @@ struct cgroup_subsys_state { struct percpu_ref refcnt; /* + * Depending on the context, this field is initialized + * via css_rstat_init() at different places: + * + * when css is associated with cgroup::self + * when css->cgroup is the root cgroup + * performed in cgroup_init() + * when css->cgroup is not the root cgroup + * performed in cgroup_create() + * when css is associated with a subsystem + * when css->cgroup is the root cgroup + * performed in cgroup_init_subsys() in the non-early path + * when css->cgroup is not the root cgroup + * performed in css_create() + */ + struct css_rstat_cpu __percpu *rstat_cpu; + + /* * siblings list anchored at the parent's ->children * * linkage is protected by cgroup_mutex or RCU @@ -177,9 +194,6 @@ struct cgroup_subsys_state { struct list_head sibling; struct list_head children; - /* flush target list anchored at cgrp->rstat_css_list */ - struct list_head rstat_css_node; - /* * PI: Subsys-unique ID. 0 is unused and root is always 1. The * matching css can be looked up using css_from_id(). @@ -219,6 +233,16 @@ struct cgroup_subsys_state { * Protected by cgroup_mutex. */ int nr_descendants; + + /* + * A singly-linked list of css structures to be rstat flushed. + * This is a scratch field to be used exclusively by + * css_rstat_flush(). + * + * Protected by rstat_base_lock when css is cgroup::self. + * Protected by css->ss->rstat_ss_lock otherwise. + */ + struct cgroup_subsys_state *rstat_flush_next; }; /* @@ -329,10 +353,10 @@ struct cgroup_base_stat { /* * rstat - cgroup scalable recursive statistics. Accounting is done - * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the + * per-cpu in css_rstat_cpu which is then lazily propagated up the * hierarchy on reads. * - * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are + * When a stat gets updated, the css_rstat_cpu and its ancestors are * linked into the updated tree. On the following read, propagation only * considers and consumes the updated tree. This makes reading O(the * number of descendants which have been active since last read) instead of @@ -344,10 +368,29 @@ struct cgroup_base_stat { * frequency decreases the cost of each read. * * This struct hosts both the fields which implement the above - - * updated_children and updated_next - and the fields which track basic - * resource statistics on top of it - bsync, bstat and last_bstat. + * updated_children and updated_next. */ -struct cgroup_rstat_cpu { +struct css_rstat_cpu { + /* + * Child cgroups with stat updates on this cpu since the last read + * are linked on the parent's ->updated_children through + * ->updated_next. updated_children is terminated by its container css. + * + * In addition to being more compact, singly-linked list pointing to + * the css makes it unnecessary for each per-cpu struct to point back + * to the associated css. + * + * Protected by per-cpu css->ss->rstat_ss_cpu_lock. + */ + struct cgroup_subsys_state *updated_children; + struct cgroup_subsys_state *updated_next; /* NULL if not on the list */ +}; + +/* + * This struct hosts the fields which track basic resource statistics on + * top of it - bsync, bstat and last_bstat. + */ +struct cgroup_rstat_base_cpu { /* * ->bsync protects ->bstat. These are the only fields which get * updated in the hot path. @@ -374,20 +417,6 @@ struct cgroup_rstat_cpu { * deltas to propagate to the per-cpu subtree_bstat. */ struct cgroup_base_stat last_subtree_bstat; - - /* - * Child cgroups with stat updates on this cpu since the last read - * are linked on the parent's ->updated_children through - * ->updated_next. - * - * In addition to being more compact, singly-linked list pointing - * to the cgroup makes it unnecessary for each per-cpu struct to - * point back to the associated cgroup. - * - * Protected by per-cpu cgroup_rstat_cpu_lock. - */ - struct cgroup *updated_children; /* terminated by self cgroup */ - struct cgroup *updated_next; /* NULL iff not on the list */ }; struct cgroup_freezer_state { @@ -516,23 +545,23 @@ struct cgroup { struct cgroup *dom_cgrp; struct cgroup *old_dom_cgrp; /* used while enabling threaded */ - /* per-cpu recursive resource statistics */ - struct cgroup_rstat_cpu __percpu *rstat_cpu; - struct list_head rstat_css_list; - /* - * Add padding to separate the read mostly rstat_cpu and - * rstat_css_list into a different cacheline from the following - * rstat_flush_next and *bstat fields which can have frequent updates. + * Depending on the context, this field is initialized via + * css_rstat_init() at different places: + * + * when cgroup is the root cgroup + * performed in cgroup_setup_root() + * otherwise + * performed in cgroup_create() */ - CACHELINE_PADDING(_pad_); + struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu; /* - * A singly-linked list of cgroup structures to be rstat flushed. - * This is a scratch field to be used exclusively by - * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock. + * Add padding to keep the read mostly rstat per-cpu pointer on a + * different cacheline than the following *bstat fields which can have + * frequent updates. */ - struct cgroup *rstat_flush_next; + CACHELINE_PADDING(_pad_); /* cgroup basic resource statistics */ struct cgroup_base_stat last_bstat; @@ -619,9 +648,8 @@ struct cgroup_root { */ struct cftype { /* - * By convention, the name should begin with the name of the - * subsystem, followed by a period. Zero length string indicates - * end of cftype array. + * Name of the subsystem is prepended in cgroup_file_name(). + * Zero length string indicates end of cftype array. */ char name[MAX_CFTYPE_NAME]; unsigned long private; @@ -711,6 +739,7 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); + void (*css_killed)(struct cgroup_subsys_state *css); void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); int (*css_extra_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); @@ -790,6 +819,9 @@ struct cgroup_subsys { * specifies the mask of subsystems that this one depends on. */ unsigned int depends_on; + + spinlock_t rstat_ss_lock; + raw_spinlock_t __percpu *rstat_ss_cpu_lock; }; extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f8ef47f8a634..b18fb5fcb38e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -19,6 +19,7 @@ #include <linux/kernfs.h> #include <linux/jump_label.h> #include <linux/types.h> +#include <linux/notifier.h> #include <linux/ns_common.h> #include <linux/nsproxy.h> #include <linux/user_namespace.h> @@ -40,7 +41,7 @@ struct kernel_clone_args; #ifdef CONFIG_CGROUPS -enum { +enum css_task_iter_flags { CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */ @@ -66,10 +67,16 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ }; +enum cgroup_lifetime_events { + CGROUP_LIFETIME_ONLINE, + CGROUP_LIFETIME_OFFLINE, +}; + extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; extern spinlock_t css_set_lock; +extern struct blocking_notifier_head cgroup_lifetime_notifier; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include <linux/cgroup_subsys.h> @@ -113,6 +120,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); +int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts); int cgroup_rm_cftypes(struct cftype *cfts); void cgroup_file_notify(struct cgroup_file *cfile); void cgroup_file_show(struct cgroup_file *cfile, bool show); @@ -343,7 +351,18 @@ static inline u64 cgroup_id(const struct cgroup *cgrp) */ static inline bool css_is_dying(struct cgroup_subsys_state *css) { - return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); + return css->flags & CSS_DYING; +} + +static inline bool css_is_self(struct cgroup_subsys_state *css) +{ + if (css == &css->cgroup->self) { + /* cgroup::self should not have subsystem association */ + WARN_ON(css->ss != NULL); + return true; + } + + return false; } static inline void cgroup_get(struct cgroup *cgrp) @@ -687,10 +706,8 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) /* * cgroup scalable recursive statistics. */ -void cgroup_rstat_updated(struct cgroup *cgrp, int cpu); -void cgroup_rstat_flush(struct cgroup *cgrp); -void cgroup_rstat_flush_hold(struct cgroup *cgrp); -void cgroup_rstat_flush_release(struct cgroup *cgrp); +void css_rstat_updated(struct cgroup_subsys_state *css, int cpu); +void css_rstat_flush(struct cgroup_subsys_state *css); /* * Basic resource stats. @@ -786,6 +803,17 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); +static inline void get_cgroup_ns(struct cgroup_namespace *ns) +{ + refcount_inc(&ns->ns.count); +} + +static inline void put_cgroup_ns(struct cgroup_namespace *ns) +{ + if (refcount_dec_and_test(&ns->ns.count)) + free_cgroup_ns(ns); +} + #else /* !CONFIG_CGROUPS */ static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } @@ -796,19 +824,10 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, return old_ns; } -#endif /* !CONFIG_CGROUPS */ +static inline void get_cgroup_ns(struct cgroup_namespace *ns) { } +static inline void put_cgroup_ns(struct cgroup_namespace *ns) { } -static inline void get_cgroup_ns(struct cgroup_namespace *ns) -{ - if (ns) - refcount_inc(&ns->ns.count); -} - -static inline void put_cgroup_ns(struct cgroup_namespace *ns) -{ - if (ns && refcount_dec_and_test(&ns->ns.count)) - free_cgroup_ns(ns); -} +#endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS diff --git a/include/linux/cgroup_dmem.h b/include/linux/cgroup_dmem.h new file mode 100644 index 000000000000..dd4869f1d736 --- /dev/null +++ b/include/linux/cgroup_dmem.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _CGROUP_DMEM_H +#define _CGROUP_DMEM_H + +#include <linux/types.h> +#include <linux/llist.h> + +struct dmem_cgroup_pool_state; + +/* Opaque definition of a cgroup region, used internally */ +struct dmem_cgroup_region; + +#if IS_ENABLED(CONFIG_CGROUP_DMEM) +struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, const char *name_fmt, ...) __printf(2,3); +void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region); +int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, + struct dmem_cgroup_pool_state **ret_pool, + struct dmem_cgroup_pool_state **ret_limit_pool); +void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size); +bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool, + struct dmem_cgroup_pool_state *test_pool, + bool ignore_low, bool *ret_hit_low); + +void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool); +#else +static inline __printf(2,3) struct dmem_cgroup_region * +dmem_cgroup_register_region(u64 size, const char *name_fmt, ...) +{ + return NULL; +} + +static inline void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region) +{ } + +static inline int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, + struct dmem_cgroup_pool_state **ret_pool, + struct dmem_cgroup_pool_state **ret_limit_pool) +{ + *ret_pool = NULL; + + if (ret_limit_pool) + *ret_limit_pool = NULL; + + return 0; +} + +static inline void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size) +{ } + +static inline +bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool, + struct dmem_cgroup_pool_state *test_pool, + bool ignore_low, bool *ret_hit_low) +{ + return true; +} + +static inline void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool) +{ } + +#endif +#endif /* _CGROUP_DMEM_H */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 445235487230..3fd0bcbf3080 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -65,6 +65,10 @@ SUBSYS(rdma) SUBSYS(misc) #endif +#if IS_ENABLED(CONFIG_CGROUP_DMEM) +SUBSYS(dmem) +#endif + /* * The following subsystems are not supported on the default hierarchy. */ diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index ec00e3f7af2b..7093e1d08af0 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -212,10 +212,29 @@ const volatile void * __must_check_fn(const volatile void *val) { return val; } #define no_free_ptr(p) \ - ((typeof(p)) __must_check_fn(__get_and_null(p, NULL))) + ((typeof(p)) __must_check_fn((__force const volatile void *)__get_and_null(p, NULL))) #define return_ptr(p) return no_free_ptr(p) +/* + * Only for situations where an allocation is handed in to another function + * and consumed by that function on success. + * + * struct foo *f __free(kfree) = kzalloc(sizeof(*f), GFP_KERNEL); + * + * setup(f); + * if (some_condition) + * return -EINVAL; + * .... + * ret = bar(f); + * if (!ret) + * retain_and_null_ptr(f); + * return ret; + * + * After retain_and_null_ptr(f) the variable f is NULL and cannot be + * dereferenced anymore. + */ +#define retain_and_null_ptr(p) ((void)__get_and_null(p, NULL)) /* * DEFINE_CLASS(name, type, exit, init, init_args...): @@ -291,11 +310,21 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ #define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond -#define DEFINE_GUARD(_name, _type, _lock, _unlock) \ +#define __DEFINE_GUARD_LOCK_PTR(_name, _exp) \ + static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { return (void *)(__force unsigned long)*(_exp); } + +#define DEFINE_CLASS_IS_GUARD(_name) \ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \ + __DEFINE_GUARD_LOCK_PTR(_name, _T) + +#define DEFINE_CLASS_IS_COND_GUARD(_name) \ + __DEFINE_CLASS_IS_CONDITIONAL(_name, true); \ + __DEFINE_GUARD_LOCK_PTR(_name, _T) + +#define DEFINE_GUARD(_name, _type, _lock, _unlock) \ DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ - static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ - { return (void *)(__force unsigned long)*_T; } + DEFINE_CLASS_IS_GUARD(_name) #define DEFINE_GUARD_COND(_name, _ext, _condlock) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ @@ -375,11 +404,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ if (_T->lock) { _unlock; } \ } \ \ -static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ -{ \ - return (void *)(__force unsigned long)_T->lock; \ -} - +__DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) #define __DEFINE_LOCK_GUARD_1(_name, _type, _lock) \ static inline class_##_name##_t class_##_name##_constructor(_type *l) \ diff --git a/include/linux/clk.h b/include/linux/clk.h index 1dcee6d701e4..b607482ca77e 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -1138,15 +1138,6 @@ static inline void clk_restore_context(void) {} #endif -/* Deprecated. Use devm_clk_bulk_get_all_enabled() */ -static inline int __must_check -devm_clk_bulk_get_all_enable(struct device *dev, struct clk_bulk_data **clks) -{ - int ret = devm_clk_bulk_get_all_enabled(dev, clks); - - return ret > 0 ? 0 : ret; -} - /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */ static inline int clk_prepare_enable(struct clk *clk) { diff --git a/include/linux/clk/davinci.h b/include/linux/clk/davinci.h index e1d37451e03f..787a81116b00 100644 --- a/include/linux/clk/davinci.h +++ b/include/linux/clk/davinci.h @@ -12,12 +12,6 @@ #include <linux/regmap.h> /* function for registering clocks in early boot */ - -#ifdef CONFIG_ARCH_DAVINCI_DA830 -int da830_pll_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -#endif -#ifdef CONFIG_ARCH_DAVINCI_DA850 int da850_pll0_init(struct device *dev, void __iomem *base, struct regmap *cfgchip); -#endif #endif /* __LINUX_CLK_DAVINCI_PLL_H___ */ diff --git a/include/linux/cma.h b/include/linux/cma.h index d15b64f51336..62d9c1cf6326 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -40,6 +40,9 @@ static inline int __init cma_declare_contiguous(phys_addr_t base, return cma_declare_contiguous_nid(base, size, limit, alignment, order_per_bit, fixed, name, res_cma, NUMA_NO_NODE); } +extern int __init cma_declare_contiguous_multi(phys_addr_t size, + phys_addr_t align, unsigned int order_per_bit, + const char *name, struct cma **res_cma, int nid); extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, const char *name, @@ -50,12 +53,14 @@ extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count); extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); +extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end); extern void cma_reserve_pages_on_error(struct cma *cma); #ifdef CONFIG_CMA struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp); bool cma_free_folio(struct cma *cma, const struct folio *folio); +bool cma_validate_zones(struct cma *cma); #else static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp) { @@ -66,6 +71,10 @@ static inline bool cma_free_folio(struct cma *cma, const struct folio *folio) { return false; } +static inline bool cma_validate_zones(struct cma *cma) +{ + return false; +} #endif #endif diff --git a/include/linux/codetag.h b/include/linux/codetag.h index d14dbd26b370..5f2b9a1f722c 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -36,10 +36,10 @@ union codetag_ref { struct codetag_type_desc { const char *section; size_t tag_size; - void (*module_load)(struct codetag_type *cttype, - struct codetag_module *cmod); - void (*module_unload)(struct codetag_type *cttype, - struct codetag_module *cmod); + int (*module_load)(struct module *mod, + struct codetag *start, struct codetag *end); + void (*module_unload)(struct module *mod, + struct codetag *start, struct codetag *end); #ifdef CONFIG_MODULES void (*module_replaced)(struct module *mod, struct module *new_mod); bool (*needs_section_mem)(struct module *mod, unsigned long size); @@ -89,7 +89,7 @@ void *codetag_alloc_module_section(struct module *mod, const char *name, unsigned long align); void codetag_free_module_sections(struct module *mod); void codetag_module_replaced(struct module *mod, struct module *new_mod); -void codetag_load_module(struct module *mod); +int codetag_load_module(struct module *mod); void codetag_unload_module(struct module *mod); #else /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ @@ -103,7 +103,7 @@ codetag_alloc_module_section(struct module *mod, const char *name, unsigned long align) { return NULL; } static inline void codetag_free_module_sections(struct module *mod) {} static inline void codetag_module_replaced(struct module *mod, struct module *new_mod) {} -static inline void codetag_load_module(struct module *mod) {} +static inline int codetag_load_module(struct module *mod) { return 0; } static inline void codetag_unload_module(struct module *mod) {} #endif /* defined(CONFIG_CODE_TAGGING) && defined(CONFIG_MODULES) */ diff --git a/include/linux/comedi/comedidev.h b/include/linux/comedi/comedidev.h index c08416a7364b..4cb0400ad616 100644 --- a/include/linux/comedi/comedidev.h +++ b/include/linux/comedi/comedidev.h @@ -234,16 +234,12 @@ struct comedi_buf_page { * * A COMEDI data buffer is allocated as individual pages, either in * conventional memory or DMA coherent memory, depending on the attached, - * low-level hardware device. (The buffer pages also get mapped into the - * kernel's contiguous virtual address space pointed to by the 'prealloc_buf' - * member of &struct comedi_async.) + * low-level hardware device. * * The buffer is normally freed when the COMEDI device is detached from the * low-level driver (which may happen due to device removal), but if it happens * to be mmapped at the time, the pages cannot be freed until the buffer has - * been munmapped. That is what the reference counter is for. (The virtual - * address space pointed by 'prealloc_buf' is freed when the COMEDI device is - * detached.) + * been munmapped. That is what the reference counter is for. */ struct comedi_buf_map { struct device *dma_hw_dev; @@ -255,7 +251,6 @@ struct comedi_buf_map { /** * struct comedi_async - Control data for asynchronous COMEDI commands - * @prealloc_buf: Kernel virtual address of allocated acquisition buffer. * @prealloc_bufsz: Buffer size (in bytes). * @buf_map: Map of buffer pages. * @max_bufsize: Maximum allowed buffer size (in bytes). @@ -344,7 +339,6 @@ struct comedi_buf_map { * less than or equal to UINT_MAX). */ struct comedi_async { - void *prealloc_buf; unsigned int prealloc_bufsz; struct comedi_buf_map *buf_map; unsigned int max_bufsize; diff --git a/include/linux/compaction.h b/include/linux/compaction.h index e94776496049..173d9c07a895 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -80,6 +80,11 @@ static inline unsigned long compact_gap(unsigned int order) return 2UL << order; } +static inline int current_is_kcompactd(void) +{ + return current->flags & PF_KCOMPACTD; +} + #ifdef CONFIG_COMPACTION extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order); @@ -90,7 +95,7 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, struct page **page); extern void reset_isolation_suitable(pg_data_t *pgdat); extern bool compaction_suitable(struct zone *zone, int order, - int highest_zoneidx); + unsigned long watermark, int highest_zoneidx); extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); @@ -108,7 +113,8 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat) } static inline bool compaction_suitable(struct zone *zone, int order, - int highest_zoneidx) + unsigned long watermark, + int highest_zoneidx) { return false; } diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 2e7c2c282f3a..4fc8e26914ad 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -128,3 +128,11 @@ */ #define ASM_INPUT_G "ir" #define ASM_INPUT_RM "r" + +/* + * Declare compiler support for __typeof_unqual__() operator. + * + * Bindgen uses LLVM even if our C compiler is GCC, so we cannot + * rely on the auto-detected CONFIG_CC_HAS_TYPEOF_UNQUAL. + */ +#define CC_HAS_TYPEOF_UNQUAL (__clang_major__ >= 19) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index c9b58188ec61..32048052c64a 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -137,3 +137,11 @@ #if GCC_VERSION < 90100 #undef __alloc_size__ #endif + +/* + * Declare compiler support for __typeof_unqual__() operator. + * + * Bindgen uses LLVM even if our C compiler is GCC, so we cannot + * rely on the auto-detected CONFIG_CC_HAS_TYPEOF_UNQUAL. + */ +#define CC_HAS_TYPEOF_UNQUAL (__GNUC__ >= 14) diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h index 573fa85b6c0c..ac1665a98a15 100644 --- a/include/linux/compiler-version.h +++ b/include/linux/compiler-version.h @@ -12,3 +12,33 @@ * and add dependency on include/config/CC_VERSION_TEXT, which is touched * by Kconfig when the version string from the compiler changes. */ + +/* Additional tree-wide dependencies start here. */ + +/* + * If any of the GCC plugins change, we need to rebuild everything that + * was built with them, as they may have changed their behavior and those + * behaviors may need to be synchronized across all translation units. + */ +#ifdef GCC_PLUGINS +#include <generated/gcc-plugins.h> +#endif + +/* + * If the randstruct seed itself changes (whether for GCC plugins or + * Clang), the entire tree needs to be rebuilt since the randomization of + * structures may change between compilation units if not. + */ +#ifdef RANDSTRUCT +#include <generated/randstruct_hash.h> +#endif + +/* + * If any external changes affect Clang's integer wrapping sanitizer + * behavior, a full rebuild is needed as the coverage for wrapping types + * may have changed, which may impact the expected behaviors that should + * not differ between compilation units. + */ +#ifdef INTEGER_WRAP +#include <generated/integer-wrap.h> +#endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d004f9b5528d..6f04a1d8c720 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -192,20 +192,72 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, }) #ifdef __CHECKER__ -#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) (0) #else /* __CHECKER__ */ -#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);})) #endif /* __CHECKER__ */ /* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array") +#define __is_array(a) (!__same_type((a), &(a)[0])) +#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_array(a), \ + "must be array") + +#define __is_byte_array(a) (__is_array(a) && sizeof((a)[0]) == 1) +#define __must_be_byte_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_byte_array(a), \ + "must be byte array") + +/* + * If the "nonstring" attribute isn't available, we have to return true + * so the __must_*() checks pass when "nonstring" isn't supported. + */ +#if __has_attribute(__nonstring__) && defined(__annotated) +#define __is_cstr(a) (!__annotated(a, nonstring)) +#define __is_noncstr(a) (__annotated(a, nonstring)) +#else +#define __is_cstr(a) (true) +#define __is_noncstr(a) (true) +#endif /* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ #define __must_be_cstr(p) \ - __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") + __BUILD_BUG_ON_ZERO_MSG(!__is_cstr(p), \ + "must be C-string (NUL-terminated)") +#define __must_be_noncstr(p) \ + __BUILD_BUG_ON_ZERO_MSG(!__is_noncstr(p), \ + "must be non-C-string (not NUL-terminated)") + +/* + * Use __typeof_unqual__() when available. + * + * XXX: Remove test for __CHECKER__ once + * sparse learns about __typeof_unqual__(). + */ +#if CC_HAS_TYPEOF_UNQUAL && !defined(__CHECKER__) +# define USE_TYPEOF_UNQUAL 1 +#endif + +/* + * Define TYPEOF_UNQUAL() to use __typeof_unqual__() as typeof + * operator when available, to return an unqualified type of the exp. + */ +#if defined(USE_TYPEOF_UNQUAL) +# define TYPEOF_UNQUAL(exp) __typeof_unqual__(exp) +#else +# define TYPEOF_UNQUAL(exp) __typeof__(exp) +#endif #endif /* __KERNEL__ */ +#if defined(CONFIG_CFI_CLANG) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +/* + * Force a reference to the external symbol so the compiler generates + * __kcfi_typid. + */ +#define KCFI_REFERENCE(sym) __ADDRESSABLE(sym) +#else +#define KCFI_REFERENCE(sym) +#endif + /** * offset_to_ptr - convert a relative memory offset to an absolute pointer * @off: the address of the 32-bit offset value @@ -308,6 +360,28 @@ static inline void *offset_to_ptr(const int *off) #define statically_true(x) (__builtin_constant_p(x) && (x)) /* + * Similar to statically_true() but produces a constant expression + * + * To be used in conjunction with macros, such as BUILD_BUG_ON_ZERO(), + * which require their input to be a constant expression and for which + * statically_true() would otherwise fail. + * + * This is a trade-off: const_true() requires all its operands to be + * compile time constants. Else, it would always returns false even on + * the most trivial cases like: + * + * true || non_const_var + * + * On the opposite, statically_true() is able to fold more complex + * tautologies and will return true on expressions such as: + * + * !(non_const_var * 8 % 4) + * + * For the general case, statically_true() is better. + */ +#define const_true(x) __builtin_choose_expr(__is_constexpr(x), x, false) + +/* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 981cc3d7e3aa..2b77d12e07b2 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -57,7 +57,7 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __user BTF_TYPE_TAG(user) # endif # define __iomem -# define __percpu BTF_TYPE_TAG(percpu) +# define __percpu __percpu_qual BTF_TYPE_TAG(percpu) # define __rcu BTF_TYPE_TAG(rcu) # define __chk_user_ptr(x) (void)0 @@ -349,6 +349,18 @@ struct ftrace_likely_data { #endif /* + * Optional: only supported since gcc >= 15 + * Optional: not supported by Clang + * + * gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117178 + */ +#ifdef CONFIG_CC_HAS_MULTIDIMENSIONAL_NONSTRING +# define __nonstring_array __attribute__((__nonstring__)) +#else +# define __nonstring_array +#endif + +/* * Apply __counted_by() when the Endianness matches to increase test coverage. */ #ifdef __LITTLE_ENDIAN @@ -360,7 +372,7 @@ struct ftrace_likely_data { #endif /* Do not trap wrapping arithmetic within an annotated function. */ -#ifdef CONFIG_UBSAN_SIGNED_WRAP +#ifdef CONFIG_UBSAN_INTEGER_WRAP # define __signed_wrap __attribute__((no_sanitize("signed-integer-overflow"))) #else # define __signed_wrap @@ -437,6 +449,11 @@ struct ftrace_likely_data { /* * When the size of an allocated object is needed, use the best available * mechanism to find it. (For cases where sizeof() cannot be used.) + * + * Optional: only supported since gcc >= 12 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Object-Size-Checking.html + * clang: https://clang.llvm.org/docs/LanguageExtensions.html#evaluating-object-size */ #if __has_builtin(__builtin_dynamic_object_size) #define __struct_size(p) __builtin_dynamic_object_size(p, 0) @@ -446,11 +463,14 @@ struct ftrace_likely_data { #define __member_size(p) __builtin_object_size(p, 1) #endif -/* Determine if an attribute has been applied to a variable. */ +/* + * Determine if an attribute has been applied to a variable. + * Using __annotated needs to check for __annotated being available, + * or negative tests may fail when annotation cannot be checked. For + * example, see the definition of __is_cstr(). + */ #if __has_builtin(__builtin_has_attribute) #define __annotated(var, attr) __builtin_has_attribute(var, attr) -#else -#define __annotated(var, attr) (false) #endif /* @@ -510,6 +530,12 @@ struct ftrace_likely_data { sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) #ifdef __OPTIMIZE__ +/* + * #ifdef __OPTIMIZE__ is only a good approximation; for instance "make + * CFLAGS_foo.o=-Og" defines __OPTIMIZE__, does not elide the conditional code + * and can break compilation with wrong error message(s). Combine with + * -U__OPTIMIZE__ when needed. + */ # define __compiletime_assert(condition, msg, prefix, suffix) \ do { \ /* \ @@ -523,7 +549,7 @@ struct ftrace_likely_data { prefix ## suffix(); \ } while (0) #else -# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0) +# define __compiletime_assert(condition, msg, prefix, suffix) ((void)(condition)) #endif #define _compiletime_assert(condition, msg, prefix, suffix) \ diff --git a/include/linux/component.h b/include/linux/component.h index df4aa75c9e7c..9d6c66401280 100644 --- a/include/linux/component.h +++ b/include/linux/component.h @@ -3,7 +3,7 @@ #define COMPONENT_H #include <linux/stddef.h> - +#include <linux/types.h> struct device; @@ -90,6 +90,8 @@ int component_compare_dev_name(struct device *dev, void *data); void component_master_del(struct device *, const struct component_master_ops *); +bool component_master_is_bound(struct device *parent, + const struct component_master_ops *ops); struct component_match; diff --git a/include/linux/configfs.h b/include/linux/configfs.h index c771e9d0d0b9..698520b1bfdb 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -120,15 +120,19 @@ struct configfs_attribute { ssize_t (*store)(struct config_item *, const char *, size_t); }; -#define CONFIGFS_ATTR(_pfx, _name) \ +#define CONFIGFS_ATTR_PERM(_pfx, _name, _perm) \ static struct configfs_attribute _pfx##attr_##_name = { \ .ca_name = __stringify(_name), \ - .ca_mode = S_IRUGO | S_IWUSR, \ + .ca_mode = _perm, \ .ca_owner = THIS_MODULE, \ .show = _pfx##_name##_show, \ .store = _pfx##_name##_store, \ } +#define CONFIGFS_ATTR(_pfx, _name) CONFIGFS_ATTR_PERM( \ + _pfx, _name, S_IRUGO | S_IWUSR \ +) + #define CONFIGFS_ATTR_RO(_pfx, _name) \ static struct configfs_attribute _pfx##attr_##_name = { \ .ca_name = __stringify(_name), \ diff --git a/include/linux/console.h b/include/linux/console.h index eba367bf605d..8f10d0a85bb4 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -633,8 +633,8 @@ extern void console_conditional_schedule(void); extern void console_unblank(void); extern void console_flush_on_panic(enum con_flush_mode mode); extern struct tty_driver *console_device(int *); -extern void console_stop(struct console *); -extern void console_start(struct console *); +extern void console_suspend(struct console *); +extern void console_resume(struct console *); extern int is_console_locked(void); extern int braille_register_console(struct console *, int index, char *console_options, char *braille_options); @@ -648,8 +648,8 @@ static inline void console_sysfs_notify(void) extern bool console_suspend_enabled; /* Suspend and resume console messages over PM events */ -extern void suspend_console(void); -extern void resume_console(void); +extern void console_suspend_all(void); +extern void console_resume_all(void); int mda_console_init(void); diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 20f564e98552..59b4fec5f254 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -145,6 +145,7 @@ struct vc_data { unsigned int vc_need_wrap : 1; unsigned int vc_can_do_color : 1; unsigned int vc_report_mouse : 2; + unsigned int vc_bracketed_paste : 1; unsigned char vc_utf : 1; /* Unicode UTF-8 encoding */ unsigned char vc_utf_count; int vc_utf_char; diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index c35db4896c37..6180b803795c 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -28,6 +28,10 @@ int conv_uni_to_pc(struct vc_data *conp, long ucs); u32 conv_8bit_to_uni(unsigned char c); int conv_uni_to_8bit(u32 uni); void console_map_init(void); +bool ucs_is_double_width(uint32_t cp); +bool ucs_is_zero_width(uint32_t cp); +u32 ucs_recompose(u32 base, u32 mark); +u32 ucs_get_fallback(u32 cp); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -57,6 +61,26 @@ static inline int conv_uni_to_8bit(u32 uni) } static inline void console_map_init(void) { } + +static inline bool ucs_is_double_width(uint32_t cp) +{ + return false; +} + +static inline bool ucs_is_zero_width(uint32_t cp) +{ + return false; +} + +static inline u32 ucs_recompose(u32 base, u32 mark) +{ + return 0; +} + +static inline u32 ucs_get_fallback(u32 cp) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ diff --git a/include/linux/context_tracking_irq.h b/include/linux/context_tracking_irq.h index c50b5670c4a5..197916ee91a4 100644 --- a/include/linux/context_tracking_irq.h +++ b/include/linux/context_tracking_irq.h @@ -10,12 +10,12 @@ void ct_irq_exit_irqson(void); void ct_nmi_enter(void); void ct_nmi_exit(void); #else -static inline void ct_irq_enter(void) { } -static inline void ct_irq_exit(void) { } +static __always_inline void ct_irq_enter(void) { } +static __always_inline void ct_irq_exit(void) { } static inline void ct_irq_enter_irqson(void) { } static inline void ct_irq_exit_irqson(void) { } -static inline void ct_nmi_enter(void) { } -static inline void ct_nmi_exit(void) { } +static __always_inline void ct_nmi_enter(void) { } +static __always_inline void ct_nmi_exit(void) { } #endif #endif diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 77e6e195d1d6..76e41805b92d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -28,6 +28,7 @@ struct coredump_params { int vma_count; size_t vma_data_size; struct core_vma_metadata *vma_meta; + struct pid *pid; }; extern unsigned int core_file_note_size_limit; diff --git a/include/linux/coresight.h b/include/linux/coresight.h index c13342594278..4ac65c68bbf4 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -71,7 +71,8 @@ enum coresight_dev_subtype_source { enum coresight_dev_subtype_helper { CORESIGHT_DEV_SUBTYPE_HELPER_CATU, - CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI + CORESIGHT_DEV_SUBTYPE_HELPER_ECT_CTI, + CORESIGHT_DEV_SUBTYPE_HELPER_CTCU, }; /** @@ -172,6 +173,9 @@ struct coresight_desc { * @dest_dev: a @coresight_device representation of the component connected to @src_port. NULL until the device is created * @link: Representation of the connection as a sysfs link. + * @filter_src_fwnode: filter source component's fwnode handle. + * @filter_src_dev: a @coresight_device representation of the component that + needs to be filtered. * * The full connection structure looks like this, where in_conns store * references to same connection as the source device's out_conns. @@ -200,8 +204,10 @@ struct coresight_connection { struct coresight_device *dest_dev; struct coresight_sysfs_link *link; struct coresight_device *src_dev; - atomic_t src_refcnt; - atomic_t dest_refcnt; + struct fwnode_handle *filter_src_fwnode; + struct coresight_device *filter_src_dev; + int src_refcnt; + int dest_refcnt; }; /** @@ -233,7 +239,7 @@ struct coresight_trace_id_map { DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX); atomic_t __percpu *cpu_map; atomic_t perf_cs_etm_session_active; - spinlock_t lock; + raw_spinlock_t lock; }; /** @@ -296,7 +302,7 @@ struct coresight_device { /* system configuration and feature lists */ struct list_head feature_csdev_list; struct list_head config_csdev_list; - spinlock_t cscfg_csdev_lock; + raw_spinlock_t cscfg_csdev_lock; void *active_cscfg_ctxt; }; @@ -324,17 +330,29 @@ static struct coresight_dev_list (var) = { \ #define to_coresight_device(d) container_of(d, struct coresight_device, dev) +/** + * struct coresight_path - data needed by enable/disable path + * @path_list: path from source to sink. + * @trace_id: trace_id of the whole path. + */ +struct coresight_path { + struct list_head path_list; + u8 trace_id; +}; + enum cs_mode { CS_MODE_DISABLED, CS_MODE_SYSFS, CS_MODE_PERF, }; +#define coresight_ops(csdev) csdev->ops #define source_ops(csdev) csdev->ops->source_ops #define sink_ops(csdev) csdev->ops->sink_ops #define link_ops(csdev) csdev->ops->link_ops #define helper_ops(csdev) csdev->ops->helper_ops #define ect_ops(csdev) csdev->ops->ect_ops +#define panic_ops(csdev) csdev->ops->panic_ops /** * struct coresight_ops_sink - basic operations for a sink @@ -380,13 +398,17 @@ struct coresight_ops_link { * is associated to. * @enable: enables tracing for a source. * @disable: disables tracing for a source. + * @resume_perf: resumes tracing for a source in perf session. + * @pause_perf: pauses tracing for a source in perf session. */ struct coresight_ops_source { int (*cpu_id)(struct coresight_device *csdev); int (*enable)(struct coresight_device *csdev, struct perf_event *event, - enum cs_mode mode, struct coresight_trace_id_map *id_map); + enum cs_mode mode, struct coresight_path *path); void (*disable)(struct coresight_device *csdev, struct perf_event *event); + int (*resume_perf)(struct coresight_device *csdev); + void (*pause_perf)(struct coresight_device *csdev); }; /** @@ -404,11 +426,24 @@ struct coresight_ops_helper { int (*disable)(struct coresight_device *csdev, void *data); }; + +/** + * struct coresight_ops_panic - Generic device ops for panic handing + * + * @sync : Sync the device register state/trace data + */ +struct coresight_ops_panic { + int (*sync)(struct coresight_device *csdev); +}; + struct coresight_ops { + int (*trace_id)(struct coresight_device *csdev, enum cs_mode mode, + struct coresight_device *sink); const struct coresight_ops_sink *sink_ops; const struct coresight_ops_link *link_ops; const struct coresight_ops_source *source_ops; const struct coresight_ops_helper *helper_ops; + const struct coresight_ops_panic *panic_ops; }; static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa, @@ -454,8 +489,11 @@ static inline struct clk *coresight_get_enable_apb_pclk(struct device *dev) int ret; pclk = clk_get(dev, "apb_pclk"); - if (IS_ERR(pclk)) - return NULL; + if (IS_ERR(pclk)) { + pclk = clk_get(dev, "apb"); + if (IS_ERR(pclk)) + return NULL; + } ret = clk_prepare_enable(pclk); if (ret) { @@ -588,9 +626,14 @@ static inline void csdev_access_write64(struct csdev_access *csa, u64 val, u32 o } #endif /* CONFIG_64BIT */ +static inline bool coresight_is_device_source(struct coresight_device *csdev) +{ + return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SOURCE); +} + static inline bool coresight_is_percpu_source(struct coresight_device *csdev) { - return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SOURCE) && + return csdev && coresight_is_device_source(csdev) && (csdev->subtype.source_subtype == CORESIGHT_DEV_SUBTYPE_SOURCE_PROC); } @@ -632,23 +675,27 @@ static inline void coresight_set_mode(struct coresight_device *csdev, local_set(&csdev->mode, new_mode); } -extern struct coresight_device * -coresight_register(struct coresight_desc *desc); -extern void coresight_unregister(struct coresight_device *csdev); -extern int coresight_enable_sysfs(struct coresight_device *csdev); -extern void coresight_disable_sysfs(struct coresight_device *csdev); -extern int coresight_timeout(struct csdev_access *csa, u32 offset, - int position, int value); - -extern int coresight_claim_device(struct coresight_device *csdev); -extern int coresight_claim_device_unlocked(struct coresight_device *csdev); - -extern void coresight_disclaim_device(struct coresight_device *csdev); -extern void coresight_disclaim_device_unlocked(struct coresight_device *csdev); -extern char *coresight_alloc_device_name(struct coresight_dev_list *devs, +struct coresight_device *coresight_register(struct coresight_desc *desc); +void coresight_unregister(struct coresight_device *csdev); +int coresight_enable_sysfs(struct coresight_device *csdev); +void coresight_disable_sysfs(struct coresight_device *csdev); +int coresight_timeout(struct csdev_access *csa, u32 offset, int position, int value); +typedef void (*coresight_timeout_cb_t) (struct csdev_access *, u32, int, int); +int coresight_timeout_action(struct csdev_access *csa, u32 offset, int position, int value, + coresight_timeout_cb_t cb); +int coresight_claim_device(struct coresight_device *csdev); +int coresight_claim_device_unlocked(struct coresight_device *csdev); + +int coresight_claim_device(struct coresight_device *csdev); +int coresight_claim_device_unlocked(struct coresight_device *csdev); +void coresight_clear_self_claim_tag(struct csdev_access *csa); +void coresight_clear_self_claim_tag_unlocked(struct csdev_access *csa); +void coresight_disclaim_device(struct coresight_device *csdev); +void coresight_disclaim_device_unlocked(struct coresight_device *csdev); +char *coresight_alloc_device_name(struct coresight_dev_list *devs, struct device *dev); -extern bool coresight_loses_context_with_cpu(struct device *dev); +bool coresight_loses_context_with_cpu(struct device *dev); u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset); u32 coresight_read32(struct coresight_device *csdev, u32 offset); @@ -661,7 +708,8 @@ void coresight_relaxed_write64(struct coresight_device *csdev, u64 val, u32 offset); void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset); -extern int coresight_get_cpu(struct device *dev); +int coresight_get_cpu(struct device *dev); +int coresight_get_static_trace_id(struct device *dev, u32 *id); struct coresight_platform_data *coresight_get_platform_data(struct device *dev); struct coresight_connection * @@ -679,8 +727,10 @@ coresight_find_output_type(struct coresight_platform_data *pdata, union coresight_dev_subtype subtype); int coresight_init_driver(const char *drv, struct amba_driver *amba_drv, - struct platform_driver *pdev_drv); + struct platform_driver *pdev_drv, struct module *owner); void coresight_remove_driver(struct amba_driver *amba_drv, struct platform_driver *pdev_drv); +int coresight_etm_get_trace_id(struct coresight_device *csdev, enum cs_mode mode, + struct coresight_device *sink); #endif /* _LINUX_COREISGHT_H */ diff --git a/include/linux/counter.h b/include/linux/counter.h index 426b7d58a438..f208e867dd0f 100644 --- a/include/linux/counter.h +++ b/include/linux/counter.h @@ -580,6 +580,9 @@ struct counter_array { #define COUNTER_COMP_CEILING(_read, _write) \ COUNTER_COMP_COUNT_U64("ceiling", _read, _write) +#define COUNTER_COMP_COMPARE(_read, _write) \ + COUNTER_COMP_COUNT_U64("compare", _read, _write) + #define COUNTER_COMP_COUNT_MODE(_read, _write, _available) \ { \ .type = COUNTER_COMP_COUNT_MODE, \ diff --git a/include/linux/cper.h b/include/linux/cper.h index 265b0f8fc0b3..0ed60a91eca9 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -89,6 +89,10 @@ enum { #define CPER_NOTIFY_DMAR \ GUID_INIT(0x667DD791, 0xC6B3, 0x4c27, 0x8A, 0x6B, 0x0F, 0x8E, \ 0x72, 0x2D, 0xEB, 0x41) +/* CXL Protocol Error Section */ +#define CPER_SEC_CXL_PROT_ERR \ + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \ + 0x4B, 0x77, 0x10, 0x48) /* CXL Event record UUIDs are formatted as GUIDs and reported in section type */ /* @@ -601,4 +605,8 @@ void cper_estatus_print(const char *pfx, int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus); int cper_estatus_check(const struct acpi_hest_generic_status *estatus); +struct cxl_cper_sec_prot_err; +void cxl_cper_print_prot_err(const char *pfx, + const struct cxl_cper_sec_prot_err *prot_err); + #endif diff --git a/include/linux/cpu.h b/include/linux/cpu.h index bdcec1732445..6378370a952f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -77,6 +77,12 @@ extern ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_old_microcode(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_indirect_target_selection(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, @@ -115,6 +121,7 @@ extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); void bringup_nonboot_cpus(unsigned int max_cpus); +int arch_cpu_rescan_dead_smt_siblings(void); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -129,6 +136,8 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} +static inline int arch_cpu_rescan_dead_smt_siblings(void) { return 0; } + #endif /* CONFIG_SMP */ extern const struct bus_type cpu_subsys; @@ -147,7 +156,7 @@ static inline int suspend_disable_secondary_cpus(void) } static inline void suspend_enable_secondary_cpus(void) { - return thaw_secondary_cpus(); + thaw_secondary_cpus(); } #else /* !CONFIG_PM_SLEEP_SMP */ diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index 20b5729903d7..2fd7ba75362a 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -32,6 +32,7 @@ struct cpu_rmap { #define CPU_RMAP_DIST_INF 0xffff extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags); +extern void cpu_rmap_get(struct cpu_rmap *rmap); extern int cpu_rmap_put(struct cpu_rmap *rmap); extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7fe0981a7e46..95f3807c8c55 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -144,6 +144,9 @@ struct cpufreq_policy { /* Per policy boost enabled flag. */ bool boost_enabled; + /* Per policy boost supported flag. */ + bool boost_supported; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; unsigned int cached_resolved_idx; @@ -167,6 +170,12 @@ struct cpufreq_policy { struct notifier_block nb_max; }; +DEFINE_GUARD(cpufreq_policy_write, struct cpufreq_policy *, + down_write(&_T->rwsem), up_write(&_T->rwsem)) + +DEFINE_GUARD(cpufreq_policy_read, struct cpufreq_policy *, + down_read(&_T->rwsem), up_read(&_T->rwsem)) + /* * Used for passing new cpufreq policy data to the cpufreq driver's ->verify() * callback for sanitization. That callback is only expected to modify the min @@ -210,6 +219,9 @@ static inline struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) static inline void cpufreq_cpu_put(struct cpufreq_policy *policy) { } #endif +/* Scope based cleanup macro for cpufreq_policy kobject reference counting */ +DEFINE_FREE(put_cpufreq_policy, struct cpufreq_policy *, if (_T) cpufreq_cpu_put(_T)) + static inline bool policy_is_inactive(struct cpufreq_policy *policy) { return cpumask_empty(policy->cpus); @@ -229,9 +241,6 @@ void disable_cpufreq(void); u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); -struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu); -void cpufreq_cpu_release(struct cpufreq_policy *policy); -int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); void refresh_frequency_limits(struct cpufreq_policy *policy); void cpufreq_update_policy(unsigned int cpu); void cpufreq_update_limits(unsigned int cpu); @@ -389,7 +398,7 @@ struct cpufreq_driver { unsigned int (*get)(unsigned int cpu); /* Called to update policy limits on firmware notifications. */ - void (*update_limits)(unsigned int cpu); + void (*update_limits)(struct cpufreq_policy *policy); /* optional */ int (*bios_limit)(int cpu, unsigned int *limit); @@ -641,6 +650,15 @@ module_exit(__governor##_exit) struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); +#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL +bool sugov_is_governor(struct cpufreq_policy *policy); +#else +static inline bool sugov_is_governor(struct cpufreq_policy *policy) +{ + return false; +} +#endif + static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy) { if (policy->max < policy->cur) @@ -770,18 +788,16 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation); + unsigned int target_freq, unsigned int min, + unsigned int max, unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf); #ifdef CONFIG_CPU_FREQ -int cpufreq_boost_trigger_state(int state); bool cpufreq_boost_enabled(void); -int cpufreq_enable_boost_support(void); -bool policy_has_boost_freq(struct cpufreq_policy *policy); +int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state); /* Find lowest freq at or above target in a table in ascending order */ static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy, @@ -836,12 +852,12 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_al(policy, target_freq, @@ -851,6 +867,14 @@ static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find highest freq at or below target in a table in ascending order */ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, unsigned int target_freq, @@ -904,12 +928,12 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ah(policy, target_freq, @@ -919,6 +943,14 @@ static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, efficiencies); } +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies); +} + /* Find closest freq to target in a table in ascending order */ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, unsigned int target_freq, @@ -989,12 +1021,12 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, return best; } -/* Works only on sorted freq-tables */ -static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, - unsigned int target_freq, - bool efficiencies) +static inline int find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int min, unsigned int max, + bool efficiencies) { - target_freq = clamp_val(target_freq, policy->min, policy->max); + target_freq = clamp_val(target_freq, min, max); if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) return cpufreq_table_find_index_ac(policy, target_freq, @@ -1004,7 +1036,17 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, efficiencies); } -static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq, + bool efficiencies) +{ + return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies); +} + +static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, + unsigned int min, unsigned int max, + int idx) { unsigned int freq; @@ -1013,11 +1055,13 @@ static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx) freq = policy->freq_table[idx].frequency; - return freq == clamp_val(freq, policy->min, policy->max); + return freq == clamp_val(freq, min, max); } static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int min, + unsigned int max, unsigned int relation) { bool efficiencies = policy->efficiencies_available && @@ -1028,29 +1072,26 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, relation &= ~CPUFREQ_RELATION_E; if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) - return cpufreq_table_index_unsorted(policy, target_freq, - relation); + return cpufreq_table_index_unsorted(policy, target_freq, min, + max, relation); retry: switch (relation) { case CPUFREQ_RELATION_L: - idx = cpufreq_table_find_index_l(policy, target_freq, - efficiencies); + idx = find_index_l(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_H: - idx = cpufreq_table_find_index_h(policy, target_freq, - efficiencies); + idx = find_index_h(policy, target_freq, min, max, efficiencies); break; case CPUFREQ_RELATION_C: - idx = cpufreq_table_find_index_c(policy, target_freq, - efficiencies); + idx = find_index_c(policy, target_freq, min, max, efficiencies); break; default: WARN_ON_ONCE(1); return 0; } - /* Limit frequency index to honor policy->min/max */ - if (!cpufreq_is_in_limits(policy, idx) && efficiencies) { + /* Limit frequency index to honor min and max */ + if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) { efficiencies = false; goto retry; } @@ -1150,23 +1191,14 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ return 0; } #else -static inline int cpufreq_boost_trigger_state(int state) -{ - return 0; -} static inline bool cpufreq_boost_enabled(void) { return false; } -static inline int cpufreq_enable_boost_support(void) +static inline int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state) { - return -EINVAL; -} - -static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) -{ - return false; + return -EOPNOTSUPP; } static inline int @@ -1184,7 +1216,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_ } #endif -extern unsigned int arch_freq_get_on_cpu(int cpu); +extern int arch_freq_get_on_cpu(int cpu); #ifndef arch_set_freq_scale static __always_inline @@ -1198,7 +1230,6 @@ void arch_set_freq_scale(const struct cpumask *cpus, /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; -extern struct freq_attr *cpufreq_generic_attr[]; int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy); unsigned int cpufreq_generic_get(unsigned int cpu); @@ -1206,6 +1237,8 @@ void cpufreq_generic_init(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int transition_latency); +bool cpufreq_ready_for_eas(const struct cpumask *cpu_mask); + static inline void cpufreq_register_em_with_opp(struct cpufreq_policy *policy) { dev_pm_opp_of_register_em(get_cpu_device(policy->cpu), diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index a04b73c40173..df366ee15456 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -60,7 +60,6 @@ enum cpuhp_state { /* PREPARE section invoked on a control CPU */ CPUHP_OFFLINE = 0, CPUHP_CREATE_THREADS, - CPUHP_PERF_PREPARE, CPUHP_PERF_X86_PREPARE, CPUHP_PERF_X86_AMD_UNCORE_PREP, CPUHP_PERF_POWER, @@ -116,7 +115,6 @@ enum cpuhp_state { CPUHP_NET_IUCV_PREPARE, CPUHP_ARM_BL_PREPARE, CPUHP_TRACE_RB_PREPARE, - CPUHP_MM_ZS_PREPARE, CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, @@ -240,6 +238,7 @@ enum cpuhp_state { CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RANDOM_ONLINE, CPUHP_AP_RCUTREE_ONLINE, + CPUHP_AP_KTHREADS_ONLINE, CPUHP_AP_BASE_CACHEINFO_ONLINE, CPUHP_AP_ONLINE_DYN, CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 40, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9278a50d514f..7ae80a7ca81e 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -81,7 +81,7 @@ static __always_inline void set_nr_cpu_ids(unsigned int nr) * * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable * cpu_present_mask - has bit 'cpu' set iff cpu is populated - * cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online + * cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * @@ -179,6 +179,19 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask } /** + * cpumask_first_andnot - return the first cpu from *srcp1 & ~*srcp2 + * @srcp1: the first input + * @srcp2: the second input + * + * Return: >= nr_cpu_ids if no such cpu found. + */ +static __always_inline +unsigned int cpumask_first_andnot(const struct cpumask *srcp1, const struct cpumask *srcp2) +{ + return find_first_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); +} + +/** * cpumask_first_and_and - return the first cpu from *srcp1 & *srcp2 & *srcp3 * @srcp1: the first input * @srcp2: the second input @@ -285,35 +298,71 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, } /** - * for_each_cpu - iterate over every cpu in a mask - * @cpu: the (optionally unsigned) integer iterator - * @mask: the cpumask pointer + * cpumask_next_andnot - get the next cpu in *src1p & ~*src2p + * @n: the cpu prior to the place to search (i.e. return will be > @n) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer * - * After the loop, cpu is >= nr_cpu_ids. + * Return: >= nr_cpu_ids if no further cpus set in both. */ -#define for_each_cpu(cpu, mask) \ - for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits) - -#if NR_CPUS == 1 static __always_inline -unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) +unsigned int cpumask_next_andnot(int n, const struct cpumask *src1p, + const struct cpumask *src2p) { - cpumask_check(start); + /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); + return find_next_andnot_bit(cpumask_bits(src1p), cpumask_bits(src2p), + small_cpumask_bits, n + 1); +} - /* - * Return the first available CPU when wrapping, or when starting before cpu0, - * since there is only one valid option. - */ - if (wrap && n >= 0) - return nr_cpumask_bits; +/** + * cpumask_next_and_wrap - get the next cpu in *src1p & *src2p, starting from + * @n+1. If nothing found, wrap around and start from + * the beginning + * @n: the cpu prior to the place to search (i.e. search starts from @n+1) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer + * + * Return: next set bit, wrapped if needed, or >= nr_cpu_ids if @src1p & @src2p is empty. + */ +static __always_inline +unsigned int cpumask_next_and_wrap(int n, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_and_bit_wrap(cpumask_bits(src1p), cpumask_bits(src2p), + small_cpumask_bits, n + 1); +} - return cpumask_first(mask); +/** + * cpumask_next_wrap - get the next cpu in *src, starting from @n+1. If nothing + * found, wrap around and start from the beginning + * @n: the cpu prior to the place to search (i.e. search starts from @n+1) + * @src: cpumask pointer + * + * Return: next set bit, wrapped if needed, or >= nr_cpu_ids if @src is empty. + */ +static __always_inline +unsigned int cpumask_next_wrap(int n, const struct cpumask *src) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_bit_wrap(cpumask_bits(src), small_cpumask_bits, n + 1); } -#else -unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap); -#endif + +/** + * for_each_cpu - iterate over every cpu in a mask + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu(cpu, mask) \ + for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits) /** * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location @@ -391,19 +440,23 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta for_each_set_bit_from(cpu, cpumask_bits(mask), small_cpumask_bits) /** - * cpumask_any_but - return a "random" in a cpumask, but not this one. + * cpumask_any_but - return an arbitrary cpu in a cpumask, but not this one. * @mask: the cpumask to search * @cpu: the cpu to ignore. * * Often used to find any cpu but smp_processor_id() in a mask. + * If @cpu == -1, the function is equivalent to cpumask_any(). * Return: >= nr_cpu_ids if no cpus set. */ static __always_inline -unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) +unsigned int cpumask_any_but(const struct cpumask *mask, int cpu) { unsigned int i; - cpumask_check(cpu); + /* -1 is a legal arg here. */ + if (cpu != -1) + cpumask_check(cpu); + for_each_cpu(i, mask) if (i != cpu) break; @@ -411,21 +464,25 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) } /** - * cpumask_any_and_but - pick a "random" cpu from *mask1 & *mask2, but not this one. + * cpumask_any_and_but - pick an arbitrary cpu from *mask1 & *mask2, but not this one. * @mask1: the first input cpumask * @mask2: the second input cpumask * @cpu: the cpu to ignore * + * If @cpu == -1, the function is equivalent to cpumask_any_and(). * Returns >= nr_cpu_ids if no cpus set. */ static __always_inline unsigned int cpumask_any_and_but(const struct cpumask *mask1, const struct cpumask *mask2, - unsigned int cpu) + int cpu) { unsigned int i; - cpumask_check(cpu); + /* -1 is a legal arg here. */ + if (cpu != -1) + cpumask_check(cpu); + i = cpumask_first_and(mask1, mask2); if (i != cpu) return i; @@ -434,6 +491,33 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1, } /** + * cpumask_any_andnot_but - pick an arbitrary cpu from *mask1 & ~*mask2, but not this one. + * @mask1: the first input cpumask + * @mask2: the second input cpumask + * @cpu: the cpu to ignore + * + * If @cpu == -1, the function returns the first matching cpu. + * Returns >= nr_cpu_ids if no cpus set. + */ +static __always_inline +unsigned int cpumask_any_andnot_but(const struct cpumask *mask1, + const struct cpumask *mask2, + int cpu) +{ + unsigned int i; + + /* -1 is a legal arg here. */ + if (cpu != -1) + cpumask_check(cpu); + + i = cpumask_first_andnot(mask1, mask2); + if (i != cpu) + return i; + + return cpumask_next_andnot(cpu, mask1, mask2); +} + +/** * cpumask_nth - get the Nth cpu in a cpumask * @srcp: the cpumask pointer * @cpu: the Nth cpu to find, starting from 0 @@ -542,22 +626,6 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp) } /** - * cpumask_assign_cpu - assign a cpu in a cpumask - * @cpu: cpu number (< nr_cpu_ids) - * @dstp: the cpumask pointer - * @bool: the value to assign - */ -static __always_inline void cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value) -{ - assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value); -} - -static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value) -{ - __assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value); -} - -/** * cpumask_test_cpu - test for a cpu in a cpumask * @cpu: cpu number (< nr_cpu_ids) * @cpumask: the cpumask pointer @@ -840,7 +908,7 @@ void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) } /** - * cpumask_any - pick a "random" cpu from *srcp + * cpumask_any - pick an arbitrary cpu from *srcp * @srcp: the input cpumask * * Return: >= nr_cpu_ids if no cpus set. @@ -848,7 +916,7 @@ void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) #define cpumask_any(srcp) cpumask_first(srcp) /** - * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2 + * cpumask_any_and - pick an arbitrary cpu from *mask1 & *mask2 * @mask1: the first input cpumask * @mask2: the second input cpumask * @@ -1033,21 +1101,33 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) #define for_each_online_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) #define for_each_present_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++) + +#define for_each_possible_cpu_wrap(cpu, start) \ + for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++) +#define for_each_online_cpu_wrap(cpu, start) \ + for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++) #else #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) #define for_each_enabled_cpu(cpu) for_each_cpu((cpu), cpu_enabled_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) + +#define for_each_possible_cpu_wrap(cpu, start) \ + for_each_cpu_wrap((cpu), cpu_possible_mask, (start)) +#define for_each_online_cpu_wrap(cpu, start) \ + for_each_cpu_wrap((cpu), cpu_online_mask, (start)) #endif /* Wrappers for arch boot code to manipulate normally-constant masks */ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); -void init_cpu_online(const struct cpumask *src); #define assign_cpu(cpu, mask, val) \ assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) +#define __assign_cpu(cpu, mask, val) \ + __assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) + #define set_cpu_possible(cpu, possible) assign_cpu((cpu), &__cpu_possible_mask, (possible)) #define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_enabled_mask, (enabled)) #define set_cpu_present(cpu, present) assign_cpu((cpu), &__cpu_present_mask, (present)) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 835e7b793f6a..2ddb256187b5 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -82,11 +82,11 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p); void cpuset_init_current_mems_allowed(void); int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask); -extern bool cpuset_node_allowed(int node, gfp_t gfp_mask); +extern bool cpuset_current_node_allowed(int node, gfp_t gfp_mask); static inline bool __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { - return cpuset_node_allowed(zone_to_nid(z), gfp_mask); + return cpuset_current_node_allowed(zone_to_nid(z), gfp_mask); } static inline bool cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) @@ -125,9 +125,11 @@ static inline int cpuset_do_page_mem_spread(void) extern bool current_cpuset_is_being_rebound(void); +extern void dl_rebuild_rd_accounting(void); extern void rebuild_sched_domains(void); extern void cpuset_print_current_mems_allowed(void); +extern void cpuset_reset_sched_domains(void); /* * read_mems_allowed_begin is required when making decisions involving @@ -171,6 +173,7 @@ static inline void set_mems_allowed(nodemask_t nodemask) task_unlock(current); } +extern bool cpuset_node_allowed(struct cgroup *cgroup, int nid); #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } @@ -259,11 +262,20 @@ static inline bool current_cpuset_is_being_rebound(void) return false; } +static inline void dl_rebuild_rd_accounting(void) +{ +} + static inline void rebuild_sched_domains(void) { partition_sched_domains(1, NULL, NULL); } +static inline void cpuset_reset_sched_domains(void) +{ + partition_sched_domains(1, NULL, NULL); +} + static inline void cpuset_print_current_mems_allowed(void) { } @@ -282,6 +294,10 @@ static inline bool read_mems_allowed_retry(unsigned int seq) return false; } +static inline bool cpuset_node_allowed(struct cgroup *cgroup, int nid) +{ + return true; +} #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 44305336314e..d35726d6a415 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -34,7 +34,12 @@ static inline void arch_kexec_protect_crashkres(void) { } static inline void arch_kexec_unprotect_crashkres(void) { } #endif - +#ifdef CONFIG_CRASH_DM_CRYPT +int crash_load_dm_crypt_keys(struct kimage *image); +ssize_t dm_crypt_keys_read(char *buf, size_t count, u64 *ppos); +#else +static inline int crash_load_dm_crypt_keys(struct kimage *image) {return 0; } +#endif #ifndef arch_crash_handle_hotplug_event static inline void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) { } diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index acc55626afdc..dd6fc3b2133b 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -15,11 +15,15 @@ extern unsigned long long elfcorehdr_addr; extern unsigned long long elfcorehdr_size; +extern unsigned long long dm_crypt_keys_addr; + #ifdef CONFIG_CRASH_DUMP extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size); extern void elfcorehdr_free(unsigned long long addr); extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos); extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); +void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr, + unsigned long long paddr, unsigned long long size); extern int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot); @@ -99,6 +103,12 @@ static inline void vmcore_unusable(void) * indicated in the vmcore instead. For example, a ballooned page * contains no data and reading from such a page will cause high * load in the hypervisor. + * @get_device_ram: query RAM ranges that can only be detected by device + * drivers, such as the virtio-mem driver, so they can be included in + * the crash dump on architectures that allocate the elfcore hdr in the dump + * ("2nd") kernel. Indicated RAM ranges may contain holes to reduce the + * total number of ranges; such holes can be detected using the pfn_is_ram + * callback just like for other RAM. * @next: List head to manage registered callbacks internally; initialized by * register_vmcore_cb(). * @@ -109,11 +119,44 @@ static inline void vmcore_unusable(void) */ struct vmcore_cb { bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn); + int (*get_device_ram)(struct vmcore_cb *cb, struct list_head *list); struct list_head next; }; extern void register_vmcore_cb(struct vmcore_cb *cb); extern void unregister_vmcore_cb(struct vmcore_cb *cb); +struct vmcore_range { + struct list_head list; + unsigned long long paddr; + unsigned long long size; + loff_t offset; +}; + +/* Allocate a vmcore range and add it to the list. */ +static inline int vmcore_alloc_add_range(struct list_head *list, + unsigned long long paddr, unsigned long long size) +{ + struct vmcore_range *m = kzalloc(sizeof(*m), GFP_KERNEL); + + if (!m) + return -ENOMEM; + m->paddr = paddr; + m->size = size; + list_add_tail(&m->list, list); + return 0; +} + +/* Free a list of vmcore ranges. */ +static inline void vmcore_free_ranges(struct list_head *list) +{ + struct vmcore_range *m, *tmp; + + list_for_each_entry_safe(m, tmp, list, list) { + list_del(&m->list); + kfree(m); + } +} + #else /* !CONFIG_CRASH_DUMP */ static inline bool is_kdump_kernel(void) { return false; } #endif /* CONFIG_CRASH_DUMP */ diff --git a/include/linux/crash_reserve.h b/include/linux/crash_reserve.h index 5a9df944fb80..1fe7e7d1b214 100644 --- a/include/linux/crash_reserve.h +++ b/include/linux/crash_reserve.h @@ -32,13 +32,12 @@ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, #define CRASH_ADDR_HIGH_MAX memblock_end_of_DRAM() #endif -void __init reserve_crashkernel_generic(char *cmdline, - unsigned long long crash_size, - unsigned long long crash_base, - unsigned long long crash_low_size, - bool high); +void __init reserve_crashkernel_generic(unsigned long long crash_size, + unsigned long long crash_base, + unsigned long long crash_low_size, + bool high); #else -static inline void __init reserve_crashkernel_generic(char *cmdline, +static inline void __init reserve_crashkernel_generic( unsigned long long crash_size, unsigned long long crash_base, unsigned long long crash_low_size, diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index 6bb0c0bf357b..a559fdff3f7e 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -4,13 +4,19 @@ #include <linux/types.h> -#define CRC_T10DIF_DIGEST_SIZE 2 -#define CRC_T10DIF_BLOCK_SIZE 1 -#define CRC_T10DIF_STRING "crct10dif" +u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len); +u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len); -extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, - size_t len); -extern __u16 crc_t10dif(unsigned char const *, size_t); -extern __u16 crc_t10dif_update(__u16 crc, unsigned char const *, size_t); +static inline u16 crc_t10dif_update(u16 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC_T10DIF_ARCH)) + return crc_t10dif_arch(crc, p, len); + return crc_t10dif_generic(crc, p, len); +} + +static inline u16 crc_t10dif(const u8 *p, size_t len) +{ + return crc_t10dif_update(0, p, len); +} #endif diff --git a/include/linux/crc16.h b/include/linux/crc16.h index 9fa74529b317..b861d969b161 100644 --- a/include/linux/crc16.h +++ b/include/linux/crc16.h @@ -15,14 +15,7 @@ #include <linux/types.h> -extern u16 const crc16_table[256]; - -extern u16 crc16(u16 crc, const u8 *buffer, size_t len); - -static inline u16 crc16_byte(u16 crc, const u8 data) -{ - return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; -} +u16 crc16(u16 crc, const u8 *p, size_t len); #endif /* __CRC16_H */ diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 87f788c0d607..8c1883b81b42 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -1,17 +1,52 @@ -/* - * crc32.h - * See linux/lib/crc32.c for license and changes - */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _LINUX_CRC32_H #define _LINUX_CRC32_H #include <linux/types.h> #include <linux/bitrev.h> -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len); +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len); +u32 crc32_le_base(u32 crc, const u8 *p, size_t len); +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len); +u32 crc32_be_base(u32 crc, const u8 *p, size_t len); +u32 crc32c_arch(u32 crc, const u8 *p, size_t len); +u32 crc32c_base(u32 crc, const u8 *p, size_t len); + +static inline u32 crc32_le(u32 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32_le_arch(crc, p, len); + return crc32_le_base(crc, p, len); +} + +static inline u32 crc32_be(u32 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32_be_arch(crc, p, len); + return crc32_be_base(crc, p, len); +} + +static inline u32 crc32c(u32 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32c_arch(crc, p, len); + return crc32c_base(crc, p, len); +} + +/* + * crc32_optimizations() returns flags that indicate which CRC32 library + * functions are using architecture-specific optimizations. Unlike + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32 + * variants and also whether any needed CPU features are available at runtime. + */ +#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ +#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ +#define CRC32C_OPTIMIZATION BIT(2) /* crc32c() is optimized */ +#if IS_ENABLED(CONFIG_CRC32_ARCH) +u32 crc32_optimizations(void); +#else +static inline u32 crc32_optimizations(void) { return 0; } +#endif /** * crc32_le_combine - Combine two crc32 check values into one. For two @@ -31,41 +66,13 @@ u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len); * with the same initializer as crc1, and crc2 seed was 0. See * also crc32_combine_test(). */ -u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len); +u32 crc32_le_shift(u32 crc, size_t len); static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) { return crc32_le_shift(crc1, len2) ^ crc2; } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure __crc32c_le_base(u32 crc, unsigned char const *p, size_t len); - -/** - * __crc32c_le_combine - Combine two crc32c check values into one. For two - * sequences of bytes, seq1 and seq2 with lengths len1 - * and len2, __crc32c_le() check values were calculated - * for each, crc1 and crc2. - * - * @crc1: crc32c of the first block - * @crc2: crc32c of the second block - * @len2: length of the second block - * - * Return: The __crc32c_le() check value of seq1 and seq2 concatenated, - * requiring only crc1, crc2, and len2. Note: If seq_full denotes - * the concatenated memory area of seq1 with seq2, and crc_full - * the __crc32c_le() value of seq_full, then crc_full == - * __crc32c_le_combine(crc1, crc2, len2) when crc_full was - * seeded with the same initializer as crc1, and crc2 seed - * was 0. See also crc32c_combine_test(). - */ -u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len); - -static inline u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2) -{ - return __crc32c_le_shift(crc1, len2) ^ crc2; -} - #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) /* diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h index 357ae4611a45..b8cff2f4309a 100644 --- a/include/linux/crc32c.h +++ b/include/linux/crc32c.h @@ -2,11 +2,6 @@ #ifndef _LINUX_CRC32C_H #define _LINUX_CRC32C_H -#include <linux/types.h> - -extern u32 crc32c(u32 crc, const void *address, unsigned int length); - -/* This macro exists for backwards-compatibility. */ -#define crc32c_le crc32c +#include <linux/crc32.h> #endif /* _LINUX_CRC32C_H */ diff --git a/include/linux/crc64.h b/include/linux/crc64.h index e044c60d1e61..41de30b907df 100644 --- a/include/linux/crc64.h +++ b/include/linux/crc64.h @@ -7,12 +7,40 @@ #include <linux/types.h> -#define CRC64_ROCKSOFT_STRING "crc64-rocksoft" +u64 crc64_be_arch(u64 crc, const u8 *p, size_t len); +u64 crc64_be_generic(u64 crc, const u8 *p, size_t len); +u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len); +u64 crc64_nvme_generic(u64 crc, const u8 *p, size_t len); -u64 __pure crc64_be(u64 crc, const void *p, size_t len); -u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len); +/** + * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64 + * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation, + * or the previous crc64 value if computing incrementally. + * @p: pointer to buffer over which CRC64 is run + * @len: length of buffer @p + */ +static inline u64 crc64_be(u64 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC64_ARCH)) + return crc64_be_arch(crc, p, len); + return crc64_be_generic(crc, p, len); +} -u64 crc64_rocksoft(const unsigned char *buffer, size_t len); -u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len); +/** + * crc64_nvme - Calculate CRC64-NVME + * @crc: seed value for computation. 0 for a new CRC calculation, or the + * previous crc64 value if computing incrementally. + * @p: pointer to buffer over which CRC64 is run + * @len: length of buffer @p + * + * This computes the CRC64 defined in the NVME NVM Command Set Specification, + * *including the bitwise inversion at the beginning and end*. + */ +static inline u64 crc64_nvme(u64 crc, const void *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC64_ARCH)) + return ~crc64_nvme_arch(~crc, p, len); + return ~crc64_nvme_generic(~crc, p, len); +} #endif /* _LINUX_CRC64_H */ diff --git a/include/linux/crc7.h b/include/linux/crc7.h index b462842f3c32..61d34749e437 100644 --- a/include/linux/crc7.h +++ b/include/linux/crc7.h @@ -3,13 +3,6 @@ #define _LINUX_CRC7_H #include <linux/types.h> -extern const u8 crc7_be_syndrome_table[256]; - -static inline u8 crc7_be_byte(u8 crc, u8 data) -{ - return crc7_be_syndrome_table[crc ^ data]; -} - extern u8 crc7_be(u8 crc, const u8 *buffer, size_t len); #endif diff --git a/include/linux/cred.h b/include/linux/cred.h index e4a3155fe409..5658a3bfe803 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -155,8 +155,6 @@ extern struct cred *prepare_creds(void); extern struct cred *prepare_exec_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); -extern const struct cred *override_creds(const struct cred *); -extern void revert_creds(const struct cred *); extern struct cred *prepare_kernel_cred(struct task_struct *); extern int set_security_override(struct cred *, u32); extern int set_security_override_from_ctx(struct cred *, const char *); @@ -172,48 +170,14 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) cred->cap_inheritable)); } -/* - * Override creds without bumping reference count. Caller must ensure - * reference remains valid or has taken reference. Almost always not the - * interface you want. Use override_creds()/revert_creds() instead. - */ -static inline const struct cred *override_creds_light(const struct cred *override_cred) -{ - const struct cred *old = current->cred; - - rcu_assign_pointer(current->cred, override_cred); - return old; -} - -static inline void revert_creds_light(const struct cred *revert_cred) -{ - rcu_assign_pointer(current->cred, revert_cred); -} - -/** - * get_new_cred_many - Get references on a new set of credentials - * @cred: The new credentials to reference - * @nr: Number of references to acquire - * - * Get references on the specified set of new credentials. The caller must - * release all acquired references. - */ -static inline struct cred *get_new_cred_many(struct cred *cred, int nr) +static inline const struct cred *override_creds(const struct cred *override_cred) { - atomic_long_add(nr, &cred->usage); - return cred; + return rcu_replace_pointer(current->cred, override_cred, 1); } -/** - * get_new_cred - Get a reference on a new set of credentials - * @cred: The new credentials to reference - * - * Get a reference on the specified set of new credentials. The caller must - * release the reference. - */ -static inline struct cred *get_new_cred(struct cred *cred) +static inline const struct cred *revert_creds(const struct cred *revert_cred) { - return get_new_cred_many(cred, 1); + return rcu_replace_pointer(current->cred, revert_cred, 1); } /** @@ -236,7 +200,8 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) if (!cred) return cred; nonconst_cred->non_rcu = 0; - return get_new_cred_many(nonconst_cred, nr); + atomic_long_add(nr, &nonconst_cred->usage); + return cred; } /* diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b164da5e129e..a2137e19be7d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -13,7 +13,8 @@ #define _LINUX_CRYPTO_H #include <linux/completion.h> -#include <linux/refcount.h> +#include <linux/errno.h> +#include <linux/refcount_types.h> #include <linux/slab.h> #include <linux/types.h> @@ -22,7 +23,6 @@ */ #define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 -#define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 #define CRYPTO_ALG_TYPE_AEAD 0x00000003 #define CRYPTO_ALG_TYPE_LSKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 @@ -50,6 +50,15 @@ #define CRYPTO_ALG_NEED_FALLBACK 0x00000100 /* + * Set if the algorithm data structure should be duplicated into + * kmalloc memory before registration. This is useful for hardware + * that can be disconnected at will. Do not use this if the data + * structure is embedded into a bigger one. Duplicate the overall + * data structure in the driver in that case. + */ +#define CRYPTO_ALG_DUP_FIRST 0x00000200 + +/* * Set if the algorithm has passed automated run-time testing. Note that * if there is no run-time testing for a given algorithm it is considered * to have passed. @@ -124,6 +133,14 @@ */ #define CRYPTO_ALG_FIPS_INTERNAL 0x00020000 +/* Set if the algorithm supports virtual addresses. */ +#define CRYPTO_ALG_REQ_VIRT 0x00040000 + +/* Set if the algorithm cannot have a fallback (e.g., phmac). */ +#define CRYPTO_ALG_NO_FALLBACK 0x00080000 + +/* The high bits 0xff000000 are reserved for type-specific flags. */ + /* * Transform masks and values (for crt_flags). */ @@ -133,6 +150,7 @@ #define CRYPTO_TFM_REQ_FORBID_WEAK_KEYS 0x00000100 #define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200 #define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400 +#define CRYPTO_TFM_REQ_ON_STACK 0x00000800 /* * Miscellaneous stuff. @@ -239,26 +257,7 @@ struct cipher_alg { void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); }; -/** - * struct compress_alg - compression/decompression algorithm - * @coa_compress: Compress a buffer of specified length, storing the resulting - * data in the specified buffer. Return the length of the - * compressed data in dlen. - * @coa_decompress: Decompress the source buffer, storing the uncompressed - * data in the specified buffer. The length of the data is - * returned in dlen. - * - * All fields are mandatory. - */ -struct compress_alg { - int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen); - int (*coa_decompress)(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen); -}; - #define cra_cipher cra_u.cipher -#define cra_compress cra_u.compress /** * struct crypto_alg - definition of a cryptograpic cipher algorithm @@ -291,6 +290,7 @@ struct compress_alg { * to the alignmask of the algorithm being used, in order to * avoid the API having to realign them. Note: the alignmask is * not supported for hash algorithms and is always 0 for them. + * @cra_reqsize: Size of the request context for this algorithm. * @cra_priority: Priority of this transformation implementation. In case * multiple transformations with same @cra_name are available to * the Crypto API, the kernel will use the one with highest @@ -309,27 +309,16 @@ struct compress_alg { * transformation types. There are multiple options, such as * &crypto_skcipher_type, &crypto_ahash_type, &crypto_rng_type. * This field might be empty. In that case, there are no common - * callbacks. This is the case for: cipher, compress, shash. + * callbacks. This is the case for: cipher. * @cra_u: Callbacks implementing the transformation. This is a union of * multiple structures. Depending on the type of transformation selected * by @cra_type and @cra_flags above, the associated structure must be * filled with callbacks. This field might be empty. This is the case * for ahash, shash. - * @cra_init: Initialize the cryptographic transformation object. This function - * is used to initialize the cryptographic transformation object. - * This function is called only once at the instantiation time, right - * after the transformation context was allocated. In case the - * cryptographic hardware has some special requirements which need to - * be handled by software, this function shall check for the precise - * requirement of the transformation and put any software fallbacks - * in place. - * @cra_exit: Deinitialize the cryptographic transformation object. This is a - * counterpart to @cra_init, used to remove various changes set in - * @cra_init. + * @cra_init: Deprecated, do not use. + * @cra_exit: Deprecated, do not use. * @cra_u.cipher: Union member which contains a single-block symmetric cipher * definition. See @struct @cipher_alg. - * @cra_u.compress: Union member which contains a (de)compression algorithm. - * See @struct @compress_alg. * @cra_module: Owner of this transformation implementation. Set to THIS_MODULE * @cra_list: internally used * @cra_users: internally used @@ -348,6 +337,7 @@ struct crypto_alg { unsigned int cra_blocksize; unsigned int cra_ctxsize; unsigned int cra_alignmask; + unsigned int cra_reqsize; int cra_priority; refcount_t cra_refcnt; @@ -359,7 +349,6 @@ struct crypto_alg { union { struct cipher_alg cipher; - struct compress_alg compress; } cra_u; int (*cra_init)(struct crypto_tfm *tfm); @@ -425,18 +414,16 @@ struct crypto_tfm { u32 crt_flags; int node; - + + struct crypto_tfm *fb; + void (*exit)(struct crypto_tfm *tfm); - + struct crypto_alg *__crt_alg; void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; }; -struct crypto_comp { - struct crypto_tfm base; -}; - /* * Transform user interface. */ @@ -472,6 +459,11 @@ static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_alignmask; } +static inline unsigned int crypto_tfm_alg_reqsize(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_reqsize; +} + static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm) { return tfm->crt_flags; @@ -493,52 +485,45 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) return __alignof__(tfm->__crt_ctx); } -static inline struct crypto_comp *__crypto_comp_cast(struct crypto_tfm *tfm) +static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm) { - return (struct crypto_comp *)tfm; + return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; } -static inline struct crypto_comp *crypto_alloc_comp(const char *alg_name, - u32 type, u32 mask) +static inline bool crypto_req_on_stack(struct crypto_async_request *req) { - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_COMPRESS; - mask |= CRYPTO_ALG_TYPE_MASK; - - return __crypto_comp_cast(crypto_alloc_base(alg_name, type, mask)); + return req->flags & CRYPTO_TFM_REQ_ON_STACK; } -static inline struct crypto_tfm *crypto_comp_tfm(struct crypto_comp *tfm) +static inline void crypto_request_set_callback( + struct crypto_async_request *req, u32 flags, + crypto_completion_t compl, void *data) { - return &tfm->base; -} + u32 keep = CRYPTO_TFM_REQ_ON_STACK; -static inline void crypto_free_comp(struct crypto_comp *tfm) -{ - crypto_free_tfm(crypto_comp_tfm(tfm)); + req->complete = compl; + req->data = data; + req->flags &= keep; + req->flags |= flags & ~keep; } -static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask) +static inline void crypto_request_set_tfm(struct crypto_async_request *req, + struct crypto_tfm *tfm) { - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_COMPRESS; - mask |= CRYPTO_ALG_TYPE_MASK; - - return crypto_has_alg(alg_name, type, mask); + req->tfm = tfm; + req->flags &= ~CRYPTO_TFM_REQ_ON_STACK; } -static inline const char *crypto_comp_name(struct crypto_comp *tfm) +struct crypto_async_request *crypto_request_clone( + struct crypto_async_request *req, size_t total, gfp_t gfp); + +static inline void crypto_stack_request_init(struct crypto_async_request *req, + struct crypto_tfm *tfm) { - return crypto_tfm_alg_name(crypto_comp_tfm(tfm)); + req->flags = 0; + crypto_request_set_tfm(req, tfm); + req->flags |= CRYPTO_TFM_REQ_ON_STACK; } -int crypto_comp_compress(struct crypto_comp *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen); - -int crypto_comp_decompress(struct crypto_comp *tfm, - const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen); - #endif /* _LINUX_CRYPTO_H */ diff --git a/include/linux/damon.h b/include/linux/damon.h index a67f2c4940e9..a4011726cb3b 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -36,6 +36,16 @@ struct damon_addr_range { }; /** + * struct damon_size_range - Represents size for filter to operate on [@min, @max]. + * @min: Min size (inclusive). + * @max: Max size (inclusive). + */ +struct damon_size_range { + unsigned long min; + unsigned long max; +}; + +/** * struct damon_region - Represents a monitoring target region. * @ar: The address range of the region. * @sampling_addr: Address of the sample for the next access check. @@ -135,6 +145,8 @@ enum damos_action { * * @DAMOS_QUOTA_USER_INPUT: User-input value. * @DAMOS_QUOTA_SOME_MEM_PSI_US: System level some memory PSI in us. + * @DAMOS_QUOTA_NODE_MEM_USED_BP: MemUsed ratio of a node. + * @DAMOS_QUOTA_NODE_MEM_FREE_BP: MemFree ratio of a node. * @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics. * * Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported. @@ -142,6 +154,8 @@ enum damos_action { enum damos_quota_goal_metric { DAMOS_QUOTA_USER_INPUT, DAMOS_QUOTA_SOME_MEM_PSI_US, + DAMOS_QUOTA_NODE_MEM_USED_BP, + DAMOS_QUOTA_NODE_MEM_FREE_BP, NR_DAMOS_QUOTA_GOAL_METRICS, }; @@ -151,6 +165,7 @@ enum damos_quota_goal_metric { * @target_value: Target value of @metric to achieve with the tuning. * @current_value: Current value of @metric. * @last_psi_total: Last measured total PSI + * @nid: Node id. * @list: List head for siblings. * * Data structure for getting the current score of the quota tuning goal. The @@ -169,6 +184,7 @@ struct damos_quota_goal { /* metric-dependent fields */ union { u64 last_psi_total; + int nid; }; struct list_head list; }; @@ -193,11 +209,16 @@ struct damos_quota_goal { * size quota is set, DAMON tries to apply the action only up to &sz bytes * within &reset_interval. * - * Internally, the time quota is transformed to a size quota using estimated - * throughput of the scheme's action. DAMON then compares it against &sz and - * uses smaller one as the effective quota. + * To convince the different types of quotas and goals, DAMON internally + * converts those into one single size quota called "effective quota". DAMON + * internally uses it as the only one real quota. The conversion is made as + * follows. + * + * The time quota is transformed to a size quota using estimated throughput of + * the scheme's action. DAMON then compares it against &sz and uses smaller + * one as the effective quota. * - * If @goals is not empt, DAMON calculates yet another size quota based on the + * If @goals is not empty, DAMON calculates yet another size quota based on the * goals using its internal feedback loop algorithm, for every @reset_interval. * Then, if the new size quota is smaller than the effective quota, it uses the * new size quota as the effective quota. @@ -286,21 +307,44 @@ struct damos_watermarks { * @sz_tried: Total size of regions that the scheme is tried to be applied. * @nr_applied: Total number of regions that the scheme is applied. * @sz_applied: Total size of regions that the scheme is applied. + * @sz_ops_filter_passed: + * Total bytes that passed ops layer-handled DAMOS filters. * @qt_exceeds: Total number of times the quota of the scheme has exceeded. + * + * "Tried an action to a region" in this context means the DAMOS core logic + * determined the region as eligible to apply the action. The access pattern + * (&struct damos_access_pattern), quotas (&struct damos_quota), watermarks + * (&struct damos_watermarks) and filters (&struct damos_filter) that handled + * on core logic can affect this. The core logic asks the operation set + * (&struct damon_operations) to apply the action to the region. + * + * "Applied an action to a region" in this context means the operation set + * (&struct damon_operations) successfully applied the action to the region, at + * least to a part of the region. The filters (&struct damos_filter) that + * handled on operation set layer and type of the action and pages of the + * region can affect this. For example, if a filter is set to exclude + * anonymous pages and the region has only anonymous pages, the region will be + * failed at applying the action. If the action is &DAMOS_PAGEOUT and all + * pages of the region are already paged out, the region will be failed at + * applying the action. */ struct damos_stat { unsigned long nr_tried; unsigned long sz_tried; unsigned long nr_applied; unsigned long sz_applied; + unsigned long sz_ops_filter_passed; unsigned long qt_exceeds; }; /** * enum damos_filter_type - Type of memory for &struct damos_filter * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. + * @DAMOS_FILTER_TYPE_ACTIVE: Active pages. * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. * @DAMOS_FILTER_TYPE_YOUNG: Recently accessed pages. + * @DAMOS_FILTER_TYPE_HUGEPAGE_SIZE: Page is part of a hugepage. + * @DAMOS_FILTER_TYPE_UNMAPPED: Unmapped pages. * @DAMOS_FILTER_TYPE_ADDR: Address range. * @DAMOS_FILTER_TYPE_TARGET: Data Access Monitoring target. * @NR_DAMOS_FILTER_TYPES: Number of filter types. @@ -318,8 +362,11 @@ struct damos_stat { */ enum damos_filter_type { DAMOS_FILTER_TYPE_ANON, + DAMOS_FILTER_TYPE_ACTIVE, DAMOS_FILTER_TYPE_MEMCG, DAMOS_FILTER_TYPE_YOUNG, + DAMOS_FILTER_TYPE_HUGEPAGE_SIZE, + DAMOS_FILTER_TYPE_UNMAPPED, DAMOS_FILTER_TYPE_ADDR, DAMOS_FILTER_TYPE_TARGET, NR_DAMOS_FILTER_TYPES, @@ -327,31 +374,61 @@ enum damos_filter_type { /** * struct damos_filter - DAMOS action target memory filter. - * @type: Type of the page. - * @matching: If the matching page should filtered out or in. + * @type: Type of the target memory. + * @matching: Whether this is for @type-matching memory. + * @allow: Whether to include or exclude the @matching memory. * @memcg_id: Memcg id of the question if @type is DAMOS_FILTER_MEMCG. * @addr_range: Address range if @type is DAMOS_FILTER_TYPE_ADDR. * @target_idx: Index of the &struct damon_target of * &damon_ctx->adaptive_targets if @type is * DAMOS_FILTER_TYPE_TARGET. + * @sz_range: Size range if @type is DAMOS_FILTER_TYPE_HUGEPAGE_SIZE. * @list: List head for siblings. * * Before applying the &damos->action to a memory region, DAMOS checks if each - * page of the region matches to this and avoid applying the action if so. - * Support of each filter type depends on the running &struct damon_operations - * and the type. Refer to &enum damos_filter_type for more detai. + * byte of the region matches to this given condition and avoid applying the + * action if so. Support of each filter type depends on the running &struct + * damon_operations and the type. Refer to &enum damos_filter_type for more + * details. */ struct damos_filter { enum damos_filter_type type; bool matching; + bool allow; union { unsigned short memcg_id; struct damon_addr_range addr_range; int target_idx; + struct damon_size_range sz_range; }; struct list_head list; }; +struct damon_ctx; +struct damos; + +/** + * struct damos_walk_control - Control damos_walk(). + * + * @walk_fn: Function to be called back for each region. + * @data: Data that will be passed to walk functions. + * + * Control damos_walk(), which requests specific kdamond to invoke the given + * function to each region that eligible to apply actions of the kdamond's + * schemes. Refer to damos_walk() for more details. + */ +struct damos_walk_control { + void (*walk_fn)(void *data, struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *s, unsigned long sz_filter_passed); + void *data; +/* private: internal use only */ + /* informs if the kdamond finished handling of the walk request */ + struct completion completion; + /* informs if the walk is canceled. */ + bool canceled; +}; + /** * struct damos_access_pattern - Target access pattern of the given scheme. * @min_sz_region: Minimum size of target regions. @@ -379,6 +456,8 @@ struct damos_access_pattern { * @wmarks: Watermarks for automated (in)activation of this scheme. * @target_nid: Destination node if @action is "migrate_{hot,cold}". * @filters: Additional set of &struct damos_filter for &action. + * @ops_filters: ops layer handling &struct damos_filter objects list. + * @last_applied: Last @action applied ops-managing entity. * @stat: Statistics of this scheme. * @list: List head for siblings. * @@ -401,6 +480,15 @@ struct damos_access_pattern { * implementation could check pages of the region and skip &action to respect * &filters * + * The minimum entity that @action can be applied depends on the underlying + * &struct damon_operations. Since it may not be aligned with the core layer + * abstract, namely &struct damon_region, &struct damon_operations could apply + * @action to same entity multiple times. Large folios that underlying on + * multiple &struct damon region objects could be such examples. The &struct + * damon_operations can use @last_applied to avoid that. DAMOS core logic + * unsets @last_applied when each regions walking for applying the scheme is + * finished. + * * After applying the &action to each region, &stat_count and &stat_sz is * updated to reflect the number of regions and total size of regions that the * &action is applied. @@ -415,6 +503,16 @@ struct damos { * @action */ unsigned long next_apply_sis; + /* informs if ongoing DAMOS walk for this scheme is finished */ + bool walk_completed; + /* + * If the current region in the filtering stage is allowed by core + * layer-handled filters. If true, operations layer allows it, too. + */ + bool core_filters_allowed; + /* whether to reject core/ops filters umatched regions */ + bool core_filters_default_reject; + bool ops_filters_default_reject; /* public: */ struct damos_quota quota; struct damos_watermarks wmarks; @@ -422,6 +520,8 @@ struct damos { int target_nid; }; struct list_head filters; + struct list_head ops_filters; + void *last_applied; struct damos_stat stat; struct list_head list; }; @@ -442,8 +542,6 @@ enum damon_ops_id { NR_DAMON_OPS, }; -struct damon_ctx; - /** * struct damon_operations - Monitoring operations for given use cases. * @@ -452,7 +550,6 @@ struct damon_ctx; * @update: Update operations-related data structures. * @prepare_access_checks: Prepare next access check of target regions. * @check_accesses: Check the accesses to target regions. - * @reset_aggregated: Reset aggregated accesses monitoring results. * @get_scheme_score: Get the score of a region for a scheme. * @apply_scheme: Apply a DAMON-based operation scheme. * @target_valid: Determine if the target is valid. @@ -464,8 +561,7 @@ struct damon_ctx; * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting * the monitoring, @update after each &damon_attrs.ops_update_interval, and * @check_accesses, @target_valid and @prepare_access_checks after each - * &damon_attrs.sample_interval. Finally, @reset_aggregated is called after - * each &damon_attrs.aggr_interval. + * &damon_attrs.sample_interval. * * Each &struct damon_operations instance having valid @id can be registered * via damon_register_ops() and selected by damon_select_ops() later. @@ -480,14 +576,13 @@ struct damon_ctx; * last preparation and update the number of observed accesses of each region. * It should also return max number of observed accesses that made as a result * of its update. The value will be used for regions adjustment threshold. - * @reset_aggregated should reset the access monitoring results that aggregated - * by @check_accesses. * @get_scheme_score should return the priority score of a region for a scheme * as an integer in [0, &DAMOS_MAX_SCORE]. * @apply_scheme is called from @kdamond when a region for user provided * DAMON-based operation scheme is found. It should apply the scheme's action * to the region and return bytes of the region that the action is successfully - * applied. + * applied. It should also report how many bytes of the region has passed + * filters (&struct damos_filter) that handled by itself. * @target_valid should check whether the target is still valid for the * monitoring. * @cleanup is called from @kdamond just before its termination. @@ -498,13 +593,12 @@ struct damon_operations { void (*update)(struct damon_ctx *context); void (*prepare_access_checks)(struct damon_ctx *context); unsigned int (*check_accesses)(struct damon_ctx *context); - void (*reset_aggregated)(struct damon_ctx *context); int (*get_scheme_score)(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, struct damos *scheme); unsigned long (*apply_scheme)(struct damon_ctx *context, struct damon_target *t, struct damon_region *r, - struct damos *scheme); + struct damos *scheme, unsigned long *sz_filter_passed); bool (*target_valid)(struct damon_target *t); void (*cleanup)(struct damon_ctx *context); }; @@ -512,52 +606,84 @@ struct damon_operations { /** * struct damon_callback - Monitoring events notification callbacks. * - * @before_start: Called before starting the monitoring. * @after_wmarks_check: Called after each schemes' watermarks check. - * @after_sampling: Called after each sampling. * @after_aggregation: Called after each aggregation. - * @before_damos_apply: Called before applying DAMOS action. * @before_terminate: Called before terminating the monitoring. - * @private: User private data. * - * The monitoring thread (&damon_ctx.kdamond) calls @before_start and - * @before_terminate just before starting and finishing the monitoring, - * respectively. Therefore, those are good places for installing and cleaning - * @private. + * The monitoring thread (&damon_ctx.kdamond) calls @before_terminate just + * before finishing the monitoring. * * The monitoring thread calls @after_wmarks_check after each DAMON-based * operation schemes' watermarks check. If users need to make changes to the * attributes of the monitoring context while it's deactivated due to the * watermarks, this is the good place to do. * - * The monitoring thread calls @after_sampling and @after_aggregation for each - * of the sampling intervals and aggregation intervals, respectively. - * Therefore, users can safely access the monitoring results without additional - * protection. For the reason, users are recommended to use these callback for - * the accesses to the results. + * The monitoring thread calls @after_aggregation for each of the aggregation + * intervals. Therefore, users can safely access the monitoring results + * without additional protection. For the reason, users are recommended to use + * these callback for the accesses to the results. * * If any callback returns non-zero, monitoring stops. */ struct damon_callback { - void *private; - - int (*before_start)(struct damon_ctx *context); int (*after_wmarks_check)(struct damon_ctx *context); - int (*after_sampling)(struct damon_ctx *context); int (*after_aggregation)(struct damon_ctx *context); - int (*before_damos_apply)(struct damon_ctx *context, - struct damon_target *target, - struct damon_region *region, - struct damos *scheme); void (*before_terminate)(struct damon_ctx *context); }; +/* + * struct damon_call_control - Control damon_call(). + * + * @fn: Function to be called back. + * @data: Data that will be passed to @fn. + * @return_code: Return code from @fn invocation. + * + * Control damon_call(), which requests specific kdamond to invoke a given + * function. Refer to damon_call() for more details. + */ +struct damon_call_control { + int (*fn)(void *data); + void *data; + int return_code; +/* private: internal use only */ + /* informs if the kdamond finished handling of the request */ + struct completion completion; + /* informs if the kdamond canceled @fn infocation */ + bool canceled; +}; + +/** + * struct damon_intervals_goal - Monitoring intervals auto-tuning goal. + * + * @access_bp: Access events observation ratio to achieve in bp. + * @aggrs: Number of aggregations to acheive @access_bp within. + * @min_sample_us: Minimum resulting sampling interval in microseconds. + * @max_sample_us: Maximum resulting sampling interval in microseconds. + * + * DAMON automatically tunes &damon_attrs->sample_interval and + * &damon_attrs->aggr_interval aiming the ratio in bp (1/10,000) of + * DAMON-observed access events to theoretical maximum amount within @aggrs + * aggregations be same to @access_bp. The logic increases + * &damon_attrs->aggr_interval and &damon_attrs->sampling_interval in same + * ratio if the current access events observation ratio is lower than the + * target for each @aggrs aggregations, and vice versa. + * + * If @aggrs is zero, the tuning is disabled and hence this struct is ignored. + */ +struct damon_intervals_goal { + unsigned long access_bp; + unsigned long aggrs; + unsigned long min_sample_us; + unsigned long max_sample_us; +}; + /** * struct damon_attrs - Monitoring attributes for accuracy/overhead control. * * @sample_interval: The time between access samplings. * @aggr_interval: The time between monitor results aggregations. * @ops_update_interval: The time between monitoring operations updates. + * @intervals_goal: Intervals auto-tuning goal. * @min_nr_regions: The minimum number of adaptive monitoring * regions. * @max_nr_regions: The maximum number of adaptive monitoring @@ -577,8 +703,20 @@ struct damon_attrs { unsigned long sample_interval; unsigned long aggr_interval; unsigned long ops_update_interval; + struct damon_intervals_goal intervals_goal; unsigned long min_nr_regions; unsigned long max_nr_regions; +/* private: internal use only */ + /* + * @aggr_interval to @sample_interval ratio. + * Core-external components call damon_set_attrs() with &damon_attrs + * that this field is unset. In the case, damon_set_attrs() sets this + * field of resulting &damon_attrs. Core-internal components such as + * kdamond_tune_intervals() calls damon_set_attrs() with &damon_attrs + * that this field is set. In the case, damon_set_attrs() just keep + * it. + */ + unsigned long aggr_samples; }; /** @@ -627,11 +765,22 @@ struct damon_ctx { * update */ unsigned long next_ops_update_sis; + /* + * number of sample intervals that should be passed before next + * intervals tuning + */ + unsigned long next_intervals_tune_sis; /* for waiting until the execution of the kdamond_fn is started */ struct completion kdamond_started; /* for scheme quotas prioritization */ unsigned long *regions_score_histogram; + struct damon_call_control *call_control; + struct mutex call_control_lock; + + struct damos_walk_control *walk_control; + struct mutex walk_control_lock; + /* public: */ struct task_struct *kdamond; struct mutex kdamond_lock; @@ -702,6 +851,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #define damos_for_each_filter_safe(f, next, scheme) \ list_for_each_entry_safe(f, next, &(scheme)->filters, list) +#define damos_for_each_ops_filter(f, scheme) \ + list_for_each_entry(f, &(scheme)->ops_filters, list) + +#define damos_for_each_ops_filter_safe(f, next, scheme) \ + list_for_each_entry_safe(f, next, &(scheme)->ops_filters, list) + #ifdef CONFIG_DAMON struct damon_region *damon_new_region(unsigned long start, unsigned long end); @@ -725,8 +880,9 @@ void damon_update_region_access_rate(struct damon_region *r, bool accessed, struct damon_attrs *attrs); struct damos_filter *damos_new_filter(enum damos_filter_type type, - bool matching); + bool matching, bool allow); void damos_add_filter(struct damos *s, struct damos_filter *f); +bool damos_filter_for_ops(enum damos_filter_type type); void damos_destroy_filter(struct damos_filter *f); struct damos_quota_goal *damos_new_quota_goal( @@ -779,6 +935,9 @@ static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive); int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); +int damon_call(struct damon_ctx *ctx, struct damon_call_control *control); +int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control); + int damon_set_region_biggest_system_ram_default(struct damon_target *t, unsigned long *start, unsigned long *end); diff --git a/include/linux/dax.h b/include/linux/dax.h index df41a0017b31..dcc9fcdf14e4 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -207,6 +207,11 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops); +static inline bool dax_page_is_idle(struct page *page) +{ + return page && page_ref_count(page) == 0; +} + #if IS_ENABLED(CONFIG_DAX) int dax_read_lock(void); void dax_read_unlock(int id); @@ -220,6 +225,19 @@ static inline void dax_read_unlock(int id) { } #endif /* CONFIG_DAX */ + +#if !IS_ENABLED(CONFIG_FS_DAX) +static inline int __must_check dax_break_layout(struct inode *inode, + loff_t start, loff_t end, void (cb)(struct inode *)) +{ + return 0; +} + +static inline void dax_break_layout_final(struct inode *inode) +{ +} +#endif + bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, @@ -241,8 +259,18 @@ vm_fault_t dax_iomap_fault(struct vm_fault *vmf, unsigned int order, vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order, pfn_t pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); +void dax_delete_mapping_range(struct address_space *mapping, + loff_t start, loff_t end); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); +int __must_check dax_break_layout(struct inode *inode, loff_t start, + loff_t end, void (cb)(struct inode *)); +static inline int __must_check dax_break_layout_inode(struct inode *inode, + void (cb)(struct inode *)) +{ + return dax_break_layout(inode, 0, LLONG_MAX, cb); +} +void dax_break_layout_final(struct inode *inode); int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bff956f7b2b9..e29823c701ac 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -57,6 +57,8 @@ struct qstr { }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } +#define QSTR_LEN(n,l) (struct qstr)QSTR_INIT(n,l) +#define QSTR(n) QSTR_LEN(n, strlen(n)) extern const struct qstr empty_name; extern const struct qstr slash_name; @@ -68,16 +70,24 @@ extern const struct qstr dotdot_name; * large memory footprint increase). */ #ifdef CONFIG_64BIT -# define DNAME_INLINE_LEN 40 /* 192 bytes */ +# define DNAME_INLINE_WORDS 5 /* 192 bytes */ #else # ifdef CONFIG_SMP -# define DNAME_INLINE_LEN 36 /* 128 bytes */ +# define DNAME_INLINE_WORDS 9 /* 128 bytes */ # else -# define DNAME_INLINE_LEN 44 /* 128 bytes */ +# define DNAME_INLINE_WORDS 11 /* 128 bytes */ # endif #endif +#define DNAME_INLINE_LEN (DNAME_INLINE_WORDS*sizeof(unsigned long)) + +union shortname_store { + unsigned char string[DNAME_INLINE_LEN]; + unsigned long words[DNAME_INLINE_WORDS]; +}; + #define d_lock d_lockref.lock +#define d_iname d_shortname.string struct dentry { /* RCU lookup touched fields */ @@ -88,7 +98,7 @@ struct dentry { struct qstr d_name; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ - unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + union shortname_store d_shortname; /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */ /* Ref lookup also touches following */ @@ -136,7 +146,8 @@ enum d_real_type { }; struct dentry_operations { - int (*d_revalidate)(struct dentry *, unsigned int); + int (*d_revalidate)(struct inode *, const struct qstr *, + struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, struct qstr *); int (*d_compare)(const struct dentry *, @@ -150,6 +161,8 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(const struct path *, bool); struct dentry *(*d_real)(struct dentry *, enum d_real_type type); + bool (*d_unalias_trylock)(const struct dentry *); + void (*d_unalias_unlock)(const struct dentry *); } ____cacheline_aligned; /* @@ -161,65 +174,59 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH BIT(0) -#define DCACHE_OP_COMPARE BIT(1) -#define DCACHE_OP_REVALIDATE BIT(2) -#define DCACHE_OP_DELETE BIT(3) -#define DCACHE_OP_PRUNE BIT(4) - -#define DCACHE_DISCONNECTED BIT(5) - /* This dentry is possibly not currently connected to the dcache tree, in - * which case its parent will either be itself, or will have this flag as - * well. nfsd will not use a dentry with this bit set, but will first - * endeavour to clear the bit either by discovering that it is connected, - * or by performing lookup operations. Any filesystem which supports - * nfsd_operations MUST have a lookup function which, if it finds a - * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that - * dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. */ - -#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */ - -#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ - -#define DCACHE_CANT_MOUNT BIT(8) -#define DCACHE_GENOCIDE BIT(9) -#define DCACHE_SHRINK_LIST BIT(10) - -#define DCACHE_OP_WEAK_REVALIDATE BIT(11) - -#define DCACHE_NFSFS_RENAMED BIT(12) - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(14) - /* Parent inode is watched by some fsnotify listener */ - -#define DCACHE_DENTRY_KILLED BIT(15) - -#define DCACHE_MOUNTED BIT(16) /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT BIT(17) /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT BIT(18) /* manage transit from this dirent */ +enum dentry_flags { + DCACHE_OP_HASH = BIT(0), + DCACHE_OP_COMPARE = BIT(1), + DCACHE_OP_REVALIDATE = BIT(2), + DCACHE_OP_DELETE = BIT(3), + DCACHE_OP_PRUNE = BIT(4), + /* + * This dentry is possibly not currently connected to the dcache tree, + * in which case its parent will either be itself, or will have this + * flag as well. nfsd will not use a dentry with this bit set, but will + * first endeavour to clear the bit either by discovering that it is + * connected, or by performing lookup operations. Any filesystem which + * supports nfsd_operations MUST have a lookup function which, if it + * finds a directory inode with a DCACHE_DISCONNECTED dentry, will + * d_move that dentry into place and return that dentry rather than the + * passed one, typically using d_splice_alias. + */ + DCACHE_DISCONNECTED = BIT(5), + DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ + DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ + DCACHE_CANT_MOUNT = BIT(8), + DCACHE_GENOCIDE = BIT(9), + DCACHE_SHRINK_LIST = BIT(10), + DCACHE_OP_WEAK_REVALIDATE = BIT(11), + /* + * this dentry has been "silly renamed" and has to be deleted on the + * last dput() + */ + DCACHE_NFSFS_RENAMED = BIT(12), + DCACHE_FSNOTIFY_PARENT_WATCHED = BIT(13), /* Parent inode is watched by some fsnotify listener */ + DCACHE_DENTRY_KILLED = BIT(14), + DCACHE_MOUNTED = BIT(15), /* is a mountpoint */ + DCACHE_NEED_AUTOMOUNT = BIT(16), /* handle automount on this dir */ + DCACHE_MANAGE_TRANSIT = BIT(17), /* manage transit from this dirent */ + DCACHE_LRU_LIST = BIT(18), + DCACHE_ENTRY_TYPE = (7 << 19), /* bits 19..21 are for storing type: */ + DCACHE_MISS_TYPE = (0 << 19), /* Negative dentry */ + DCACHE_WHITEOUT_TYPE = (1 << 19), /* Whiteout dentry (stop pathwalk) */ + DCACHE_DIRECTORY_TYPE = (2 << 19), /* Normal directory */ + DCACHE_AUTODIR_TYPE = (3 << 19), /* Lookupless directory (presumed automount) */ + DCACHE_REGULAR_TYPE = (4 << 19), /* Regular file type */ + DCACHE_SPECIAL_TYPE = (5 << 19), /* Other file type */ + DCACHE_SYMLINK_TYPE = (6 << 19), /* Symlink */ + DCACHE_NOKEY_NAME = BIT(22), /* Encrypted name encoded without key */ + DCACHE_OP_REAL = BIT(23), + DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ + DCACHE_DENTRY_CURSOR = BIT(25), + DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ +}; + #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST BIT(19) - -#define DCACHE_ENTRY_TYPE (7 << 20) /* bits 20..22 are for storing type: */ -#define DCACHE_MISS_TYPE (0 << 20) /* Negative dentry */ -#define DCACHE_WHITEOUT_TYPE (1 << 20) /* Whiteout dentry (stop pathwalk) */ -#define DCACHE_DIRECTORY_TYPE (2 << 20) /* Normal directory */ -#define DCACHE_AUTODIR_TYPE (3 << 20) /* Lookupless directory (presumed automount) */ -#define DCACHE_REGULAR_TYPE (4 << 20) /* Regular file type */ -#define DCACHE_SPECIAL_TYPE (5 << 20) /* Other file type */ -#define DCACHE_SYMLINK_TYPE (6 << 20) /* Symlink */ - -#define DCACHE_NOKEY_NAME BIT(25) /* Encrypted name encoded without key */ -#define DCACHE_OP_REAL BIT(26) - -#define DCACHE_PAR_LOOKUP BIT(28) /* being looked up (with parent locked shared) */ -#define DCACHE_DENTRY_CURSOR BIT(29) -#define DCACHE_NORCU BIT(30) /* No RCU delay for freeing */ - extern seqlock_t rename_lock; /* @@ -241,7 +248,6 @@ extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern bool d_same_name(const struct dentry *dentry, const struct dentry *parent, const struct qstr *name); -extern struct dentry * d_exact_alias(struct dentry *, struct inode *); extern struct dentry *d_find_any_alias(struct inode *inode); extern struct dentry * d_obtain_alias(struct inode *); extern struct dentry * d_obtain_root(struct inode *); @@ -276,7 +282,6 @@ extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); -extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); static inline unsigned d_count(const struct dentry *dentry) { @@ -508,12 +513,7 @@ static inline int simple_positive(const struct dentry *dentry) return d_really_is_positive(dentry) && !d_unhashed(dentry); } -extern int sysctl_vfs_cache_pressure; - -static inline unsigned long vfs_pressure_ratio(unsigned long val) -{ - return mult_frac(val, sysctl_vfs_cache_pressure, 100); -} +unsigned long vfs_pressure_ratio(unsigned long val); /** * d_inode - Get the actual inode of this dentry @@ -589,7 +589,7 @@ static inline struct inode *d_real_inode(const struct dentry *dentry) struct name_snapshot { struct qstr name; - unsigned char inline_name[DNAME_INLINE_LEN]; + union shortname_store inline_name; }; void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); void release_dentry_name_snapshot(struct name_snapshot *); diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 325af611909f..0b61b8b996d4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -2,79 +2,8 @@ #ifndef _LINUX_DCCP_H #define _LINUX_DCCP_H - -#include <linux/in.h> -#include <linux/interrupt.h> -#include <linux/ktime.h> -#include <linux/list.h> -#include <linux/uio.h> -#include <linux/workqueue.h> - -#include <net/inet_connection_sock.h> -#include <net/inet_sock.h> -#include <net/inet_timewait_sock.h> -#include <net/tcp_states.h> #include <uapi/linux/dccp.h> -enum dccp_state { - DCCP_OPEN = TCP_ESTABLISHED, - DCCP_REQUESTING = TCP_SYN_SENT, - DCCP_LISTEN = TCP_LISTEN, - DCCP_RESPOND = TCP_SYN_RECV, - /* - * States involved in closing a DCCP connection: - * 1) ACTIVE_CLOSEREQ is entered by a server sending a CloseReq. - * - * 2) CLOSING can have three different meanings (RFC 4340, 8.3): - * a. Client has performed active-close, has sent a Close to the server - * from state OPEN or PARTOPEN, and is waiting for the final Reset - * (in this case, SOCK_DONE == 1). - * b. Client is asked to perform passive-close, by receiving a CloseReq - * in (PART)OPEN state. It sends a Close and waits for final Reset - * (in this case, SOCK_DONE == 0). - * c. Server performs an active-close as in (a), keeps TIMEWAIT state. - * - * 3) The following intermediate states are employed to give passively - * closing nodes a chance to process their unread data: - * - PASSIVE_CLOSE (from OPEN => CLOSED) and - * - PASSIVE_CLOSEREQ (from (PART)OPEN to CLOSING; case (b) above). - */ - DCCP_ACTIVE_CLOSEREQ = TCP_FIN_WAIT1, - DCCP_PASSIVE_CLOSE = TCP_CLOSE_WAIT, /* any node receiving a Close */ - DCCP_CLOSING = TCP_CLOSING, - DCCP_TIME_WAIT = TCP_TIME_WAIT, - DCCP_CLOSED = TCP_CLOSE, - DCCP_NEW_SYN_RECV = TCP_NEW_SYN_RECV, - DCCP_PARTOPEN = TCP_MAX_STATES, - DCCP_PASSIVE_CLOSEREQ, /* clients receiving CloseReq */ - DCCP_MAX_STATES -}; - -enum { - DCCPF_OPEN = TCPF_ESTABLISHED, - DCCPF_REQUESTING = TCPF_SYN_SENT, - DCCPF_LISTEN = TCPF_LISTEN, - DCCPF_RESPOND = TCPF_SYN_RECV, - DCCPF_ACTIVE_CLOSEREQ = TCPF_FIN_WAIT1, - DCCPF_CLOSING = TCPF_CLOSING, - DCCPF_TIME_WAIT = TCPF_TIME_WAIT, - DCCPF_CLOSED = TCPF_CLOSE, - DCCPF_NEW_SYN_RECV = TCPF_NEW_SYN_RECV, - DCCPF_PARTOPEN = (1 << DCCP_PARTOPEN), -}; - -static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) -{ - return (struct dccp_hdr *)skb_transport_header(skb); -} - -static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen) -{ - skb_push(skb, headlen); - skb_reset_transport_header(skb); - return memset(skb_transport_header(skb), 0, headlen); -} - static inline struct dccp_hdr_ext *dccp_hdrx(const struct dccp_hdr *dh) { return (struct dccp_hdr_ext *)((unsigned char *)dh + sizeof(*dh)); @@ -85,12 +14,6 @@ static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); } -static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - return __dccp_basic_hdr_len(dh); -} - static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh) { __u64 seq_nr = ntohs(dh->dccph_seq); @@ -103,222 +26,10 @@ static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh) return seq_nr; } -static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) -{ - return (struct dccp_hdr_request *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - -static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) -{ - return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - -static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) -{ - const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); - return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + ntohl(dhack->dccph_ack_nr_low); -} - -static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) -{ - return (struct dccp_hdr_response *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - -static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) -{ - return (struct dccp_hdr_reset *)(skb_transport_header(skb) + - dccp_basic_hdr_len(skb)); -} - static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh) { return __dccp_basic_hdr_len(dh) + dccp_packet_hdr_len(dh->dccph_type); } -static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) -{ - return __dccp_hdr_len(dccp_hdr(skb)); -} - -/** - * struct dccp_request_sock - represent DCCP-specific connection request - * @dreq_inet_rsk: structure inherited from - * @dreq_iss: initial sequence number, sent on the first Response (RFC 4340, 7.1) - * @dreq_gss: greatest sequence number sent (for retransmitted Responses) - * @dreq_isr: initial sequence number received in the first Request - * @dreq_gsr: greatest sequence number received (for retransmitted Request(s)) - * @dreq_service: service code present on the Request (there is just one) - * @dreq_featneg: feature negotiation options for this connection - * The following two fields are analogous to the ones in dccp_sock: - * @dreq_timestamp_echo: last received timestamp to echo (13.1) - * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo - */ -struct dccp_request_sock { - struct inet_request_sock dreq_inet_rsk; - __u64 dreq_iss; - __u64 dreq_gss; - __u64 dreq_isr; - __u64 dreq_gsr; - __be32 dreq_service; - spinlock_t dreq_lock; - struct list_head dreq_featneg; - __u32 dreq_timestamp_echo; - __u32 dreq_timestamp_time; -}; - -static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) -{ - return (struct dccp_request_sock *)req; -} - -extern struct inet_timewait_death_row dccp_death_row; - -extern int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, - struct sk_buff *skb); - -struct dccp_options_received { - u64 dccpor_ndp:48; - u32 dccpor_timestamp; - u32 dccpor_timestamp_echo; - u32 dccpor_elapsed_time; -}; - -struct ccid; - -enum dccp_role { - DCCP_ROLE_UNDEFINED, - DCCP_ROLE_LISTEN, - DCCP_ROLE_CLIENT, - DCCP_ROLE_SERVER, -}; - -struct dccp_service_list { - __u32 dccpsl_nr; - __be32 dccpsl_list[]; -}; - -#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) -#define DCCP_SERVICE_CODE_IS_ABSENT 0 - -static inline bool dccp_list_has_service(const struct dccp_service_list *sl, - const __be32 service) -{ - if (likely(sl != NULL)) { - u32 i = sl->dccpsl_nr; - while (i--) - if (sl->dccpsl_list[i] == service) - return true; - } - return false; -} - -struct dccp_ackvec; - -/** - * struct dccp_sock - DCCP socket state - * - * @dccps_swl - sequence number window low - * @dccps_swh - sequence number window high - * @dccps_awl - acknowledgement number window low - * @dccps_awh - acknowledgement number window high - * @dccps_iss - initial sequence number sent - * @dccps_isr - initial sequence number received - * @dccps_osr - first OPEN sequence number received - * @dccps_gss - greatest sequence number sent - * @dccps_gsr - greatest valid sequence number received - * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss - * @dccps_service - first (passive sock) or unique (active sock) service code - * @dccps_service_list - second .. last service code on passive socket - * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option - * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo - * @dccps_l_ack_ratio - feature-local Ack Ratio - * @dccps_r_ack_ratio - feature-remote Ack Ratio - * @dccps_l_seq_win - local Sequence Window (influences ack number validity) - * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) - * @dccps_pcslen - sender partial checksum coverage (via sockopt) - * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) - * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) - * @dccps_ndp_count - number of Non Data Packets since last data packet - * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) - * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) - * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) - * @dccps_hc_rx_ackvec - rx half connection ack vector - * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) - * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) - * @dccps_options_received - parsed set of retrieved options - * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy - * @dccps_tx_qlen - maximum length of the TX queue - * @dccps_role - role of this sock, one of %dccp_role - * @dccps_hc_rx_insert_options - receiver wants to add options when acking - * @dccps_hc_tx_insert_options - sender wants to add options when sending - * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) - * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" - * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets - * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) - * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) - */ -struct dccp_sock { - /* inet_connection_sock has to be the first member of dccp_sock */ - struct inet_connection_sock dccps_inet_connection; -#define dccps_syn_rtt dccps_inet_connection.icsk_ack.lrcvtime - __u64 dccps_swl; - __u64 dccps_swh; - __u64 dccps_awl; - __u64 dccps_awh; - __u64 dccps_iss; - __u64 dccps_isr; - __u64 dccps_osr; - __u64 dccps_gss; - __u64 dccps_gsr; - __u64 dccps_gar; - __be32 dccps_service; - __u32 dccps_mss_cache; - struct dccp_service_list *dccps_service_list; - __u32 dccps_timestamp_echo; - __u32 dccps_timestamp_time; - __u16 dccps_l_ack_ratio; - __u16 dccps_r_ack_ratio; - __u64 dccps_l_seq_win:48; - __u64 dccps_r_seq_win:48; - __u8 dccps_pcslen:4; - __u8 dccps_pcrlen:4; - __u8 dccps_send_ndp_count:1; - __u64 dccps_ndp_count:48; - unsigned long dccps_rate_last; - struct list_head dccps_featneg; - struct dccp_ackvec *dccps_hc_rx_ackvec; - struct ccid *dccps_hc_rx_ccid; - struct ccid *dccps_hc_tx_ccid; - struct dccp_options_received dccps_options_received; - __u8 dccps_qpolicy; - __u32 dccps_tx_qlen; - enum dccp_role dccps_role:2; - __u8 dccps_hc_rx_insert_options:1; - __u8 dccps_hc_tx_insert_options:1; - __u8 dccps_server_timewait:1; - __u8 dccps_sync_scheduled:1; - struct tasklet_struct dccps_xmitlet; - struct timer_list dccps_xmit_timer; -}; - -#define dccp_sk(ptr) container_of_const(ptr, struct dccp_sock, \ - dccps_inet_connection.icsk_inet.sk) - -static inline const char *dccp_role(const struct sock *sk) -{ - switch (dccp_sk(sk)->dccps_role) { - case DCCP_ROLE_UNDEFINED: return "undefined"; - case DCCP_ROLE_LISTEN: return "listen"; - case DCCP_ROLE_SERVER: return "server"; - case DCCP_ROLE_CLIENT: return "client"; - } - return NULL; -} - -extern void dccp_syn_ack_timeout(const struct request_sock *req); - #endif /* _LINUX_DCCP_H */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 59444b495d49..fa2568b4380d 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -67,21 +67,23 @@ static const struct file_operations __fops = { \ typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); -#if defined(CONFIG_DEBUG_FS) - -struct dentry *debugfs_lookup(const char *name, struct dentry *parent); - struct debugfs_short_fops { ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); loff_t (*llseek) (struct file *, loff_t, int); }; +#if defined(CONFIG_DEBUG_FS) + +struct dentry *debugfs_lookup(const char *name, struct dentry *parent); + struct dentry *debugfs_create_file_full(const char *name, umode_t mode, struct dentry *parent, void *data, + const void *aux, const struct file_operations *fops); struct dentry *debugfs_create_file_short(const char *name, umode_t mode, struct dentry *parent, void *data, + const void *aux, const struct debugfs_short_fops *fops); /** @@ -126,7 +128,15 @@ struct dentry *debugfs_create_file_short(const char *name, umode_t mode, const struct debugfs_short_fops *: debugfs_create_file_short, \ struct file_operations *: debugfs_create_file_full, \ struct debugfs_short_fops *: debugfs_create_file_short) \ - (name, mode, parent, data, fops) + (name, mode, parent, data, NULL, fops) + +#define debugfs_create_file_aux(name, mode, parent, data, aux, fops) \ + _Generic(fops, \ + const struct file_operations *: debugfs_create_file_full, \ + const struct debugfs_short_fops *: debugfs_create_file_short, \ + struct file_operations *: debugfs_create_file_full, \ + struct debugfs_short_fops *: debugfs_create_file_short) \ + (name, mode, parent, data, aux, fops) struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode, struct dentry *parent, void *data, @@ -153,6 +163,7 @@ void debugfs_remove(struct dentry *dentry); void debugfs_lookup_and_remove(const char *name, struct dentry *parent); const struct file_operations *debugfs_real_fops(const struct file *filp); +const void *debugfs_get_aux(const struct file *file); int debugfs_file_get(struct dentry *dentry); void debugfs_file_put(struct dentry *dentry); @@ -164,8 +175,7 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf, ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, size_t len, loff_t *ppos); -struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, - struct dentry *new_dir, const char *new_name); +int debugfs_change_name(struct dentry *dentry, const char *fmt, ...) __printf(2, 3); void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value); @@ -259,6 +269,14 @@ static inline struct dentry *debugfs_lookup(const char *name, return ERR_PTR(-ENODEV); } +static inline struct dentry *debugfs_create_file_aux(const char *name, + umode_t mode, struct dentry *parent, + void *data, void *aux, + const void *fops) +{ + return ERR_PTR(-ENODEV); +} + static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const void *fops) @@ -312,6 +330,7 @@ static inline void debugfs_lookup_and_remove(const char *name, { } const struct file_operations *debugfs_real_fops(const struct file *filp); +void *debugfs_get_aux(const struct file *file); static inline int debugfs_file_get(struct dentry *dentry) { @@ -341,10 +360,10 @@ static inline ssize_t debugfs_attr_write_signed(struct file *file, return -ENODEV; } -static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, - struct dentry *new_dir, char *new_name) +static inline int __printf(2, 3) debugfs_change_name(struct dentry *dentry, + const char *fmt, ...) { - return ERR_PTR(-ENODEV); + return -ENODEV; } static inline void debugfs_create_u8(const char *name, umode_t mode, @@ -452,6 +471,11 @@ static inline ssize_t debugfs_read_file_str(struct file *file, #endif +#define debugfs_create_file_aux_num(name, mode, parent, data, n, fops) \ + debugfs_create_file_aux(name, mode, parent, data, \ + (void *)(unsigned long)n, fops) +#define debugfs_get_aux_num(f) (unsigned long)debugfs_get_aux(f) + /** * debugfs_create_xul - create a debugfs file that is used to read and write an * unsigned long value, formatted in hexadecimal diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 6639f48dac36..800dcc360db2 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -29,25 +29,39 @@ struct task_delay_info { * XXX_delay contains the accumulated delay time in nanoseconds. */ u64 blkio_start; + u64 blkio_delay_max; + u64 blkio_delay_min; u64 blkio_delay; /* wait for sync block io completion */ u64 swapin_start; + u64 swapin_delay_max; + u64 swapin_delay_min; u64 swapin_delay; /* wait for swapin */ u32 blkio_count; /* total count of the number of sync block */ /* io operations performed */ u32 swapin_count; /* total count of swapin */ u64 freepages_start; + u64 freepages_delay_max; + u64 freepages_delay_min; u64 freepages_delay; /* wait for memory reclaim */ u64 thrashing_start; + u64 thrashing_delay_max; + u64 thrashing_delay_min; u64 thrashing_delay; /* wait for thrashing page */ u64 compact_start; + u64 compact_delay_max; + u64 compact_delay_min; u64 compact_delay; /* wait for memory compact */ u64 wpcopy_start; + u64 wpcopy_delay_max; + u64 wpcopy_delay_min; u64 wpcopy_delay; /* wait for write-protect copy */ + u64 irq_delay_max; + u64 irq_delay_min; u64 irq_delay; /* wait for IRQ/SOFTIRQ */ u32 freepages_count; /* total count of memory reclaim */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 8321f65897f3..cb95951547ab 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -93,7 +93,14 @@ typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, typedef int (*dm_message_fn) (struct dm_target *ti, unsigned int argc, char **argv, char *result, unsigned int maxlen); -typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev); +/* + * Called with *forward == true. If it remains true, the ioctl should be + * forwarded to bdev. If it is reset to false, the target already fully handled + * the ioctl and the return value is the return value for the whole ioctl. + */ +typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev, + unsigned int cmd, unsigned long arg, + bool *forward); #ifdef CONFIG_BLK_DEV_ZONED typedef int (*dm_report_zones_fn) (struct dm_target *ti, @@ -299,6 +306,9 @@ struct target_type { #define dm_target_supports_mixed_zoned_model(type) (false) #endif +#define DM_TARGET_ATOMIC_WRITES 0x00000400 +#define dm_target_supports_atomic_writes(type) ((type)->features & DM_TARGET_ATOMIC_WRITES) + struct dm_target { struct dm_table *table; struct target_type *type; diff --git a/include/linux/device.h b/include/linux/device.h index 667cb6db9019..4940db137fff 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -26,9 +26,9 @@ #include <linux/atomic.h> #include <linux/uidgid.h> #include <linux/gfp.h> -#include <linux/overflow.h> #include <linux/device/bus.h> #include <linux/device/class.h> +#include <linux/device/devres.h> #include <linux/device/driver.h> #include <linux/cleanup.h> #include <asm/device.h> @@ -281,145 +281,6 @@ int __must_check device_create_bin_file(struct device *dev, void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr); -/* device resource management */ -typedef void (*dr_release_t)(struct device *dev, void *res); -typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data); - -void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, - int nid, const char *name) __malloc; -#define devres_alloc(release, size, gfp) \ - __devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release) -#define devres_alloc_node(release, size, gfp, nid) \ - __devres_alloc_node(release, size, gfp, nid, #release) - -void devres_for_each_res(struct device *dev, dr_release_t release, - dr_match_t match, void *match_data, - void (*fn)(struct device *, void *, void *), - void *data); -void devres_free(void *res); -void devres_add(struct device *dev, void *res); -void *devres_find(struct device *dev, dr_release_t release, - dr_match_t match, void *match_data); -void *devres_get(struct device *dev, void *new_res, - dr_match_t match, void *match_data); -void *devres_remove(struct device *dev, dr_release_t release, - dr_match_t match, void *match_data); -int devres_destroy(struct device *dev, dr_release_t release, - dr_match_t match, void *match_data); -int devres_release(struct device *dev, dr_release_t release, - dr_match_t match, void *match_data); - -/* devres group */ -void * __must_check devres_open_group(struct device *dev, void *id, gfp_t gfp); -void devres_close_group(struct device *dev, void *id); -void devres_remove_group(struct device *dev, void *id); -int devres_release_group(struct device *dev, void *id); - -/* managed devm_k.alloc/kfree for device drivers */ -void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __alloc_size(2); -void *devm_krealloc(struct device *dev, void *ptr, size_t size, - gfp_t gfp) __must_check __realloc_size(3); -__printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp, - const char *fmt, va_list ap) __malloc; -__printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp, - const char *fmt, ...) __malloc; -static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp) -{ - return devm_kmalloc(dev, size, gfp | __GFP_ZERO); -} -static inline void *devm_kmalloc_array(struct device *dev, - size_t n, size_t size, gfp_t flags) -{ - size_t bytes; - - if (unlikely(check_mul_overflow(n, size, &bytes))) - return NULL; - - return devm_kmalloc(dev, bytes, flags); -} -static inline void *devm_kcalloc(struct device *dev, - size_t n, size_t size, gfp_t flags) -{ - return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO); -} -static inline __realloc_size(3, 4) void * __must_check -devm_krealloc_array(struct device *dev, void *p, size_t new_n, size_t new_size, gfp_t flags) -{ - size_t bytes; - - if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) - return NULL; - - return devm_krealloc(dev, p, bytes, flags); -} - -void devm_kfree(struct device *dev, const void *p); -char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc; -const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp); -void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp) - __realloc_size(3); - -unsigned long devm_get_free_pages(struct device *dev, - gfp_t gfp_mask, unsigned int order); -void devm_free_pages(struct device *dev, unsigned long addr); - -#ifdef CONFIG_HAS_IOMEM -void __iomem *devm_ioremap_resource(struct device *dev, - const struct resource *res); -void __iomem *devm_ioremap_resource_wc(struct device *dev, - const struct resource *res); - -void __iomem *devm_of_iomap(struct device *dev, - struct device_node *node, int index, - resource_size_t *size); -#else - -static inline -void __iomem *devm_ioremap_resource(struct device *dev, - const struct resource *res) -{ - return ERR_PTR(-EINVAL); -} - -static inline -void __iomem *devm_ioremap_resource_wc(struct device *dev, - const struct resource *res) -{ - return ERR_PTR(-EINVAL); -} - -static inline -void __iomem *devm_of_iomap(struct device *dev, - struct device_node *node, int index, - resource_size_t *size) -{ - return ERR_PTR(-EINVAL); -} - -#endif - -/* allows to add/remove a custom action to devres stack */ -void devm_remove_action(struct device *dev, void (*action)(void *), void *data); -void devm_release_action(struct device *dev, void (*action)(void *), void *data); - -int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); -#define devm_add_action(dev, action, data) \ - __devm_add_action(dev, action, data, #action) - -static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), - void *data, const char *name) -{ - int ret; - - ret = __devm_add_action(dev, action, data, name); - if (ret) - action(data); - - return ret; -} -#define devm_add_action_or_reset(dev, action, data) \ - __devm_add_action_or_reset(dev, action, data, #action) - /** * devm_alloc_percpu - Resource-managed alloc_percpu * @dev: Device to allocate per-cpu memory for @@ -1009,6 +870,15 @@ static inline bool dev_pm_test_driver_flags(struct device *dev, u32 flags) return !!(dev->power.driver_flags & flags); } +static inline bool dev_pm_smart_suspend(struct device *dev) +{ +#ifdef CONFIG_PM_SLEEP + return dev->power.smart_suspend; +#else + return false; +#endif +} + static inline void device_lock(struct device *dev) { mutex_lock(&dev->mutex); @@ -1074,18 +944,44 @@ void device_del(struct device *dev); DEFINE_FREE(device_del, struct device *, if (_T) device_del(_T)) -int device_for_each_child(struct device *dev, void *data, - int (*fn)(struct device *dev, void *data)); -int device_for_each_child_reverse(struct device *dev, void *data, - int (*fn)(struct device *dev, void *data)); +int device_for_each_child(struct device *parent, void *data, + device_iter_t fn); +int device_for_each_child_reverse(struct device *parent, void *data, + device_iter_t fn); int device_for_each_child_reverse_from(struct device *parent, - struct device *from, const void *data, - int (*fn)(struct device *, const void *)); -struct device *device_find_child(struct device *dev, void *data, - int (*match)(struct device *dev, void *data)); -struct device *device_find_child_by_name(struct device *parent, - const char *name); -struct device *device_find_any_child(struct device *parent); + struct device *from, void *data, + device_iter_t fn); +struct device *device_find_child(struct device *parent, const void *data, + device_match_t match); +/** + * device_find_child_by_name - device iterator for locating a child device. + * @parent: parent struct device + * @name: name of the child device + * + * This is similar to the device_find_child() function above, but it + * returns a reference to a device that has the name @name. + * + * NOTE: you will need to drop the reference with put_device() after use. + */ +static inline struct device *device_find_child_by_name(struct device *parent, + const char *name) +{ + return device_find_child(parent, name, device_match_name); +} + +/** + * device_find_any_child - device iterator for locating a child device, if any. + * @parent: parent struct device + * + * This is similar to the device_find_child() function above, but it + * returns a reference to a child device, if any. + * + * NOTE: you will need to drop the reference with put_device() after use. + */ +static inline struct device *device_find_any_child(struct device *parent) +{ + return device_find_child(parent, NULL, device_match_any); +} int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, @@ -1149,6 +1045,8 @@ int device_online(struct device *dev); void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode); void device_set_node(struct device *dev, struct fwnode_handle *fwnode); +int device_add_of_node(struct device *dev, struct device_node *of_node); +void device_remove_of_node(struct device *dev); void device_set_of_node_from_dev(struct device *dev, const struct device *dev2); static inline struct device_node *dev_of_node(struct device *dev) @@ -1226,7 +1124,7 @@ static inline void device_remove_group(struct device *dev, { const struct attribute_group *groups[] = { grp, NULL }; - return device_remove_groups(dev, groups); + device_remove_groups(dev, groups); } int __must_check devm_device_add_group(struct device *dev, diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index cdc4757217f9..f5a56efd2bd6 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -48,6 +48,7 @@ struct fwnode_handle; * will never get called until they do. * @remove: Called when a device removed from this bus. * @shutdown: Called at shut-down time to quiesce the device. + * @irq_get_affinity: Get IRQ affinity mask for the device on this bus. * * @online: Called to put the device back online (after offlining it). * @offline: Called to put the device offline for hot-removal. May fail. @@ -87,6 +88,8 @@ struct bus_type { void (*sync_state)(struct device *dev); void (*remove)(struct device *dev); void (*shutdown)(struct device *dev); + const struct cpumask *(*irq_get_affinity)(struct device *dev, + unsigned int irq_vec); int (*online)(struct device *dev); int (*offline)(struct device *dev); @@ -131,6 +134,7 @@ typedef int (*device_match_t)(struct device *dev, const void *data); /* Generic device matching functions that all busses can use to match with */ int device_match_name(struct device *dev, const void *name); +int device_match_type(struct device *dev, const void *type); int device_match_of_node(struct device *dev, const void *np); int device_match_fwnode(struct device *dev, const void *fwnode); int device_match_devt(struct device *dev, const void *pdevt); @@ -138,9 +142,12 @@ int device_match_acpi_dev(struct device *dev, const void *adev); int device_match_acpi_handle(struct device *dev, const void *handle); int device_match_any(struct device *dev, const void *unused); +/* Device iterating function type for various driver core for_each APIs */ +typedef int (*device_iter_t)(struct device *dev, void *data); + /* iterator helpers for buses */ -int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, - int (*fn)(struct device *dev, void *data)); +int bus_for_each_dev(const struct bus_type *bus, struct device *start, + void *data, device_iter_t fn); struct device *bus_find_device(const struct bus_type *bus, struct device *start, const void *data, device_match_t match); /** diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 518c9c83d64b..65880e60c720 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -82,18 +82,16 @@ bool class_is_registered(const struct class *class); struct class_compat; struct class_compat *class_compat_register(const char *name); void class_compat_unregister(struct class_compat *cls); -int class_compat_create_link(struct class_compat *cls, struct device *dev, - struct device *device_link); -void class_compat_remove_link(struct class_compat *cls, struct device *dev, - struct device *device_link); +int class_compat_create_link(struct class_compat *cls, struct device *dev); +void class_compat_remove_link(struct class_compat *cls, struct device *dev); void class_dev_iter_init(struct class_dev_iter *iter, const struct class *class, const struct device *start, const struct device_type *type); struct device *class_dev_iter_next(struct class_dev_iter *iter); void class_dev_iter_exit(struct class_dev_iter *iter); -int class_for_each_device(const struct class *class, const struct device *start, void *data, - int (*fn)(struct device *dev, void *data)); +int class_for_each_device(const struct class *class, const struct device *start, + void *data, device_iter_t fn); struct device *class_find_device(const struct class *class, const struct device *start, const void *data, device_match_t match); @@ -195,7 +193,7 @@ static inline int __must_check class_create_file(const struct class *class, static inline void class_remove_file(const struct class *class, const struct class_attribute *attr) { - return class_remove_file_ns(class, attr, NULL); + class_remove_file_ns(class, attr, NULL); } /* Simple class attribute that is just a static string */ diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h new file mode 100644 index 000000000000..ae696d10faff --- /dev/null +++ b/include/linux/device/devres.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DEVICE_DEVRES_H_ +#define _DEVICE_DEVRES_H_ + +#include <linux/err.h> +#include <linux/gfp_types.h> +#include <linux/numa.h> +#include <linux/overflow.h> +#include <linux/stdarg.h> +#include <linux/types.h> +#include <asm/bug.h> + +struct device; +struct device_node; +struct resource; + +/* device resource management */ +typedef void (*dr_release_t)(struct device *dev, void *res); +typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data); + +void * __malloc +__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid, const char *name); +#define devres_alloc(release, size, gfp) \ + __devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release) +#define devres_alloc_node(release, size, gfp, nid) \ + __devres_alloc_node(release, size, gfp, nid, #release) + +void devres_for_each_res(struct device *dev, dr_release_t release, + dr_match_t match, void *match_data, + void (*fn)(struct device *, void *, void *), + void *data); +void devres_free(void *res); +void devres_add(struct device *dev, void *res); +void *devres_find(struct device *dev, dr_release_t release, dr_match_t match, void *match_data); +void *devres_get(struct device *dev, void *new_res, dr_match_t match, void *match_data); +void *devres_remove(struct device *dev, dr_release_t release, dr_match_t match, void *match_data); +int devres_destroy(struct device *dev, dr_release_t release, dr_match_t match, void *match_data); +int devres_release(struct device *dev, dr_release_t release, dr_match_t match, void *match_data); + +/* devres group */ +void * __must_check devres_open_group(struct device *dev, void *id, gfp_t gfp); +void devres_close_group(struct device *dev, void *id); +void devres_remove_group(struct device *dev, void *id); +int devres_release_group(struct device *dev, void *id); + +/* managed devm_k.alloc/kfree for device drivers */ +void * __alloc_size(2) +devm_kmalloc(struct device *dev, size_t size, gfp_t gfp); +void * __must_check __realloc_size(3) +devm_krealloc(struct device *dev, void *ptr, size_t size, gfp_t gfp); +static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp) +{ + return devm_kmalloc(dev, size, gfp | __GFP_ZERO); +} +static inline void *devm_kmalloc_array(struct device *dev, size_t n, size_t size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + + return devm_kmalloc(dev, bytes, flags); +} +static inline void *devm_kcalloc(struct device *dev, size_t n, size_t size, gfp_t flags) +{ + return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO); +} +static inline __realloc_size(3, 4) void * __must_check +devm_krealloc_array(struct device *dev, void *p, size_t new_n, size_t new_size, gfp_t flags) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return devm_krealloc(dev, p, bytes, flags); +} + +void devm_kfree(struct device *dev, const void *p); + +void * __realloc_size(3) +devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp); +static inline void *devm_kmemdup_array(struct device *dev, const void *src, + size_t n, size_t size, gfp_t flags) +{ + return devm_kmemdup(dev, src, size_mul(size, n), flags); +} + +char * __malloc +devm_kstrdup(struct device *dev, const char *s, gfp_t gfp); +const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp); +char * __printf(3, 0) __malloc +devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap); +char * __printf(3, 4) __malloc +devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...); + +unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); +void devm_free_pages(struct device *dev, unsigned long addr); + +#ifdef CONFIG_HAS_IOMEM + +void __iomem *devm_ioremap_resource(struct device *dev, const struct resource *res); +void __iomem *devm_ioremap_resource_wc(struct device *dev, const struct resource *res); + +void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index, + resource_size_t *size); +#else + +static inline +void __iomem *devm_ioremap_resource(struct device *dev, const struct resource *res) +{ + return IOMEM_ERR_PTR(-EINVAL); +} + +static inline +void __iomem *devm_ioremap_resource_wc(struct device *dev, const struct resource *res) +{ + return IOMEM_ERR_PTR(-EINVAL); +} + +static inline +void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index, + resource_size_t *size) +{ + return IOMEM_ERR_PTR(-EINVAL); +} + +#endif + +/* allows to add/remove a custom action to devres stack */ +int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data); + +/** + * devm_remove_action() - removes previously added custom action + * @dev: Device that owns the action + * @action: Function implementing the action + * @data: Pointer to data passed to @action implementation + * + * Removes instance of @action previously added by devm_add_action(). + * Both action and data should match one of the existing entries. + */ +static inline +void devm_remove_action(struct device *dev, void (*action)(void *), void *data) +{ + WARN_ON(devm_remove_action_nowarn(dev, action, data)); +} + +void devm_release_action(struct device *dev, void (*action)(void *), void *data); + +int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); +#define devm_add_action(dev, action, data) \ + __devm_add_action(dev, action, data, #action) + +static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), + void *data, const char *name) +{ + int ret; + + ret = __devm_add_action(dev, action, data, name); + if (ret) + action(data); + + return ret; +} +#define devm_add_action_or_reset(dev, action, data) \ + __devm_add_action_or_reset(dev, action, data, #action) + +bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data); + +#endif /* _DEVICE_DEVRES_H_ */ diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 5c04b8e3833b..cd8e0f0a634b 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -154,7 +154,7 @@ void driver_remove_file(const struct device_driver *driver, int driver_set_override(struct device *dev, const char **override, const char *s, size_t len); int __must_check driver_for_each_device(struct device_driver *drv, struct device *start, - void *data, int (*fn)(struct device *dev, void *)); + void *data, device_iter_t fn); struct device *driver_find_device(const struct device_driver *drv, struct device *start, const void *data, device_match_t match); diff --git a/include/linux/device/faux.h b/include/linux/device/faux.h new file mode 100644 index 000000000000..9f43c0e46aa4 --- /dev/null +++ b/include/linux/device/faux.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025 Greg Kroah-Hartman <gregkh@linuxfoundation.org> + * Copyright (c) 2025 The Linux Foundation + * + * A "simple" faux bus that allows devices to be created and added + * automatically to it. This is to be used whenever you need to create a + * device that is not associated with any "real" system resources, and do + * not want to have to deal with a bus/driver binding logic. It is + * intended to be very simple, with only a create and a destroy function + * available. + */ +#ifndef _FAUX_DEVICE_H_ +#define _FAUX_DEVICE_H_ + +#include <linux/container_of.h> +#include <linux/device.h> + +/** + * struct faux_device - a "faux" device + * @dev: internal struct device of the object + * + * A simple faux device that can be created/destroyed. To be used when a + * driver only needs to have a device to "hang" something off. This can be + * used for downloading firmware or other basic tasks. Use this instead of + * a struct platform_device if the device has no resources assigned to + * it at all. + */ +struct faux_device { + struct device dev; +}; +#define to_faux_device(x) container_of_const((x), struct faux_device, dev) + +/** + * struct faux_device_ops - a set of callbacks for a struct faux_device + * @probe: called when a faux device is probed by the driver core + * before the device is fully bound to the internal faux bus + * code. If probe succeeds, return 0, otherwise return a + * negative error number to stop the probe sequence from + * succeeding. + * @remove: called when a faux device is removed from the system + * + * Both @probe and @remove are optional, if not needed, set to NULL. + */ +struct faux_device_ops { + int (*probe)(struct faux_device *faux_dev); + void (*remove)(struct faux_device *faux_dev); +}; + +struct faux_device *faux_device_create(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops); +struct faux_device *faux_device_create_with_groups(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops, + const struct attribute_group **groups); +void faux_device_destroy(struct faux_device *faux_dev); + +static inline void *faux_device_get_drvdata(const struct faux_device *faux_dev) +{ + return dev_get_drvdata(&faux_dev->dev); +} + +static inline void faux_device_set_drvdata(struct faux_device *faux_dev, void *data) +{ + dev_set_drvdata(&faux_dev->dev, data); +} + +#endif /* _FAUX_DEVICE_H_ */ diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index d02f32b7514e..0864773a57e8 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -18,15 +18,16 @@ static inline int devcgroup_inode_permission(struct inode *inode, int mask) { short type, access = 0; + if (likely(!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))) + return 0; + if (likely(!inode->i_rdev)) return 0; if (S_ISBLK(inode->i_mode)) type = DEVCG_DEV_BLOCK; - else if (S_ISCHR(inode->i_mode)) + else /* S_ISCHR by the test above */ type = DEVCG_DEV_CHAR; - else - return 0; if (mask & MAY_WRITE) access |= DEVCG_ACC_WRITE; diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 36216d28d8bd..d58e329ac0e7 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -35,15 +35,6 @@ struct dma_buf_attachment; */ struct dma_buf_ops { /** - * @cache_sgt_mapping: - * - * If true the framework will cache the first mapping made for each - * attachment. This avoids creating mappings for attachments multiple - * times. - */ - bool cache_sgt_mapping; - - /** * @attach: * * This is called from dma_buf_attach() to make sure that a given @@ -370,10 +361,8 @@ struct dma_buf { */ struct module *owner; -#if IS_ENABLED(CONFIG_DEBUG_FS) /** @list_node: node for dma_buf accounting and debugging. */ struct list_head list_node; -#endif /** @priv: exporter specific private data for this buffer object. */ void *priv; @@ -493,8 +482,6 @@ struct dma_buf_attach_ops { * @dmabuf: buffer for this attachment. * @dev: device attached to the buffer. * @node: list of dma_buf_attachment, protected by dma_resv lock of the dmabuf. - * @sgt: cached mapping. - * @dir: direction of cached mapping. * @peer2peer: true if the importer can handle peer resources without pages. * @priv: exporter specific attachment data. * @importer_ops: importer operations for this attachment, if provided @@ -514,8 +501,6 @@ struct dma_buf_attachment { struct dma_buf *dmabuf; struct device *dev; struct list_head node; - struct sg_table *sgt; - enum dma_data_direction dir; bool peer2peer; const struct dma_buf_attach_ops *importer_ops; void *importer_priv; @@ -583,20 +568,6 @@ static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) return !!dmabuf->ops->pin; } -/** - * dma_buf_attachment_is_dynamic - check if a DMA-buf attachment uses dynamic - * mappings - * @attach: the DMA-buf attachment to check - * - * Returns true if a DMA-buf importer wants to call the map/unmap functions with - * the dma_resv lock held. - */ -static inline bool -dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach) -{ - return !!attach->importer_ops; -} - struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev); struct dma_buf_attachment * @@ -636,4 +607,6 @@ int dma_buf_vmap(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap(struct dma_buf *dmabuf, struct iosys_map *map); int dma_buf_vmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); void dma_buf_vunmap_unlocked(struct dma_buf *dmabuf, struct iosys_map *map); +struct dma_buf *dma_buf_iter_begin(void); +struct dma_buf *dma_buf_iter_next(struct dma_buf *dmbuf); #endif /* __DMA_BUF_H__ */ diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index d7e30d4f7503..f3bc0bcd7098 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -78,14 +78,18 @@ static inline dma_addr_t dma_range_map_max(const struct bus_dma_region *map) #define phys_to_dma_unencrypted phys_to_dma #endif #else -static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, - phys_addr_t paddr) +static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { if (dev->dma_range_map) return translate_phys_to_dma(dev, paddr); return paddr; } +static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, + phys_addr_t paddr) +{ + return dma_addr_unencrypted(__phys_to_dma(dev, paddr)); +} /* * If memory encryption is supported, phys_to_dma will set the memory encryption * bit in the DMA address, and dma_to_phys will clear it. @@ -94,19 +98,20 @@ static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, */ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { - return __sme_set(phys_to_dma_unencrypted(dev, paddr)); + return dma_addr_encrypted(__phys_to_dma(dev, paddr)); } static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { phys_addr_t paddr; + dma_addr = dma_addr_canonical(dma_addr); if (dev->dma_range_map) paddr = translate_dma_to_phys(dev, dma_addr); else paddr = dma_addr; - return __sme_clr(paddr); + return paddr; } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index 66b1e56fbb81..62df222fe0f1 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -52,6 +52,8 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, struct dma_fence_unwrap *cursors); +int dma_fence_dedup_array(struct dma_fence **array, int num_fences); + /** * dma_fence_unwrap_merge - unwrap and merge fences * diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index e7ad819962e3..b12776883d14 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -169,8 +169,8 @@ struct dma_fence_ops { * implementation know that there is another driver waiting on the * signal (ie. hw->sw case). * - * This function can be called from atomic context, but not - * from irq context, so normal spinlocks can be used. + * This is called with irq's disabled, so only spinlocks which disable + * IRQ's can be used in the code outside of this callback. * * A return value of false indicates the fence already passed, * or some failure occurred that made it impossible to enable @@ -239,27 +239,6 @@ struct dma_fence_ops { void (*release)(struct dma_fence *fence); /** - * @fence_value_str: - * - * Callback to fill in free-form debug info specific to this fence, like - * the sequence number. - * - * This callback is optional. - */ - void (*fence_value_str)(struct dma_fence *fence, char *str, int size); - - /** - * @timeline_value_str: - * - * Fills in the current value of the timeline as a string, like the - * sequence number. Note that the specific fence passed to this function - * should not matter, drivers should only use it to look up the - * corresponding timeline structures. - */ - void (*timeline_value_str)(struct dma_fence *fence, - char *str, int size); - - /** * @set_deadline: * * Callback to allow a fence waiter to inform the fence signaler of diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index e172522cd936..f48e5fb88bd5 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -434,58 +434,4 @@ static inline void debug_dma_dump_mappings(struct device *dev) #endif /* CONFIG_DMA_API_DEBUG */ extern const struct dma_map_ops dma_dummy_ops; - -enum pci_p2pdma_map_type { - /* - * PCI_P2PDMA_MAP_UNKNOWN: Used internally for indicating the mapping - * type hasn't been calculated yet. Functions that return this enum - * never return this value. - */ - PCI_P2PDMA_MAP_UNKNOWN = 0, - - /* - * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will - * traverse the host bridge and the host bridge is not in the - * allowlist. DMA Mapping routines should return an error when - * this is returned. - */ - PCI_P2PDMA_MAP_NOT_SUPPORTED, - - /* - * PCI_P2PDMA_BUS_ADDR: Indicates that two devices can talk to - * each other directly through a PCI switch and the transaction will - * not traverse the host bridge. Such a mapping should program - * the DMA engine with PCI bus addresses. - */ - PCI_P2PDMA_MAP_BUS_ADDR, - - /* - * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk - * to each other, but the transaction traverses a host bridge on the - * allowlist. In this case, a normal mapping either with CPU physical - * addresses (in the case of dma-direct) or IOVA addresses (in the - * case of IOMMUs) should be used to program the DMA engine. - */ - PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, -}; - -struct pci_p2pdma_map_state { - struct dev_pagemap *pgmap; - int map; - u64 bus_off; -}; - -#ifdef CONFIG_PCI_P2PDMA -enum pci_p2pdma_map_type -pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, - struct scatterlist *sg); -#else /* CONFIG_PCI_P2PDMA */ -static inline enum pci_p2pdma_map_type -pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, - struct scatterlist *sg) -{ - return PCI_P2PDMA_MAP_NOT_SUPPORTED; -} -#endif /* CONFIG_PCI_P2PDMA */ - #endif /* _LINUX_DMA_MAP_OPS_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index b79925b1c433..55c03e5fe8cb 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -72,6 +72,22 @@ #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) +struct dma_iova_state { + dma_addr_t addr; + u64 __size; +}; + +/* + * Use the high bit to mark if we used swiotlb for one or more ranges. + */ +#define DMA_IOVA_USE_SWIOTLB (1ULL << 63) + +static inline size_t dma_iova_size(struct dma_iova_state *state) +{ + /* Casting is needed for 32-bits systems */ + return (size_t)(state->__size & ~DMA_IOVA_USE_SWIOTLB); +} + #ifdef CONFIG_DMA_API_DEBUG void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); void debug_dma_map_single(struct device *dev, const void *addr, @@ -277,6 +293,70 @@ static inline int dma_mmap_noncontiguous(struct device *dev, } #endif /* CONFIG_HAS_DMA */ +#ifdef CONFIG_IOMMU_DMA +/** + * dma_use_iova - check if the IOVA API is used for this state + * @state: IOVA state + * + * Return %true if the DMA transfers uses the dma_iova_*() calls or %false if + * they can't be used. + */ +static inline bool dma_use_iova(struct dma_iova_state *state) +{ + return state->__size != 0; +} + +bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t size); +void dma_iova_free(struct device *dev, struct dma_iova_state *state); +void dma_iova_destroy(struct device *dev, struct dma_iova_state *state, + size_t mapped_len, enum dma_data_direction dir, + unsigned long attrs); +int dma_iova_sync(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size); +int dma_iova_link(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void dma_iova_unlink(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +#else /* CONFIG_IOMMU_DMA */ +static inline bool dma_use_iova(struct dma_iova_state *state) +{ + return false; +} +static inline bool dma_iova_try_alloc(struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t size) +{ + return false; +} +static inline void dma_iova_free(struct device *dev, + struct dma_iova_state *state) +{ +} +static inline void dma_iova_destroy(struct device *dev, + struct dma_iova_state *state, size_t mapped_len, + enum dma_data_direction dir, unsigned long attrs) +{ +} +static inline int dma_iova_sync(struct device *dev, + struct dma_iova_state *state, size_t offset, size_t size) +{ + return -EOPNOTSUPP; +} +static inline int dma_iova_link(struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t offset, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return -EOPNOTSUPP; +} +static inline void dma_iova_unlink(struct device *dev, + struct dma_iova_state *state, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ +} +#endif /* CONFIG_IOMMU_DMA */ + #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir); @@ -326,6 +406,7 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; } +bool dma_need_unmap(struct device *dev); #else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ static inline bool dma_dev_need_sync(const struct device *dev) { @@ -351,6 +432,10 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return false; } +static inline bool dma_need_unmap(struct device *dev) +{ + return false; +} #endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ struct page *dma_alloc_pages(struct device *dev, size_t size, @@ -629,10 +714,14 @@ static inline int dma_mmap_wc(struct device *dev, #else #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) -#define dma_unmap_addr(PTR, ADDR_NAME) (0) -#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define dma_unmap_len(PTR, LEN_NAME) (0) -#define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) +#define dma_unmap_addr(PTR, ADDR_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) +#define dma_unmap_len(PTR, LEN_NAME) \ + ({ typeof(PTR) __p __maybe_unused = PTR; 0; }) +#define dma_unmap_len_set(PTR, LEN_NAME, VAL) \ + do { typeof(PTR) __p __maybe_unused = PTR; } while (0) #endif #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/include/linux/dma/k3-udma-glue.h b/include/linux/dma/k3-udma-glue.h index 2dea217629d0..5d43881e6fb7 100644 --- a/include/linux/dma/k3-udma-glue.h +++ b/include/linux/dma/k3-udma-glue.h @@ -138,8 +138,7 @@ int k3_udma_glue_rx_get_irq(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num); void k3_udma_glue_reset_rx_chn(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_num, void *data, - void (*cleanup)(void *data, dma_addr_t desc_dma), - bool skip_fdq); + void (*cleanup)(void *data, dma_addr_t desc_dma)); int k3_udma_glue_rx_flow_enable(struct k3_udma_glue_rx_channel *rx_chn, u32 flow_idx); int k3_udma_glue_rx_flow_disable(struct k3_udma_glue_rx_channel *rx_chn, diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 346251bf1026..bb146c5ac3e4 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -839,7 +839,6 @@ struct dma_filter { * The function takes a buffer of size buf_len. The callback function will * be called after period_len bytes have been transferred. * @device_prep_interleaved_dma: Transfer expression in a generic way. - * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address * @device_caps: May be used to override the generic DMA slave capabilities * with per-channel specific ones * @device_config: Pushes a new configuration to a channel, return 0 or an error @@ -942,9 +941,6 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)( struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags); - struct dma_async_tx_descriptor *(*device_prep_dma_imm_data)( - struct dma_chan *chan, dma_addr_t dst, u64 data, - unsigned long flags); void (*device_caps)(struct dma_chan *chan, struct dma_slave_caps *caps); int (*device_config)(struct dma_chan *chan, struct dma_slave_config *config); @@ -1639,14 +1635,14 @@ static inline struct dma_chan { struct dma_chan *chan; - chan = dma_request_slave_channel(dev, name); - if (chan) + chan = dma_request_chan(dev, name); + if (!IS_ERR(chan)) return chan; if (!fn || !fn_param) return NULL; - return __dma_request_channel(&mask, fn, fn_param, NULL); + return dma_request_channel(mask, fn, fn_param); } static inline char * diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index f632ecfb4238..06c4de602b2f 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -11,6 +11,7 @@ #ifndef LINUX_DMAPOOL_H #define LINUX_DMAPOOL_H +#include <linux/nodemask_types.h> #include <linux/scatterlist.h> #include <asm/io.h> @@ -18,8 +19,8 @@ struct device; #ifdef CONFIG_HAS_DMA -struct dma_pool *dma_pool_create(const char *name, struct device *dev, - size_t size, size_t align, size_t allocation); +struct dma_pool *dma_pool_create_node(const char *name, struct device *dev, + size_t size, size_t align, size_t boundary, int node); void dma_pool_destroy(struct dma_pool *pool); @@ -35,9 +36,12 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev, void dmam_pool_destroy(struct dma_pool *pool); #else /* !CONFIG_HAS_DMA */ -static inline struct dma_pool *dma_pool_create(const char *name, - struct device *dev, size_t size, size_t align, size_t allocation) -{ return NULL; } +static inline struct dma_pool *dma_pool_create_node(const char *name, + struct device *dev, size_t size, size_t align, size_t boundary, + int node) +{ + return NULL; +} static inline void dma_pool_destroy(struct dma_pool *pool) { } static inline void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { return NULL; } @@ -49,6 +53,13 @@ static inline struct dma_pool *dmam_pool_create(const char *name, static inline void dmam_pool_destroy(struct dma_pool *pool) { } #endif /* !CONFIG_HAS_DMA */ +static inline struct dma_pool *dma_pool_create(const char *name, + struct device *dev, size_t size, size_t align, size_t boundary) +{ + return dma_pool_create_node(name, dev, size, align, boundary, + NUMA_NO_NODE); +} + static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { diff --git a/include/linux/edac.h b/include/linux/edac.h index b4ee8961e623..fa32f2aca22f 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -661,4 +661,226 @@ static inline struct dimm_info *edac_get_dimm(struct mem_ctl_info *mci, return mci->dimms[index]; } + +#define EDAC_FEAT_NAME_LEN 128 + +/* RAS feature type */ +enum edac_dev_feat { + RAS_FEAT_SCRUB, + RAS_FEAT_ECS, + RAS_FEAT_MEM_REPAIR, + RAS_FEAT_MAX +}; + +/** + * struct edac_scrub_ops - scrub device operations (all elements optional) + * @read_addr: read base address of scrubbing range. + * @read_size: read offset of scrubbing range. + * @write_addr: set base address of the scrubbing range. + * @write_size: set offset of the scrubbing range. + * @get_enabled_bg: check if currently performing background scrub. + * @set_enabled_bg: start or stop a bg-scrub. + * @get_min_cycle: get minimum supported scrub cycle duration in seconds. + * @get_max_cycle: get maximum supported scrub cycle duration in seconds. + * @get_cycle_duration: get current scrub cycle duration in seconds. + * @set_cycle_duration: set current scrub cycle duration in seconds. + */ +struct edac_scrub_ops { + int (*read_addr)(struct device *dev, void *drv_data, u64 *base); + int (*read_size)(struct device *dev, void *drv_data, u64 *size); + int (*write_addr)(struct device *dev, void *drv_data, u64 base); + int (*write_size)(struct device *dev, void *drv_data, u64 size); + int (*get_enabled_bg)(struct device *dev, void *drv_data, bool *enable); + int (*set_enabled_bg)(struct device *dev, void *drv_data, bool enable); + int (*get_min_cycle)(struct device *dev, void *drv_data, u32 *min); + int (*get_max_cycle)(struct device *dev, void *drv_data, u32 *max); + int (*get_cycle_duration)(struct device *dev, void *drv_data, u32 *cycle); + int (*set_cycle_duration)(struct device *dev, void *drv_data, u32 cycle); +}; + +#if IS_ENABLED(CONFIG_EDAC_SCRUB) +int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, + u8 instance); +#else +static inline int edac_scrub_get_desc(struct device *scrub_dev, + const struct attribute_group **attr_groups, + u8 instance) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_SCRUB */ + +/** + * struct edac_ecs_ops - ECS device operations (all elements optional) + * @get_log_entry_type: read the log entry type value. + * @set_log_entry_type: set the log entry type value. + * @get_mode: read the mode value. + * @set_mode: set the mode value. + * @reset: reset the ECS counter. + * @get_threshold: read the threshold count per gigabits of memory cells. + * @set_threshold: set the threshold count per gigabits of memory cells. + */ +struct edac_ecs_ops { + int (*get_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 *val); + int (*set_log_entry_type)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*get_mode)(struct device *dev, void *drv_data, int fru_id, u32 *val); + int (*set_mode)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*reset)(struct device *dev, void *drv_data, int fru_id, u32 val); + int (*get_threshold)(struct device *dev, void *drv_data, int fru_id, u32 *threshold); + int (*set_threshold)(struct device *dev, void *drv_data, int fru_id, u32 threshold); +}; + +struct edac_ecs_ex_info { + u16 num_media_frus; +}; + +#if IS_ENABLED(CONFIG_EDAC_ECS) +int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, + u16 num_media_frus); +#else +static inline int edac_ecs_get_desc(struct device *ecs_dev, + const struct attribute_group **attr_groups, + u16 num_media_frus) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_ECS */ + +enum edac_mem_repair_type { + EDAC_REPAIR_PPR, + EDAC_REPAIR_CACHELINE_SPARING, + EDAC_REPAIR_ROW_SPARING, + EDAC_REPAIR_BANK_SPARING, + EDAC_REPAIR_RANK_SPARING, + EDAC_REPAIR_MAX +}; + +extern const char * const edac_repair_type[]; + +enum edac_mem_repair_cmd { + EDAC_DO_MEM_REPAIR = 1, +}; + +/** + * struct edac_mem_repair_ops - memory repair operations + * (all elements are optional except do_repair, set_hpa/set_dpa) + * @get_repair_type: get the memory repair type, listed in + * enum edac_mem_repair_function. + * @get_persist_mode: get the current persist mode. + * false - Soft repair type (temporary repair). + * true - Hard memory repair type (permanent repair). + * @set_persist_mode: set the persist mode of the memory repair instance. + * @get_repair_safe_when_in_use: get whether memory media is accessible and + * data is retained during repair operation. + * @get_hpa: get current host physical address (HPA) of memory to repair. + * @set_hpa: set host physical address (HPA) of memory to repair. + * @get_min_hpa: get the minimum supported host physical address (HPA). + * @get_max_hpa: get the maximum supported host physical address (HPA). + * @get_dpa: get current device physical address (DPA) of memory to repair. + * @set_dpa: set device physical address (DPA) of memory to repair. + * In some states of system configuration (e.g. before address decoders + * have been configured), memory devices (e.g. CXL) may not have an active + * mapping in the host physical address map. As such, the memory + * to repair must be identified by a device specific physical addressing + * scheme using a device physical address(DPA). The DPA and other control + * attributes to use for the repair operations will be presented in related + * error records. + * @get_min_dpa: get the minimum supported device physical address (DPA). + * @get_max_dpa: get the maximum supported device physical address (DPA). + * @get_nibble_mask: get current nibble mask of memory to repair. + * @set_nibble_mask: set nibble mask of memory to repair. + * @get_bank_group: get current bank group of memory to repair. + * @set_bank_group: set bank group of memory to repair. + * @get_bank: get current bank of memory to repair. + * @set_bank: set bank of memory to repair. + * @get_rank: get current rank of memory to repair. + * @set_rank: set rank of memory to repair. + * @get_row: get current row of memory to repair. + * @set_row: set row of memory to repair. + * @get_column: get current column of memory to repair. + * @set_column: set column of memory to repair. + * @get_channel: get current channel of memory to repair. + * @set_channel: set channel of memory to repair. + * @get_sub_channel: get current subchannel of memory to repair. + * @set_sub_channel: set subchannel of memory to repair. + * @do_repair: Issue memory repair operation for the HPA/DPA and + * other control attributes set for the memory to repair. + * + * All elements are optional except do_repair and at least one of set_hpa/set_dpa. + */ +struct edac_mem_repair_ops { + int (*get_repair_type)(struct device *dev, void *drv_data, const char **type); + int (*get_persist_mode)(struct device *dev, void *drv_data, bool *persist); + int (*set_persist_mode)(struct device *dev, void *drv_data, bool persist); + int (*get_repair_safe_when_in_use)(struct device *dev, void *drv_data, bool *safe); + int (*get_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*set_hpa)(struct device *dev, void *drv_data, u64 hpa); + int (*get_min_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*get_max_hpa)(struct device *dev, void *drv_data, u64 *hpa); + int (*get_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*set_dpa)(struct device *dev, void *drv_data, u64 dpa); + int (*get_min_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*get_max_dpa)(struct device *dev, void *drv_data, u64 *dpa); + int (*get_nibble_mask)(struct device *dev, void *drv_data, u32 *val); + int (*set_nibble_mask)(struct device *dev, void *drv_data, u32 val); + int (*get_bank_group)(struct device *dev, void *drv_data, u32 *val); + int (*set_bank_group)(struct device *dev, void *drv_data, u32 val); + int (*get_bank)(struct device *dev, void *drv_data, u32 *val); + int (*set_bank)(struct device *dev, void *drv_data, u32 val); + int (*get_rank)(struct device *dev, void *drv_data, u32 *val); + int (*set_rank)(struct device *dev, void *drv_data, u32 val); + int (*get_row)(struct device *dev, void *drv_data, u32 *val); + int (*set_row)(struct device *dev, void *drv_data, u32 val); + int (*get_column)(struct device *dev, void *drv_data, u32 *val); + int (*set_column)(struct device *dev, void *drv_data, u32 val); + int (*get_channel)(struct device *dev, void *drv_data, u32 *val); + int (*set_channel)(struct device *dev, void *drv_data, u32 val); + int (*get_sub_channel)(struct device *dev, void *drv_data, u32 *val); + int (*set_sub_channel)(struct device *dev, void *drv_data, u32 val); + int (*do_repair)(struct device *dev, void *drv_data, u32 val); +}; + +#if IS_ENABLED(CONFIG_EDAC_MEM_REPAIR) +int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance); +#else +static inline int edac_mem_repair_get_desc(struct device *dev, + const struct attribute_group **attr_groups, + u8 instance) +{ return -EOPNOTSUPP; } +#endif /* CONFIG_EDAC_MEM_REPAIR */ + +/* EDAC device feature information structure */ +struct edac_dev_data { + union { + const struct edac_scrub_ops *scrub_ops; + const struct edac_ecs_ops *ecs_ops; + const struct edac_mem_repair_ops *mem_repair_ops; + }; + u8 instance; + void *private; +}; + +struct edac_dev_feat_ctx { + struct device dev; + void *private; + struct edac_dev_data *scrub; + struct edac_dev_data ecs; + struct edac_dev_data *mem_repair; +}; + +struct edac_dev_feature { + enum edac_dev_feat ft_type; + u8 instance; + union { + const struct edac_scrub_ops *scrub_ops; + const struct edac_ecs_ops *ecs_ops; + const struct edac_mem_repair_ops *mem_repair_ops; + }; + void *ctx; + struct edac_ecs_ex_info ecs_info; +}; + +int edac_dev_register(struct device *parent, char *dev_name, + void *parent_pvt_data, int num_features, + const struct edac_dev_feature *ras_features); #endif /* _LINUX_EDAC_H_ */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 8bcd629ee250..7d63d1d75f22 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -114,22 +114,22 @@ typedef struct { #define EFI_MAX_MEMORY_TYPE 16 /* Attribute values: */ -#define EFI_MEMORY_UC ((u64)0x0000000000000001ULL) /* uncached */ -#define EFI_MEMORY_WC ((u64)0x0000000000000002ULL) /* write-coalescing */ -#define EFI_MEMORY_WT ((u64)0x0000000000000004ULL) /* write-through */ -#define EFI_MEMORY_WB ((u64)0x0000000000000008ULL) /* write-back */ -#define EFI_MEMORY_UCE ((u64)0x0000000000000010ULL) /* uncached, exported */ -#define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */ -#define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */ -#define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ -#define EFI_MEMORY_NV ((u64)0x0000000000008000ULL) /* non-volatile */ -#define EFI_MEMORY_MORE_RELIABLE \ - ((u64)0x0000000000010000ULL) /* higher reliability */ -#define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ -#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */ -#define EFI_MEMORY_CPU_CRYPTO ((u64)0x0000000000080000ULL) /* supports encryption */ +#define EFI_MEMORY_UC BIT_ULL(0) /* uncached */ +#define EFI_MEMORY_WC BIT_ULL(1) /* write-coalescing */ +#define EFI_MEMORY_WT BIT_ULL(2) /* write-through */ +#define EFI_MEMORY_WB BIT_ULL(3) /* write-back */ +#define EFI_MEMORY_UCE BIT_ULL(4) /* uncached, exported */ +#define EFI_MEMORY_WP BIT_ULL(12) /* write-protect */ +#define EFI_MEMORY_RP BIT_ULL(13) /* read-protect */ +#define EFI_MEMORY_XP BIT_ULL(14) /* execute-protect */ +#define EFI_MEMORY_NV BIT_ULL(15) /* non-volatile */ +#define EFI_MEMORY_MORE_RELIABLE BIT_ULL(16) /* higher reliability */ +#define EFI_MEMORY_RO BIT_ULL(17) /* read-only */ +#define EFI_MEMORY_SP BIT_ULL(18) /* soft reserved */ +#define EFI_MEMORY_CPU_CRYPTO BIT_ULL(19) /* supports encryption */ #define EFI_MEMORY_HOT_PLUGGABLE BIT_ULL(20) /* supports unplugging at runtime */ -#define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ +#define EFI_MEMORY_RUNTIME BIT_ULL(63) /* range requires runtime mapping */ + #define EFI_MEMORY_DESCRIPTOR_VERSION 1 #define EFI_PAGE_SHIFT 12 @@ -364,7 +364,6 @@ void efi_native_runtime_setup(void); #define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define SMBIOS_TABLE_GUID EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define SMBIOS3_TABLE_GUID EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94) -#define UGA_IO_PROTOCOL_GUID EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2) #define EFI_GLOBAL_VARIABLE_GUID EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c) #define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) #define LINUX_EFI_CRASH_GUID EFI_GUID(0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0) @@ -374,7 +373,6 @@ void efi_native_runtime_setup(void); #define EFI_DEVICE_PATH_TO_TEXT_PROTOCOL_GUID EFI_GUID(0x8b843e20, 0x8132, 0x4852, 0x90, 0xcc, 0x55, 0x1a, 0x4e, 0x4a, 0x7f, 0x1c) #define EFI_DEVICE_PATH_FROM_TEXT_PROTOCOL_GUID EFI_GUID(0x05c99a21, 0xc70f, 0x4ad2, 0x8a, 0x5f, 0x35, 0xdf, 0x33, 0x43, 0xf5, 0x1e) #define EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID EFI_GUID(0x9042a9de, 0x23dc, 0x4a38, 0x96, 0xfb, 0x7a, 0xde, 0xd0, 0x80, 0x51, 0x6a) -#define EFI_UGA_PROTOCOL_GUID EFI_GUID(0x982c298b, 0xf4fa, 0x41cb, 0xb8, 0x38, 0x77, 0xaa, 0x68, 0x8f, 0xb8, 0x39) #define EFI_PCI_IO_PROTOCOL_GUID EFI_GUID(0x4cf5b200, 0x68b8, 0x4ca5, 0x9e, 0xec, 0xb2, 0x3e, 0x3f, 0x50, 0x02, 0x9a) #define EFI_FILE_INFO_ID EFI_GUID(0x09576e92, 0x6d3f, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) #define EFI_SYSTEM_RESOURCE_TABLE_GUID EFI_GUID(0xb122a263, 0x3661, 0x4f68, 0x99, 0x29, 0x78, 0xf8, 0xb0, 0xd6, 0x21, 0x80) @@ -1287,8 +1285,6 @@ struct linux_efi_memreserve { void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size); -char *efi_systab_show_arch(char *str); - /* * The LINUX_EFI_MOK_VARIABLE_TABLE_GUID config table can be provided * to the kernel by an EFI boot loader. The table contains a packed diff --git a/include/linux/eisa.h b/include/linux/eisa.h index f98200cae637..21a2ecc1e538 100644 --- a/include/linux/eisa.h +++ b/include/linux/eisa.h @@ -28,6 +28,9 @@ #define EISA_CONFIG_ENABLED 1 #define EISA_CONFIG_FORCED 2 +/* Chosen to hold the longest string in eisa.ids. */ +#define EISA_DEVICE_INFO_NAME_SIZE 74 + /* There is not much we can say about an EISA device, apart from * signature, slot number, and base address. dma_mask is set by * default to parent device mask..*/ @@ -41,7 +44,7 @@ struct eisa_device { u64 dma_mask; struct device dev; /* generic device */ #ifdef CONFIG_EISA_NAMES - char pretty_name[50]; + char pretty_name[EISA_DEVICE_INFO_NAME_SIZE]; #endif }; diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 752e0b297582..7fa1eb3cc823 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -167,18 +167,20 @@ struct em_data_callback { struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_update_perf_domain(struct device *dev, - struct em_perf_table __rcu *new_table); + struct em_perf_table *new_table); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span, - bool microwatts); + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); -struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd); -void em_table_free(struct em_perf_table __rcu *table); +struct em_perf_table *em_table_alloc(struct em_perf_domain *pd); +void em_table_free(struct em_perf_table *table); int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int nr_states); int em_dev_update_chip_binning(struct device *dev); int em_update_performance_limits(struct em_perf_domain *pd, unsigned long freq_min_khz, unsigned long freq_max_khz); +void em_adjust_cpu_capacity(unsigned int cpu); +void em_rebuild_sched_domains(void); /** * em_pd_get_efficient_state() - Get an efficient performance state from the EM @@ -239,9 +241,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, struct em_perf_state *ps; int i; -#ifdef CONFIG_SCHED_DEBUG WARN_ONCE(!rcu_read_lock_held(), "EM: rcu read lock needed\n"); -#endif if (!sum_util) return 0; @@ -345,8 +345,8 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, - struct em_data_callback *cb, cpumask_t *span, - bool microwatts) + const struct em_data_callback *cb, + const cpumask_t *cpus, bool microwatts) { return -EINVAL; } @@ -372,14 +372,14 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd) return 0; } static inline -struct em_perf_table __rcu *em_table_alloc(struct em_perf_domain *pd) +struct em_perf_table *em_table_alloc(struct em_perf_domain *pd) { return NULL; } -static inline void em_table_free(struct em_perf_table __rcu *table) {} +static inline void em_table_free(struct em_perf_table *table) {} static inline int em_dev_update_perf_domain(struct device *dev, - struct em_perf_table __rcu *new_table) + struct em_perf_table *new_table) { return -EINVAL; } @@ -404,6 +404,8 @@ int em_update_performance_limits(struct em_perf_domain *pd, { return -EINVAL; } +static inline void em_adjust_cpu_capacity(unsigned int cpu) {} +static inline void em_rebuild_sched_domains(void) {} #endif #endif diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index fc61d0205c97..f94f3fdf15fc 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -14,6 +14,7 @@ #include <linux/kmsan.h> #include <asm/entry-common.h> +#include <asm/syscall.h> /* * Define dummy _TIF work flags if not defined by the architecture or for @@ -367,6 +368,15 @@ static __always_inline void exit_to_user_mode(void) } /** + * syscall_exit_work - Handle work before returning to user mode + * @regs: Pointer to current pt_regs + * @work: Current thread syscall work + * + * Do one-time syscall specific work. + */ +void syscall_exit_work(struct pt_regs *regs, unsigned long work); + +/** * syscall_exit_to_user_mode_work - Handle work before returning to user mode * @regs: Pointer to currents pt_regs * @@ -379,7 +389,30 @@ static __always_inline void exit_to_user_mode(void) * make the final state transitions. Interrupts must stay disabled between * return from this function and the invocation of exit_to_user_mode(). */ -void syscall_exit_to_user_mode_work(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + unsigned long nr = syscall_get_nr(current, regs); + + CT_WARN_ON(ct_state() != CT_STATE_KERNEL); + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) + local_irq_enable(); + } + + rseq_syscall(regs); + + /* + * Do one-time syscall specific work. If these work items are + * enabled, we want to run them exactly once per syscall exit with + * interrupts enabled. + */ + if (unlikely(work & SYSCALL_WORK_EXIT)) + syscall_exit_work(regs, work); + local_irq_disable_exit_to_user(); + exit_to_user_mode_prepare(regs); +} /** * syscall_exit_to_user_mode - Handle work before returning to user mode @@ -410,7 +443,13 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs); * exit_to_user_mode(). This function is preferred unless there is a * compelling architectural reason to use the separate functions. */ -void syscall_exit_to_user_mode(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) +{ + instrumentation_begin(); + syscall_exit_to_user_mode_work(regs); + instrumentation_end(); + exit_to_user_mode(); +} /** * irqentry_enter_from_user_mode - Establish state before invoking the irq handler diff --git a/include/linux/err.h b/include/linux/err.h index a4dacd745fcf..1d60aa86db53 100644 --- a/include/linux/err.h +++ b/include/linux/err.h @@ -44,6 +44,9 @@ static inline void * __must_check ERR_PTR(long error) /* Return the pointer in the percpu address space. */ #define ERR_PTR_PCPU(error) ((void __percpu *)(unsigned long)ERR_PTR(error)) +/* Cast an error pointer to __iomem. */ +#define IOMEM_ERR_PTR(error) (__force void __iomem *)ERR_PTR(error) + /** * PTR_ERR - Extract the error code from an error pointer. * @ptr: An error pointer. diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index ecf203f01034..9a1eacf35d37 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -81,7 +81,7 @@ static const u8 eth_ipv6_mcast_addr_base[ETH_ALEN] __aligned(2) = * is_link_local_ether_addr - Determine if given Ethernet address is link-local * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per + * Return: true if address is link local reserved addr (01:80:c2:00:00:0X) per * IEEE 802.1Q 8.6.3 Frame filtering. * * Please note: addr must be aligned to u16. @@ -104,7 +104,7 @@ static inline bool is_link_local_ether_addr(const u8 *addr) * is_zero_ether_addr - Determine if give Ethernet address is all zeros. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is all zeroes. + * Return: true if the address is all zeroes. * * Please note: addr must be aligned to u16. */ @@ -123,7 +123,7 @@ static inline bool is_zero_ether_addr(const u8 *addr) * is_multicast_ether_addr - Determine if the Ethernet address is a multicast. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a multicast address. + * Return: true if the address is a multicast address. * By definition the broadcast address is also a multicast address. */ static inline bool is_multicast_ether_addr(const u8 *addr) @@ -157,7 +157,7 @@ static inline bool is_multicast_ether_addr_64bits(const u8 *addr) * is_local_ether_addr - Determine if the Ethernet address is locally-assigned one (IEEE 802). * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a local address. + * Return: true if the address is a local address. */ static inline bool is_local_ether_addr(const u8 *addr) { @@ -168,7 +168,7 @@ static inline bool is_local_ether_addr(const u8 *addr) * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is the broadcast address. + * Return: true if the address is the broadcast address. * * Please note: addr must be aligned to u16. */ @@ -183,7 +183,7 @@ static inline bool is_broadcast_ether_addr(const u8 *addr) * is_unicast_ether_addr - Determine if the Ethernet address is unicast * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a unicast address. + * Return: true if the address is a unicast address. */ static inline bool is_unicast_ether_addr(const u8 *addr) { @@ -197,7 +197,7 @@ static inline bool is_unicast_ether_addr(const u8 *addr) * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not * a multicast address, and is not FF:FF:FF:FF:FF:FF. * - * Return true if the address is valid. + * Return: true if the address is valid. * * Please note: addr must be aligned to u16. */ @@ -214,7 +214,7 @@ static inline bool is_valid_ether_addr(const u8 *addr) * * Check that the value from the Ethertype/length field is a valid Ethertype. * - * Return true if the valid is an 802.3 supported Ethertype. + * Return: true if the valid is an 802.3 supported Ethertype. */ static inline bool eth_proto_is_802_3(__be16 proto) { @@ -458,7 +458,7 @@ static inline bool ether_addr_is_ip_mcast(const u8 *addr) * ether_addr_to_u64 - Convert an Ethernet address into a u64 value. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return a u64 value of the address + * Return: a u64 value of the address */ static inline u64 ether_addr_to_u64(const u8 *addr) { diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b8b935b52603..5e0dd333ad1f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -17,9 +17,14 @@ #include <linux/compat.h> #include <linux/if_ether.h> #include <linux/netlink.h> +#include <linux/timer_types.h> #include <uapi/linux/ethtool.h> +#include <uapi/linux/ethtool_netlink_generated.h> #include <uapi/linux/net_tstamp.h> +#define ETHTOOL_MM_MAX_VERIFY_TIME_MS 128 +#define ETHTOOL_MM_MAX_VERIFY_RETRIES 3 + struct compat_ethtool_rx_flow_spec { u32 flow_type; union ethtool_flow_union h_u; @@ -78,6 +83,9 @@ enum { * @cqe_size: Size of TX/RX completion queue event * @tx_push_buf_len: Size of TX push buffer * @tx_push_buf_max_len: Maximum allowed size of TX push buffer + * @hds_thresh: Packet size threshold for header data split (HDS) + * @hds_thresh_max: Maximum supported setting for @hds_threshold + * */ struct kernel_ethtool_ringparam { u32 rx_buf_len; @@ -87,6 +95,8 @@ struct kernel_ethtool_ringparam { u32 cqe_size; u32 tx_push_buf_len; u32 tx_push_buf_max_len; + u32 hds_thresh; + u32 hds_thresh_max; }; /** @@ -97,6 +107,7 @@ struct kernel_ethtool_ringparam { * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len * @ETHTOOL_RING_USE_TCP_DATA_SPLIT: capture for setting tcp_data_split + * @ETHTOOL_RING_USE_HDS_THRS: capture for setting header-data-split-thresh */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), @@ -105,6 +116,7 @@ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_PUSH = BIT(3), ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4), ETHTOOL_RING_USE_TCP_DATA_SPLIT = BIT(5), + ETHTOOL_RING_USE_HDS_THRS = BIT(6), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) @@ -203,6 +215,14 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); +struct link_mode_info { + int speed; + u8 lanes; + u8 duplex; +}; + +extern const struct link_mode_info link_mode_params[]; + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -257,7 +277,7 @@ struct ethtool_link_ksettings { * @mode : one of the ETHTOOL_LINK_MODE_*_BIT * (not atomic, no bound checking) * - * Returns true/false. + * Returns: true/false. */ #define ethtool_link_ksettings_test_link_mode(ptr, name, mode) \ test_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name) @@ -412,6 +432,29 @@ struct ethtool_eth_phy_stats { ); }; +/** + * struct ethtool_phy_stats - PHY-level statistics counters + * @rx_packets: Total successfully received frames + * @rx_bytes: Total successfully received bytes + * @rx_errors: Total received frames with errors (e.g., CRC errors) + * @tx_packets: Total successfully transmitted frames + * @tx_bytes: Total successfully transmitted bytes + * @tx_errors: Total transmitted frames with errors + * + * This structure provides a standardized interface for reporting + * PHY-level statistics counters. It is designed to expose statistics + * commonly provided by PHYs but not explicitly defined in the IEEE + * 802.3 standard. + */ +struct ethtool_phy_stats { + u64 rx_packets; + u64 rx_bytes; + u64 rx_errors; + u64 tx_packets; + u64 tx_bytes; + u64 tx_errors; +}; + /* Basic IEEE 802.3 MAC Ctrl statistics (30.3.3.*), not otherwise exposed * via a more targeted API. */ @@ -529,6 +572,12 @@ struct ethtool_rmon_stats { /** * struct ethtool_ts_stats - HW timestamping statistics * @pkts: Number of packets successfully timestamped by the hardware. + * @onestep_pkts_unconfirmed: Number of PTP packets with one-step TX + * timestamping that were sent, but for which the + * device offers no confirmation whether they made + * it onto the wire and the timestamp was inserted + * in the originTimestamp or correctionField, or + * not. * @lost: Number of hardware timestamping requests where the timestamping * information from the hardware never arrived for submission with * the skb. @@ -541,6 +590,7 @@ struct ethtool_rmon_stats { struct ethtool_ts_stats { struct_group(tx_stats, u64 pkts; + u64 onestep_pkts_unconfirmed; u64 lost; u64 err; ); @@ -673,6 +723,75 @@ struct ethtool_mm_stats { u64 MACMergeHoldCount; }; +enum ethtool_mmsv_event { + ETHTOOL_MMSV_LP_SENT_VERIFY_MPACKET, + ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET, + ETHTOOL_MMSV_LP_SENT_RESPONSE_MPACKET, +}; + +/* MAC Merge verification mPacket type */ +enum ethtool_mpacket { + ETHTOOL_MPACKET_VERIFY, + ETHTOOL_MPACKET_RESPONSE, +}; + +struct ethtool_mmsv; + +/** + * struct ethtool_mmsv_ops - Operations for MAC Merge Software Verification + * @configure_tx: Driver callback for the event where the preemptible TX + * becomes active or inactive. Preemptible traffic + * classes must be committed to hardware only while + * preemptible TX is active. + * @configure_pmac: Driver callback for the event where the pMAC state + * changes as result of an administrative setting + * (ethtool) or a call to ethtool_mmsv_link_state_handle(). + * @send_mpacket: Driver-provided method for sending a Verify or a Response + * mPacket. + */ +struct ethtool_mmsv_ops { + void (*configure_tx)(struct ethtool_mmsv *mmsv, bool tx_active); + void (*configure_pmac)(struct ethtool_mmsv *mmsv, bool pmac_enabled); + void (*send_mpacket)(struct ethtool_mmsv *mmsv, enum ethtool_mpacket mpacket); +}; + +/** + * struct ethtool_mmsv - MAC Merge Software Verification + * @ops: operations for MAC Merge Software Verification + * @dev: pointer to net_device structure + * @lock: serialize access to MAC Merge state between + * ethtool requests and link state updates. + * @status: current verification FSM state + * @verify_timer: timer for verification in local TX direction + * @verify_enabled: indicates if verification is enabled + * @verify_retries: number of retries for verification + * @pmac_enabled: indicates if the preemptible MAC is enabled + * @verify_time: time for verification in milliseconds + * @tx_enabled: indicates if transmission is enabled + */ +struct ethtool_mmsv { + const struct ethtool_mmsv_ops *ops; + struct net_device *dev; + spinlock_t lock; + enum ethtool_mm_verify_status status; + struct timer_list verify_timer; + bool verify_enabled; + int verify_retries; + bool pmac_enabled; + u32 verify_time; + bool tx_enabled; +}; + +void ethtool_mmsv_stop(struct ethtool_mmsv *mmsv); +void ethtool_mmsv_link_state_handle(struct ethtool_mmsv *mmsv, bool up); +void ethtool_mmsv_event_handle(struct ethtool_mmsv *mmsv, + enum ethtool_mmsv_event event); +void ethtool_mmsv_get_mm(struct ethtool_mmsv *mmsv, + struct ethtool_mm_state *state); +void ethtool_mmsv_set_mm(struct ethtool_mmsv *mmsv, struct ethtool_mm_cfg *cfg); +void ethtool_mmsv_init(struct ethtool_mmsv *mmsv, struct net_device *dev, + const struct ethtool_mmsv_ops *ops); + /** * struct ethtool_rxfh_param - RXFH (RSS) parameters * @hfunc: Defines the current RSS hash function used by HW (or to be set to). @@ -711,6 +830,9 @@ struct ethtool_rxfh_param { * @cmd: command number = %ETHTOOL_GET_TS_INFO * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags * @phc_index: device index of the associated PHC, or -1 if there is none + * @phc_qualifier: qualifier of the associated PHC + * @phc_source: source device of the associated PHC + * @phc_phyindex: index of PHY device source of the associated PHC * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values */ @@ -718,19 +840,21 @@ struct kernel_ethtool_ts_info { u32 cmd; u32 so_timestamping; int phc_index; + enum hwtstamp_provider_qualifier phc_qualifier; + enum hwtstamp_source phc_source; + int phc_phyindex; enum hwtstamp_tx_types tx_types; enum hwtstamp_rx_filters rx_filters; }; /** * struct ethtool_ops - optional netdev operations + * @supported_input_xfrm: supported types of input xfrm from %RXH_XFRM_*. * @cap_link_lanes_supported: indicates if the driver supports lanes * parameter. * @cap_rss_ctx_supported: indicates if the driver supports RSS * contexts via legacy API, drivers implementing @create_rxfh_context * do not have to set this bit. - * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor - * RSS. * @rxfh_per_ctx_key: device supports setting different RSS key for each * additional context. Netlink API should report hfunc, key, and input_xfrm * for every context, not just context 0. @@ -749,6 +873,7 @@ struct kernel_ethtool_ts_info { * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. + * @supported_hwtstamp_qualifiers: bitfield of supported hwtstamp qualifier. * @get_drvinfo: Report driver/device information. Modern drivers no * longer have to implement this callback. Most fields are * correctly filled in by the core using system information, or @@ -879,10 +1004,11 @@ struct kernel_ethtool_ts_info { * @get_ts_info: Get the time stamping and PTP hardware clock capabilities. * It may be called with RCU, or rtnl or reference on the device. * Drivers supporting transmit time stamps in software should set this to - * ethtool_op_get_ts_info(). Drivers must not zero statistics which they - * don't report. The stats structure is initialized to ETHTOOL_STAT_NOT_SET - * indicating driver does not report statistics. - * @get_ts_stats: Query the device hardware timestamping statistics. + * ethtool_op_get_ts_info(). + * @get_ts_stats: Query the device hardware timestamping statistics. Drivers + * must not zero statistics which they don't report. The stats structure + * is initialized to ETHTOOL_STAT_NOT_SET indicating driver does not + * report statistics. * @get_module_info: Get the size and type of the eeprom contained within * a plug-in module. * @get_module_eeprom: Get the eeprom information from the plug-in module @@ -955,9 +1081,9 @@ struct kernel_ethtool_ts_info { * of the generic netdev features interface. */ struct ethtool_ops { + u32 supported_input_xfrm:8; u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; - u32 cap_rss_sym_xor_supported:1; u32 rxfh_per_ctx_key:1; u32 cap_rss_rxnfc_adds:1; u32 rxfh_indir_space; @@ -966,6 +1092,7 @@ struct ethtool_ops { u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; + u32 supported_hwtstamp_qualifiers; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); @@ -1199,7 +1326,7 @@ ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, * @dev: pointer to net_device structure * @vclock_index: pointer to pointer of vclock index * - * Return number of phc vclocks + * Return: number of phc vclocks */ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); @@ -1253,7 +1380,7 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, * ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer. * @dev: pointer to net_device structure * @info: buffer to hold the result - * Returns zero on success, non-zero otherwise. + * Returns: zero on success, non-zero otherwise. */ int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info); @@ -1281,6 +1408,17 @@ extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...); */ extern void ethtool_puts(u8 **data, const char *str); +/** + * ethtool_cpy - Write possibly-not-NUL-terminated string to ethtool string data + * @data: Pointer to a pointer to the start of string to write into + * @str: NUL-byte padded char array of size ETH_GSTRING_LEN to copy from + */ +#define ethtool_cpy(data, str) do { \ + BUILD_BUG_ON(sizeof(str) != ETH_GSTRING_LEN); \ + memcpy(*(data), str, ETH_GSTRING_LEN); \ + *(data) += ETH_GSTRING_LEN; \ +} while (0) + /* Link mode to forced speed capabilities maps */ struct ethtool_forced_speed_map { u32 speed; @@ -1299,24 +1437,4 @@ struct ethtool_forced_speed_map { void ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size); - -/* C33 PSE extended state and substate. */ -struct ethtool_c33_pse_ext_state_info { - enum ethtool_c33_pse_ext_state c33_pse_ext_state; - union { - enum ethtool_c33_pse_ext_substate_error_condition error_condition; - enum ethtool_c33_pse_ext_substate_mr_pse_enable mr_pse_enable; - enum ethtool_c33_pse_ext_substate_option_detect_ted option_detect_ted; - enum ethtool_c33_pse_ext_substate_option_vport_lim option_vport_lim; - enum ethtool_c33_pse_ext_substate_ovld_detected ovld_detected; - enum ethtool_c33_pse_ext_substate_power_not_available power_not_available; - enum ethtool_c33_pse_ext_substate_short_detected short_detected; - u32 __c33_pse_ext_substate; - }; -}; - -struct ethtool_c33_pse_pw_limit_range { - u32 min; - u32 max; -}; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 0c0d00fcd131..ccb478eb174b 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -25,6 +25,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t /* Used to release the epoll bits inside the "struct file" */ void eventpoll_release_file(struct file *file); +/* Copy ready events to userspace */ +int epoll_sendevents(struct file *file, struct epoll_event __user *events, + int maxevents); + /* * This is called from inside fs/file_table.c:__fput() to unlink files * from the eventpoll interface. We need to have this facility to cleanup diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 64130ae19690..3be35680a54f 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/moduleloader.h> +#include <linux/cleanup.h> #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) @@ -65,6 +66,37 @@ enum execmem_range_flags { * Architectures that use EXECMEM_ROX_CACHE must implement this. */ void execmem_fill_trapping_insns(void *ptr, size_t size, bool writable); + +/** + * execmem_make_temp_rw - temporarily remap region with read-write + * permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Remaps a part of the cached large page in the ROX cache in the range + * [@ptr, @ptr + @size) as writable and not executable. The caller must + * have exclusive ownership of this range and ensure nothing will try to + * execute code in this range. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_make_temp_rw(void *ptr, size_t size); + +/** + * execmem_restore_rox - restore read-only-execute permissions + * @ptr: address of the region to remap + * @size: size of the region to remap + * + * Restores read-only-execute permissions on a range [@ptr, @ptr + @size) + * after it was temporarily remapped as writable. Relies on architecture + * implementation of set_memory_rox() to restore mapping using large pages. + * + * Return: 0 on success or negative error code on failure. + */ +int execmem_restore_rox(void *ptr, size_t size); +#else +static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } +static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } #endif /** @@ -139,6 +171,8 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +DEFINE_FREE(execmem, void *, if (_T) execmem_free(_T)); + #ifdef CONFIG_MMU /** * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory diff --git a/include/linux/export.h b/include/linux/export.h index 2633df4d31e6..f35d03b4113b 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -24,11 +24,17 @@ .long sym #endif -#define ___EXPORT_SYMBOL(sym, license, ns) \ +/* + * LLVM integrated assembler cam merge adjacent string literals (like + * C and GNU-as) passed to '.ascii', but not to '.asciz' and chokes on: + * + * .asciz "MODULE_" "kvm" ; + */ +#define ___EXPORT_SYMBOL(sym, license, ns...) \ .section ".export_symbol","a" ASM_NL \ __export_symbol_##sym: ASM_NL \ .asciz license ASM_NL \ - .asciz ns ASM_NL \ + .ascii ns "\0" ASM_NL \ __EXPORT_SYMBOL_REF(sym) ASM_NL \ .previous @@ -52,9 +58,24 @@ #else +#ifdef CONFIG_GENDWARFKSYMS +/* + * With CONFIG_GENDWARFKSYMS, ensure the compiler emits debugging + * information for all exported symbols, including those defined in + * different TUs, by adding a __gendwarfksyms_ptr_<symbol> pointer + * that's discarded during the final link. + */ +#define __GENDWARFKSYMS_EXPORT(sym) \ + static typeof(sym) *__gendwarfksyms_ptr_##sym __used \ + __section(".discard.gendwarfksyms") = &sym; +#else +#define __GENDWARFKSYMS_EXPORT(sym) +#endif + #define __EXPORT_SYMBOL(sym, license, ns) \ extern typeof(sym) sym; \ __ADDRESSABLE(sym) \ + __GENDWARFKSYMS_EXPORT(sym) \ asm(__stringify(___EXPORT_SYMBOL(sym, license, ns))) #endif @@ -70,4 +91,6 @@ #define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", ns) #define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "GPL", ns) +#define EXPORT_SYMBOL_GPL_FOR_MODULES(sym, mods) __EXPORT_SYMBOL(sym, "GPL", "module:" mods) + #endif /* _LINUX_EXPORT_H */ diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 4cc8801e50e3..25c4a5afbd44 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -3,6 +3,7 @@ #define LINUX_EXPORTFS_H 1 #include <linux/types.h> +#include <linux/path.h> struct dentry; struct iattr; @@ -156,6 +157,17 @@ struct fid { }; }; +enum handle_to_path_flags { + HANDLE_CHECK_PERMS = (1 << 0), + HANDLE_CHECK_SUBTREE = (1 << 1), +}; + +struct handle_to_path_ctx { + struct path root; + enum handle_to_path_flags flags; + unsigned int fh_flags; +}; + #define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */ #define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */ #define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */ @@ -225,6 +237,12 @@ struct fid { * is also a directory. In the event that it cannot be found, or storage * space cannot be allocated, a %ERR_PTR should be returned. * + * permission: + * Allow filesystems to specify a custom permission function. + * + * open: + * Allow filesystems to specify a custom open function. + * * commit_metadata: * @commit_metadata should commit metadata changes to stable storage. * @@ -251,6 +269,8 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); + int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags); + struct file * (*open)(struct path *path, unsigned int oflags); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ @@ -259,10 +279,22 @@ struct export_operations { atomic attribute updates */ #define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ -#define EXPORT_OP_ASYNC_LOCK (0x40) /* fs can do async lock request */ +#define EXPORT_OP_NOLOCKS (0x40) /* no file locking support */ unsigned long flags; }; +/** + * exportfs_cannot_lock() - check if export implements file locking + * @export_ops: the nfs export operations to check + * + * Returns true if the export does not support file locking. + */ +static inline bool +exportfs_cannot_lock(const struct export_operations *export_ops) +{ + return export_ops->flags & EXPORT_OP_NOLOCKS; +} + extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent, int flags); @@ -282,6 +314,9 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { + if (!nop) + return false; + /* * If a non-decodeable file handle was requested, we only need to make * sure that filesystem did not opt-out of encoding fid. @@ -290,6 +325,13 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop, return exportfs_can_encode_fid(nop); /* + * If a connectable file handle was requested, we need to make sure that + * filesystem can also decode connected file handles. + */ + if ((fh_flags & EXPORT_FH_CONNECTABLE) && !nop->fh_to_parent) + return false; + + /* * If a decodeable file handle was requested, we need to make sure that * filesystem can also decode file handles. */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index c24f8bc01045..5206d63b3386 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -78,6 +78,7 @@ enum stop_cp_reason { STOP_CP_REASON_UPDATE_INODE, STOP_CP_REASON_FLUSH_FAIL, STOP_CP_REASON_NO_SEGMENT, + STOP_CP_REASON_CORRUPTED_FREE_BITMAP, STOP_CP_REASON_MAX, }; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 89ff45bd6f01..879cff5eccd4 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -25,7 +25,7 @@ #define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET) -#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) +#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT) /* * fanotify_init() flags that require CAP_SYS_ADMIN. @@ -47,7 +47,7 @@ * so one of the flags for reporting file handles is required. */ #define FANOTIFY_USER_INIT_FLAGS (FAN_CLASS_NOTIF | \ - FANOTIFY_FID_BITS | \ + FANOTIFY_FID_BITS | FAN_REPORT_MNT | \ FAN_CLOEXEC | FAN_NONBLOCK) #define FANOTIFY_INIT_FLAGS (FANOTIFY_ADMIN_INIT_FLAGS | \ @@ -58,7 +58,7 @@ #define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV) #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ - FAN_MARK_FILESYSTEM) + FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS) #define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ FAN_MARK_FLUSH) @@ -89,6 +89,16 @@ #define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ FAN_RENAME) +/* Content events can be used to inspect file content */ +#define FANOTIFY_CONTENT_PERM_EVENTS (FAN_OPEN_PERM | FAN_OPEN_EXEC_PERM | \ + FAN_ACCESS_PERM) +/* Pre-content events can be used to fill file content */ +#define FANOTIFY_PRE_CONTENT_EVENTS (FAN_PRE_ACCESS) + +/* Events that require a permission response from user */ +#define FANOTIFY_PERM_EVENTS (FANOTIFY_CONTENT_PERM_EVENTS | \ + FANOTIFY_PRE_CONTENT_EVENTS) + /* Events that can be reported with event->fd */ #define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS) @@ -99,14 +109,13 @@ /* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */ #define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR) +#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH) + /* Events that user can request to be notified on */ #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \ FANOTIFY_INODE_EVENTS | \ - FANOTIFY_ERROR_EVENTS) - -/* Events that require a permission response from user */ -#define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \ - FAN_OPEN_EXEC_PERM) + FANOTIFY_ERROR_EVENTS | \ + FANOTIFY_MOUNT_EVENTS) /* Extra flags that may be reported with event or control handling of events */ #define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR) @@ -126,7 +135,9 @@ /* These masks check for invalid bits in permission responses. */ #define FANOTIFY_RESPONSE_ACCESS (FAN_ALLOW | FAN_DENY) #define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO) -#define FANOTIFY_RESPONSE_VALID_MASK (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS) +#define FANOTIFY_RESPONSE_VALID_MASK \ + (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS | \ + (FAN_ERRNO_MASK << FAN_ERRNO_SHIFT)) /* Do not use these old uapi constants internally */ #undef FAN_ALL_CLASS_BITS diff --git a/include/linux/fb.h b/include/linux/fb.h index 5ba187e08cf7..05cc251035da 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -129,18 +129,12 @@ struct fb_cursor_user { * Register/unregister for framebuffer events */ -/* The resolution of the passed in fb_info about to change */ -#define FB_EVENT_MODE_CHANGE 0x01 - #ifdef CONFIG_GUMSTIX_AM200EPD /* only used by mach-pxa/am200epd.c */ #define FB_EVENT_FB_REGISTERED 0x05 #define FB_EVENT_FB_UNREGISTERED 0x06 #endif -/* A display blank is requested */ -#define FB_EVENT_BLANK 0x09 - struct fb_event { struct fb_info *info; void *data; @@ -225,6 +219,7 @@ struct fb_deferred_io { int open_count; /* number of opened files; protected by fb_info lock */ struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ + struct address_space *mapping; /* page cache object for fb device */ /* callback */ struct page *(*get_page)(struct fb_info *info, unsigned long offset); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); @@ -471,6 +466,8 @@ struct fb_info { struct list_head modelist; /* mode list */ struct fb_videomode *mode; /* current mode */ + int blank; /* current blanking; see FB_BLANK_ constants */ + #if IS_ENABLED(CONFIG_FB_BACKLIGHT) /* assigned backlight device */ /* set before framebuffer registration, @@ -755,11 +752,15 @@ extern void fb_bl_default_curve(struct fb_info *fb_info, u8 off, u8 min, u8 max) #if IS_ENABLED(CONFIG_FB_BACKLIGHT) struct backlight_device *fb_bl_device(struct fb_info *info); +void fb_bl_notify_blank(struct fb_info *info, int old_blank); #else static inline struct backlight_device *fb_bl_device(struct fb_info *info) { return NULL; } + +static inline void fb_bl_notify_blank(struct fb_info *info, int old_blank) +{ } #endif static inline struct lcd_device *fb_lcd_device(struct fb_info *info) diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h index c50882f19235..966092ffa89a 100644 --- a/include/linux/fiemap.h +++ b/include/linux/fiemap.h @@ -5,12 +5,18 @@ #include <uapi/linux/fiemap.h> #include <linux/fs.h> +/** + * struct fiemap_extent_info - fiemap request to a filesystem + * @fi_flags: Flags as passed from user + * @fi_extents_mapped: Number of mapped extents + * @fi_extents_max: Size of fiemap_extent array + * @fi_extents_start: Start of fiemap_extent array + */ struct fiemap_extent_info { - unsigned int fi_flags; /* Flags as passed from user */ - unsigned int fi_extents_mapped; /* Number of mapped extents */ - unsigned int fi_extents_max; /* Size of fiemap_extent array */ - struct fiemap_extent __user *fi_extents_start; /* Start of - fiemap_extent array */ + unsigned int fi_flags; + unsigned int fi_extents_mapped; + unsigned int fi_extents_max; + struct fiemap_extent __user *fi_extents_start; }; int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/include/linux/file.h b/include/linux/file.h index 302f11355b10..af1768d934a0 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -59,7 +59,7 @@ static inline struct fd CLONED_FD(struct file *f) static inline void fdput(struct fd fd) { - if (fd.word & FDPUT_FPUT) + if (unlikely(fd.word & FDPUT_FPUT)) fput(fd_file(fd)); } diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h index 9b3a8d9b17ab..31551e4cb8f3 100644 --- a/include/linux/file_ref.h +++ b/include/linux/file_ref.h @@ -161,6 +161,33 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref) } /** + * file_ref_put_close - drop a reference expecting it would transition to FILE_REF_NOREF + * @ref: Pointer to the reference count + * + * Semantically it is equivalent to calling file_ref_put(), but it trades lower + * performance in face of other CPUs also modifying the refcount for higher + * performance when this happens to be the last reference. + * + * For the last reference file_ref_put() issues 2 atomics. One to drop the + * reference and another to transition it to FILE_REF_DEAD. This routine does + * the work in one step, but in order to do it has to pre-read the variable which + * decreases scalability. + * + * Use with close() et al, stick to file_ref_put() by default. + */ +static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref) +{ + long old; + + old = atomic_long_read(&ref->refcnt); + if (likely(old == FILE_REF_ONEREF)) { + if (likely(atomic_long_try_cmpxchg(&ref->refcnt, &old, FILE_REF_DEAD))) + return true; + } + return file_ref_put(ref); +} + +/** * file_ref_read - Read the number of file references * @ref: Pointer to the reference count * @@ -174,4 +201,18 @@ static inline unsigned long file_ref_read(file_ref_t *ref) return c >= FILE_REF_RELEASED ? 0 : c + 1; } +/* + * __file_ref_read_raw - Return the value stored in ref->refcnt + * @ref: Pointer to the reference count + * + * Return: The raw value found in the counter + * + * A hack for file_needs_f_pos_lock(), you probably want to use + * file_ref_read() instead. + */ +static inline unsigned long __file_ref_read_raw(file_ref_t *ref) +{ + return atomic_long_read(&ref->refcnt); +} + #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 0477254bc2d3..f5cf4d35d83e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -364,6 +364,8 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn) * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) + * BPF_LOAD_ACQ dst_reg = smp_load_acquire(src_reg + off16) + * BPF_STORE_REL smp_store_release(dst_reg + off16, src_reg) */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ @@ -469,6 +471,16 @@ static inline bool insn_is_cast_user(const struct bpf_insn *insn) .off = 0, \ .imm = BPF_CALL_IMM(FUNC) }) +/* Kfunc call */ + +#define BPF_CALL_KFUNC(OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_CALL, \ + .dst_reg = 0, \ + .src_reg = BPF_PSEUDO_KFUNC_CALL, \ + .off = OFF, \ + .imm = IMM }) + /* Raw code statement block */ #define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ @@ -659,6 +671,11 @@ struct bpf_prog_stats { struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); +struct bpf_timed_may_goto { + u64 count; + u64 timestamp; +}; + struct sk_filter { refcount_t refcnt; struct rcu_head rcu; @@ -1120,8 +1137,11 @@ bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); bool bpf_jit_supports_private_stack(void); +bool bpf_jit_supports_timed_may_goto(void); u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); +u64 arch_bpf_timed_may_goto(void); +u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *); bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id); static inline bool bpf_dump_raw_ok(const struct cred *cred) @@ -1179,17 +1199,18 @@ static inline int xdp_ok_fwd_dev(const struct net_device *fwd, * This does not appear to be a real limitation for existing software. */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct xdp_buff *xdp, struct bpf_prog *prog); + struct xdp_buff *xdp, const struct bpf_prog *prog); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *prog); + const struct bpf_prog *prog); int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, struct xdp_frame *xdpf, - struct bpf_prog *prog); + const struct bpf_prog *prog); void xdp_do_flush(void); -void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); +void bpf_warn_invalid_xdp_action(const struct net_device *dev, + const struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, @@ -1507,6 +1528,7 @@ struct bpf_sock_ops_kern { void *skb_data_end; u8 op; u8 is_fullsock; + u8 is_locked_tcp_sock; u8 remaining_opt_len; u64 temp; /* temp and everything after is not * initialized to 0 before calling diff --git a/include/linux/find.h b/include/linux/find.h index 68685714bc18..5a2c267ea7f9 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -29,6 +29,8 @@ unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsign unsigned long n); extern unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size); +unsigned long _find_first_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size); unsigned long _find_first_and_and_bit(const unsigned long *addr1, const unsigned long *addr2, const unsigned long *addr3, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); @@ -348,6 +350,29 @@ unsigned long find_first_and_bit(const unsigned long *addr1, #endif /** + * find_first_andnot_bit - find the first bit set in 1st memory region and unset in 2nd + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the first set bit + * If no bits are set, returns >= @size. + */ +static __always_inline +unsigned long find_first_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_andnot_bit(addr1, addr2, size); +} + +/** * find_first_and_and_bit - find the first set bit in 3 memory regions * @addr1: The first address to base the search on * @addr2: The second address to base the search on diff --git a/include/linux/firmware/cirrus/cs_dsp_test_utils.h b/include/linux/firmware/cirrus/cs_dsp_test_utils.h new file mode 100644 index 000000000000..ecd821ed8064 --- /dev/null +++ b/include/linux/firmware/cirrus/cs_dsp_test_utils.h @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Support utilities for cs_dsp testing. + * + * Copyright (C) 2024 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#include <linux/regmap.h> +#include <linux/firmware/cirrus/wmfw.h> + +struct kunit; +struct cs_dsp_test; +struct cs_dsp_test_local; + +/** + * struct cs_dsp_test - base class for test utilities + * + * @test: Pointer to struct kunit instance. + * @dsp: Pointer to struct cs_dsp instance. + * @local: Private data for each test suite. + */ +struct cs_dsp_test { + struct kunit *test; + struct cs_dsp *dsp; + + struct cs_dsp_test_local *local; + + /* Following members are private */ + bool saw_bus_write; +}; + +/** + * struct cs_dsp_mock_alg_def - Info for creating a mock algorithm entry. + * + * @id Algorithm ID. + * @ver; Algorithm version. + * @xm_base_words XM base address in DSP words. + * @xm_size_words XM size in DSP words. + * @ym_base_words YM base address in DSP words. + * @ym_size_words YM size in DSP words. + * @zm_base_words ZM base address in DSP words. + * @zm_size_words ZM size in DSP words. + */ +struct cs_dsp_mock_alg_def { + unsigned int id; + unsigned int ver; + unsigned int xm_base_words; + unsigned int xm_size_words; + unsigned int ym_base_words; + unsigned int ym_size_words; + unsigned int zm_base_words; + unsigned int zm_size_words; +}; + +struct cs_dsp_mock_coeff_def { + const char *shortname; + const char *fullname; + const char *description; + u16 type; + u16 flags; + u16 mem_type; + unsigned int offset_dsp_words; + unsigned int length_bytes; +}; + +/** + * struct cs_dsp_mock_xm_header - XM header builder + * + * @test_priv: Pointer to the struct cs_dsp_test. + * @blob_data: Pointer to the created blob data. + * @blob_size_bytes: Size of the data at blob_data. + */ +struct cs_dsp_mock_xm_header { + struct cs_dsp_test *test_priv; + void *blob_data; + size_t blob_size_bytes; +}; + +struct cs_dsp_mock_wmfw_builder; +struct cs_dsp_mock_bin_builder; + +extern const unsigned int cs_dsp_mock_adsp2_32bit_sysbase; +extern const unsigned int cs_dsp_mock_adsp2_16bit_sysbase; +extern const unsigned int cs_dsp_mock_halo_core_base; +extern const unsigned int cs_dsp_mock_halo_sysinfo_base; + +extern const struct cs_dsp_region cs_dsp_mock_halo_dsp1_regions[]; +extern const unsigned int cs_dsp_mock_halo_dsp1_region_sizes[]; +extern const struct cs_dsp_region cs_dsp_mock_adsp2_32bit_dsp1_regions[]; +extern const unsigned int cs_dsp_mock_adsp2_32bit_dsp1_region_sizes[]; +extern const struct cs_dsp_region cs_dsp_mock_adsp2_16bit_dsp1_regions[]; +extern const unsigned int cs_dsp_mock_adsp2_16bit_dsp1_region_sizes[]; +int cs_dsp_mock_count_regions(const unsigned int *region_sizes); +unsigned int cs_dsp_mock_size_of_region(const struct cs_dsp *dsp, int mem_type); +unsigned int cs_dsp_mock_base_addr_for_mem(struct cs_dsp_test *priv, int mem_type); +unsigned int cs_dsp_mock_reg_addr_inc_per_unpacked_word(struct cs_dsp_test *priv); +unsigned int cs_dsp_mock_reg_block_length_bytes(struct cs_dsp_test *priv, int mem_type); +unsigned int cs_dsp_mock_reg_block_length_registers(struct cs_dsp_test *priv, int mem_type); +unsigned int cs_dsp_mock_reg_block_length_dsp_words(struct cs_dsp_test *priv, int mem_type); +bool cs_dsp_mock_has_zm(struct cs_dsp_test *priv); +int cs_dsp_mock_packed_to_unpacked_mem_type(int packed_mem_type); +unsigned int cs_dsp_mock_num_dsp_words_to_num_packed_regs(unsigned int num_dsp_words); +unsigned int cs_dsp_mock_xm_header_get_alg_base_in_words(struct cs_dsp_test *priv, + unsigned int alg_id, + int mem_type); +unsigned int cs_dsp_mock_xm_header_get_fw_version(struct cs_dsp_mock_xm_header *header); +void cs_dsp_mock_xm_header_drop_from_regmap_cache(struct cs_dsp_test *priv); +int cs_dsp_mock_xm_header_write_to_regmap(struct cs_dsp_mock_xm_header *header); +struct cs_dsp_mock_xm_header *cs_dsp_create_mock_xm_header(struct cs_dsp_test *priv, + const struct cs_dsp_mock_alg_def *algs, + size_t num_algs); + +int cs_dsp_mock_regmap_init(struct cs_dsp_test *priv); +void cs_dsp_mock_regmap_drop_range(struct cs_dsp_test *priv, + unsigned int first_reg, unsigned int last_reg); +void cs_dsp_mock_regmap_drop_regs(struct cs_dsp_test *priv, + unsigned int first_reg, size_t num_regs); +void cs_dsp_mock_regmap_drop_bytes(struct cs_dsp_test *priv, + unsigned int first_reg, size_t num_bytes); +void cs_dsp_mock_regmap_drop_system_regs(struct cs_dsp_test *priv); +bool cs_dsp_mock_regmap_is_dirty(struct cs_dsp_test *priv, bool drop_system_regs); + +struct cs_dsp_mock_bin_builder *cs_dsp_mock_bin_init(struct cs_dsp_test *priv, + int format_version, + unsigned int fw_version); +void cs_dsp_mock_bin_add_raw_block(struct cs_dsp_mock_bin_builder *builder, + unsigned int alg_id, unsigned int alg_ver, + int type, unsigned int offset, + const void *payload_data, size_t payload_len_bytes); +void cs_dsp_mock_bin_add_info(struct cs_dsp_mock_bin_builder *builder, + const char *info); +void cs_dsp_mock_bin_add_name(struct cs_dsp_mock_bin_builder *builder, + const char *name); +void cs_dsp_mock_bin_add_patch(struct cs_dsp_mock_bin_builder *builder, + unsigned int alg_id, unsigned int alg_ver, + int mem_region, unsigned int reg_addr_offset, + const void *payload_data, size_t payload_len_bytes); +struct firmware *cs_dsp_mock_bin_get_firmware(struct cs_dsp_mock_bin_builder *builder); + +struct cs_dsp_mock_wmfw_builder *cs_dsp_mock_wmfw_init(struct cs_dsp_test *priv, + int format_version); +void cs_dsp_mock_wmfw_add_raw_block(struct cs_dsp_mock_wmfw_builder *builder, + int mem_region, unsigned int mem_offset_dsp_words, + const void *payload_data, size_t payload_len_bytes); +void cs_dsp_mock_wmfw_add_info(struct cs_dsp_mock_wmfw_builder *builder, + const char *info); +void cs_dsp_mock_wmfw_add_data_block(struct cs_dsp_mock_wmfw_builder *builder, + int mem_region, unsigned int mem_offset_dsp_words, + const void *payload_data, size_t payload_len_bytes); +void cs_dsp_mock_wmfw_start_alg_info_block(struct cs_dsp_mock_wmfw_builder *builder, + unsigned int alg_id, + const char *name, + const char *description); +void cs_dsp_mock_wmfw_add_coeff_desc(struct cs_dsp_mock_wmfw_builder *builder, + const struct cs_dsp_mock_coeff_def *def); +void cs_dsp_mock_wmfw_end_alg_info_block(struct cs_dsp_mock_wmfw_builder *builder); +struct firmware *cs_dsp_mock_wmfw_get_firmware(struct cs_dsp_mock_wmfw_builder *builder); +int cs_dsp_mock_wmfw_format_version(struct cs_dsp_mock_wmfw_builder *builder); diff --git a/include/linux/firmware/imx/sm.h b/include/linux/firmware/imx/sm.h index 9b85a3f028d1..a8a17eeb7d90 100644 --- a/include/linux/firmware/imx/sm.h +++ b/include/linux/firmware/imx/sm.h @@ -8,6 +8,7 @@ #include <linux/bitfield.h> #include <linux/errno.h> +#include <linux/scmi_imx_protocol.h> #include <linux/types.h> #define SCMI_IMX_CTRL_PDM_CLK_SEL 0 /* AON PDM clock sel */ @@ -20,4 +21,22 @@ int scmi_imx_misc_ctrl_get(u32 id, u32 *num, u32 *val); int scmi_imx_misc_ctrl_set(u32 id, u32 val); +int scmi_imx_cpu_start(u32 cpuid, bool start); +int scmi_imx_cpu_started(u32 cpuid, bool *started); +int scmi_imx_cpu_reset_vector_set(u32 cpuid, u64 vector, bool start, bool boot, + bool resume); + +enum scmi_imx_lmm_op { + SCMI_IMX_LMM_BOOT, + SCMI_IMX_LMM_POWER_ON, + SCMI_IMX_LMM_SHUTDOWN, +}; + +/* For shutdown pperation */ +#define SCMI_IMX_LMM_OP_FORCEFUL 0 +#define SCMI_IMX_LMM_OP_GRACEFUL BIT(0) + +int scmi_imx_lmm_operation(u32 lmid, enum scmi_imx_lmm_op op, u32 flags); +int scmi_imx_lmm_info(u32 lmid, struct scmi_imx_lmm_info *info); +int scmi_imx_lmm_reset_vector_set(u32 lmid, u32 cpuid, u32 flags, u64 vector); #endif diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 4621aec0328c..983e1591bbba 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -105,6 +105,14 @@ bool qcom_scm_ice_available(void); int qcom_scm_ice_invalidate_key(u32 index); int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, enum qcom_scm_ice_cipher cipher, u32 data_unit_size); +bool qcom_scm_has_wrapped_key_support(void); +int qcom_scm_derive_sw_secret(const u8 *eph_key, size_t eph_key_size, + u8 *sw_secret, size_t sw_secret_size); +int qcom_scm_generate_ice_key(u8 *lt_key, size_t lt_key_size); +int qcom_scm_prepare_ice_key(const u8 *lt_key, size_t lt_key_size, + u8 *eph_key, size_t eph_key_size); +int qcom_scm_import_ice_key(const u8 *raw_key, size_t raw_key_size, + u8 *lt_key, size_t lt_key_size); bool qcom_scm_hdcp_available(void); int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); diff --git a/include/linux/firmware/samsung/exynos-acpm-protocol.h b/include/linux/firmware/samsung/exynos-acpm-protocol.h new file mode 100644 index 000000000000..f628bf1862c2 --- /dev/null +++ b/include/linux/firmware/samsung/exynos-acpm-protocol.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2020 Samsung Electronics Co., Ltd. + * Copyright 2020 Google LLC. + * Copyright 2024 Linaro Ltd. + */ + +#ifndef __EXYNOS_ACPM_PROTOCOL_H +#define __EXYNOS_ACPM_PROTOCOL_H + +#include <linux/types.h> + +struct acpm_handle; +struct device_node; + +struct acpm_pmic_ops { + int (*read_reg)(const struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 *buf); + int (*bulk_read)(const struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 count, u8 *buf); + int (*write_reg)(const struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 value); + int (*bulk_write)(const struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 count, const u8 *buf); + int (*update_reg)(const struct acpm_handle *handle, + unsigned int acpm_chan_id, u8 type, u8 reg, u8 chan, + u8 value, u8 mask); +}; + +struct acpm_ops { + struct acpm_pmic_ops pmic_ops; +}; + +/** + * struct acpm_handle - Reference to an initialized protocol instance + * @ops: + */ +struct acpm_handle { + struct acpm_ops ops; +}; + +struct device; + +const struct acpm_handle *devm_acpm_get_by_node(struct device *dev, + struct device_node *np); + +#endif /* __EXYNOS_ACPM_PROTOCOL_H */ diff --git a/include/linux/firmware/thead/thead,th1520-aon.h b/include/linux/firmware/thead/thead,th1520-aon.h new file mode 100644 index 000000000000..dae132b66873 --- /dev/null +++ b/include/linux/firmware/thead/thead,th1520-aon.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 Alibaba Group Holding Limited. + */ + +#ifndef _THEAD_AON_H +#define _THEAD_AON_H + +#include <linux/device.h> +#include <linux/types.h> + +#define AON_RPC_MSG_MAGIC (0xef) +#define TH1520_AON_RPC_VERSION 2 +#define TH1520_AON_RPC_MSG_NUM 7 + +struct th1520_aon_chan; + +enum th1520_aon_rpc_svc { + TH1520_AON_RPC_SVC_UNKNOWN = 0, + TH1520_AON_RPC_SVC_PM = 1, + TH1520_AON_RPC_SVC_MISC = 2, + TH1520_AON_RPC_SVC_AVFS = 3, + TH1520_AON_RPC_SVC_SYS = 4, + TH1520_AON_RPC_SVC_WDG = 5, + TH1520_AON_RPC_SVC_LPM = 6, + TH1520_AON_RPC_SVC_MAX = 0x3F, +}; + +enum th1520_aon_misc_func { + TH1520_AON_MISC_FUNC_UNKNOWN = 0, + TH1520_AON_MISC_FUNC_SET_CONTROL = 1, + TH1520_AON_MISC_FUNC_GET_CONTROL = 2, + TH1520_AON_MISC_FUNC_REGDUMP_CFG = 3, +}; + +enum th1520_aon_wdg_func { + TH1520_AON_WDG_FUNC_UNKNOWN = 0, + TH1520_AON_WDG_FUNC_START = 1, + TH1520_AON_WDG_FUNC_STOP = 2, + TH1520_AON_WDG_FUNC_PING = 3, + TH1520_AON_WDG_FUNC_TIMEOUTSET = 4, + TH1520_AON_WDG_FUNC_RESTART = 5, + TH1520_AON_WDG_FUNC_GET_STATE = 6, + TH1520_AON_WDG_FUNC_POWER_OFF = 7, + TH1520_AON_WDG_FUNC_AON_WDT_ON = 8, + TH1520_AON_WDG_FUNC_AON_WDT_OFF = 9, +}; + +enum th1520_aon_sys_func { + TH1520_AON_SYS_FUNC_UNKNOWN = 0, + TH1520_AON_SYS_FUNC_AON_RESERVE_MEM = 1, +}; + +enum th1520_aon_lpm_func { + TH1520_AON_LPM_FUNC_UNKNOWN = 0, + TH1520_AON_LPM_FUNC_REQUIRE_STR = 1, + TH1520_AON_LPM_FUNC_RESUME_STR = 2, + TH1520_AON_LPM_FUNC_REQUIRE_STD = 3, + TH1520_AON_LPM_FUNC_CPUHP = 4, + TH1520_AON_LPM_FUNC_REGDUMP_CFG = 5, +}; + +enum th1520_aon_pm_func { + TH1520_AON_PM_FUNC_UNKNOWN = 0, + TH1520_AON_PM_FUNC_SET_RESOURCE_REGULATOR = 1, + TH1520_AON_PM_FUNC_GET_RESOURCE_REGULATOR = 2, + TH1520_AON_PM_FUNC_SET_RESOURCE_POWER_MODE = 3, + TH1520_AON_PM_FUNC_PWR_SET = 4, + TH1520_AON_PM_FUNC_PWR_GET = 5, + TH1520_AON_PM_FUNC_CHECK_FAULT = 6, + TH1520_AON_PM_FUNC_GET_TEMPERATURE = 7, +}; + +struct th1520_aon_rpc_msg_hdr { + u8 ver; /* version of msg hdr */ + u8 size; /* msg size ,uinit in bytes,the size includes rpc msg header self */ + u8 svc; /* rpc main service id */ + u8 func; /* rpc sub func id of specific service, sent by caller */ +} __packed __aligned(1); + +struct th1520_aon_rpc_ack_common { + struct th1520_aon_rpc_msg_hdr hdr; + u8 err_code; +} __packed __aligned(1); + +#define RPC_SVC_MSG_TYPE_DATA 0 +#define RPC_SVC_MSG_TYPE_ACK 1 +#define RPC_SVC_MSG_NEED_ACK 0 +#define RPC_SVC_MSG_NO_NEED_ACK 1 + +#define RPC_GET_VER(MESG) ((MESG)->ver) +#define RPC_SET_VER(MESG, VER) ((MESG)->ver = (VER)) +#define RPC_GET_SVC_ID(MESG) ((MESG)->svc & 0x3F) +#define RPC_SET_SVC_ID(MESG, ID) ((MESG)->svc |= 0x3F & (ID)) +#define RPC_GET_SVC_FLAG_MSG_TYPE(MESG) (((MESG)->svc & 0x80) >> 7) +#define RPC_SET_SVC_FLAG_MSG_TYPE(MESG, TYPE) ((MESG)->svc |= (TYPE) << 7) +#define RPC_GET_SVC_FLAG_ACK_TYPE(MESG) (((MESG)->svc & 0x40) >> 6) +#define RPC_SET_SVC_FLAG_ACK_TYPE(MESG, ACK) ((MESG)->svc |= (ACK) << 6) + +#define RPC_SET_BE64(MESG, OFFSET, SET_DATA) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + u64 _set_data = (SET_DATA); \ + data[_offset + 7] = _set_data & 0xFF; \ + data[_offset + 6] = (_set_data & 0xFF00) >> 8; \ + data[_offset + 5] = (_set_data & 0xFF0000) >> 16; \ + data[_offset + 4] = (_set_data & 0xFF000000) >> 24; \ + data[_offset + 3] = (_set_data & 0xFF00000000) >> 32; \ + data[_offset + 2] = (_set_data & 0xFF0000000000) >> 40; \ + data[_offset + 1] = (_set_data & 0xFF000000000000) >> 48; \ + data[_offset + 0] = (_set_data & 0xFF00000000000000) >> 56; \ + } while (0) + +#define RPC_SET_BE32(MESG, OFFSET, SET_DATA) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + u64 _set_data = (SET_DATA); \ + data[_offset + 3] = (_set_data) & 0xFF; \ + data[_offset + 2] = (_set_data & 0xFF00) >> 8; \ + data[_offset + 1] = (_set_data & 0xFF0000) >> 16; \ + data[_offset + 0] = (_set_data & 0xFF000000) >> 24; \ + } while (0) + +#define RPC_SET_BE16(MESG, OFFSET, SET_DATA) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + u64 _set_data = (SET_DATA); \ + data[_offset + 1] = (_set_data) & 0xFF; \ + data[_offset + 0] = (_set_data & 0xFF00) >> 8; \ + } while (0) + +#define RPC_SET_U8(MESG, OFFSET, SET_DATA) \ + do { \ + u8 *data = (u8 *)(MESG); \ + data[OFFSET] = (SET_DATA) & 0xFF; \ + } while (0) + +#define RPC_GET_BE64(MESG, OFFSET, PTR) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + *(u32 *)(PTR) = \ + (data[_offset + 7] | data[_offset + 6] << 8 | \ + data[_offset + 5] << 16 | data[_offset + 4] << 24 | \ + data[_offset + 3] << 32 | data[_offset + 2] << 40 | \ + data[_offset + 1] << 48 | data[_offset + 0] << 56); \ + } while (0) + +#define RPC_GET_BE32(MESG, OFFSET, PTR) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + *(u32 *)(PTR) = \ + (data[_offset + 3] | data[_offset + 2] << 8 | \ + data[_offset + 1] << 16 | data[_offset + 0] << 24); \ + } while (0) + +#define RPC_GET_BE16(MESG, OFFSET, PTR) \ + do { \ + u8 *data = (u8 *)(MESG); \ + u64 _offset = (OFFSET); \ + *(u16 *)(PTR) = (data[_offset + 1] | data[_offset + 0] << 8); \ + } while (0) + +#define RPC_GET_U8(MESG, OFFSET, PTR) \ + do { \ + u8 *data = (u8 *)(MESG); \ + *(u8 *)(PTR) = (data[OFFSET]); \ + } while (0) + +/* + * Defines for SC PM Power Mode + */ +#define TH1520_AON_PM_PW_MODE_OFF 0 /* Power off */ +#define TH1520_AON_PM_PW_MODE_STBY 1 /* Power in standby */ +#define TH1520_AON_PM_PW_MODE_LP 2 /* Power in low-power */ +#define TH1520_AON_PM_PW_MODE_ON 3 /* Power on */ + +/* + * Defines for AON power islands + */ +#define TH1520_AON_AUDIO_PD 0 +#define TH1520_AON_VDEC_PD 1 +#define TH1520_AON_NPU_PD 2 +#define TH1520_AON_VENC_PD 3 +#define TH1520_AON_GPU_PD 4 +#define TH1520_AON_DSP0_PD 5 +#define TH1520_AON_DSP1_PD 6 + +struct th1520_aon_chan *th1520_aon_init(struct device *dev); +void th1520_aon_deinit(struct th1520_aon_chan *aon_chan); + +int th1520_aon_call_rpc(struct th1520_aon_chan *aon_chan, void *msg); +int th1520_aon_power_update(struct th1520_aon_chan *aon_chan, u16 rsrc, + bool power_on); + +#endif /* _THEAD_AON_H */ diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 3abe614ef5f0..adab609c972e 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -15,6 +15,7 @@ #define _LINUX_FOLIO_QUEUE_H #include <linux/pagevec.h> +#include <linux/mm.h> /* * Segment in a queue of running buffers. Each segment can hold a number of @@ -33,27 +34,31 @@ struct folio_queue { struct folio_queue *prev; /* Previous queue segment of NULL */ unsigned long marks; /* 1-bit mark per folio */ unsigned long marks2; /* Second 1-bit mark per folio */ - unsigned long marks3; /* Third 1-bit mark per folio */ #if PAGEVEC_SIZE > BITS_PER_LONG #error marks is not big enough #endif + unsigned int rreq_id; + unsigned int debug_id; }; /** * folioq_init - Initialise a folio queue segment * @folioq: The segment to initialise + * @rreq_id: The request identifier to use in tracelines. * - * Initialise a folio queue segment. Note that the folio pointers are - * left uninitialised. + * Initialise a folio queue segment and set an identifier to be used in traces. + * + * Note that the folio pointers are left uninitialised. */ -static inline void folioq_init(struct folio_queue *folioq) +static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id) { folio_batch_init(&folioq->vec); folioq->next = NULL; folioq->prev = NULL; folioq->marks = 0; folioq->marks2 = 0; - folioq->marks3 = 0; + folioq->rreq_id = rreq_id; + folioq->debug_id = 0; } /** @@ -172,52 +177,6 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) } /** - * folioq_is_marked3: Check third folio mark in a folio queue segment - * @folioq: The segment to query - * @slot: The slot number of the folio to query - * - * Determine if the third mark is set for the folio in the specified slot in a - * folio queue segment. - */ -static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot) -{ - return test_bit(slot, &folioq->marks3); -} - -/** - * folioq_mark3: Set the third mark on a folio in a folio queue segment - * @folioq: The segment to modify - * @slot: The slot number of the folio to modify - * - * Set the third mark for the folio in the specified slot in a folio queue - * segment. - */ -static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot) -{ - set_bit(slot, &folioq->marks3); -} - -/** - * folioq_unmark3: Clear the third mark on a folio in a folio queue segment - * @folioq: The segment to modify - * @slot: The slot number of the folio to modify - * - * Clear the third mark for the folio in the specified slot in a folio queue - * segment. - */ -static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) -{ - clear_bit(slot, &folioq->marks3); -} - -static inline unsigned int __folio_order(struct folio *folio) -{ - if (!folio_test_large(folio)) - return 0; - return folio->_flags_1 & 0xff; -} - -/** * folioq_append: Add a folio to a folio queue segment * @folioq: The segment to add to * @folio: The folio to add @@ -235,7 +194,7 @@ static inline unsigned int folioq_append(struct folio_queue *folioq, struct foli unsigned int slot = folioq->vec.nr++; folioq->vec.folios[slot] = folio; - folioq->orders[slot] = __folio_order(folio); + folioq->orders[slot] = folio_order(folio); return slot; } @@ -257,7 +216,7 @@ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct unsigned int slot = folioq->vec.nr++; folioq->vec.folios[slot] = folio; - folioq->orders[slot] = __folio_order(folio); + folioq->orders[slot] = folio_order(folio); folioq_mark(folioq, slot); return slot; } @@ -318,7 +277,6 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) folioq->vec.folios[slot] = NULL; folioq_unmark(folioq, slot); folioq_unmark2(folioq, slot); - folioq_unmark3(folioq, slot); } #endif /* _LINUX_FOLIO_QUEUE_H */ diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index e4ce1cae03bf..b3b53f8c1b28 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -596,7 +596,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, if (p_size != SIZE_MAX && p_size < size) fortify_panic(func, FORTIFY_WRITE, p_size, size, true); else if (q_size != SIZE_MAX && q_size < size) - fortify_panic(func, FORTIFY_READ, p_size, size, true); + fortify_panic(func, FORTIFY_READ, q_size, size, true); /* * Warn when writing beyond destination field size. diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h index f39869588117..702099f08929 100644 --- a/include/linux/fprobe.h +++ b/include/linux/fprobe.h @@ -5,47 +5,68 @@ #include <linux/compiler.h> #include <linux/ftrace.h> -#include <linux/rethook.h> +#include <linux/rcupdate.h> +#include <linux/refcount.h> +#include <linux/slab.h> struct fprobe; - typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *regs, void *entry_data); typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *regs, void *entry_data); /** + * struct fprobe_hlist_node - address based hash list node for fprobe. + * + * @hlist: The hlist node for address search hash table. + * @addr: One of the probing address of @fp. + * @fp: The fprobe which owns this. + */ +struct fprobe_hlist_node { + struct hlist_node hlist; + unsigned long addr; + struct fprobe *fp; +}; + +/** + * struct fprobe_hlist - hash list nodes for fprobe. + * + * @hlist: The hlist node for existence checking hash table. + * @rcu: rcu_head for RCU deferred release. + * @fp: The fprobe which owns this fprobe_hlist. + * @size: The size of @array. + * @array: The fprobe_hlist_node for each address to probe. + */ +struct fprobe_hlist { + struct hlist_node hlist; + struct rcu_head rcu; + struct fprobe *fp; + int size; + struct fprobe_hlist_node array[] __counted_by(size); +}; + +/** * struct fprobe - ftrace based probe. - * @ops: The ftrace_ops. + * * @nmissed: The counter for missing events. * @flags: The status flag. - * @rethook: The rethook data structure. (internal data) * @entry_data_size: The private data storage size. - * @nr_maxactive: The max number of active functions. * @entry_handler: The callback function for function entry. * @exit_handler: The callback function for function exit. + * @hlist_array: The fprobe_hlist for fprobe search from IP hash table. */ struct fprobe { -#ifdef CONFIG_FUNCTION_TRACER - /* - * If CONFIG_FUNCTION_TRACER is not set, CONFIG_FPROBE is disabled too. - * But user of fprobe may keep embedding the struct fprobe on their own - * code. To avoid build error, this will keep the fprobe data structure - * defined here, but remove ftrace_ops data structure. - */ - struct ftrace_ops ops; -#endif unsigned long nmissed; unsigned int flags; - struct rethook *rethook; size_t entry_data_size; - int nr_maxactive; fprobe_entry_cb entry_handler; fprobe_exit_cb exit_handler; + + struct fprobe_hlist *hlist_array; }; /* This fprobe is soft-disabled. */ @@ -121,4 +142,9 @@ static inline void enable_fprobe(struct fprobe *fp) fp->flags &= ~FPROBE_FL_DISABLED; } +/* The entry data size is 4 bits (=16) * sizeof(long) in maximum */ +#define FPROBE_DATA_SIZE_BITS 4 +#define MAX_FPROBE_DATA_SIZE_WORD ((1L << FPROBE_DATA_SIZE_BITS) - 1) +#define MAX_FPROBE_DATA_SIZE (MAX_FPROBE_DATA_SIZE_WORD * sizeof(long)) + #endif diff --git a/include/linux/fs.h b/include/linux/fs.h index f7efc6866ebc..040c0036320f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2,6 +2,7 @@ #ifndef _LINUX_FS_H #define _LINUX_FS_H +#include <linux/vfsdebug.h> #include <linux/linkage.h> #include <linux/wait_bit.h> #include <linux/kdev_t.h> @@ -173,13 +174,20 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_NOREUSE ((__force fmode_t)(1 << 23)) -/* FMODE_* bit 24 */ - /* File is embedded in backing_file object */ -#define FMODE_BACKING ((__force fmode_t)(1 << 25)) +#define FMODE_BACKING ((__force fmode_t)(1 << 24)) -/* File was opened by fanotify and shouldn't generate fanotify events */ -#define FMODE_NONOTIFY ((__force fmode_t)(1 << 26)) +/* + * Together with FMODE_NONOTIFY_PERM defines which fsnotify events shouldn't be + * generated (see below) + */ +#define FMODE_NONOTIFY ((__force fmode_t)(1 << 25)) + +/* + * Together with FMODE_NONOTIFY defines which fsnotify events shouldn't be + * generated (see below) + */ +#define FMODE_NONOTIFY_PERM ((__force fmode_t)(1 << 26)) /* File is capable of returning -EAGAIN if I/O will block */ #define FMODE_NOWAIT ((__force fmode_t)(1 << 27)) @@ -191,6 +199,31 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29)) /* + * The two FMODE_NONOTIFY* define which fsnotify events should not be generated + * for a file. These are the possible values of (f->f_mode & + * FMODE_FSNOTIFY_MASK) and their meaning: + * + * FMODE_NONOTIFY - suppress all (incl. non-permission) events. + * FMODE_NONOTIFY_PERM - suppress permission (incl. pre-content) events. + * FMODE_NONOTIFY | FMODE_NONOTIFY_PERM - suppress only pre-content events. + */ +#define FMODE_FSNOTIFY_MASK \ + (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM) + +#define FMODE_FSNOTIFY_NONE(mode) \ + ((mode & FMODE_FSNOTIFY_MASK) == FMODE_NONOTIFY) +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +#define FMODE_FSNOTIFY_PERM(mode) \ + ((mode & FMODE_FSNOTIFY_MASK) == 0 || \ + (mode & FMODE_FSNOTIFY_MASK) == (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM)) +#define FMODE_FSNOTIFY_HSM(mode) \ + ((mode & FMODE_FSNOTIFY_MASK) == 0) +#else +#define FMODE_FSNOTIFY_PERM(mode) 0 +#define FMODE_FSNOTIFY_HSM(mode) 0 +#endif + +/* * Attribute flags. These should be or-ed together to figure out what * has been changed! */ @@ -322,6 +355,7 @@ struct readahead_control; #define IOCB_NOWAIT (__force int) RWF_NOWAIT #define IOCB_APPEND (__force int) RWF_APPEND #define IOCB_ATOMIC (__force int) RWF_ATOMIC +#define IOCB_DONTCACHE (__force int) RWF_DONTCACHE /* non-RWF related bits - start at 16 */ #define IOCB_EVENTFD (1 << 16) @@ -348,6 +382,7 @@ struct readahead_control; #define IOCB_DIO_CALLER_COMP (1 << 22) /* kiocb is a read or write operation submitted by fs/aio.c. */ #define IOCB_AIO_RW (1 << 23) +#define IOCB_HAS_METADATA (1 << 24) /* for use in trace events */ #define TRACE_IOCB_STRINGS \ @@ -356,14 +391,17 @@ struct readahead_control; { IOCB_SYNC, "SYNC" }, \ { IOCB_NOWAIT, "NOWAIT" }, \ { IOCB_APPEND, "APPEND" }, \ - { IOCB_ATOMIC, "ATOMIC"}, \ + { IOCB_ATOMIC, "ATOMIC" }, \ + { IOCB_DONTCACHE, "DONTCACHE" }, \ { IOCB_EVENTFD, "EVENTFD"}, \ { IOCB_DIRECT, "DIRECT" }, \ { IOCB_WRITE, "WRITE" }, \ { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ - { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } + { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \ + { IOCB_AIO_RW, "AIO_RW" }, \ + { IOCB_HAS_METADATA, "AIO_HAS_METADATA" } struct kiocb { struct file *ki_filp; @@ -372,6 +410,7 @@ struct kiocb { void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ + u8 ki_write_stream; union { /* * Only used for async buffered reads, where it denotes the @@ -397,7 +436,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) } struct address_space_operations { - int (*writepage)(struct page *page, struct writeback_control *wbc); int (*read_folio)(struct file *, struct folio *); /* Write back some dirty pages from this mapping. */ @@ -626,6 +664,7 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 #define IOP_MGTIME 0x0020 +#define IOP_CACHED_LINK 0x0040 /* * Keep mostly read-only and often accessed (especially for @@ -723,7 +762,10 @@ struct inode { }; struct file_lock_context *i_flctx; struct address_space i_data; - struct list_head i_devices; + union { + struct list_head i_devices; + int i_linklen; + }; union { struct pipe_inode_info *i_pipe; struct cdev *i_cdev; @@ -749,6 +791,15 @@ struct inode { void *i_private; /* fs or device private pointer */ } __randomize_layout; +static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) +{ + VFS_WARN_ON_INODE(strlen(link) != linklen, inode); + VFS_WARN_ON_INODE(inode->i_opflags & IOP_CACHED_LINK, inode); + inode->i_link = link; + inode->i_linklen = linklen; + inode->i_opflags |= IOP_CACHED_LINK; +} + /* * Get bit address from inode->i_state to use with wait_var_event() * infrastructre. @@ -818,6 +869,11 @@ static inline void inode_lock(struct inode *inode) down_write(&inode->i_rwsem); } +static inline __must_check int inode_lock_killable(struct inode *inode) +{ + return down_write_killable(&inode->i_rwsem); +} + static inline void inode_unlock(struct inode *inode) { up_write(&inode->i_rwsem); @@ -828,6 +884,11 @@ static inline void inode_lock_shared(struct inode *inode) down_read(&inode->i_rwsem); } +static inline __must_check int inode_lock_shared_killable(struct inode *inode) +{ + return down_read_killable(&inode->i_rwsem); +} + static inline void inode_unlock_shared(struct inode *inode) { up_read(&inode->i_rwsem); @@ -1008,7 +1069,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) /** * struct file - Represents a file - * @f_ref: reference count * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context. * @f_mode: FMODE_* flags often used in hotpaths * @f_op: file operations @@ -1018,12 +1078,12 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_flags: file flags * @f_iocb_flags: iocb flags * @f_cred: stashed credentials of creator/opener + * @f_owner: file owner * @f_path: path of the file * @f_pos_lock: lock protecting file position * @f_pipe: specific to pipes * @f_pos: file position * @f_security: LSM security context of this file - * @f_owner: file owner * @f_wb_err: writeback error * @f_sb_err: per sb writeback errors * @f_ep: link of all epoll hooks for this file @@ -1031,9 +1091,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) * @f_llist: work queue entrypoint * @f_ra: file's readahead state * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.) + * @f_ref: reference count */ struct file { - file_ref_t f_ref; spinlock_t f_lock; fmode_t f_mode; const struct file_operations *f_op; @@ -1043,6 +1103,7 @@ struct file { unsigned int f_flags; unsigned int f_iocb_flags; const struct cred *f_cred; + struct fown_struct *f_owner; /* --- cacheline 1 boundary (64 bytes) --- */ struct path f_path; union { @@ -1056,7 +1117,6 @@ struct file { void *f_security; #endif /* --- cacheline 2 boundary (128 bytes) --- */ - struct fown_struct *f_owner; errseq_t f_wb_err; errseq_t f_sb_err; #ifdef CONFIG_EPOLL @@ -1068,6 +1128,7 @@ struct file { struct file_ra_state f_ra; freeptr_t f_freeptr; }; + file_ref_t f_ref; /* --- cacheline 3 boundary (192 bytes) --- */ } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ @@ -1191,7 +1252,6 @@ extern int send_sigurg(struct file *file); /* These sb flags are internal to the kernel */ #define SB_DEAD BIT(21) #define SB_DYING BIT(24) -#define SB_SUBMOUNT BIT(26) #define SB_FORCE BIT(27) #define SB_NOSEC BIT(28) #define SB_BORN BIT(29) @@ -1199,11 +1259,19 @@ extern int send_sigurg(struct file *file); #define SB_NOUSER BIT(31) /* These flags relate to encoding and casefolding */ -#define SB_ENC_STRICT_MODE_FL (1 << 0) +#define SB_ENC_STRICT_MODE_FL (1 << 0) +#define SB_ENC_NO_COMPAT_FALLBACK_FL (1 << 1) #define sb_has_strict_encoding(sb) \ (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) +#if IS_ENABLED(CONFIG_UNICODE) +#define sb_no_casefold_compat_fallback(sb) \ + (sb->s_encoding_flags & SB_ENC_NO_COMPAT_FALLBACK_FL) +#else +#define sb_no_casefold_compat_fallback(sb) (1) +#endif + /* * Umount options */ @@ -1232,6 +1300,7 @@ extern int send_sigurg(struct file *file); #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ #define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ +#define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */ /* Possible states of 'frozen' field */ enum { @@ -1249,6 +1318,7 @@ struct sb_writers { unsigned short frozen; /* Is sb frozen? */ int freeze_kcount; /* How many kernel freeze requests? */ int freeze_ucount; /* How many userspace freeze requests? */ + const void *freeze_owner; /* Owner of the freeze */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -1722,7 +1792,7 @@ static inline void __sb_end_write(struct super_block *sb, int level) static inline void __sb_start_write(struct super_block *sb, int level) { - percpu_down_read(sb->s_writers.rw_sem + level - 1); + percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true); } static inline bool __sb_start_write_trylock(struct super_block *sb, int level) @@ -1921,8 +1991,8 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap, */ int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); -int vfs_mkdir(struct mnt_idmap *, struct inode *, - struct dentry *, umode_t); +struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, + struct dentry *, umode_t); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int vfs_symlink(struct mnt_idmap *, struct inode *, @@ -1979,7 +2049,7 @@ int vfs_fchown(struct file *file, uid_t user, gid_t group); int vfs_fchmod(struct file *file, umode_t mode); int vfs_utimes(const struct path *path, struct timespec64 *times); -extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT extern long compat_ptr_ioctl(struct file *file, unsigned int cmd, @@ -2013,8 +2083,18 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, struct dir_context { filldir_t actor; loff_t pos; + /* + * Filesystems MUST NOT MODIFY count, but may use as a hint: + * 0 unknown + * > 0 space in buffer (assume at least one entry) + * INT_MAX unlimited + */ + int count; }; +/* If OR-ed with d_type, pending signals are not checked */ +#define FILLDIR_FLAG_NOINTR 0x1000 + /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. @@ -2111,6 +2191,7 @@ struct file_operations { int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, unsigned int poll_flags); + int (*mmap_prepare)(struct vm_area_desc *); } __randomize_layout; /* Supports async buffered reads */ @@ -2127,6 +2208,8 @@ struct file_operations { #define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5)) /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) +/* File system supports uncached read/write buffered IO */ +#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, @@ -2149,8 +2232,8 @@ struct inode_operations { int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *, const char *); - int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *, - umode_t); + struct dentry *(*mkdir) (struct mnt_idmap *, struct inode *, + struct dentry *, umode_t); int (*rmdir) (struct inode *,struct dentry *); int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *, umode_t,dev_t); @@ -2178,11 +2261,37 @@ struct inode_operations { struct offset_ctx *(*get_offset_ctx)(struct inode *inode); } ____cacheline_aligned; +/* Did the driver provide valid mmap hook configuration? */ +static inline bool file_has_valid_mmap_hooks(struct file *file) +{ + bool has_mmap = file->f_op->mmap; + bool has_mmap_prepare = file->f_op->mmap_prepare; + + /* Hooks are mutually exclusive. */ + if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) + return false; + if (!has_mmap && !has_mmap_prepare) + return false; + + return true; +} + +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); + return file->f_op->mmap(file, vma); } +static inline int __call_mmap_prepare(struct file *file, + struct vm_area_desc *desc) +{ + return file->f_op->mmap_prepare(desc); +} + extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, @@ -2209,6 +2318,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed + * @FREEZE_EXCL: a freeze that can only be undone by the owner * * Indicate who the owner of the freeze or thaw request is and whether * the freeze needs to be exclusive or can nest. @@ -2222,6 +2332,7 @@ enum freeze_holder { FREEZE_HOLDER_KERNEL = (1U << 0), FREEZE_HOLDER_USERSPACE = (1U << 1), FREEZE_MAY_NEST = (1U << 2), + FREEZE_EXCL = (1U << 3), }; struct super_operations { @@ -2235,9 +2346,9 @@ struct super_operations { void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - int (*freeze_super) (struct super_block *, enum freeze_holder who); + int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*freeze_fs) (struct super_block *); - int (*thaw_super) (struct super_block *, enum freeze_holder who); + int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); @@ -2284,6 +2395,7 @@ struct super_operations { #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ +#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -2340,6 +2452,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) +#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE) static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) @@ -2554,6 +2667,7 @@ struct file_system_type { #define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */ #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ #define FS_MGTIME 64 /* FS uses multigrain timestamps */ +#define FS_LBS 128 /* FS supports LBS */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -2591,9 +2705,6 @@ static inline bool is_mgtime(const struct inode *inode) extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); -extern struct dentry *mount_single(struct file_system_type *fs_type, - int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2647,8 +2758,10 @@ extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); -int freeze_super(struct super_block *super, enum freeze_holder who); -int thaw_super(struct super_block *super, enum freeze_holder who); +int freeze_super(struct super_block *super, enum freeze_holder who, + const void *freeze_owner); +int thaw_super(struct super_block *super, enum freeze_holder who, + const void *freeze_owner); extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); @@ -2732,13 +2845,13 @@ static inline bool is_idmapped_mnt(const struct vfsmount *mnt) return mnt_idmap(mnt) != &nop_mnt_idmap; } -extern long vfs_truncate(const struct path *, loff_t); +int vfs_truncate(const struct path *, loff_t); int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); extern int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len); -extern long do_sys_open(int dfd, const char __user *filename, int flags, - umode_t mode); +int do_sys_open(int dfd, const char __user *filename, int flags, + umode_t mode); extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(const struct path *, @@ -2751,6 +2864,8 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt, } struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); +struct file *dentry_open_nonotify(const struct path *path, int flags, + const struct cred *cred); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); struct path *backing_file_user_path(struct file *f); @@ -2787,7 +2902,10 @@ extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); -extern struct filename *getname(const char __user *); +static inline struct filename *getname(const char __user *name) +{ + return getname_flags(name, 0); +} extern struct filename *getname_kernel(const char *); extern struct filename *__getname_maybe_null(const char __user *); static inline struct filename *getname_maybe_null(const char __user *name, int flags) @@ -2800,6 +2918,13 @@ static inline struct filename *getname_maybe_null(const char __user *name, int f return __getname_maybe_null(name); } extern void putname(struct filename *name); +DEFINE_FREE(putname, struct filename *, if (!IS_ERR_OR_NULL(_T)) putname(_T)) + +static inline struct filename *refname(struct filename *name) +{ + atomic_inc(&name->refcnt); + return name; +} extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *)); @@ -2874,6 +2999,8 @@ extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart, extern int __must_check file_check_and_advance_wb_err(struct file *file); extern int __must_check file_write_and_wait_range(struct file *file, loff_t start, loff_t end); +int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start, + loff_t end); static inline int file_write_and_wait(struct file *file) { @@ -2906,6 +3033,11 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) (iocb->ki_flags & IOCB_SYNC) ? 0 : 1); if (ret) return ret; + } else if (iocb->ki_flags & IOCB_DONTCACHE) { + struct address_space *mapping = iocb->ki_filp->f_mapping; + + filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count, + iocb->ki_pos - 1); } return count; @@ -3059,6 +3191,34 @@ static inline void allow_write_access(struct file *file) if (file) atomic_inc(&file_inode(file)->i_writecount); } + +/* + * Do not prevent write to executable file when watched by pre-content events. + * + * Note that FMODE_FSNOTIFY_HSM mode is set depending on pre-content watches at + * the time of file open and remains constant for entire lifetime of the file, + * so if pre-content watches are added post execution or removed before the end + * of the execution, it will not cause i_writecount reference leak. + */ +static inline int exe_file_deny_write_access(struct file *exe_file) +{ + if (unlikely(FMODE_FSNOTIFY_HSM(exe_file->f_mode))) + return 0; + return deny_write_access(exe_file); +} +static inline void exe_file_allow_write_access(struct file *exe_file) +{ + if (unlikely(!exe_file || FMODE_FSNOTIFY_HSM(exe_file->f_mode))) + return; + allow_write_access(exe_file); +} + +static inline void file_set_fsnotify_mode(struct file *file, fmode_t mode) +{ + file->f_mode &= ~FMODE_FSNOTIFY_MASK; + file->f_mode |= mode; +} + static inline bool inode_is_open_for_write(const struct inode *inode) { return atomic_read(&inode->i_writecount) > 0; @@ -3198,7 +3358,11 @@ static inline void __iget(struct inode *inode) extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void __destroy_inode(struct inode *); -extern struct inode *new_inode_pseudo(struct super_block *sb); +struct inode *alloc_inode(struct super_block *sb); +static inline struct inode *new_inode_pseudo(struct super_block *sb) +{ + return alloc_inode(sb); +} extern struct inode *new_inode(struct super_block *sb); extern void free_inode_nonrcu(struct inode *inode); extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *); @@ -3351,8 +3515,10 @@ extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -extern int readlink_copy(char __user *, int, const char *); +extern int readlink_copy(char __user *, int, const char *, int); extern int page_readlink(struct dentry *, char __user *, int); +extern const char *page_get_link_raw(struct dentry *, struct inode *, + struct delayed_call *); extern const char *page_get_link(struct dentry *, struct inode *, struct delayed_call *); extern void page_put_link(void *); @@ -3364,7 +3530,8 @@ void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, - unsigned int unit_max); + unsigned int unit_max, + unsigned int unit_max_opt); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); @@ -3404,9 +3571,11 @@ extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); -extern void iterate_supers(void (*)(struct super_block *, void *), void *); +extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); +void filesystems_freeze(void); +void filesystems_thaw(void); extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); @@ -3439,6 +3608,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping, extern const struct address_space_operations ram_aops; extern int always_delete_dentry(const struct dentry *); extern struct inode *alloc_anon_inode(struct super_block *); +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode); extern int simple_nosetlease(struct file *, int, struct file_lease **, void **); extern const struct dentry_operations simple_dentry_operations; @@ -3613,6 +3784,14 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags, if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE)) return -EOPNOTSUPP; } + if (flags & RWF_DONTCACHE) { + /* file system must support it */ + if (!(ki->ki_filp->f_op->fop_flags & FOP_DONTCACHE)) + return -EOPNOTSUPP; + /* DAX mappings not supported */ + if (IS_DAX(ki->ki_filp->f_mapping->host)) + return -EOPNOTSUPP; + } kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; @@ -3706,11 +3885,9 @@ struct ctl_table; int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) -#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) -#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ - (flag & __FMODE_NONOTIFY))) +#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE)) static inline bool is_sxid(umode_t mode) { diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 4b4bfef6f053..7773eb870039 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -144,8 +144,6 @@ extern void put_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param); extern void fc_drop_locked(struct fs_context *fc); -int reconfigure_single(struct super_block *s, - int flags, void *data); extern int get_tree_nodev(struct fs_context *fc, int (*fill_super)(struct super_block *sb, @@ -202,7 +200,7 @@ void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, */ #define infof(fc, fmt, ...) __logfc(fc, 'i', fmt, ## __VA_ARGS__) #define info_plog(p, fmt, ...) __plog(p, 'i', fmt, ## __VA_ARGS__) -#define infofc(p, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__) +#define infofc(fc, fmt, ...) __plog((&(fc)->log), 'i', fmt, ## __VA_ARGS__) /** * warnf - Store supplementary warning message diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 3cef566088fc..5a0e897cae80 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -84,15 +84,12 @@ extern int fs_lookup_param(struct fs_context *fc, extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); +extern const struct constant_table bool_names[]; + #ifdef CONFIG_VALIDATE_FS_PARSER -extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special); extern bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc); #else -static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special) -{ return true; } static inline bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc) { return true; } @@ -123,8 +120,6 @@ static inline bool fs_validate_description(const char *name, #define fsparam_u32(NAME, OPT) __fsparam(fs_param_is_u32, NAME, OPT, 0, NULL) #define fsparam_u32oct(NAME, OPT) \ __fsparam(fs_param_is_u32, NAME, OPT, 0, (void *)8) -#define fsparam_u32hex(NAME, OPT) \ - __fsparam(fs_param_is_u32_hex, NAME, OPT, 0, (void *)16) #define fsparam_s32(NAME, OPT) __fsparam(fs_param_is_s32, NAME, OPT, 0, NULL) #define fsparam_u64(NAME, OPT) __fsparam(fs_param_is_u64, NAME, OPT, 0, NULL) #define fsparam_enum(NAME, OPT, array) __fsparam(fs_param_is_enum, NAME, OPT, 0, array) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9de27643607f..58fdb9605425 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -498,9 +498,6 @@ static inline void fscache_end_operation(struct netfs_cache_resources *cres) * * NETFS_READ_HOLE_IGNORE - Just try to read (may return a short read). * - * NETFS_READ_HOLE_CLEAR - Seek for data, clearing the part of the buffer - * skipped over, then do as for IGNORE. - * * NETFS_READ_HOLE_FAIL - Give ENODATA if we encounter a hole. */ static inline @@ -628,7 +625,7 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, term_func, term_func_priv, using_pgpriv2, caching); else if (term_func) - term_func(term_func_priv, -ENOBUFS, false); + term_func(term_func_priv, -ENOBUFS); } diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 772f822dc6b8..56fad33043d5 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -192,7 +192,8 @@ struct fscrypt_operations { unsigned int *num_devs); }; -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); +int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags); static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) @@ -309,10 +310,8 @@ static inline void fscrypt_prepare_dentry(struct dentry *dentry, /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); -struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, - unsigned int len, - unsigned int offs, - gfp_t gfp_flags); +struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio, + size_t len, size_t offs, gfp_t gfp_flags); int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags); @@ -479,10 +478,8 @@ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { } -static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, - unsigned int len, - unsigned int offs, - gfp_t gfp_flags) +static inline struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio, + size_t len, size_t offs, gfp_t gfp_flags) { return ERR_PTR(-EOPNOTSUPP); } @@ -711,8 +708,8 @@ static inline u64 fscrypt_fname_siphash(const struct inode *dir, return 0; } -static inline int fscrypt_d_revalidate(struct dentry *dentry, - unsigned int flags) +static inline int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { return 1; } diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index c90ec889bfc2..897d6211c163 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -417,8 +417,6 @@ int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev, void fsl_mc_portal_free(struct fsl_mc_io *mc_io); -int fsl_mc_portal_reset(struct fsl_mc_io *mc_io); - int __must_check fsl_mc_object_allocate(struct fsl_mc_device *mc_dev, enum fsl_mc_pool_type pool_type, struct fsl_mc_device **new_mc_adev); @@ -438,21 +436,21 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, extern const struct bus_type fsl_mc_bus_type; -extern struct device_type fsl_mc_bus_dprc_type; -extern struct device_type fsl_mc_bus_dpni_type; -extern struct device_type fsl_mc_bus_dpio_type; -extern struct device_type fsl_mc_bus_dpsw_type; -extern struct device_type fsl_mc_bus_dpbp_type; -extern struct device_type fsl_mc_bus_dpcon_type; -extern struct device_type fsl_mc_bus_dpmcp_type; -extern struct device_type fsl_mc_bus_dpmac_type; -extern struct device_type fsl_mc_bus_dprtc_type; -extern struct device_type fsl_mc_bus_dpseci_type; -extern struct device_type fsl_mc_bus_dpdmux_type; -extern struct device_type fsl_mc_bus_dpdcei_type; -extern struct device_type fsl_mc_bus_dpaiop_type; -extern struct device_type fsl_mc_bus_dpci_type; -extern struct device_type fsl_mc_bus_dpdmai_type; +extern const struct device_type fsl_mc_bus_dprc_type; +extern const struct device_type fsl_mc_bus_dpni_type; +extern const struct device_type fsl_mc_bus_dpio_type; +extern const struct device_type fsl_mc_bus_dpsw_type; +extern const struct device_type fsl_mc_bus_dpbp_type; +extern const struct device_type fsl_mc_bus_dpcon_type; +extern const struct device_type fsl_mc_bus_dpmcp_type; +extern const struct device_type fsl_mc_bus_dpmac_type; +extern const struct device_type fsl_mc_bus_dprtc_type; +extern const struct device_type fsl_mc_bus_dpseci_type; +extern const struct device_type fsl_mc_bus_dpdmux_type; +extern const struct device_type fsl_mc_bus_dpdcei_type; +extern const struct device_type fsl_mc_bus_dpaiop_type; +extern const struct device_type fsl_mc_bus_dpci_type; +extern const struct device_type fsl_mc_bus_dpdmai_type; static inline bool is_fsl_mc_bus_dprc(const struct fsl_mc_device *mc_dev) { diff --git a/include/linux/fsl/ntmp.h b/include/linux/fsl/ntmp.h new file mode 100644 index 000000000000..916dc4fe7de3 --- /dev/null +++ b/include/linux/fsl/ntmp.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* Copyright 2025 NXP */ +#ifndef __NETC_NTMP_H +#define __NETC_NTMP_H + +#include <linux/bitops.h> +#include <linux/if_ether.h> + +struct maft_keye_data { + u8 mac_addr[ETH_ALEN]; + __le16 resv; +}; + +struct maft_cfge_data { + __le16 si_bitmap; + __le16 resv; +}; + +struct netc_cbdr_regs { + void __iomem *pir; + void __iomem *cir; + void __iomem *mr; + + void __iomem *bar0; + void __iomem *bar1; + void __iomem *lenr; +}; + +struct netc_tbl_vers { + u8 maft_ver; + u8 rsst_ver; +}; + +struct netc_cbdr { + struct device *dev; + struct netc_cbdr_regs regs; + + int bd_num; + int next_to_use; + int next_to_clean; + + int dma_size; + void *addr_base; + void *addr_base_align; + dma_addr_t dma_base; + dma_addr_t dma_base_align; + + /* Serialize the order of command BD ring */ + spinlock_t ring_lock; +}; + +struct ntmp_user { + int cbdr_num; /* number of control BD ring */ + struct device *dev; + struct netc_cbdr *ring; + struct netc_tbl_vers tbl; +}; + +struct maft_entry_data { + struct maft_keye_data keye; + struct maft_cfge_data cfge; +}; + +#if IS_ENABLED(CONFIG_NXP_NETC_LIB) +int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev, + const struct netc_cbdr_regs *regs); +void ntmp_free_cbdr(struct netc_cbdr *cbdr); + +/* NTMP APIs */ +int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft); +int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft); +int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id); +int ntmp_rsst_update_entry(struct ntmp_user *user, const u32 *table, + int count); +int ntmp_rsst_query_entry(struct ntmp_user *user, + u32 *table, int count); +#else +static inline int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev, + const struct netc_cbdr_regs *regs) +{ + return 0; +} + +static inline void ntmp_free_cbdr(struct netc_cbdr *cbdr) +{ +} + +static inline int ntmp_maft_add_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft) +{ + return 0; +} + +static inline int ntmp_maft_query_entry(struct ntmp_user *user, u32 entry_id, + struct maft_entry_data *maft) +{ + return 0; +} + +static inline int ntmp_maft_delete_entry(struct ntmp_user *user, u32 entry_id) +{ + return 0; +} + +static inline int ntmp_rsst_update_entry(struct ntmp_user *user, + const u32 *table, int count) +{ + return 0; +} + +static inline int ntmp_rsst_query_entry(struct ntmp_user *user, + u32 *table, int count) +{ + return 0; +} + +#endif + +#endif diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 278620e063ab..454d8e466958 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -108,38 +108,35 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY); } -static inline int fsnotify_file(struct file *file, __u32 mask) +static inline int fsnotify_path(const struct path *path, __u32 mask) { - const struct path *path; + return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); +} +static inline int fsnotify_file(struct file *file, __u32 mask) +{ /* * FMODE_NONOTIFY are fds generated by fanotify itself which should not * generate new events. We also don't want to generate events for * FMODE_PATH fds (involves open & close events) as they are just * handle creation / destruction events and not "real" file events. */ - if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH)) + if (FMODE_FSNOTIFY_NONE(file->f_mode)) return 0; - path = &file->f_path; - /* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */ - if (mask & ALL_FSNOTIFY_PERM_EVENTS && - !fsnotify_sb_has_priority_watchers(path->dentry->d_sb, - FSNOTIFY_PRIO_CONTENT)) - return 0; - - return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); + return fsnotify_path(&file->f_path, mask); } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + +void file_set_fsnotify_mode_from_watchers(struct file *file); + /* * fsnotify_file_area_perm - permission hook before access to file range */ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { - __u32 fsnotify_mask = FS_ACCESS_PERM; - /* * filesystem may be modified in the context of permission events * (e.g. by HSM filling a file on access), so sb freeze protection @@ -147,14 +144,64 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, */ lockdep_assert_once(file_write_not_started(file)); + if (!(perm_mask & (MAY_READ | MAY_WRITE | MAY_ACCESS))) + return 0; + + if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) + return 0; + + /* + * read()/write() and other types of access generate pre-content events. + */ + if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { + int ret = fsnotify_pre_content(&file->f_path, ppos, count); + + if (ret) + return ret; + } + if (!(perm_mask & MAY_READ)) return 0; - return fsnotify_file(file, fsnotify_mask); + /* + * read() also generates the legacy FS_ACCESS_PERM event, so content + * scanners can inspect the content filled by pre-content event. + */ + return fsnotify_path(&file->f_path, FS_ACCESS_PERM); +} + +/* + * fsnotify_mmap_perm - permission hook before mmap of file range + */ +static inline int fsnotify_mmap_perm(struct file *file, int prot, + const loff_t off, size_t len) +{ + /* + * mmap() generates only pre-content events. + */ + if (!file || likely(!FMODE_FSNOTIFY_HSM(file->f_mode))) + return 0; + + return fsnotify_pre_content(&file->f_path, &off, len); } /* - * fsnotify_file_perm - permission hook before file access + * fsnotify_truncate_perm - permission hook before file truncate + */ +static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) +{ + struct inode *inode = d_inode(path->dentry); + + if (!(inode->i_sb->s_iflags & SB_I_ALLOW_HSM) || + !fsnotify_sb_has_priority_watchers(inode->i_sb, + FSNOTIFY_PRIO_PRE_CONTENT)) + return 0; + + return fsnotify_pre_content(path, &length, 0); +} + +/* + * fsnotify_file_perm - permission hook before file access (unknown range) */ static inline int fsnotify_file_perm(struct file *file, int perm_mask) { @@ -168,22 +215,40 @@ static inline int fsnotify_open_perm(struct file *file) { int ret; + if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) + return 0; + if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); + ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); if (ret) return ret; } - return fsnotify_file(file, FS_OPEN_PERM); + return fsnotify_path(&file->f_path, FS_OPEN_PERM); } #else +static inline void file_set_fsnotify_mode_from_watchers(struct file *file) +{ +} + static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { return 0; } +static inline int fsnotify_mmap_perm(struct file *file, int prot, + const loff_t off, size_t len) +{ + return 0; +} + +static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) +{ + return 0; +} + static inline int fsnotify_file_perm(struct file *file, int perm_mask) { return 0; @@ -255,6 +320,11 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) __fsnotify_vfsmount_delete(mnt); } +static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns) +{ + __fsnotify_mntns_delete(mntns); +} + /* * fsnotify_inoderemove - an inode is going away */ @@ -463,4 +533,19 @@ static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, NULL, NULL, NULL, 0); } +static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_ATTACH, ns, mnt); +} + +static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_DETACH, ns, mnt); +} + +static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_MOVE, ns, mnt); +} + #endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 3ecf7768e577..d4034ddaf392 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -55,6 +55,13 @@ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ +/* #define FS_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ + +#define FS_PRE_ACCESS 0x00100000 /* Pre-content access hook */ + +#define FS_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FS_MNT_DETACH 0x02000000 /* Mount was detached */ +#define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH) /* * Set on inode mark that cares about things that happen to its children. @@ -77,8 +84,17 @@ */ #define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) -#define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ - FS_OPEN_EXEC_PERM) +/* Mount namespace events */ +#define FSNOTIFY_MNT_EVENTS (FS_MNT_ATTACH | FS_MNT_DETACH) + +/* Content events can be used to inspect file content */ +#define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \ + FS_ACCESS_PERM) +/* Pre-content events can be used to fill file content */ +#define FSNOTIFY_PRE_CONTENT_EVENTS (FS_PRE_ACCESS) + +#define ALL_FSNOTIFY_PERM_EVENTS (FSNOTIFY_CONTENT_PERM_EVENTS | \ + FSNOTIFY_PRE_CONTENT_EVENTS) /* * This is a list of all events that may get sent to a parent that is watching @@ -99,6 +115,7 @@ /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ + FSNOTIFY_MNT_EVENTS | \ FS_EVENTS_POSS_ON_CHILD | \ FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ @@ -233,6 +250,7 @@ struct fsnotify_group { * full */ struct mem_cgroup *memcg; /* memcg to charge allocations */ + struct user_namespace *user_ns; /* user ns where group was created */ /* groups can define private fields here or use the void *private */ union { @@ -285,9 +303,11 @@ static inline void fsnotify_group_assert_locked(struct fsnotify_group *group) /* When calling fsnotify tell it if the data is a path or inode */ enum fsnotify_data_type { FSNOTIFY_EVENT_NONE, + FSNOTIFY_EVENT_FILE_RANGE, FSNOTIFY_EVENT_PATH, FSNOTIFY_EVENT_INODE, FSNOTIFY_EVENT_DENTRY, + FSNOTIFY_EVENT_MNT, FSNOTIFY_EVENT_ERROR, }; @@ -297,6 +317,22 @@ struct fs_error_report { struct super_block *sb; }; +struct file_range { + const struct path *path; + loff_t pos; + size_t count; +}; + +static inline const struct path *file_range_path(const struct file_range *range) +{ + return range->path; +} + +struct fsnotify_mnt { + const struct mnt_namespace *ns; + u64 mnt_id; +}; + static inline struct inode *fsnotify_data_inode(const void *data, int data_type) { switch (data_type) { @@ -306,6 +342,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type) return d_inode(data); case FSNOTIFY_EVENT_PATH: return d_inode(((const struct path *)data)->dentry); + case FSNOTIFY_EVENT_FILE_RANGE: + return d_inode(file_range_path(data)->dentry); case FSNOTIFY_EVENT_ERROR: return ((struct fs_error_report *)data)->inode; default: @@ -321,6 +359,8 @@ static inline struct dentry *fsnotify_data_dentry(const void *data, int data_typ return (struct dentry *)data; case FSNOTIFY_EVENT_PATH: return ((const struct path *)data)->dentry; + case FSNOTIFY_EVENT_FILE_RANGE: + return file_range_path(data)->dentry; default: return NULL; } @@ -332,6 +372,8 @@ static inline const struct path *fsnotify_data_path(const void *data, switch (data_type) { case FSNOTIFY_EVENT_PATH: return data; + case FSNOTIFY_EVENT_FILE_RANGE: + return file_range_path(data); default: return NULL; } @@ -347,6 +389,8 @@ static inline struct super_block *fsnotify_data_sb(const void *data, return ((struct dentry *)data)->d_sb; case FSNOTIFY_EVENT_PATH: return ((const struct path *)data)->dentry->d_sb; + case FSNOTIFY_EVENT_FILE_RANGE: + return file_range_path(data)->dentry->d_sb; case FSNOTIFY_EVENT_ERROR: return ((struct fs_error_report *) data)->sb; default: @@ -354,6 +398,24 @@ static inline struct super_block *fsnotify_data_sb(const void *data, } } +static inline const struct fsnotify_mnt *fsnotify_data_mnt(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_MNT: + return data; + default: + return NULL; + } +} + +static inline u64 fsnotify_data_mnt_id(const void *data, int data_type) +{ + const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); + + return mnt_data ? mnt_data->mnt_id : 0; +} + static inline struct fs_error_report *fsnotify_data_error_report( const void *data, int data_type) @@ -366,6 +428,18 @@ static inline struct fs_error_report *fsnotify_data_error_report( } } +static inline const struct file_range *fsnotify_data_file_range( + const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_FILE_RANGE: + return (struct file_range *)data; + default: + return NULL; + } +} + /* * Index to merged marks iterator array that correlates to a type of watch. * The type of watched object can be deduced from the iterator type, but not @@ -379,6 +453,7 @@ enum fsnotify_iter_type { FSNOTIFY_ITER_TYPE_SB, FSNOTIFY_ITER_TYPE_PARENT, FSNOTIFY_ITER_TYPE_INODE2, + FSNOTIFY_ITER_TYPE_MNTNS, FSNOTIFY_ITER_TYPE_COUNT }; @@ -388,6 +463,7 @@ enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, + FSNOTIFY_OBJ_TYPE_MNTNS, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; @@ -572,8 +648,10 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); +extern void __fsnotify_mntns_delete(struct mnt_namespace *mntns); extern void fsnotify_sb_free(struct super_block *sb); extern u32 fsnotify_get_cookie(void); +extern void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt); static inline __u32 fsnotify_parent_needed_mask(__u32 mask) { @@ -830,21 +908,6 @@ extern void fsnotify_wait_marks_destroyed(void); /* Clear all of the marks of a group attached to a given object type */ extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int obj_type); -/* run all the marks in a group, and clear all of the vfsmount marks */ -static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); -} -/* run all the marks in a group, and clear all of the inode marks */ -static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); -} -/* run all the marks in a group, and clear all of the sn marks */ -static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB); -} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); @@ -854,9 +917,17 @@ static inline void fsnotify_init_event(struct fsnotify_event *event) { INIT_LIST_HEAD(&event->list); } +int fsnotify_pre_content(const struct path *path, const loff_t *ppos, + size_t count); #else +static inline int fsnotify_pre_content(const struct path *path, + const loff_t *ppos, size_t count) +{ + return 0; +} + static inline int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, struct inode *inode, u32 cookie) @@ -879,6 +950,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) static inline void fsnotify_sb_delete(struct super_block *sb) {} +static inline void __fsnotify_mntns_delete(struct mnt_namespace *mntns) +{} + static inline void fsnotify_sb_free(struct super_block *sb) {} @@ -893,6 +967,9 @@ static inline u32 fsnotify_get_cookie(void) static inline void fsnotify_unmount_inodes(struct super_block *sb) {} +static inline void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index aa9ddd1e4bb6..b672ca15f265 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,9 +43,8 @@ struct dyn_ftrace; char *arch_ftrace_match_adjust(char *str, const char *search); -#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL -struct fgraph_ret_regs; -unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs); +#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS +unsigned long ftrace_return_to_handler(struct ftrace_regs *fregs); #else unsigned long ftrace_return_to_handler(unsigned long frame_pointer); #endif @@ -134,6 +133,13 @@ extern int ftrace_enabled; * Also, architecture dependent fields can be used for internal process. * (e.g. orig_ax on x86_64) * + * Basically, ftrace_regs stores the registers related to the context. + * On function entry, registers for function parameters and hooking the + * function call are stored, and on function exit, registers for function + * return value and frame pointers are stored. + * + * And also, it dpends on the context that which registers are restored + * from the ftrace_regs. * On the function entry, those registers will be restored except for * the stack pointer, so that user can change the function parameters * and instruction pointer (e.g. live patching.) @@ -170,6 +176,12 @@ static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs) #define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0) #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ +#ifdef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS + +static_assert(sizeof(struct pt_regs) == ftrace_regs_size()); + +#endif /* CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */ + static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) { if (!fregs) @@ -178,6 +190,54 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs return arch_ftrace_get_regs(fregs); } +#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \ + defined(CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS) + +static __always_inline struct pt_regs * +ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs) +{ + /* + * If CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS=y, ftrace_regs memory + * layout is including pt_regs. So always returns that address. + * Since arch_ftrace_get_regs() will check some members and may return + * NULL, we can not use it. + */ + return &arch_ftrace_regs(fregs)->regs; +} + +#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */ + +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + +/* + * Please define arch dependent pt_regs which compatible to the + * perf_arch_fetch_caller_regs() but based on ftrace_regs. + * This requires + * - user_mode(_regs) returns false (always kernel mode). + * - able to use the _regs for stack trace. + */ +#ifndef arch_ftrace_fill_perf_regs +/* As same as perf_arch_fetch_caller_regs(), do nothing by default */ +#define arch_ftrace_fill_perf_regs(fregs, _regs) do {} while (0) +#endif + +static __always_inline struct pt_regs * +ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs) +{ + arch_ftrace_fill_perf_regs(fregs, regs); + return regs; +} + +#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ + +static __always_inline struct pt_regs * +ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs) +{ + return &arch_ftrace_regs(fregs)->regs; +} + +#endif + /* * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs. * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs. @@ -190,6 +250,23 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs) return ftrace_get_regs(fregs) != NULL; } +#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API +static __always_inline unsigned long +ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth) +{ + unsigned long *stackp; + + stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs); + if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) == + ((unsigned long)stackp & ~(THREAD_SIZE - 1))) + return *(stackp + nth); + + return 0; +} +#else /* !CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */ +#define ftrace_regs_get_kernel_stack_nth(fregs, nth) (0L) +#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */ + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); @@ -251,6 +328,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * DIRECT - Used by the direct ftrace_ops helper for direct functions * (internal ftrace only, should not be used by others) * SUBOP - Is controlled by another op in field managed. + * GRAPH - Is a component of the fgraph_ops structure */ enum { FTRACE_OPS_FL_ENABLED = BIT(0), @@ -272,6 +350,7 @@ enum { FTRACE_OPS_FL_PERMANENT = BIT(16), FTRACE_OPS_FL_DIRECT = BIT(17), FTRACE_OPS_FL_SUBOP = BIT(18), + FTRACE_OPS_FL_GRAPH = BIT(19), }; #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -492,8 +571,6 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, #ifdef CONFIG_STACK_TRACER -extern int stack_tracer_enabled; - int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); @@ -545,6 +622,21 @@ enum { FTRACE_MAY_SLEEP = (1 << 5), }; +/* Arches can override ftrace_get_symaddr() to convert fentry_ip to symaddr. */ +#ifndef ftrace_get_symaddr +/** + * ftrace_get_symaddr - return the symbol address from fentry_ip + * @fentry_ip: the address of ftrace location + * + * Get the symbol address from @fentry_ip (fast path). If there is no fast + * search path, this returns 0. + * User may need to use kallsyms API to find the symbol address. + */ +#define ftrace_get_symaddr(fentry_ip) (0) +#endif + +void ftrace_sync_ipi(void *data); + #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void); @@ -1061,20 +1153,21 @@ struct ftrace_graph_ret { int depth; /* Number of functions that overran the depth limit for current task */ unsigned int overrun; - unsigned long long calltime; - unsigned long long rettime; } __packed; struct fgraph_ops; /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, - struct fgraph_ops *); /* return */ + struct fgraph_ops *, + struct ftrace_regs *); /* return */ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, - struct fgraph_ops *); /* entry */ + struct fgraph_ops *, + struct ftrace_regs *); /* entry */ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, - struct fgraph_ops *gops); + struct fgraph_ops *gops, + struct ftrace_regs *fregs); bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -1114,8 +1207,15 @@ struct ftrace_ret_stack { extern void return_to_handler(void); extern int -function_graph_enter(unsigned long ret, unsigned long func, - unsigned long frame_pointer, unsigned long *retp); +function_graph_enter_regs(unsigned long ret, unsigned long func, + unsigned long frame_pointer, unsigned long *retp, + struct ftrace_regs *fregs); + +static inline int function_graph_enter(unsigned long ret, unsigned long func, + unsigned long fp, unsigned long *retp) +{ + return function_graph_enter_regs(ret, func, fp, retp, NULL); +} struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int skip); @@ -1200,16 +1300,9 @@ static inline void unpause_graph_tracing(void) { } #ifdef CONFIG_TRACING enum ftrace_dump_mode; -#define MAX_TRACER_SIZE 100 -extern char ftrace_dump_on_oops[]; extern int ftrace_dump_on_oops_enabled(void); -extern int tracepoint_printk; extern void disable_trace_on_warning(void); -extern int __disable_trace_on_warning; - -int tracepoint_printk_sysctl(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); #else /* CONFIG_TRACING */ static inline void disable_trace_on_warning(void) { } diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h index be1ed0c891d0..15627ceea9bc 100644 --- a/include/linux/ftrace_regs.h +++ b/include/linux/ftrace_regs.h @@ -30,7 +30,14 @@ struct ftrace_regs; override_function_with_return(&arch_ftrace_regs(fregs)->regs) #define ftrace_regs_query_register_offset(name) \ regs_query_register_offset(name) +#define ftrace_regs_get_frame_pointer(fregs) \ + frame_pointer(&arch_ftrace_regs(fregs)->regs) #endif /* HAVE_ARCH_FTRACE_REGS */ +/* This can be overridden by the architectures */ +#ifndef FTRACE_REGS_MAX_ARGS +# define FTRACE_REGS_MAX_ARGS 6 +#endif + #endif /* _LINUX_FTRACE_REGS_H */ diff --git a/include/linux/futex.h b/include/linux/futex.h index b70df27d7e85..b37193653e6b 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,11 +4,11 @@ #include <linux/sched.h> #include <linux/ktime.h> +#include <linux/mm_types.h> #include <uapi/linux/futex.h> struct inode; -struct mm_struct; struct task_struct; /* @@ -34,6 +34,7 @@ union futex_key { u64 i_seq; unsigned long pgoff; unsigned int offset; + /* unsigned int node; */ } shared; struct { union { @@ -42,11 +43,13 @@ union futex_key { }; unsigned long address; unsigned int offset; + /* unsigned int node; */ } private; struct { u64 ptr; unsigned long word; unsigned int offset; + unsigned int node; /* NOT hashed! */ } both; }; @@ -77,7 +80,26 @@ void futex_exec_release(struct task_struct *tsk); long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -#else +int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4); + +#ifdef CONFIG_FUTEX_PRIVATE_HASH +int futex_hash_allocate_default(void); +void futex_hash_free(struct mm_struct *mm); + +static inline void futex_mm_init(struct mm_struct *mm) +{ + RCU_INIT_POINTER(mm->futex_phash, NULL); + mm->futex_phash_new = NULL; + mutex_init(&mm->futex_hash_lock); +} + +#else /* !CONFIG_FUTEX_PRIVATE_HASH */ +static inline int futex_hash_allocate_default(void) { return 0; } +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } +#endif /* CONFIG_FUTEX_PRIVATE_HASH */ + +#else /* !CONFIG_FUTEX */ static inline void futex_init_task(struct task_struct *tsk) { } static inline void futex_exit_recursive(struct task_struct *tsk) { } static inline void futex_exit_release(struct task_struct *tsk) { } @@ -88,6 +110,17 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val, { return -EINVAL; } +static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) +{ + return -EINVAL; +} +static inline int futex_hash_allocate_default(void) +{ + return 0; +} +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } + #endif #endif diff --git a/include/linux/fwctl.h b/include/linux/fwctl.h new file mode 100644 index 000000000000..5d61fc8a6871 --- /dev/null +++ b/include/linux/fwctl.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES + */ +#ifndef __LINUX_FWCTL_H +#define __LINUX_FWCTL_H +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/cleanup.h> +#include <uapi/fwctl/fwctl.h> + +struct fwctl_device; +struct fwctl_uctx; + +/** + * struct fwctl_ops - Driver provided operations + * + * fwctl_unregister() will wait until all excuting ops are completed before it + * returns. Drivers should be mindful to not let their ops run for too long as + * it will block device hot unplug and module unloading. + */ +struct fwctl_ops { + /** + * @device_type: The drivers assigned device_type number. This is uABI. + */ + enum fwctl_device_type device_type; + /** + * @uctx_size: The size of the fwctl_uctx struct to allocate. The first + * bytes of this memory will be a fwctl_uctx. The driver can use the + * remaining bytes as its private memory. + */ + size_t uctx_size; + /** + * @open_uctx: Called when a file descriptor is opened before the uctx + * is ever used. + */ + int (*open_uctx)(struct fwctl_uctx *uctx); + /** + * @close_uctx: Called when the uctx is destroyed, usually when the FD + * is closed. + */ + void (*close_uctx)(struct fwctl_uctx *uctx); + /** + * @info: Implement FWCTL_INFO. Return a kmalloc() memory that is copied + * to out_device_data. On input length indicates the size of the user + * buffer on output it indicates the size of the memory. The driver can + * ignore length on input, the core code will handle everything. + */ + void *(*info)(struct fwctl_uctx *uctx, size_t *length); + /** + * @fw_rpc: Implement FWCTL_RPC. Deliver rpc_in/in_len to the FW and + * return the response and set out_len. rpc_in can be returned as the + * response pointer. Otherwise the returned pointer is freed with + * kvfree(). + */ + void *(*fw_rpc)(struct fwctl_uctx *uctx, enum fwctl_rpc_scope scope, + void *rpc_in, size_t in_len, size_t *out_len); +}; + +/** + * struct fwctl_device - Per-driver registration struct + * @dev: The sysfs (class/fwctl/fwctlXX) device + * + * Each driver instance will have one of these structs with the driver private + * data following immediately after. This struct is refcounted, it is freed by + * calling fwctl_put(). + */ +struct fwctl_device { + struct device dev; + /* private: */ + struct cdev cdev; + + /* Protect uctx_list */ + struct mutex uctx_list_lock; + struct list_head uctx_list; + /* + * Protect ops, held for write when ops becomes NULL during unregister, + * held for read whenever ops is loaded or an ops function is running. + */ + struct rw_semaphore registration_lock; + const struct fwctl_ops *ops; +}; + +struct fwctl_device *_fwctl_alloc_device(struct device *parent, + const struct fwctl_ops *ops, + size_t size); +/** + * fwctl_alloc_device - Allocate a fwctl + * @parent: Physical device that provides the FW interface + * @ops: Driver ops to register + * @drv_struct: 'struct driver_fwctl' that holds the struct fwctl_device + * @member: Name of the struct fwctl_device in @drv_struct + * + * This allocates and initializes the fwctl_device embedded in the drv_struct. + * Upon success the pointer must be freed via fwctl_put(). Returns a 'drv_struct + * \*' on success, NULL on error. + */ +#define fwctl_alloc_device(parent, ops, drv_struct, member) \ + ({ \ + static_assert(__same_type(struct fwctl_device, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_fwctl_alloc_device(parent, ops, \ + sizeof(drv_struct)); \ + }) + +static inline struct fwctl_device *fwctl_get(struct fwctl_device *fwctl) +{ + get_device(&fwctl->dev); + return fwctl; +} +static inline void fwctl_put(struct fwctl_device *fwctl) +{ + put_device(&fwctl->dev); +} +DEFINE_FREE(fwctl, struct fwctl_device *, if (_T) fwctl_put(_T)); + +int fwctl_register(struct fwctl_device *fwctl); +void fwctl_unregister(struct fwctl_device *fwctl); + +/** + * struct fwctl_uctx - Per user FD context + * @fwctl: fwctl instance that owns the context + * + * Every FD opened by userspace will get a unique context allocation. Any driver + * private data will follow immediately after. + */ +struct fwctl_uctx { + struct fwctl_device *fwctl; + /* private: */ + /* Head at fwctl_device::uctx_list */ + struct list_head uctx_list_entry; +}; + +#endif diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 0d79070c5a70..097be89487bf 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -2,6 +2,11 @@ /* * fwnode.h - Firmware device node object handle type definition. * + * This header file provides low-level data types and definitions for firmware + * and device property providers. The respective API header files supplied by + * them should contain all of the requisite data types and definitions for end + * users, so including it directly should not be necessary. + * * Copyright (C) 2015, Intel Corporation * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> */ @@ -91,7 +96,7 @@ struct fwnode_endpoint { #define SWNODE_GRAPH_PORT_NAME_FMT "port@%u" #define SWNODE_GRAPH_ENDPOINT_NAME_FMT "endpoint@%u" -#define NR_FWNODE_REFERENCE_ARGS 8 +#define NR_FWNODE_REFERENCE_ARGS 16 /** * struct fwnode_reference_args - Fwnode reference with additional arguments @@ -112,6 +117,7 @@ struct fwnode_reference_args { * @device_is_available: Return true if the device is available. * @device_get_match_data: Return the device driver match data. * @property_present: Return true if a property is present. + * @property_read_bool: Return a boolean property value. * @property_read_int_array: Read an array of integer properties. Return zero on * success, a negative error code otherwise. * @property_read_string_array: Read an array of string properties. Return zero @@ -141,6 +147,8 @@ struct fwnode_operations { (*device_get_dma_attr)(const struct fwnode_handle *fwnode); bool (*property_present)(const struct fwnode_handle *fwnode, const char *propname); + bool (*property_read_bool)(const struct fwnode_handle *fwnode, + const char *propname); int (*property_read_int_array)(const struct fwnode_handle *fwnode, const char *propname, unsigned int elem_size, void *val, diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b0fe9f62d15b..be160e8d8bcb 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -39,6 +39,25 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } +static inline bool gfpflags_allow_spinning(const gfp_t gfp_flags) +{ + /* + * !__GFP_DIRECT_RECLAIM -> direct claim is not allowed. + * !__GFP_KSWAPD_RECLAIM -> it's not safe to wake up kswapd. + * All GFP_* flags including GFP_NOWAIT use one or both flags. + * alloc_pages_nolock() is the only API that doesn't specify either flag. + * + * This is stronger than GFP_NOWAIT or GFP_ATOMIC because + * those are guaranteed to never block on a sleeping lock. + * Here we are enforcing that the allocation doesn't ever spin + * on any locks (i.e. only trylocks). There is no high level + * GFP_$FOO flag for this use in alloc_pages_nolock() as the + * regular page allocator doesn't fully support this + * allocation mode. + */ + return !!(gfp_flags & __GFP_RECLAIM); +} + #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else @@ -212,35 +231,31 @@ struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, - struct list_head *page_list, struct page **page_array); #define __alloc_pages_bulk(...) alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__)) -unsigned long alloc_pages_bulk_array_mempolicy_noprof(gfp_t gfp, +unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array); -#define alloc_pages_bulk_array_mempolicy(...) \ - alloc_hooks(alloc_pages_bulk_array_mempolicy_noprof(__VA_ARGS__)) +#define alloc_pages_bulk_mempolicy(...) \ + alloc_hooks(alloc_pages_bulk_mempolicy_noprof(__VA_ARGS__)) /* Bulk allocate order-0 pages */ -#define alloc_pages_bulk_list(_gfp, _nr_pages, _list) \ - __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _list, NULL) - -#define alloc_pages_bulk_array(_gfp, _nr_pages, _page_array) \ - __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, NULL, _page_array) +#define alloc_pages_bulk(_gfp, _nr_pages, _page_array) \ + __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _page_array) static inline unsigned long -alloc_pages_bulk_array_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, +alloc_pages_bulk_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array) { if (nid == NUMA_NO_NODE) nid = numa_mem_id(); - return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, NULL, page_array); + return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, page_array); } -#define alloc_pages_bulk_array_node(...) \ - alloc_hooks(alloc_pages_bulk_array_node_noprof(__VA_ARGS__)) +#define alloc_pages_bulk_node(...) \ + alloc_hooks(alloc_pages_bulk_node_noprof(__VA_ARGS__)) static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) { @@ -300,8 +315,6 @@ static inline struct page *alloc_pages_node_noprof(int nid, gfp_t gfp_mask, #ifdef CONFIG_NUMA struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order); -struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, - struct mempolicy *mpol, pgoff_t ilx, int nid); struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order); struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid); @@ -312,11 +325,6 @@ static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order { return alloc_pages_node_noprof(numa_node_id(), gfp_mask, order); } -static inline struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, - struct mempolicy *mpol, pgoff_t ilx, int nid) -{ - return alloc_pages_noprof(gfp, order); -} static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { return __folio_alloc_node_noprof(gfp, order, numa_node_id()); @@ -331,7 +339,6 @@ static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int orde #endif #define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__)) -#define alloc_pages_mpol(...) alloc_hooks(alloc_pages_mpol_noprof(__VA_ARGS__)) #define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__)) #define folio_alloc_mpol(...) alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__)) #define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__)) @@ -347,6 +354,9 @@ static inline struct page *alloc_page_vma_noprof(gfp_t gfp, } #define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__)) +struct page *alloc_pages_nolock_noprof(int nid, unsigned int order); +#define alloc_pages_nolock(...) alloc_hooks(alloc_pages_nolock_noprof(__VA_ARGS__)) + extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order); #define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__)) @@ -369,6 +379,7 @@ __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mas __get_free_pages((gfp_mask) | GFP_DMA, (order)) extern void __free_pages(struct page *page, unsigned int order); +extern void free_pages_nolock(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); #define __free_page(page) __free_pages((page), 0) diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 6270150f4e29..c1ec62c11ed3 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -91,7 +91,7 @@ static inline int gpio_get_value_cansleep(unsigned gpio) } static inline void gpio_set_value_cansleep(unsigned gpio, int value) { - return gpiod_set_raw_value_cansleep(gpio_to_desc(gpio), value); + gpiod_set_raw_value_cansleep(gpio_to_desc(gpio), value); } static inline int gpio_get_value(unsigned gpio) @@ -100,7 +100,7 @@ static inline int gpio_get_value(unsigned gpio) } static inline void gpio_set_value(unsigned gpio, int value) { - return gpiod_set_raw_value(gpio_to_desc(gpio), value); + gpiod_set_raw_value(gpio_to_desc(gpio), value); } static inline int gpio_to_irq(unsigned gpio) diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index db2dfbae8edb..f0b1982da0cc 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -3,6 +3,7 @@ #define __LINUX_GPIO_CONSUMER_H #include <linux/bits.h> +#include <linux/err.h> #include <linux/types.h> struct acpi_device; @@ -30,6 +31,7 @@ struct gpio_descs { #define GPIOD_FLAGS_BIT_DIR_OUT BIT(1) #define GPIOD_FLAGS_BIT_DIR_VAL BIT(2) #define GPIOD_FLAGS_BIT_OPEN_DRAIN BIT(3) +/* GPIOD_FLAGS_BIT_NONEXCLUSIVE is DEPRECATED, don't use in new code. */ #define GPIOD_FLAGS_BIT_NONEXCLUSIVE BIT(4) /** @@ -110,8 +112,6 @@ int gpiod_get_direction(struct gpio_desc *desc); int gpiod_direction_input(struct gpio_desc *desc); int gpiod_direction_output(struct gpio_desc *desc, int value); int gpiod_direction_output_raw(struct gpio_desc *desc, int value); -int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags); -int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags); /* Value get/set from non-sleeping context */ int gpiod_get_value(const struct gpio_desc *desc); @@ -119,7 +119,7 @@ int gpiod_get_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap); -void gpiod_set_value(struct gpio_desc *desc, int value); +int gpiod_set_value(struct gpio_desc *desc, int value); int gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, @@ -129,7 +129,7 @@ int gpiod_get_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap); -void gpiod_set_raw_value(struct gpio_desc *desc, int value); +int gpiod_set_raw_value(struct gpio_desc *desc, int value); int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, @@ -141,7 +141,7 @@ int gpiod_get_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap); -void gpiod_set_value_cansleep(struct gpio_desc *desc, int value); +int gpiod_set_value_cansleep(struct gpio_desc *desc, int value); int gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, @@ -151,7 +151,7 @@ int gpiod_get_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap); -void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); +int gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, @@ -181,13 +181,13 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, enum gpiod_flags flags, const char *label); +bool gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other); + #else /* CONFIG_GPIOLIB */ -#include <linux/err.h> +#include <linux/bug.h> #include <linux/kernel.h> -#include <asm/bug.h> - static inline int gpiod_count(struct device *dev, const char *con_id) { return 0; @@ -348,18 +348,6 @@ static inline int gpiod_direction_output_raw(struct gpio_desc *desc, int value) WARN_ON(desc); return -ENOSYS; } -static inline int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, - unsigned long flags) -{ - WARN_ON(desc); - return -ENOSYS; -} -static inline int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, - unsigned long flags) -{ - WARN_ON(desc); - return -ENOSYS; -} static inline int gpiod_get_value(const struct gpio_desc *desc) { /* GPIO can never have been requested */ @@ -375,10 +363,11 @@ static inline int gpiod_get_array_value(unsigned int array_size, WARN_ON(desc_array); return 0; } -static inline void gpiod_set_value(struct gpio_desc *desc, int value) +static inline int gpiod_set_value(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ WARN_ON(desc); + return 0; } static inline int gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, @@ -404,10 +393,11 @@ static inline int gpiod_get_raw_array_value(unsigned int array_size, WARN_ON(desc_array); return 0; } -static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) +static inline int gpiod_set_raw_value(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ WARN_ON(desc); + return 0; } static inline int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, @@ -434,10 +424,11 @@ static inline int gpiod_get_array_value_cansleep(unsigned int array_size, WARN_ON(desc_array); return 0; } -static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) +static inline int gpiod_set_value_cansleep(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ WARN_ON(desc); + return 0; } static inline int gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, @@ -463,11 +454,12 @@ static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size, WARN_ON(desc_array); return 0; } -static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, - int value) +static inline int gpiod_set_raw_value_cansleep(struct gpio_desc *desc, + int value) { /* GPIO can never have been requested */ WARN_ON(desc); + return 0; } static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, @@ -558,8 +550,40 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, return ERR_PTR(-ENOSYS); } +static inline bool +gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other) +{ + WARN_ON(desc || other); + return false; +} + #endif /* CONFIG_GPIOLIB */ +#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_HTE) +int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags); +int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags); +#else + +#include <linux/bug.h> + +static inline int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, + unsigned long flags) +{ + if (!IS_ENABLED(CONFIG_GPIOLIB)) + WARN_ON(desc); + + return -ENOSYS; +} +static inline int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, + unsigned long flags) +{ + if (!IS_ENABLED(CONFIG_GPIOLIB)) + WARN_ON(desc); + + return -ENOSYS; +} +#endif /* CONFIG_GPIOLIB && CONFIG_HTE */ + static inline struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev, struct fwnode_handle *fwnode, @@ -573,7 +597,7 @@ struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev, struct acpi_gpio_params { unsigned int crs_entry_index; - unsigned int line_index; + unsigned short line_index; bool active_low; }; @@ -608,8 +632,6 @@ int devm_acpi_dev_add_driver_gpios(struct device *dev, #else /* CONFIG_GPIOLIB && CONFIG_ACPI */ -#include <linux/err.h> - static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev, const struct acpi_gpio_mapping *gpios) { @@ -635,8 +657,6 @@ void gpiod_unexport(struct gpio_desc *desc); #else /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */ -#include <asm/errno.h> - static inline int gpiod_export(struct gpio_desc *desc, bool direction_may_change) { @@ -655,4 +675,14 @@ static inline void gpiod_unexport(struct gpio_desc *desc) #endif /* CONFIG_GPIOLIB && CONFIG_GPIO_SYSFS */ +static inline int gpiod_multi_set_value_cansleep(struct gpio_descs *descs, + unsigned long *value_bitmap) +{ + if (IS_ERR_OR_NULL(descs)) + return PTR_ERR_OR_ZERO(descs); + + return gpiod_set_array_value_cansleep(descs->ndescs, descs->desc, + descs->info, value_bitmap); +} + #endif diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 2dd7cb9cc270..b53233051bee 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -14,6 +14,7 @@ #include <linux/property.h> #include <linux/spinlock_types.h> #include <linux/types.h> +#include <linux/util_macros.h> #ifdef CONFIG_GENERIC_MSI_IRQ #include <asm/msi.h> @@ -286,8 +287,9 @@ struct gpio_irq_chip { /** * @first: * - * Required for static IRQ allocation. If set, irq_domain_add_simple() - * will allocate and map all IRQs during initialization. + * Required for static IRQ allocation. If set, + * irq_domain_create_simple() will allocate and map all IRQs + * during initialization. */ unsigned int first; @@ -328,7 +330,8 @@ struct gpio_irq_chip { * @fwnode: optional fwnode providing this controller's properties * @owner: helps prevent removal of modules exporting active GPIOs * @request: optional hook for chip-specific activation, such as - * enabling module power and clock; may sleep + * enabling module power and clock; may sleep; must return 0 on success + * or negative error number on failure * @free: optional hook for chip-specific deactivation, such as * disabling module power and clock; may sleep * @get_direction: returns direction for signal "offset", 0=out, 1=in, @@ -344,10 +347,15 @@ struct gpio_irq_chip { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error - * @set: assigns output value for signal "offset" - * @set_multiple: assigns output values for multiple signals defined by "mask" + * @set: **DEPRECATED** - please use set_rv() instead + * @set_multiple: **DEPRECATED** - please use set_multiple_rv() instead + * @set_rv: assigns output value for signal "offset", returns 0 on success or + * negative error value + * @set_multiple_rv: assigns output values for multiple signals defined by + * "mask", returns 0 on success or negative error value * @set_config: optional hook for all kinds of settings. Uses the same - * packed config format as generic pinconf. + * packed config format as generic pinconf. Must return 0 on success and + * a negative error number on failure. * @to_irq: optional hook supporting non-static gpiod_to_irq() mappings; * implementation may not sleep * @dbg_show: optional routine to show contents in debugfs; default code @@ -394,6 +402,7 @@ struct gpio_irq_chip { * @reg_dir_in: direction in setting register for generic GPIO * @bgpio_dir_unreadable: indicates that the direction register(s) cannot * be read and we need to rely on out internal state tracking. + * @bgpio_pinctrl: the generic GPIO uses a pin control backend. * @bgpio_bits: number of register bits used for a generic GPIO i.e. * <register width> * 8 * @bgpio_lock: used to lock chip->bgpio_data. Also, this is needed to keep @@ -441,6 +450,12 @@ struct gpio_chip { void (*set_multiple)(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits); + int (*set_rv)(struct gpio_chip *gc, + unsigned int offset, + int value); + int (*set_multiple_rv)(struct gpio_chip *gc, + unsigned long *mask, + unsigned long *bits); int (*set_config)(struct gpio_chip *gc, unsigned int offset, unsigned long config); @@ -478,6 +493,7 @@ struct gpio_chip { void __iomem *reg_dir_out; void __iomem *reg_dir_in; bool bgpio_dir_unreadable; + bool bgpio_pinctrl; int bgpio_bits; raw_spinlock_t bgpio_lock; unsigned long bgpio_data; @@ -499,14 +515,6 @@ struct gpio_chip { struct gpio_irq_chip irq; #endif /* CONFIG_GPIOLIB_IRQCHIP */ - /** - * @valid_mask: - * - * If not %NULL, holds bitmask of GPIOs which are valid to be used - * from the chip. - */ - unsigned long *valid_mask; - #if defined(CONFIG_OF_GPIO) /* * If CONFIG_OF_GPIO is enabled, then all GPIO controllers described in @@ -516,11 +524,33 @@ struct gpio_chip { /** * @of_gpio_n_cells: * - * Number of cells used to form the GPIO specifier. + * Number of cells used to form the GPIO specifier. The standard is 2 + * cells: + * + * gpios = <&gpio offset flags>; + * + * some complex GPIO controllers instantiate more than one chip per + * device tree node and have 3 cells: + * + * gpios = <&gpio instance offset flags>; + * + * Legacy GPIO controllers may even have 1 cell: + * + * gpios = <&gpio offset>; */ unsigned int of_gpio_n_cells; /** + * @of_node_instance_match: + * + * Determine if a chip is the right instance. Must be implemented by + * any driver using more than one gpio_chip per device tree node. + * Returns true if gc is the instance indicated by i (which is the + * first cell in the phandles for GPIO lines and gpio-ranges). + */ + bool (*of_node_instance_match)(struct gpio_chip *gc, unsigned int i); + + /** * @of_xlate: * * Callback to translate a device tree GPIO specifier into a chip- @@ -550,19 +580,29 @@ DEFINE_CLASS(_gpiochip_for_each_data, const char **label, int *i) /** + * for_each_hwgpio_in_range - Iterates over all GPIOs in a given range + * @_chip: Chip to iterate over. + * @_i: Loop counter. + * @_base: First GPIO in the ranger. + * @_size: Amount of GPIOs to check starting from @base. + * @_label: Place to store the address of the label if the GPIO is requested. + * Set to NULL for unused GPIOs. + */ +#define for_each_hwgpio_in_range(_chip, _i, _base, _size, _label) \ + for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ + _i < _size; \ + _i++, kfree(_label), _label = NULL) \ + for_each_if(!IS_ERR(_label = gpiochip_dup_line_label(_chip, _base + _i))) + +/** * for_each_hwgpio - Iterates over all GPIOs for given chip. * @_chip: Chip to iterate over. * @_i: Loop counter. * @_label: Place to store the address of the label if the GPIO is requested. * Set to NULL for unused GPIOs. */ -#define for_each_hwgpio(_chip, _i, _label) \ - for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ - *_data.i < _chip->ngpio; \ - (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ - if (IS_ERR(*_data.label = \ - gpiochip_dup_line_label(_chip, *_data.i))) {} \ - else +#define for_each_hwgpio(_chip, _i, _label) \ + for_each_hwgpio_in_range(_chip, _i, 0, _chip->ngpio, _label) /** * for_each_requested_gpio_in_range - iterates over requested GPIOs in a given range @@ -573,13 +613,8 @@ DEFINE_CLASS(_gpiochip_for_each_data, * @_label: label of current GPIO */ #define for_each_requested_gpio_in_range(_chip, _i, _base, _size, _label) \ - for (CLASS(_gpiochip_for_each_data, _data)(&_label, &_i); \ - *_data.i < _size; \ - (*_data.i)++, kfree(*(_data.label)), *_data.label = NULL) \ - if ((*_data.label = \ - gpiochip_dup_line_label(_chip, _base + *_data.i)) == NULL) {} \ - else if (IS_ERR(*_data.label)) {} \ - else + for_each_hwgpio_in_range(_chip, _i, _base, _size, _label) \ + for_each_if(_label) /* Iterates over all requested GPIO of the given @chip */ #define for_each_requested_gpio(chip, i, label) \ @@ -678,6 +713,7 @@ bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset); /* Sleep persistence inquiry for drivers */ bool gpiochip_line_is_persistent(struct gpio_chip *gc, unsigned int offset); bool gpiochip_line_is_valid(const struct gpio_chip *gc, unsigned int offset); +const unsigned long *gpiochip_query_valid_mask(const struct gpio_chip *gc); /* get driver data */ void *gpiochip_get_data(struct gpio_chip *gc); @@ -713,6 +749,7 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, #define BGPIOF_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ #define BGPIOF_NO_OUTPUT BIT(5) /* only input */ #define BGPIOF_NO_SET_ON_INPUT BIT(6) +#define BGPIOF_PINCTRL_BACKEND BIT(7) /* Call pinctrl direction setters */ #ifdef CONFIG_GPIOLIB_IRQCHIP int gpiochip_irqchip_add_domain(struct gpio_chip *gc, @@ -865,7 +902,7 @@ static inline void gpiochip_unlock_as_irq(struct gpio_chip *gc, #define for_each_gpiochip_node(dev, child) \ device_for_each_child_node(dev, child) \ - if (!fwnode_property_present(child, "gpio-controller")) {} else + for_each_if(fwnode_property_present(child, "gpio-controller")) static inline unsigned int gpiochip_node_count(struct device *dev) { diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h index a9f7b7faf57b..c722c67668c6 100644 --- a/include/linux/gpio/regmap.h +++ b/include/linux/gpio/regmap.h @@ -21,7 +21,7 @@ struct regmap; * If not given, the fwnode of the parent is used. * @label: (Optional) Descriptive name for GPIO controller. * If not given, the name of the device is used. - * @ngpio: Number of GPIOs + * @ngpio: (Optional) Number of GPIOs * @names: (Optional) Array of names for gpios * @reg_dat_base: (Optional) (in) register base address * @reg_set_base: (Optional) set register base address @@ -30,7 +30,7 @@ struct regmap; * @reg_dir_out_base: (Optional) out setting register base address * @reg_stride: (Optional) May be set if the registers (of the * same type, dat, set, etc) are not consecutive. - * @ngpio_per_reg: Number of GPIOs per register + * @ngpio_per_reg: (Optional) Number of GPIOs per register * @irq_domain: (Optional) IRQ domain if the controller is * interrupt-capable * @reg_mask_xlate: (Optional) Translates base address and GPIO diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h index d2a9fc96424b..af5fb4ad77eb 100644 --- a/include/linux/habanalabs/hl_boot_if.h +++ b/include/linux/habanalabs/hl_boot_if.h @@ -295,7 +295,7 @@ enum cpu_boot_dev_sts { * Initialized in: linux * * CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN GIC access permission only from - * previleged entity. FW sets this status + * privileged entity. FW sets this status * bit for host. If this bit is set then * GIC can not be accessed from host. * Initialized in: linux diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 455f855bc084..96bda41d9148 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -445,7 +445,6 @@ ssize_t hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, size_t size); ssize_t hdmi_infoframe_pack_only(const union hdmi_infoframe *frame, void *buffer, size_t size); -int hdmi_infoframe_check(union hdmi_infoframe *frame); int hdmi_infoframe_unpack(union hdmi_infoframe *frame, const void *buffer, size_t size); void hdmi_infoframe_log(const char *level, struct device *dev, diff --git a/include/linux/hid-over-i2c.h b/include/linux/hid-over-i2c.h new file mode 100644 index 000000000000..3b1a0208a6b8 --- /dev/null +++ b/include/linux/hid-over-i2c.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2024 Intel Corporation */ + +#include <linux/bits.h> + +#ifndef _HID_OVER_I2C_H_ +#define _HID_OVER_I2C_H_ + +#define HIDI2C_REG_LEN sizeof(__le16) + +/* Input report type definition in HIDI2C protocol */ +enum hidi2c_report_type { + HIDI2C_RESERVED = 0, + HIDI2C_INPUT, + HIDI2C_OUTPUT, + HIDI2C_FEATURE, +}; + +/* Power state type definition in HIDI2C protocol */ +enum hidi2c_power_state { + HIDI2C_ON, + HIDI2C_SLEEP, +}; + +/* Opcode type definition in HIDI2C protocol */ +enum hidi2c_opcode { + HIDI2C_RESET = 1, + HIDI2C_GET_REPORT, + HIDI2C_SET_REPORT, + HIDI2C_GET_IDLE, + HIDI2C_SET_IDLE, + HIDI2C_GET_PROTOCOL, + HIDI2C_SET_PROTOCOL, + HIDI2C_SET_POWER, +}; + +/** + * struct hidi2c_report_packet - Report packet definition in HIDI2C protocol + * @len: data field length + * @data: HIDI2C report packet data + */ +struct hidi2c_report_packet { + __le16 len; + u8 data[]; +} __packed; + +#define HIDI2C_LENGTH_LEN sizeof(__le16) + +#define HIDI2C_PACKET_LEN(data_len) ((data_len) + HIDI2C_LENGTH_LEN) +#define HIDI2C_DATA_LEN(pkt_len) ((pkt_len) - HIDI2C_LENGTH_LEN) + +#define HIDI2C_CMD_MAX_RI 0x0F + +/** + * HIDI2C command data packet - Command packet definition in HIDI2C protocol + * @report_id: [0:3] report id (<15) for features or output reports + * @report_type: [4:5] indicate report type, reference to hidi2c_report_type + * @reserved0: [6:7] reserved bits + * @opcode: [8:11] command operation code, reference to hidi2c_opcode + * @reserved1: [12:15] reserved bits + * @report_id_optional: [23:16] appended 3rd byte. + * If the report_id in the low byte is set to the + * sentinel value (HIDI2C_CMD_MAX_RI), then this + * optional third byte represents the report id (>=15) + * Otherwise, not this 3rd byte. + */ + +#define HIDI2C_CMD_LEN sizeof(__le16) +#define HIDI2C_CMD_LEN_OPT (sizeof(__le16) + 1) +#define HIDI2C_CMD_REPORT_ID GENMASK(3, 0) +#define HIDI2C_CMD_REPORT_TYPE GENMASK(5, 4) +#define HIDI2C_CMD_OPCODE GENMASK(11, 8) +#define HIDI2C_CMD_OPCODE GENMASK(11, 8) +#define HIDI2C_CMD_3RD_BYTE GENMASK(23, 16) + +#define HIDI2C_HID_DESC_BCDVERSION 0x100 + +/** + * struct hidi2c_dev_descriptor - HIDI2C device descriptor definition + * @dev_desc_len: The length of the complete device descriptor, fixed to 0x1E (30). + * @bcd_ver: The version number of the HIDI2C protocol supported. + * In binary coded decimal (BCD) format. + * @report_desc_len: The length of the report descriptor + * @report_desc_reg: The register address to retrieve report descriptor + * @input_reg: the register address to retrieve input report + * @max_input_len: The length of the largest possible HID input (or feature) report + * @output_reg: the register address to send output report + * @max_output_len: The length of the largest output (or feature) report + * @cmd_reg: the register address to send command + * @data_reg: the register address to send command data + * @vendor_id: Device manufacturers vendor ID + * @product_id: Device unique model/product ID + * @version_id: Device’s unique version + * @reserved0: Reserved and should be 0 + * @reserved1: Reserved and should be 0 + */ +struct hidi2c_dev_descriptor { + __le16 dev_desc_len; + __le16 bcd_ver; + __le16 report_desc_len; + __le16 report_desc_reg; + __le16 input_reg; + __le16 max_input_len; + __le16 output_reg; + __le16 max_output_len; + __le16 cmd_reg; + __le16 data_reg; + __le16 vendor_id; + __le16 product_id; + __le16 version_id; + __le16 reserved0; + __le16 reserved1; +} __packed; + +#define HIDI2C_DEV_DESC_LEN sizeof(struct hidi2c_dev_descriptor) + +#endif /* _HID_OVER_I2C_H_ */ diff --git a/include/linux/hid-over-spi.h b/include/linux/hid-over-spi.h new file mode 100644 index 000000000000..da5a14b5e89b --- /dev/null +++ b/include/linux/hid-over-spi.h @@ -0,0 +1,155 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2024 Intel Corporation */ + +#ifndef _HID_OVER_SPI_H_ +#define _HID_OVER_SPI_H_ + +#include <linux/bits.h> +#include <linux/types.h> + +/* Input report type definition in HIDSPI protocol */ +enum input_report_type { + INVALID_INPUT_REPORT_TYPE_0 = 0, + DATA = 1, + INVALID_TYPE_2 = 2, + RESET_RESPONSE = 3, + COMMAND_RESPONSE = 4, + GET_FEATURE_RESPONSE = 5, + INVALID_TYPE_6 = 6, + DEVICE_DESCRIPTOR_RESPONSE = 7, + REPORT_DESCRIPTOR_RESPONSE = 8, + SET_FEATURE_RESPONSE = 9, + OUTPUT_REPORT_RESPONSE = 10, + GET_INPUT_REPORT_RESPONSE = 11, + INVALID_INPUT_REPORT_TYPE = 0xF, +}; + +/* Output report type definition in HIDSPI protocol */ +enum output_report_type { + INVALID_OUTPUT_REPORT_TYPE_0 = 0, + DEVICE_DESCRIPTOR = 1, + REPORT_DESCRIPTOR = 2, + SET_FEATURE = 3, + GET_FEATURE = 4, + OUTPUT_REPORT = 5, + GET_INPUT_REPORT = 6, + COMMAND_CONTENT = 7, +}; + +/* Set power command ID for output report */ +#define HIDSPI_SET_POWER_CMD_ID 1 + +/* Power state definition in HIDSPI protocol */ +enum hidspi_power_state { + HIDSPI_ON = 1, + HIDSPI_SLEEP = 2, + HIDSPI_OFF = 3, +}; + +/** + * Input report header definition in HIDSPI protocol + * Report header size is 32bits, it includes: + * protocol_ver: [0:3] Current supported HIDSPI protocol version, must be 0x3 + * reserved0: [4:7] Reserved bits + * input_report_len: [8:21] Input report length in number bytes divided by 4 + * last_frag_flag: [22]Indicate if this packet is last fragment. + * 1 - indicates last fragment + * 0 - indicates additional fragments + * reserved1: [23] Reserved bits + * @sync_const: [24:31] Used to validate input report header, must be 0x5A + */ +#define HIDSPI_INPUT_HEADER_SIZE sizeof(u32) +#define HIDSPI_INPUT_HEADER_VER GENMASK(3, 0) +#define HIDSPI_INPUT_HEADER_REPORT_LEN GENMASK(21, 8) +#define HIDSPI_INPUT_HEADER_LAST_FLAG BIT(22) +#define HIDSPI_INPUT_HEADER_SYNC GENMASK(31, 24) + +/** + * struct input_report_body_header - Input report body header definition in HIDSPI protocol + * @input_report_type: indicate input report type, reference to enum input_report_type + * @content_len: this input report body packet length + * @content_id: indicate this input report's report id + */ +struct input_report_body_header { + u8 input_report_type; + __le16 content_len; + u8 content_id; +} __packed; + +#define HIDSPI_INPUT_BODY_HEADER_SIZE sizeof(struct input_report_body_header) + +/** + * struct input_report_body - Input report body definition in HIDSPI protocol + * @body_hdr: input report body header + * @content: input report body content + */ +struct input_report_body { + struct input_report_body_header body_hdr; + u8 content[]; +} __packed; + +#define HIDSPI_INPUT_BODY_SIZE(content_len) ((content_len) + HIDSPI_INPUT_BODY_HEADER_SIZE) + +/** + * struct output_report_header - Output report header definition in HIDSPI protocol + * @report_type: output report type, reference to enum output_report_type + * @content_len: length of content + * @content_id: 0x00 - descriptors + * report id - Set/Feature feature or Input/Output Reports + * command opcode - for commands + */ +struct output_report_header { + u8 report_type; + __le16 content_len; + u8 content_id; +} __packed; + +#define HIDSPI_OUTPUT_REPORT_HEADER_SIZE sizeof(struct output_report_header) + +/** + * struct output_report - Output report definition in HIDSPI protocol + * @output_hdr: output report header + * @content: output report content + */ +struct output_report { + struct output_report_header output_hdr; + u8 content[]; +} __packed; + +#define HIDSPI_OUTPUT_REPORT_SIZE(content_len) ((content_len) + HIDSPI_OUTPUT_REPORT_HEADER_SIZE) + +/** + * struct hidspi_dev_descriptor - HIDSPI device descriptor definition + * @dev_desc_len: The length of the complete device descriptor, fixed to 0x18 (24). + * @bcd_ver: The version number of the HIDSPI protocol supported. + * In binary coded decimal (BCD) format. Must be fixed to 0x0300. + * @rep_desc_len: The length of the report descriptor + * @max_input_len: The length of the largest possible HID input (or feature) report + * @max_output_len: The length of the largest output (or feature) report + * @max_frag_len: The length of the largest fragment, where a fragment represents + * the body of an input report. + * @vendor_id: Device manufacturers vendor ID + * @product_id: Device unique model/product ID + * @version_id: Device’s unique version + * @flags: Specify flags for the device’s operation + * @reserved: Reserved and should be 0 + */ +struct hidspi_dev_descriptor { + __le16 dev_desc_len; + __le16 bcd_ver; + __le16 rep_desc_len; + __le16 max_input_len; + __le16 max_output_len; + __le16 max_frag_len; + __le16 vendor_id; + __le16 product_id; + __le16 version_id; + __le16 flags; + __le32 reserved; +}; + +#define HIDSPI_DEVICE_DESCRIPTOR_SIZE sizeof(struct hidspi_dev_descriptor) +#define HIDSPI_INPUT_DEVICE_DESCRIPTOR_SIZE \ + (HIDSPI_INPUT_BODY_HEADER_SIZE + HIDSPI_DEVICE_DESCRIPTOR_SIZE) + +#endif /* _HID_OVER_SPI_H_ */ diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index c27329e2a5ad..e71056553108 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -17,7 +17,7 @@ * @attrib_id: Attribute id for this attribute. * @report_id: Report id in which this information resides. * @index: Field index in the report. - * @units: Measurment unit for this attribute. + * @units: Measurement unit for this attribute. * @unit_expo: Exponent used in the data. * @size: Size in bytes for data size. * @logical_minimum: Logical minimum value for this attribute. @@ -39,8 +39,8 @@ struct hid_sensor_hub_attribute_info { * struct sensor_hub_pending - Synchronous read pending information * @status: Pending status true/false. * @ready: Completion synchronization data. - * @usage_id: Usage id for physical device, E.g. Gyro usage id. - * @attr_usage_id: Usage Id of a field, E.g. X-AXIS for a gyro. + * @usage_id: Usage id for physical device, e.g. gyro usage id. + * @attr_usage_id: Usage Id of a field, e.g. X-axis for a gyro. * @raw_size: Response size for a read request. * @raw_data: Place holder for received response. */ @@ -104,10 +104,10 @@ struct hid_sensor_hub_callbacks { int sensor_hub_device_open(struct hid_sensor_hub_device *hsdev); /** -* sensor_hub_device_clode() - Close hub device +* sensor_hub_device_close() - Close hub device * @hsdev: Hub device instance. * -* Used to clode hid device for sensor hub. +* Used to close hid device for sensor hub. */ void sensor_hub_device_close(struct hid_sensor_hub_device *hsdev); @@ -128,12 +128,13 @@ int sensor_hub_register_callback(struct hid_sensor_hub_device *hsdev, struct hid_sensor_hub_callbacks *usage_callback); /** -* sensor_hub_remove_callback() - Remove client callbacks +* sensor_hub_remove_callback() - Remove client callback * @hsdev: Hub device instance. -* @usage_id: Usage id of the client (E.g. 0x200076 for Gyro). +* @usage_id: Usage id of the client (e.g. 0x200076 for gyro). * -* If there is a callback registred, this call will remove that -* callbacks, so that it will stop data and event notifications. +* Removes a previously registered callback for the given usage_id +* and hsdev. Once removed, the client will no longer receive data or +* event notifications. */ int sensor_hub_remove_callback(struct hid_sensor_hub_device *hsdev, u32 usage_id); diff --git a/include/linux/hid.h b/include/linux/hid.h index cdc0dc13c87f..7f260e0e2049 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -81,6 +81,8 @@ struct hid_item { #define HID_MAIN_ITEM_TAG_FEATURE 11 #define HID_MAIN_ITEM_TAG_BEGIN_COLLECTION 10 #define HID_MAIN_ITEM_TAG_END_COLLECTION 12 +#define HID_MAIN_ITEM_TAG_RESERVED_MIN 13 +#define HID_MAIN_ITEM_TAG_RESERVED_MAX 15 /* * HID report descriptor main item contents @@ -355,6 +357,7 @@ struct hid_item { * | @HID_QUIRK_INPUT_PER_APP: * | @HID_QUIRK_X_INVERT: * | @HID_QUIRK_Y_INVERT: + * | @HID_QUIRK_IGNORE_MOUSE: * | @HID_QUIRK_SKIP_OUTPUT_REPORTS: * | @HID_QUIRK_SKIP_OUTPUT_REPORT_ID: * | @HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP: @@ -380,6 +383,7 @@ struct hid_item { #define HID_QUIRK_INPUT_PER_APP BIT(11) #define HID_QUIRK_X_INVERT BIT(12) #define HID_QUIRK_Y_INVERT BIT(13) +#define HID_QUIRK_IGNORE_MOUSE BIT(14) #define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16) #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID BIT(17) #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) @@ -738,8 +742,9 @@ struct hid_descriptor { __le16 bcdHID; __u8 bCountryCode; __u8 bNumDescriptors; + struct hid_class_descriptor rpt_desc; - struct hid_class_descriptor desc[1]; + struct hid_class_descriptor opt_descs[]; } __attribute__ ((packed)); #define HID_DEVICE(b, g, ven, prod) \ @@ -790,6 +795,8 @@ struct hid_usage_id { * @suspend: invoked on suspend (NULL means nop) * @resume: invoked on resume if device was not reset (NULL means nop) * @reset_resume: invoked on resume if device was reset (NULL means nop) + * @on_hid_hw_open: invoked when hid core opens first instance (NULL means nop) + * @on_hid_hw_close: invoked when hid core closes last instance (NULL means nop) * * probe should return -errno on error, or 0 on success. During probe, * input will not be passed to raw_event unless hid_device_io_start is @@ -845,6 +852,8 @@ struct hid_driver { int (*suspend)(struct hid_device *hdev, pm_message_t message); int (*resume)(struct hid_device *hdev); int (*reset_resume)(struct hid_device *hdev); + void (*on_hid_hw_open)(struct hid_device *hdev); + void (*on_hid_hw_close)(struct hid_device *hdev); /* private: */ struct device_driver driver; @@ -1222,12 +1231,6 @@ unsigned long hid_lookup_quirk(const struct hid_device *hdev); int hid_quirks_init(char **quirks_param, __u16 bus, int count); void hid_quirks_exit(__u16 bus); -#ifdef CONFIG_HID_PID -int hid_pidff_init(struct hid_device *hid); -#else -#define hid_pidff_init NULL -#endif - #define dbg_hid(fmt, ...) pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__) #define hid_err(hid, fmt, ...) \ @@ -1236,6 +1239,8 @@ int hid_pidff_init(struct hid_device *hid); dev_notice(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_warn(hid, fmt, ...) \ dev_warn(&(hid)->dev, fmt, ##__VA_ARGS__) +#define hid_warn_ratelimited(hid, fmt, ...) \ + dev_warn_ratelimited(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_info(hid, fmt, ...) \ dev_info(&(hid)->dev, fmt, ##__VA_ARGS__) #define hid_dbg(hid, fmt, ...) \ diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index dd100e849f5e..9a7683d79a4b 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -73,6 +73,14 @@ static inline void *kmap_local_page(struct page *page) return __kmap_local_page_prot(page, kmap_prot); } +static inline void *kmap_local_page_try_from_panic(struct page *page) +{ + if (!PageHighMem(page)) + return page_address(page); + /* If the page is in HighMem, it's not safe to kmap it.*/ + return NULL; +} + static inline void *kmap_local_folio(struct folio *folio, size_t offset) { struct page *page = folio_page(folio, offset / PAGE_SIZE); @@ -180,6 +188,11 @@ static inline void *kmap_local_page(struct page *page) return page_address(page); } +static inline void *kmap_local_page_try_from_panic(struct page *page) +{ + return page_address(page); +} + static inline void *kmap_local_folio(struct folio *folio, size_t offset) { return page_address(&folio->page) + offset; diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 5c6bea81a90e..e48d7f27b0b9 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -404,6 +404,33 @@ static inline void memcpy_page(struct page *dst_page, size_t dst_off, kunmap_local(dst); } +static inline void memcpy_folio(struct folio *dst_folio, size_t dst_off, + struct folio *src_folio, size_t src_off, size_t len) +{ + VM_BUG_ON(dst_off + len > folio_size(dst_folio)); + VM_BUG_ON(src_off + len > folio_size(src_folio)); + + do { + char *dst = kmap_local_folio(dst_folio, dst_off); + const char *src = kmap_local_folio(src_folio, src_off); + size_t chunk = len; + + if (folio_test_highmem(dst_folio) && + chunk > PAGE_SIZE - offset_in_page(dst_off)) + chunk = PAGE_SIZE - offset_in_page(dst_off); + if (folio_test_highmem(src_folio) && + chunk > PAGE_SIZE - offset_in_page(src_off)) + chunk = PAGE_SIZE - offset_in_page(src_off); + memcpy(dst, src, chunk); + kunmap_local(src); + kunmap_local(dst); + + dst_off += chunk; + src_off += chunk; + len -= chunk; + } while (len > 0); +} + static inline void memset_page(struct page *page, size_t offset, int val, size_t len) { @@ -461,7 +488,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, const char *from = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -489,7 +516,7 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset, char *to = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -522,7 +549,7 @@ static inline __must_check void *folio_zero_tail(struct folio *folio, { size_t len = folio_size(folio) - offset; - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -560,7 +587,7 @@ static inline void folio_fill_tail(struct folio *folio, size_t offset, VM_BUG_ON(offset + len > folio_size(folio)); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -597,7 +624,7 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); } else diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 6dbd0d49628f..99fcf65d575f 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -97,6 +97,8 @@ /* page number for queue file region */ #define QM_DOORBELL_PAGE_NR 1 +#define QM_DEV_ALG_MAX_LEN 256 + /* uacce mode of the driver */ #define UACCE_MODE_NOUACCE 0 /* don't use uacce */ #define UACCE_MODE_SVA 1 /* use uacce sva mode */ @@ -122,6 +124,7 @@ enum qm_hw_ver { QM_HW_V1 = 0x20, QM_HW_V2 = 0x21, QM_HW_V3 = 0x30, + QM_HW_V4 = 0x50, }; enum qm_fun_type { @@ -156,6 +159,7 @@ enum qm_cap_bits { QM_SUPPORT_MB_COMMAND, QM_SUPPORT_SVA_PREFETCH, QM_SUPPORT_RPM, + QM_SUPPORT_DAE, }; struct qm_dev_alg { @@ -266,6 +270,8 @@ struct hisi_qm_err_ini { void (*show_last_dfx_regs)(struct hisi_qm *qm); void (*err_info_init)(struct hisi_qm *qm); enum acc_err_result (*get_err_result)(struct hisi_qm *qm); + bool (*dev_is_abnormal)(struct hisi_qm *qm); + int (*set_priv_status)(struct hisi_qm *qm); }; struct hisi_qm_cap_info { @@ -392,6 +398,8 @@ struct hisi_qm { struct mutex mailbox_lock; + struct mutex ifc_lock; + const struct hisi_qm_hw_ops *ops; struct qm_debug debug; diff --git a/include/linux/hmm-dma.h b/include/linux/hmm-dma.h new file mode 100644 index 000000000000..f58b9fc71999 --- /dev/null +++ b/include/linux/hmm-dma.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */ +#ifndef LINUX_HMM_DMA_H +#define LINUX_HMM_DMA_H + +#include <linux/dma-mapping.h> + +struct dma_iova_state; +struct pci_p2pdma_map_state; + +/* + * struct hmm_dma_map - array of PFNs and DMA addresses + * + * @state: DMA IOVA state + * @pfns: array of PFNs + * @dma_list: array of DMA addresses + * @dma_entry_size: size of each DMA entry in the array + */ +struct hmm_dma_map { + struct dma_iova_state state; + unsigned long *pfn_list; + dma_addr_t *dma_list; + size_t dma_entry_size; +}; + +int hmm_dma_map_alloc(struct device *dev, struct hmm_dma_map *map, + size_t nr_entries, size_t dma_entry_size); +void hmm_dma_map_free(struct device *dev, struct hmm_dma_map *map); +dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map, + size_t idx, + struct pci_p2pdma_map_state *p2pdma_state); +bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx); +#endif /* LINUX_HMM_DMA_H */ diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 126a36571667..db75ffc949a7 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -23,6 +23,10 @@ struct mmu_interval_notifier; * HMM_PFN_WRITE - if the page memory can be written to (requires HMM_PFN_VALID) * HMM_PFN_ERROR - accessing the pfn is impossible and the device should * fail. ie poisoned memory, special pages, no vma, etc + * HMM_PFN_P2PDMA - P2P page + * HMM_PFN_P2PDMA_BUS - Bus mapped P2P transfer + * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation + * to mark that page is already DMA mapped * * On input: * 0 - Return the current state of the page, do not fault it. @@ -36,13 +40,21 @@ enum hmm_pfn_flags { HMM_PFN_VALID = 1UL << (BITS_PER_LONG - 1), HMM_PFN_WRITE = 1UL << (BITS_PER_LONG - 2), HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3), - HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 8), + /* + * Sticky flags, carried from input to output, + * don't forget to update HMM_PFN_INOUT_FLAGS + */ + HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 4), + HMM_PFN_P2PDMA = 1UL << (BITS_PER_LONG - 5), + HMM_PFN_P2PDMA_BUS = 1UL << (BITS_PER_LONG - 6), + + HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 11), /* Input flags */ HMM_PFN_REQ_FAULT = HMM_PFN_VALID, HMM_PFN_REQ_WRITE = HMM_PFN_WRITE, - HMM_PFN_FLAGS = 0xFFUL << HMM_PFN_ORDER_SHIFT, + HMM_PFN_FLAGS = ~((1UL << HMM_PFN_ORDER_SHIFT) - 1), }; /* @@ -58,6 +70,14 @@ static inline struct page *hmm_pfn_to_page(unsigned long hmm_pfn) } /* + * hmm_pfn_to_phys() - return physical address pointed to by a device entry + */ +static inline phys_addr_t hmm_pfn_to_phys(unsigned long hmm_pfn) +{ + return __pfn_to_phys(hmm_pfn & ~HMM_PFN_FLAGS); +} + +/* * hmm_pfn_to_map_order() - return the CPU mapping size order * * This is optionally useful to optimize processing of the pfn result diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index f7bfdcf0dda3..1ef867bb8c44 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -223,11 +223,14 @@ static inline void hrtimer_cancel_wait_running(struct hrtimer *timer) } #endif +static inline enum hrtimer_restart hrtimer_dummy_timeout(struct hrtimer *unused) +{ + return HRTIMER_NORESTART; +} + /* Exported timer functions: */ /* Initialize timers: */ -extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, - enum hrtimer_mode mode); extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), clockid_t clock_id, enum hrtimer_mode mode); extern void hrtimer_setup_on_stack(struct hrtimer *timer, @@ -333,6 +336,7 @@ static inline int hrtimer_callback_running(struct hrtimer *timer) static inline void hrtimer_update_function(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *)) { +#ifdef CONFIG_PROVE_LOCKING guard(raw_spinlock_irqsave)(&timer->base->cpu_base->lock); if (WARN_ON_ONCE(hrtimer_is_queued(timer))) @@ -340,8 +344,8 @@ static inline void hrtimer_update_function(struct hrtimer *timer, if (WARN_ON_ONCE(!function)) return; - - timer->function = function; +#endif + ACCESS_PRIVATE(timer, function) = function; } /* Forward a hrtimer so it expires after now: */ diff --git a/include/linux/hrtimer_types.h b/include/linux/hrtimer_types.h index ad66a3081735..8fbbb6bdf7a1 100644 --- a/include/linux/hrtimer_types.h +++ b/include/linux/hrtimer_types.h @@ -34,12 +34,12 @@ enum hrtimer_restart { * @is_hard: Set if hrtimer will be expired in hard interrupt context * even on RT. * - * The hrtimer structure must be initialized by hrtimer_init() + * The hrtimer structure must be initialized by hrtimer_setup() */ struct hrtimer { struct timerqueue_node node; ktime_t _softexpires; - enum hrtimer_restart (*function)(struct hrtimer *); + enum hrtimer_restart (*__private function)(struct hrtimer *); struct hrtimer_clock_base *base; u8 state; u8 is_rel; diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b94c2e8ee918..2f190c90192d 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -39,6 +39,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio, + bool write); +vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio, + bool write); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_UNSUPPORTED, @@ -121,6 +125,8 @@ enum mthp_stat_item { MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, MTHP_STAT_ZSWPOUT, MTHP_STAT_SWPIN, + MTHP_STAT_SWPIN_FALLBACK, + MTHP_STAT_SWPIN_FALLBACK_CHARGE, MTHP_STAT_SWPOUT, MTHP_STAT_SWPOUT_FALLBACK, MTHP_STAT_SHMEM_ALLOC, @@ -339,6 +345,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, unsigned int new_order); int min_order_for_split(struct folio *folio); int split_folio_to_list(struct folio *folio, struct list_head *list); +bool uniform_split_supported(struct folio *folio, unsigned int new_order, + bool warns); +bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, + bool warns); +int folio_split(struct folio *folio, unsigned int new_order, struct page *page, + struct list_head *list); +/* + * try_folio_split - try to split a @folio at @page using non uniform split. + * @folio: folio to be split + * @page: split to order-0 at the given page + * @list: store the after-split folios + * + * Try to split a @folio at @page using non uniform split to order-0, if + * non uniform split is not supported, fall back to uniform split. + * + * Return: 0: split is successful, otherwise split failed. + */ +static inline int try_folio_split(struct folio *folio, struct page *page, + struct list_head *list) +{ + int ret = min_order_for_split(folio); + + if (ret < 0) + return ret; + + if (!non_uniform_split_supported(folio, 0, false)) + return split_huge_page_to_list_to_order(&folio->page, list, + ret); + return folio_split(folio, ret, page, list); +} static inline int split_huge_page(struct page *page) { struct folio *folio = page_folio(page); @@ -359,7 +395,7 @@ static inline int split_huge_page(struct page *page) void deferred_split_folio(struct folio *folio, bool partially_mapped); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long address, bool freeze, struct folio *folio); + unsigned long address, bool freeze); #define split_huge_pmd(__vma, __pmd, __address) \ do { \ @@ -367,12 +403,11 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ || pmd_devmap(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ - false, NULL); \ + false); \ } while (0) - void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, - bool freeze, struct folio *folio); + bool freeze); void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, unsigned long address); @@ -402,7 +437,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end); void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, - unsigned long end, long adjust_next); + unsigned long end, struct vm_area_struct *next); spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); @@ -459,15 +494,13 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) struct folio *mm_get_huge_zero_folio(struct mm_struct *mm); void mm_put_huge_zero_folio(struct mm_struct *mm); -#define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) - static inline bool thp_migration_supported(void) { return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); } void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmd, bool freeze, struct folio *folio); + pmd_t *pmd, bool freeze); bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp, struct folio *folio); @@ -531,17 +564,23 @@ static inline int split_folio_to_list(struct folio *folio, struct list_head *lis return 0; } +static inline int try_folio_split(struct folio *folio, struct page *page, + struct list_head *list) +{ + return 0; +} + static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long address, bool freeze, struct folio *folio) {} + unsigned long address, bool freeze) {} static inline void split_huge_pmd_address(struct vm_area_struct *vma, - unsigned long address, bool freeze, struct folio *folio) {} + unsigned long address, bool freeze) {} static inline void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, - bool freeze, struct folio *folio) {} + bool freeze) {} static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp, @@ -569,7 +608,7 @@ static inline int madvise_collapse(struct vm_area_struct *vma, static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, - long adjust_next) + struct vm_area_struct *next) { } static inline int is_swap_pmd(pmd_t pmd) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ae4fe8615bb6..42f374e828a2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -14,6 +14,7 @@ #include <linux/pgtable.h> #include <linux/gfp.h> #include <linux/userfaultfd_k.h> +#include <linux/nodemask.h> struct ctl_table; struct user_struct; @@ -128,12 +129,12 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma, int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *, struct vm_area_struct *); void unmap_hugepage_range(struct vm_area_struct *, - unsigned long, unsigned long, struct page *, - zap_flags_t); + unsigned long start, unsigned long end, + struct folio *, zap_flags_t); void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, - struct page *ref_page, zap_flags_t zap_flags); + struct folio *, zap_flags_t zap_flags); void hugetlb_report_meminfo(struct seq_file *); int hugetlb_report_node_meminfo(char *buf, int len, int nid); void hugetlb_show_meminfo_node(int nid); @@ -153,11 +154,11 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -bool isolate_hugetlb(struct folio *folio, struct list_head *list); +bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); -void folio_putback_active_hugetlb(struct folio *folio); +void folio_putback_hugetlb(struct folio *folio); void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); void hugetlb_fix_reserve_counts(struct inode *inode); extern struct mutex *hugetlb_fault_mutex_table; @@ -174,6 +175,11 @@ struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages[MAX_NUMNODES]; +void hugetlb_bootmem_alloc(void); +bool hugetlb_bootmem_allocated(void); +extern nodemask_t hugetlb_bootmem_nodes; +void hugetlb_bootmem_set_nodes(void); + /* arch callbacks */ #ifndef CONFIG_HIGHPTE @@ -272,6 +278,8 @@ long hugetlb_change_protection(struct vm_area_struct *vma, bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void fixup_hugetlb_reservations(struct vm_area_struct *vma); +void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); #else /* !CONFIG_HUGETLB_PAGE */ @@ -414,7 +422,7 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return NULL; } -static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list) +static inline bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list) { return false; } @@ -430,7 +438,7 @@ static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, return 0; } -static inline void folio_putback_active_hugetlb(struct folio *folio) +static inline void folio_putback_hugetlb(struct folio *folio) { } @@ -449,7 +457,7 @@ static inline long hugetlb_change_protection( static inline void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, - unsigned long end, struct page *ref_page, + unsigned long end, struct folio *folio, zap_flags_t zap_flags) { BUG(); @@ -465,6 +473,12 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + +static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write @@ -588,6 +602,7 @@ enum hugetlb_page_flags { HPG_freed, HPG_vmemmap_optimized, HPG_raw_hwp_unreliable, + HPG_cma, __NR_HPAGEFLAGS, }; @@ -647,6 +662,7 @@ HPAGEFLAG(Temporary, temporary) HPAGEFLAG(Freed, freed) HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) +HPAGEFLAG(Cma, cma) #ifdef CONFIG_HUGETLB_PAGE @@ -675,14 +691,26 @@ struct hstate { char name[HSTATE_NAME_LEN]; }; +struct cma; + struct huge_bootmem_page { struct list_head list; struct hstate *hstate; + unsigned long flags; + struct cma *cma; }; -int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); +#define HUGE_BOOTMEM_HVO 0x0001 +#define HUGE_BOOTMEM_ZONES_VALID 0x0002 +#define HUGE_BOOTMEM_CMA 0x0004 + +bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m); + +int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list); +int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); +void wait_for_freed_hugetlb_folios(void); struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, - unsigned long addr, int avoid_reserve); + unsigned long addr, bool cow_from_owner); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask, bool allow_alloc_fallback); @@ -813,6 +841,17 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, } #endif +#ifndef arch_has_huge_bootmem_alloc +/* + * Some architectures do their own bootmem allocation, so they can't use + * early CMA allocation. + */ +static inline bool arch_has_huge_bootmem_alloc(void) +{ + return false; +} +#endif + static inline struct hstate *folio_hstate(struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); @@ -1003,7 +1042,9 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm) static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); } #endif @@ -1053,15 +1094,25 @@ static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, return NULL; } -static inline int isolate_or_dissolve_huge_page(struct page *page, +static inline int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list) { return -ENOMEM; } +static inline int replace_free_hugepage_folios(unsigned long start_pfn, + unsigned long end_pfn) +{ + return 0; +} + +static inline void wait_for_freed_hugetlb_folios(void) +{ +} + static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, - int avoid_reserve) + bool cow_from_owner) { return NULL; } @@ -1243,6 +1294,15 @@ static inline bool hugetlbfs_pagecache_present( { return false; } + +static inline void hugetlb_bootmem_alloc(void) +{ +} + +static inline bool hugetlb_bootmem_allocated(void) +{ + return false; +} #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h new file mode 100644 index 000000000000..1bc2b3244613 --- /dev/null +++ b/include/linux/hung_task.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Detect Hung Task: detecting tasks stuck in D state + * + * Copyright (C) 2025 Tongcheng Travel (www.ly.com) + * Author: Lance Yang <mingzhe.yang@ly.com> + */ +#ifndef __LINUX_HUNG_TASK_H +#define __LINUX_HUNG_TASK_H + +#include <linux/bug.h> +#include <linux/sched.h> +#include <linux/compiler.h> + +/* + * @blocker: Combines lock address and blocking type. + * + * Since lock pointers are at least 4-byte aligned(32-bit) or 8-byte + * aligned(64-bit). This leaves the 2 least bits (LSBs) of the pointer + * always zero. So we can use these bits to encode the specific blocking + * type. + * + * Type encoding: + * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) + * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) + * 10 - Blocked on rt-mutex (BLOCKER_TYPE_RTMUTEX) + * 11 - Blocked on rw-semaphore (BLOCKER_TYPE_RWSEM) + */ +#define BLOCKER_TYPE_MUTEX 0x00UL +#define BLOCKER_TYPE_SEM 0x01UL +#define BLOCKER_TYPE_RTMUTEX 0x02UL +#define BLOCKER_TYPE_RWSEM 0x03UL + +#define BLOCKER_TYPE_MASK 0x03UL + +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER +static inline void hung_task_set_blocker(void *lock, unsigned long type) +{ + unsigned long lock_ptr = (unsigned long)lock; + + WARN_ON_ONCE(!lock_ptr); + WARN_ON_ONCE(READ_ONCE(current->blocker)); + + /* + * If the lock pointer matches the BLOCKER_TYPE_MASK, return + * without writing anything. + */ + if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK)) + return; + + WRITE_ONCE(current->blocker, lock_ptr | type); +} + +static inline void hung_task_clear_blocker(void) +{ + WARN_ON_ONCE(!READ_ONCE(current->blocker)); + + WRITE_ONCE(current->blocker, 0UL); +} + +/* + * hung_task_get_blocker_type - Extracts blocker type from encoded blocker + * address. + * + * @blocker: Blocker pointer with encoded type (via LSB bits) + * + * Returns: BLOCKER_TYPE_MUTEX, BLOCKER_TYPE_SEM, etc. + */ +static inline unsigned long hung_task_get_blocker_type(unsigned long blocker) +{ + WARN_ON_ONCE(!blocker); + + return blocker & BLOCKER_TYPE_MASK; +} + +static inline void *hung_task_blocker_to_lock(unsigned long blocker) +{ + WARN_ON_ONCE(!blocker); + + return (void *)(blocker & ~BLOCKER_TYPE_MASK); +} +#else +static inline void hung_task_set_blocker(void *lock, unsigned long type) +{ +} +static inline void hung_task_clear_blocker(void) +{ +} +static inline unsigned long hung_task_get_blocker_type(unsigned long blocker) +{ + return 0UL; +} +static inline void *hung_task_blocker_to_lock(unsigned long blocker) +{ + return NULL; +} +#endif + +#endif /* __LINUX_HUNG_TASK_H */ diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h index f0231dbc4777..f35b42e8c5de 100644 --- a/include/linux/hwspinlock.h +++ b/include/linux/hwspinlock.h @@ -58,11 +58,9 @@ struct hwspinlock_pdata { int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev, const struct hwspinlock_ops *ops, int base_id, int num_locks); int hwspin_lock_unregister(struct hwspinlock_device *bank); -struct hwspinlock *hwspin_lock_request(void); struct hwspinlock *hwspin_lock_request_specific(unsigned int id); int hwspin_lock_free(struct hwspinlock *hwlock); int of_hwspin_lock_get_id(struct device_node *np, int index); -int hwspin_lock_get_id(struct hwspinlock *hwlock); int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int, unsigned long *); int __hwspin_trylock(struct hwspinlock *, int, unsigned long *); @@ -70,7 +68,6 @@ void __hwspin_unlock(struct hwspinlock *, int, unsigned long *); int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name); int hwspin_lock_bust(struct hwspinlock *hwlock, unsigned int id); int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock); -struct hwspinlock *devm_hwspin_lock_request(struct device *dev); struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev, unsigned int id); int devm_hwspin_lock_unregister(struct device *dev, @@ -95,11 +92,6 @@ int devm_hwspin_lock_register(struct device *dev, * Note: ERR_PTR(-ENODEV) will still be considered a success for NULL-checking * users. Others, which care, can still check this with IS_ERR. */ -static inline struct hwspinlock *hwspin_lock_request(void) -{ - return ERR_PTR(-ENODEV); -} - static inline struct hwspinlock *hwspin_lock_request_specific(unsigned int id) { return ERR_PTR(-ENODEV); @@ -138,11 +130,6 @@ static inline int of_hwspin_lock_get_id(struct device_node *np, int index) return 0; } -static inline int hwspin_lock_get_id(struct hwspinlock *hwlock) -{ - return 0; -} - static inline int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name) { @@ -155,11 +142,6 @@ int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock) return 0; } -static inline struct hwspinlock *devm_hwspin_lock_request(struct device *dev) -{ - return ERR_PTR(-ENODEV); -} - static inline struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev, unsigned int id) diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 02a226bcf0ed..a59c5c3e95fb 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -24,7 +24,7 @@ #include <linux/mod_devicetable.h> #include <linux/interrupt.h> #include <linux/reciprocal_div.h> -#include <asm/hyperv-tlfs.h> +#include <hyperv/hvhdk.h> #define MAX_PAGE_BUFFER_COUNT 32 #define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */ @@ -371,19 +371,6 @@ struct vmtransfer_page_packet_header { struct vmtransfer_page_range ranges[]; } __packed; -struct vmgpadl_packet_header { - struct vmpacket_descriptor d; - u32 gpadl; - u32 reserved; -} __packed; - -struct vmadd_remove_transfer_page_set { - struct vmpacket_descriptor d; - u32 gpadl; - u16 xfer_pageset_id; - u16 reserved; -} __packed; - /* * This structure defines a range in guest physical space that can be made to * look virtually contiguous. @@ -395,30 +382,6 @@ struct gpa_range { }; /* - * This is the format for an Establish Gpadl packet, which contains a handle by - * which this GPADL will be known and a set of GPA ranges associated with it. - * This can be converted to a MDL by the guest OS. If there are multiple GPA - * ranges, then the resulting MDL will be "chained," representing multiple VA - * ranges. - */ -struct vmestablish_gpadl { - struct vmpacket_descriptor d; - u32 gpadl; - u32 range_cnt; - struct gpa_range range[1]; -} __packed; - -/* - * This is the format for a Teardown Gpadl packet, which indicates that the - * GPADL handle in the Establish Gpadl packet will never be referenced again. - */ -struct vmteardown_gpadl { - struct vmpacket_descriptor d; - u32 gpadl; - u32 reserved; /* for alignment to a 8-byte boundary */ -} __packed; - -/* * This is the format for a GPA-Direct packet, which contains a set of GPA * ranges, in addition to commands and/or data. */ @@ -429,25 +392,6 @@ struct vmdata_gpa_direct { struct gpa_range range[1]; } __packed; -/* This is the format for a Additional Data Packet. */ -struct vmadditional_data { - struct vmpacket_descriptor d; - u64 total_bytes; - u32 offset; - u32 byte_cnt; - unsigned char data[1]; -} __packed; - -union vmpacket_largest_possible_header { - struct vmpacket_descriptor simple_hdr; - struct vmtransfer_page_packet_header xfer_page_hdr; - struct vmgpadl_packet_header gpadl_hdr; - struct vmadd_remove_transfer_page_set add_rm_xfer_page_hdr; - struct vmestablish_gpadl establish_gpadl_hdr; - struct vmteardown_gpadl teardown_gpadl_hdr; - struct vmdata_gpa_direct data_gpa_direct_hdr; -}; - #define VMPACKET_DATA_START_ADDRESS(__packet) \ (void *)(((unsigned char *)__packet) + \ ((struct vmpacket_descriptor)__packet)->offset8 * 8) @@ -768,15 +712,6 @@ struct vmbus_close_msg { struct vmbus_channel_close_channel msg; }; -/* Define connection identifier type. */ -union hv_connection_id { - u32 asu32; - struct { - u32 id:24; - u32 reserved:8; - } u; -}; - enum vmbus_device_type { HV_IDE = 0, HV_SCSI, @@ -1067,6 +1002,12 @@ struct vmbus_channel { /* The max size of a packet on this channel */ u32 max_pkt_size; + + /* function to mmap ring buffer memory to the channel's sysfs ring attribute */ + int (*mmap_ring_buffer)(struct vmbus_channel *channel, struct vm_area_struct *vma); + + /* boolean to control visibility of sysfs for ring buffer */ + bool ring_sysfs_visible; }; #define lock_requestor(channel, flags) \ @@ -1226,13 +1167,6 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel, enum vmbus_packet_type type, u32 flags); -extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, - struct hv_page_buffer pagebuffers[], - u32 pagecount, - void *buffer, - u32 bufferlen, - u64 requestid); - extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct vmbus_packet_mpb_array *mpb, u32 desc_size, @@ -1342,6 +1276,8 @@ static inline void *hv_get_drvdata(struct hv_device *dev) return dev_get_drvdata(&dev->device); } +struct device *hv_get_vmbus_root_device(void); + struct hv_ring_buffer_debug_info { u32 current_interrupt_mask; u32 current_read_index; @@ -1670,6 +1606,7 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id, const guid_t *shv_host_servie_id); int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp); void vmbus_set_event(struct vmbus_channel *channel); +int vmbus_channel_set_cpu(struct vmbus_channel *channel, u32 target_cpu); /* Get the start of the ring buffer. */ static inline void * diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h index 9efbc54e35e5..be5417303ecf 100644 --- a/include/linux/hypervisor.h +++ b/include/linux/hypervisor.h @@ -37,6 +37,9 @@ static inline bool hypervisor_isolated_pci_functions(void) if (IS_ENABLED(CONFIG_S390)) return true; + if (IS_ENABLED(CONFIG_LOONGARCH)) + return true; + return jailhouse_paravirt(); } diff --git a/include/linux/i2c-atr.h b/include/linux/i2c-atr.h index 4d5da161c225..2bb54dc87c8e 100644 --- a/include/linux/i2c-atr.h +++ b/include/linux/i2c-atr.h @@ -19,21 +19,59 @@ struct fwnode_handle; struct i2c_atr; /** + * enum i2c_atr_flags - Flags for an I2C ATR driver + * + * @I2C_ATR_F_STATIC: ATR does not support dynamic mapping, use static mapping. + * Mappings will only be added or removed as a result of + * devices being added or removed from a child bus. + * The ATR pool will have to be big enough to accomodate all + * devices expected to be added to the child buses. + * @I2C_ATR_F_PASSTHROUGH: Allow unmapped incoming addresses to pass through + */ +enum i2c_atr_flags { + I2C_ATR_F_STATIC = BIT(0), + I2C_ATR_F_PASSTHROUGH = BIT(1), +}; + +/** * struct i2c_atr_ops - Callbacks from ATR to the device driver. - * @attach_client: Notify the driver of a new device connected on a child - * bus, with the alias assigned to it. The driver must - * configure the hardware to use the alias. - * @detach_client: Notify the driver of a device getting disconnected. The - * driver must configure the hardware to stop using the - * alias. + * @attach_addr: Notify the driver of a new device connected on a child + * bus, with the alias assigned to it. The driver must + * configure the hardware to use the alias. + * @detach_addr: Notify the driver of a device getting disconnected. The + * driver must configure the hardware to stop using the + * alias. * * All these functions return 0 on success, a negative error code otherwise. */ struct i2c_atr_ops { - int (*attach_client)(struct i2c_atr *atr, u32 chan_id, - const struct i2c_client *client, u16 alias); - void (*detach_client)(struct i2c_atr *atr, u32 chan_id, - const struct i2c_client *client); + int (*attach_addr)(struct i2c_atr *atr, u32 chan_id, + u16 addr, u16 alias); + void (*detach_addr)(struct i2c_atr *atr, u32 chan_id, + u16 addr); +}; + +/** + * struct i2c_atr_adap_desc - An ATR downstream bus descriptor + * @chan_id: Index of the new adapter (0 .. max_adapters-1). This value is + * passed to the callbacks in `struct i2c_atr_ops`. + * @parent: The device used as the parent of the new i2c adapter, or NULL + * to use the i2c-atr device as the parent. + * @bus_handle: The fwnode handle that points to the adapter's i2c + * peripherals, or NULL. + * @num_aliases: The number of aliases in this adapter's private alias pool. Set + * to zero if this adapter uses the ATR's global alias pool. + * @aliases: An optional array of private aliases used by the adapter + * instead of the ATR's global pool of aliases. Must contain + * exactly num_aliases entries if num_aliases > 0, is ignored + * otherwise. + */ +struct i2c_atr_adap_desc { + u32 chan_id; + struct device *parent; + struct fwnode_handle *bus_handle; + size_t num_aliases; + u16 *aliases; }; /** @@ -42,6 +80,7 @@ struct i2c_atr_ops { * @dev: The device acting as an ATR * @ops: Driver-specific callbacks * @max_adapters: Maximum number of child adapters + * @flags: Flags for ATR * * The new ATR helper is connected to the parent adapter but has no child * adapters. Call i2c_atr_add_adapter() to add some. @@ -51,7 +90,8 @@ struct i2c_atr_ops { * Return: pointer to the new ATR helper object, or ERR_PTR */ struct i2c_atr *i2c_atr_new(struct i2c_adapter *parent, struct device *dev, - const struct i2c_atr_ops *ops, int max_adapters); + const struct i2c_atr_ops *ops, int max_adapters, + u32 flags); /** * i2c_atr_delete - Delete an I2C ATR helper. @@ -65,12 +105,7 @@ void i2c_atr_delete(struct i2c_atr *atr); /** * i2c_atr_add_adapter - Create a child ("downstream") I2C bus. * @atr: The I2C ATR - * @chan_id: Index of the new adapter (0 .. max_adapters-1). This value is - * passed to the callbacks in `struct i2c_atr_ops`. - * @adapter_parent: The device used as the parent of the new i2c adapter, or NULL - * to use the i2c-atr device as the parent. - * @bus_handle: The fwnode handle that points to the adapter's i2c - * peripherals, or NULL. + * @desc: An ATR adapter descriptor * * After calling this function a new i2c bus will appear. Adding and removing * devices on the downstream bus will result in calls to the @@ -85,9 +120,7 @@ void i2c_atr_delete(struct i2c_atr *atr); * * Return: 0 on success, a negative error code otherwise. */ -int i2c_atr_add_adapter(struct i2c_atr *atr, u32 chan_id, - struct device *adapter_parent, - struct fwnode_handle *bus_handle); +int i2c_atr_add_adapter(struct i2c_atr *atr, struct i2c_atr_adap_desc *desc); /** * i2c_atr_del_adapter - Remove a child ("downstream") I2C bus added by diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index ced1c6ead52a..dc1bd2ab4c13 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -44,9 +44,11 @@ static inline void i2c_free_slave_host_notify_device(struct i2c_client *client) #endif #if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_DMI) -void i2c_register_spd(struct i2c_adapter *adap); +void i2c_register_spd_write_disable(struct i2c_adapter *adap); +void i2c_register_spd_write_enable(struct i2c_adapter *adap); #else -static inline void i2c_register_spd(struct i2c_adapter *adap) { } +static inline void i2c_register_spd_write_disable(struct i2c_adapter *adap) { } +static inline void i2c_register_spd_write_enable(struct i2c_adapter *adap) { } #endif #endif /* _LINUX_I2C_SMBUS_H */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 388ce71a29a9..20fd41b51d5c 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -321,6 +321,8 @@ struct i2c_driver { * calls it to pass on slave events to the slave driver. * @devres_group_id: id of the devres group that will be created for resources * acquired when probing this device. + * @debugfs: pointer to the debugfs subdirectory which the I2C core created + * for this client. * * An i2c_client identifies a single device (i.e. chip) connected to an * i2c bus. The behaviour exposed to Linux is defined by the driver @@ -350,6 +352,7 @@ struct i2c_client { i2c_slave_cb_t slave_cb; /* callback for slave mode */ #endif void *devres_group_id; /* ID of probe devres group */ + struct dentry *debugfs; /* per-client debugfs dir */ }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) @@ -402,7 +405,6 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } * @addr: stored in i2c_client.addr * @dev_name: Overrides the default <busnr>-<addr> dev_name if set * @platform_data: stored in i2c_client.dev.platform_data - * @of_node: pointer to OpenFirmware device node * @fwnode: device node supplied by the platform firmware * @swnode: software node for the device * @resources: resources associated with the device @@ -426,7 +428,6 @@ struct i2c_board_info { unsigned short addr; const char *dev_name; void *platform_data; - struct device_node *of_node; struct fwnode_handle *fwnode; const struct software_node *swnode; const struct resource *resources; @@ -949,6 +950,21 @@ static inline u8 i2c_8bit_addr_from_msg(const struct i2c_msg *msg) return (msg->addr << 1) | (msg->flags & I2C_M_RD); } +/* + * 10-bit address + * addr_1: 5'b11110 | addr[9:8] | (R/nW) + * addr_2: addr[7:0] + */ +static inline u8 i2c_10bit_addr_hi_from_msg(const struct i2c_msg *msg) +{ + return 0xf0 | ((msg->addr & GENMASK(9, 8)) >> 7) | (msg->flags & I2C_M_RD); +} + +static inline u8 i2c_10bit_addr_lo_from_msg(const struct i2c_msg *msg) +{ + return msg->addr & GENMASK(7, 0); +} + u8 *i2c_get_dma_safe_msg_buf(struct i2c_msg *msg, unsigned int threshold); void i2c_put_dma_safe_msg_buf(u8 *buf, struct i2c_msg *msg, bool xferred); @@ -1026,10 +1042,6 @@ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node return i2c_get_adapter_by_fwnode(of_fwnode_handle(node)); } -const struct of_device_id -*i2c_of_match_device(const struct of_device_id *matches, - struct i2c_client *client); - int of_i2c_get_board_info(struct device *dev, struct device_node *node, struct i2c_board_info *info); @@ -1050,13 +1062,6 @@ static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node return NULL; } -static inline const struct of_device_id -*i2c_of_match_device(const struct of_device_id *matches, - struct i2c_client *client) -{ - return NULL; -} - static inline int of_i2c_get_board_info(struct device *dev, struct device_node *node, struct i2c_board_info *info) diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 0a8a44ac2f02..7f136de4b73e 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -245,7 +245,7 @@ void i3c_driver_unregister(struct i3c_driver *drv); * * Return: 0 if both registrations succeeds, a negative error code otherwise. */ -static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, +static __always_inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { int ret; @@ -270,7 +270,7 @@ static inline int i3c_i2c_driver_register(struct i3c_driver *i3cdrv, * Note that when CONFIG_I3C is not enabled, this function only unregisters the * @i2cdrv. */ -static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, +static __always_inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, struct i2c_driver *i2cdrv) { if (IS_ENABLED(CONFIG_I3C)) @@ -283,7 +283,7 @@ static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv, * module_i3c_i2c_driver() - Register a module providing an I3C and an I2C * driver * @__i3cdrv: the I3C driver to register - * @__i2cdrv: the I3C driver to register + * @__i2cdrv: the I2C driver to register * * Provide generic init/exit functions that simply register/unregister an I3C * and an I2C driver. diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 12d532b012c5..c67922ece617 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -475,7 +475,7 @@ struct i3c_master_controller_ops { int (*attach_i2c_dev)(struct i2c_dev_desc *dev); void (*detach_i2c_dev)(struct i2c_dev_desc *dev); int (*i2c_xfers)(struct i2c_dev_desc *dev, - const struct i2c_msg *xfers, int nxfers); + struct i2c_msg *xfers, int nxfers); int (*request_ibi)(struct i3c_dev_desc *dev, const struct i3c_ibi_setup *req); void (*free_ibi)(struct i3c_dev_desc *dev); diff --git a/include/linux/i8042.h b/include/linux/i8042.h index 95b07f8b77fe..00037c13abc8 100644 --- a/include/linux/i8042.h +++ b/include/linux/i8042.h @@ -54,15 +54,29 @@ struct serio; +/** + * typedef i8042_filter_t - i8042 filter callback + * @data: Data received by the i8042 controller + * @str: Status register of the i8042 controller + * @serio: Serio of the i8042 controller + * @context: Context pointer associated with this callback + * + * This represents a i8042 filter callback which can be used with i8042_install_filter() + * and i8042_remove_filter() to filter the i8042 input for platform-specific key codes. + * + * Context: Interrupt context. + * Returns: true if the data should be filtered out, false if otherwise. + */ +typedef bool (*i8042_filter_t)(unsigned char data, unsigned char str, struct serio *serio, + void *context); + #if defined(CONFIG_SERIO_I8042) || defined(CONFIG_SERIO_I8042_MODULE) void i8042_lock_chip(void); void i8042_unlock_chip(void); int i8042_command(unsigned char *param, int command); -int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str, - struct serio *serio)); -int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str, - struct serio *serio)); +int i8042_install_filter(i8042_filter_t filter, void *context); +int i8042_remove_filter(i8042_filter_t filter); #else @@ -79,14 +93,12 @@ static inline int i8042_command(unsigned char *param, int command) return -ENODEV; } -static inline int i8042_install_filter(bool (*filter)(unsigned char data, unsigned char str, - struct serio *serio)) +static inline int i8042_install_filter(i8042_filter_t filter, void *context) { return -ENODEV; } -static inline int i8042_remove_filter(bool (*filter)(unsigned char data, unsigned char str, - struct serio *serio)) +static inline int i8042_remove_filter(i8042_filter_t filter) { return -ENODEV; } diff --git a/include/linux/idr.h b/include/linux/idr.h index da5f5fa4a3a6..2267902d29a7 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -15,6 +15,7 @@ #include <linux/radix-tree.h> #include <linux/gfp.h> #include <linux/percpu.h> +#include <linux/cleanup.h> struct idr { struct radix_tree_root idr_rt; @@ -124,6 +125,22 @@ void *idr_get_next_ul(struct idr *, unsigned long *nextid); void *idr_replace(struct idr *, void *, unsigned long id); void idr_destroy(struct idr *); +struct __class_idr { + struct idr *idr; + int id; +}; + +#define idr_null ((struct __class_idr){ NULL, -1 }) +#define take_idr_id(id) __get_and_null(id, idr_null) + +DEFINE_CLASS(idr_alloc, struct __class_idr, + if (_T.id >= 0) idr_remove(_T.idr, _T.id), + ((struct __class_idr){ + .idr = idr, + .id = idr_alloc(idr, ptr, start, end, gfp), + }), + struct idr *idr, void *ptr, int start, int end, gfp_t gfp); + /** * idr_init_base() - Initialise an IDR. * @idr: IDR handle. @@ -257,6 +274,7 @@ struct ida { int ida_alloc_range(struct ida *, unsigned int min, unsigned int max, gfp_t); void ida_free(struct ida *, unsigned int id); void ida_destroy(struct ida *ida); +int ida_find_first_range(struct ida *ida, unsigned int min, unsigned int max); /** * ida_alloc() - Allocate an unused ID. @@ -328,4 +346,14 @@ static inline bool ida_is_empty(const struct ida *ida) { return xa_empty(&ida->xa); } + +static inline bool ida_exists(struct ida *ida, unsigned int id) +{ + return ida_find_first_range(ida, id, id) == id; +} + +static inline int ida_find_first(struct ida *ida) +{ + return ida_find_first_range(ida, 0, ~0); +} #endif /* __IDR_H__ */ diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a96db2915aab..996be3c2cff0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -111,6 +111,8 @@ /* bits unique to S1G beacon */ #define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 +#define IEEE80211_S1G_BCN_CSSID 0x200 +#define IEEE80211_S1G_BCN_ANO 0x400 /* see 802.11ah-2016 9.9 NDP CMAC frames */ #define IEEE80211_S1G_1MHZ_NDP_BITS 25 @@ -153,9 +155,6 @@ #define IEEE80211_ANO_NETTYPE_WILD 15 -/* bits unique to S1G beacon */ -#define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 - /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */ #define IEEE80211_CTL_EXT_POLL 0x2000 #define IEEE80211_CTL_EXT_SPR 0x3000 @@ -628,18 +627,42 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc) } /** - * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon + * ieee80211_s1g_has_next_tbtt - check if IEEE80211_S1G_BCN_NEXT_TBTT * @fc: frame control bytes in little-endian byteorder - * Return: whether or not the frame is an S1G short beacon, - * i.e. it is an S1G beacon with 'next TBTT' flag set + * Return: whether or not the frame contains the variable-length + * next TBTT field */ -static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) +static inline bool ieee80211_s1g_has_next_tbtt(__le16 fc) { return ieee80211_is_s1g_beacon(fc) && (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); } /** + * ieee80211_s1g_has_ano - check if IEEE80211_S1G_BCN_ANO + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * ANO field + */ +static inline bool ieee80211_s1g_has_ano(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_ANO)); +} + +/** + * ieee80211_s1g_has_cssid - check if IEEE80211_S1G_BCN_CSSID + * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame contains the variable-length + * compressed SSID field + */ +static inline bool ieee80211_s1g_has_cssid(__le16 fc) +{ + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_CSSID)); +} + +/** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an ATIM frame @@ -1243,18 +1266,42 @@ struct ieee80211_ext { u8 sa[ETH_ALEN]; __le32 timestamp; u8 change_seq; - u8 variable[0]; + u8 variable[]; } __packed s1g_beacon; - struct { - u8 sa[ETH_ALEN]; - __le32 timestamp; - u8 change_seq; - u8 next_tbtt[3]; - u8 variable[0]; - } __packed s1g_short_beacon; } u; } __packed __aligned(2); +/** + * ieee80211_s1g_optional_len - determine length of optional S1G beacon fields + * @fc: frame control bytes in little-endian byteorder + * Return: total length in bytes of the optional fixed-length fields + * + * S1G beacons may contain up to three optional fixed-length fields that + * precede the variable-length elements. Whether these fields are present + * is indicated by flags in the frame control field. + * + * From IEEE 802.11-2024 section 9.3.4.3: + * - Next TBTT field may be 0 or 3 bytes + * - Short SSID field may be 0 or 4 bytes + * - Access Network Options (ANO) field may be 0 or 1 byte + */ +static inline size_t +ieee80211_s1g_optional_len(__le16 fc) +{ + size_t len = 0; + + if (ieee80211_s1g_has_next_tbtt(fc)) + len += 3; + + if (ieee80211_s1g_has_cssid(fc)) + len += 4; + + if (ieee80211_s1g_has_ano(fc)) + len += 1; + + return len; +} + #define IEEE80211_TWT_CONTROL_NDP BIT(0) #define IEEE80211_TWT_CONTROL_RESP_MODE BIT(1) #define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST BIT(3) @@ -1477,7 +1524,7 @@ struct ieee80211_mgmt { u8 action_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed tdls_discover_resp; struct { u8 action_code; @@ -1526,12 +1573,27 @@ struct ieee80211_mgmt { struct { u8 action_code; u8 dialog_token; - u8 status_code; + __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { u8 action_code; } __packed ttlm_tear_down; + struct { + u8 action_code; + u8 dialog_token; + u8 variable[]; + } __packed ml_reconf_req; + struct { + u8 action_code; + u8 dialog_token; + u8 count; + u8 variable[]; + } __packed ml_reconf_resp; + struct { + u8 action_code; + u8 variable[]; + } __packed epcs; } u; } __packed action; DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ @@ -1542,11 +1604,13 @@ struct ieee80211_mgmt { #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 #define BSS_MEMBERSHIP_SELECTOR_VHT_PHY 126 #define BSS_MEMBERSHIP_SELECTOR_GLK 125 -#define BSS_MEMBERSHIP_SELECTOR_EPS 124 +#define BSS_MEMBERSHIP_SELECTOR_EPD 124 #define BSS_MEMBERSHIP_SELECTOR_SAE_H2E 123 #define BSS_MEMBERSHIP_SELECTOR_HE_PHY 122 #define BSS_MEMBERSHIP_SELECTOR_EHT_PHY 121 +#define BSS_MEMBERSHIP_SELECTOR_MIN BSS_MEMBERSHIP_SELECTOR_EHT_PHY + /* mgmt header + 1 byte category code */ #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) @@ -1645,35 +1709,35 @@ struct ieee80211_tdls_data { struct { u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_req; struct { __le16 status_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_resp; struct { __le16 status_code; u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed setup_cfm; struct { __le16 reason_code; - u8 variable[0]; + u8 variable[]; } __packed teardown; struct { u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed discover_req; struct { u8 target_channel; u8 oper_class; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_req; struct { __le16 status_code; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_resp; } u; } __packed; @@ -2308,6 +2372,7 @@ struct ieee80211_eht_cap_elem { #define IEEE80211_EHT_OPER_EHT_DEF_PE_DURATION 0x04 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_LIMIT 0x08 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_EXP_MASK 0x30 +#define IEEE80211_EHT_OPER_MCS15_DISABLE 0x40 /** * struct ieee80211_eht_operation - eht operation element @@ -3096,6 +3161,11 @@ ieee80211_he_spr_size(const u8 *he_spr_ie) #define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454 2 #define IEEE80211_EHT_MAC_CAP1_MAX_AMPDU_LEN_MASK 0x01 +#define IEEE80211_EHT_MAC_CAP1_EHT_TRS 0x02 +#define IEEE80211_EHT_MAC_CAP1_TXOP_RET 0x04 +#define IEEE80211_EHT_MAC_CAP1_TWO_BQRS 0x08 +#define IEEE80211_EHT_MAC_CAP1_EHT_LINK_ADAPT_MASK 0x30 +#define IEEE80211_EHT_MAC_CAP1_UNSOL_EPCS_PRIO_ACCESS 0x40 /* EHT PHY capabilities as defined in P802.11be_D2.0 section 9.4.2.313.3 */ #define IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ 0x02 @@ -3883,6 +3953,16 @@ enum ieee80211_protected_eht_actioncode { WLAN_PROTECTED_EHT_ACTION_TTLM_REQ = 0, WLAN_PROTECTED_EHT_ACTION_TTLM_RES = 1, WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN = 2, + WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_REQ = 3, + WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_RESP = 4, + WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN = 5, + WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF = 6, + WLAN_PROTECTED_EHT_ACTION_LINK_RECOMMEND = 7, + WLAN_PROTECTED_EHT_ACTION_ML_OP_UPDATE_REQ = 8, + WLAN_PROTECTED_EHT_ACTION_ML_OP_UPDATE_RESP = 9, + WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_NOTIF = 10, + WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_REQ = 11, + WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_RESP = 12, }; /* Security key length */ @@ -4055,6 +4135,9 @@ enum ieee80211_tdls_actioncode { /* Defines support for enhanced multi-bssid advertisement*/ #define WLAN_EXT_CAPA11_EMA_SUPPORT BIT(3) +/* Enable Beacon Protection */ +#define WLAN_EXT_CAPA11_BCN_PROTECT BIT(4) + /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 @@ -4806,6 +4889,39 @@ static inline bool ieee80211_is_ftm(struct sk_buff *skb) return false; } +/** + * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon + * @fc: frame control bytes in little-endian byteorder + * @variable: pointer to the beacon frame elements + * @variable_len: length of the frame elements + * Return: whether or not the frame is an S1G short beacon. As per + * IEEE80211-2024 11.1.3.10.1, The S1G beacon compatibility element shall + * always be present as the first element in beacon frames generated at a + * TBTT (Target Beacon Transmission Time), so any frame not containing + * this element must have been generated at a TSBTT (Target Short Beacon + * Transmission Time) that is not a TBTT. Additionally, short beacons are + * prohibited from containing the S1G beacon compatibility element as per + * IEEE80211-2024 9.3.4.3 Table 9-76, so if we have an S1G beacon with + * either no elements or the first element is not the beacon compatibility + * element, we have a short beacon. + */ +static inline bool ieee80211_is_s1g_short_beacon(__le16 fc, const u8 *variable, + size_t variable_len) +{ + if (!ieee80211_is_s1g_beacon(fc)) + return false; + + /* + * If the frame does not contain at least 1 element (this is perfectly + * valid in a short beacon) and is an S1G beacon, we have a short + * beacon. + */ + if (variable_len < 2) + return true; + + return variable[0] != WLAN_EID_S1G_BCN_COMPAT; +} + struct element { u8 id; u8 datalen; @@ -4961,6 +5077,7 @@ struct ieee80211_multi_link_elem { #define IEEE80211_MLC_BASIC_PRES_EML_CAPA 0x0080 #define IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP 0x0100 #define IEEE80211_MLC_BASIC_PRES_MLD_ID 0x0200 +#define IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP 0x0400 #define IEEE80211_MED_SYNC_DELAY_DURATION 0x00ff #define IEEE80211_MED_SYNC_DELAY_SYNC_OFDM_ED_THRESH 0x0f00 @@ -5018,6 +5135,8 @@ struct ieee80211_multi_link_elem { #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_DIFF 3 #define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80 #define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000 +#define IEEE80211_MLD_CAP_OP_LINK_RECONF_SUPPORT 0x2000 +#define IEEE80211_MLD_CAP_OP_ALIGNED_TWT_SUPPORT 0x4000 struct ieee80211_mle_basic_common_info { u8 len; @@ -5033,6 +5152,9 @@ struct ieee80211_mle_preq_common_info { } __packed; #define IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR 0x0010 +#define IEEE80211_MLC_RECONF_PRES_EML_CAPA 0x0020 +#define IEEE80211_MLC_RECONF_PRES_MLD_CAPA_OP 0x0040 +#define IEEE80211_MLC_RECONF_PRES_EXT_MLD_CAPA_OP 0x0080 /* no fixed fields in RECONF */ @@ -5223,6 +5345,47 @@ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) } /** + * ieee80211_mle_get_ext_mld_capa_op - returns the extended MLD capabilities + * and operations. + * @data: pointer to the multi-link element + * Return: the extended MLD capabilities and operations field value from + * the multi-link element, or 0 if not present + * + * The element is assumed to be of the correct type (BASIC) and big enough, + * this must be checked using ieee80211_mle_type_ok(). + */ +static inline u16 ieee80211_mle_get_ext_mld_capa_op(const u8 *data) +{ + const struct ieee80211_multi_link_elem *mle = (const void *)data; + u16 control = le16_to_cpu(mle->control); + const u8 *common = mle->variable; + + /* + * common points now at the beginning of + * ieee80211_mle_basic_common_info + */ + common += sizeof(struct ieee80211_mle_basic_common_info); + + if (!(control & IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP)) + return 0; + + if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) + common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) + common += 2; + if (control & IEEE80211_MLC_BASIC_PRES_MLD_ID) + common += 1; + + return get_unaligned_le16(common); +} + +/** * ieee80211_mle_get_mld_id - returns the MLD ID * @data: pointer to the multi-link element * Return: The MLD ID in the given multi-link element, or 0 if not present @@ -5294,6 +5457,8 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_MLD_ID) common += 1; + if (control & IEEE80211_MLC_BASIC_PRES_EXT_MLD_CAPA_OP) + common += 2; break; case IEEE80211_ML_CONTROL_TYPE_PREQ: common += sizeof(struct ieee80211_mle_preq_common_info); @@ -5304,6 +5469,12 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) case IEEE80211_ML_CONTROL_TYPE_RECONF: if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR) common += ETH_ALEN; + if (control & IEEE80211_MLC_RECONF_PRES_EML_CAPA) + common += 2; + if (control & IEEE80211_MLC_RECONF_PRES_MLD_CAPA_OP) + common += 2; + if (control & IEEE80211_MLC_RECONF_PRES_EXT_MLD_CAPA_OP) + common += 2; break; case IEEE80211_ML_CONTROL_TYPE_TDLS: common += sizeof(struct ieee80211_mle_tdls_common_info); @@ -5453,8 +5624,13 @@ ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(const struct ieee80211_mle_per_sta #define IEEE80211_MLE_STA_RECONF_CONTROL_COMPLETE_PROFILE 0x0010 #define IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT 0x0020 #define IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT 0x0040 -#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_UPDATE_TYPE 0x0780 -#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_PARAMS_PRESENT 0x0800 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE 0x0780 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_AP_REM 0 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_OP_PARAM_UPDATE 1 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_ADD_LINK 2 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_DEL_LINK 3 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_TYPE_NSTR_STATUS 4 +#define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_PARAMS_PRESENT 0x0800 /** * ieee80211_mle_reconf_sta_prof_size_ok - validate reconfiguration multi-link @@ -5487,6 +5663,9 @@ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, fixed + prof->sta_info_len - 1 <= len; } +#define IEEE80211_MLE_STA_EPCS_CONTROL_LINK_ID 0x000f +#define IEEE80211_EPCS_ENA_RESP_BODY_LEN 3 + static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) { const struct ieee80211_ttlm_elem *t2l = (const void *)data; @@ -5520,6 +5699,80 @@ static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) return len >= fixed + elem_len; } +/** + * ieee80211_emlsr_pad_delay_in_us - Fetch the EMLSR Padding delay + * in microseconds + * @eml_cap: EML capabilities field value from common info field of + * the Multi-link element + * Return: the EMLSR Padding delay (in microseconds) encoded in the + * EML Capabilities field + */ + +static inline u32 ieee80211_emlsr_pad_delay_in_us(u16 eml_cap) +{ + /* IEEE Std 802.11be-2024 Table 9-417i—Encoding of the EMLSR + * Padding Delay subfield. + */ + u32 pad_delay = u16_get_bits(eml_cap, + IEEE80211_EML_CAP_EMLSR_PADDING_DELAY); + + if (!pad_delay || + pad_delay > IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US) + return 0; + + return 32 * (1 << (pad_delay - 1)); +} + +/** + * ieee80211_emlsr_trans_delay_in_us - Fetch the EMLSR Transition + * delay in microseconds + * @eml_cap: EML capabilities field value from common info field of + * the Multi-link element + * Return: the EMLSR Transition delay (in microseconds) encoded in the + * EML Capabilities field + */ + +static inline u32 ieee80211_emlsr_trans_delay_in_us(u16 eml_cap) +{ + /* IEEE Std 802.11be-2024 Table 9-417j—Encoding of the EMLSR + * Transition Delay subfield. + */ + u32 trans_delay = + u16_get_bits(eml_cap, + IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY); + + /* invalid values also just use 0 */ + if (!trans_delay || + trans_delay > IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US) + return 0; + + return 16 * (1 << (trans_delay - 1)); +} + +/** + * ieee80211_eml_trans_timeout_in_us - Fetch the EMLSR Transition + * timeout value in microseconds + * @eml_cap: EML capabilities field value from common info field of + * the Multi-link element + * Return: the EMLSR Transition timeout (in microseconds) encoded in + * the EML Capabilities field + */ + +static inline u32 ieee80211_eml_trans_timeout_in_us(u16 eml_cap) +{ + /* IEEE Std 802.11be-2024 Table 9-417m—Encoding of the + * Transition Timeout subfield. + */ + u8 timeout = u16_get_bits(eml_cap, + IEEE80211_EML_CAP_TRANSITION_TIMEOUT); + + /* invalid values also just use 0 */ + if (!timeout || timeout > IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU) + return 0; + + return 128 * (1 << (timeout - 1)); +} + #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 3ff96ae31bf6..c5fe3b2a53e8 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -65,11 +65,9 @@ struct br_ip_list { #define BR_DEFAULT_AGEING_TIME (300 * HZ) struct net_bridge; -void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br, - unsigned int cmd, struct ifreq *ifr, +void brioctl_set(int (*hook)(struct net *net, unsigned int cmd, void __user *uarg)); -int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd, - struct ifreq *ifr, void __user *uarg); +int br_ioctl_call(struct net *net, unsigned int cmd, void __user *uarg); #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) int br_multicast_list_adjacent(struct net_device *dev, diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 8a9792a6427a..61b7335aa037 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -19,6 +19,9 @@ #include <linux/skbuff.h> #include <uapi/linux/if_ether.h> +/* XX:XX:XX:XX:XX:XX */ +#define MAC_ADDR_STR_LEN (3 * ETH_ALEN - 1) + static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb_mac_header(skb); diff --git a/include/linux/if_hsr.h b/include/linux/if_hsr.h index 0404f5bf4f30..d7941fd88032 100644 --- a/include/linux/if_hsr.h +++ b/include/linux/if_hsr.h @@ -13,6 +13,15 @@ enum hsr_version { PRP_V1, }; +enum hsr_port_type { + HSR_PT_NONE = 0, /* Must be 0, used by framereg */ + HSR_PT_SLAVE_A, + HSR_PT_SLAVE_B, + HSR_PT_INTERLINK, + HSR_PT_MASTER, + HSR_PT_PORTS, /* This must be the last item in the enum */ +}; + /* HSR Tag. * As defined in IEC-62439-3:2010, the HSR tag is really { ethertype = 0x88FB, * path, LSDU_size, sequence Nr }. But we let eth_header() create { h_dest, @@ -32,6 +41,8 @@ struct hsr_tag { #if IS_ENABLED(CONFIG_HSR) extern bool is_hsr_master(struct net_device *dev); extern int hsr_get_version(struct net_device *dev, enum hsr_version *ver); +struct net_device *hsr_get_port_ndev(struct net_device *ndev, + enum hsr_port_type pt); #else static inline bool is_hsr_master(struct net_device *dev) { @@ -42,6 +53,12 @@ static inline int hsr_get_version(struct net_device *dev, { return -EINVAL; } + +static inline struct net_device *hsr_get_port_ndev(struct net_device *ndev, + enum hsr_port_type pt) +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_HSR */ #endif /*_LINUX_IF_HSR_H_*/ diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 523025106a64..0f7281e3e448 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -59,8 +59,10 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, extern void macvlan_common_setup(struct net_device *dev); -extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], +struct rtnl_newlink_params; + +extern int macvlan_common_newlink(struct net_device *dev, + struct rtnl_newlink_params *params, struct netlink_ext_ack *extack); extern void macvlan_dellink(struct net_device *dev, struct list_head *head); diff --git a/include/linux/if_team.h b/include/linux/if_team.h index cdc684e04a2f..ce97d891cf72 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -191,8 +191,6 @@ struct team { const struct header_ops *header_ops_cache; - struct mutex lock; /* used for overall locking, e.g. port lists write */ - /* * List of enabled ports and their count */ @@ -223,7 +221,6 @@ struct team { atomic_t count_pending; struct delayed_work dw; } mcast_rejoin; - struct lock_class_key team_lock_key; long mode_priv[TEAM_MODE_PRIV_LONGS]; }; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index d65b5d71b93b..b9f699799cf6 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -79,11 +79,6 @@ static inline struct vlan_ethhdr *skb_vlan_eth_hdr(const struct sk_buff *skb) /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); -static inline bool is_vlan_dev(const struct net_device *dev) -{ - return dev->priv_flags & IFF_802_1Q_VLAN; -} - #define skb_vlan_tag_present(__skb) (!!(__skb)->vlan_all) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) @@ -176,6 +171,7 @@ struct netpoll; * @real_dev_addr: address of underlying netdevice * @dent: proc dir entry * @vlan_pcpu_stats: ptr to percpu rx stats + * @netpoll: netpoll instance "propagated" down to @real_dev */ struct vlan_dev_priv { unsigned int nr_ingress_mappings; @@ -199,6 +195,11 @@ struct vlan_dev_priv { #endif }; +static inline bool is_vlan_dev(const struct net_device *dev) +{ + return dev->priv_flags & IFF_802_1Q_VLAN; +} + static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) { return netdev_priv(dev); @@ -236,6 +237,11 @@ extern void vlan_vids_del_by_dev(struct net_device *dev, extern bool vlan_uses_dev(const struct net_device *dev); #else +static inline bool is_vlan_dev(const struct net_device *dev) +{ + return false; +} + static inline struct net_device * __vlan_find_dev_deep_rcu(struct net_device *real_dev, __be16 vlan_proto, u16 vlan_id) @@ -253,19 +259,19 @@ vlan_for_each(struct net_device *dev, static inline struct net_device *vlan_dev_real_dev(const struct net_device *dev) { - BUG(); + WARN_ON_ONCE(1); return NULL; } static inline u16 vlan_dev_vlan_id(const struct net_device *dev) { - BUG(); + WARN_ON_ONCE(1); return 0; } static inline __be16 vlan_dev_vlan_proto(const struct net_device *dev) { - BUG(); + WARN_ON_ONCE(1); return 0; } @@ -310,7 +316,7 @@ static inline bool vlan_uses_dev(const struct net_device *dev) * eth_type_vlan - check for valid vlan ether type. * @ethertype: ether type to check * - * Returns true if the ether type is a vlan ether type. + * Returns: true if the ether type is a vlan ether type. */ static inline bool eth_type_vlan(__be16 ethertype) { @@ -341,9 +347,9 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features, * @mac_len: MAC header length including outer vlan headers * * Inserts the VLAN tag into @skb as part of the payload at offset mac_len - * Returns error if skb_cow_head fails. - * * Does not change skb->protocol so this function can be used during receive. + * + * Returns: error if skb_cow_head fails. */ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci, @@ -390,9 +396,9 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload - * Returns error if skb_cow_head fails. - * * Does not change skb->protocol so this function can be used during receive. + * + * Returns: error if skb_cow_head fails. */ static inline int __vlan_insert_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) @@ -414,6 +420,8 @@ static inline int __vlan_insert_tag(struct sk_buff *skb, * doesn't have to worry about freeing the original skb. * * Does not change skb->protocol so this function can be used during receive. + * + * Return: modified @skb on success, NULL on error (@skb is freed). */ static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, __be16 vlan_proto, @@ -443,6 +451,8 @@ static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb, * doesn't have to worry about freeing the original skb. * * Does not change skb->protocol so this function can be used during receive. + * + * Return: modified @skb on success, NULL on error (@skb is freed). */ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) @@ -461,6 +471,8 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb, * * Following the skb_unshare() example, in case of error, the calling function * doesn't have to worry about freeing the original skb. + * + * Return: modified @skb on success, NULL on error (@skb is freed). */ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb, __be16 vlan_proto, @@ -533,7 +545,7 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if the skb is not of VLAN type + * Returns: error if the skb is not of VLAN type */ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { @@ -551,7 +563,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if @skb->vlan_tci is not set correctly + * Returns: error if @skb->vlan_tci is not set correctly */ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, u16 *vlan_tci) @@ -570,7 +582,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if the skb is not VLAN tagged + * Returns: error if the skb is not VLAN tagged */ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { @@ -582,13 +594,13 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) } /** - * vlan_get_protocol - get protocol EtherType. + * __vlan_get_protocol_offset() - get protocol EtherType. * @skb: skbuff to query * @type: first vlan protocol * @mac_offset: MAC offset * @depth: buffer to store length of eth and vlan tags in bytes * - * Returns the EtherType of the packet, regardless of whether it is + * Returns: the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ static inline __be16 __vlan_get_protocol_offset(const struct sk_buff *skb, @@ -639,7 +651,7 @@ static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, * vlan_get_protocol - get protocol EtherType. * @skb: skbuff to query * - * Returns the EtherType of the packet, regardless of whether it is + * Returns: the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ static inline __be16 vlan_get_protocol(const struct sk_buff *skb) @@ -720,7 +732,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, * Expects the skb to contain a VLAN tag in the payload, and to have skb->data * pointing at the MAC header. * - * Returns a new pointer to skb->data, or NULL on failure to pull. + * Returns: a new pointer to skb->data, or NULL on failure to pull. */ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) { @@ -737,7 +749,7 @@ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) * skb_vlan_tagged - check if skb is vlan tagged. * @skb: skbuff to query * - * Returns true if the skb is tagged, regardless of whether it is hardware + * Returns: true if the skb is tagged, regardless of whether it is hardware * accelerated or not. */ static inline bool skb_vlan_tagged(const struct sk_buff *skb) @@ -753,7 +765,7 @@ static inline bool skb_vlan_tagged(const struct sk_buff *skb) * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers. * @skb: skbuff to query * - * Returns true if the skb is tagged with multiple vlan headers, regardless + * Returns: true if the skb is tagged with multiple vlan headers, regardless * of whether it is hardware accelerated or not. */ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) @@ -784,7 +796,7 @@ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) * @skb: skbuff to query * @features: features to be checked * - * Returns features without unsafe ones if the skb has multiple tags. + * Returns: features without unsafe ones if the skb has multiple tags. */ static inline netdev_features_t vlan_features_check(struct sk_buff *skb, netdev_features_t features) @@ -808,9 +820,11 @@ static inline netdev_features_t vlan_features_check(struct sk_buff *skb, * @h1: Pointer to vlan header * @h2: Pointer to vlan header * - * Compare two vlan headers, returns 0 if equal. + * Compare two vlan headers. * * Please note that alignment of h1 & h2 are only guaranteed to be 16 bits. + * + * Return: 0 if equal, arbitrary non-zero value if not equal. */ static inline unsigned long compare_vlan_header(const struct vlan_hdr *h1, const struct vlan_hdr *h2) diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 5171231f70a8..073b30a9b850 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -87,6 +87,8 @@ struct ip_mc_list { char loaded; unsigned char gsquery; /* check source marks? */ unsigned char crcount; + unsigned long mca_cstamp; + unsigned long mca_tstamp; struct rcu_head rcu; }; diff --git a/include/linux/iio/adc-helpers.h b/include/linux/iio/adc-helpers.h new file mode 100644 index 000000000000..56b092a2a4c4 --- /dev/null +++ b/include/linux/iio/adc-helpers.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * The industrial I/O ADC firmware property parsing helpers + * + * Copyright (c) 2025 Matti Vaittinen <mazziesaccount@gmail.com> + */ + +#ifndef _INDUSTRIAL_IO_ADC_HELPERS_H_ +#define _INDUSTRIAL_IO_ADC_HELPERS_H_ + +#include <linux/property.h> + +struct device; +struct iio_chan_spec; + +static inline int iio_adc_device_num_channels(struct device *dev) +{ + return device_get_named_child_node_count(dev, "channel"); +} + +int devm_iio_adc_device_alloc_chaninfo_se(struct device *dev, + const struct iio_chan_spec *template, + int max_chan_id, + struct iio_chan_spec **cs); + +#endif /* _INDUSTRIAL_IO_ADC_HELPERS_H_ */ diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index f8c1d2505940..f242b285081b 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -29,6 +29,7 @@ struct ad_sd_calib_data { struct ad_sigma_delta; struct device; +struct gpio_desc; struct iio_dev; /** @@ -45,6 +46,7 @@ struct iio_dev; * modify or drop the sample data, it, may be NULL. * @has_registers: true if the device has writable and readable registers, false * if there is just one read-only sample data shift register. + * @has_named_irqs: Set to true if there is more than one IRQ line. * @addr_shift: Shift of the register address in the communications register. * @read_mask: Mask for the communications register having the read bit set. * @status_ch_mask: Mask for the channel number stored in status register. @@ -52,7 +54,7 @@ struct iio_dev; * be used. * @irq_flags: flags for the interrupt used by the triggered buffer * @num_slots: Number of sequencer slots - * @irq_line: IRQ for reading conversions. If 0, spi->irq will be used + * @num_resetclks: Number of SPI clk cycles with MOSI=1 to reset the chip. */ struct ad_sigma_delta_info { int (*set_channel)(struct ad_sigma_delta *, unsigned int channel); @@ -62,13 +64,14 @@ struct ad_sigma_delta_info { int (*disable_one)(struct ad_sigma_delta *, unsigned int chan); int (*postprocess_sample)(struct ad_sigma_delta *, unsigned int raw_sample); bool has_registers; + bool has_named_irqs; unsigned int addr_shift; unsigned int read_mask; unsigned int status_ch_mask; unsigned int data_reg; unsigned long irq_flags; unsigned int num_slots; - int irq_line; + unsigned int num_resetclks; }; /** @@ -85,6 +88,7 @@ struct ad_sigma_delta { /* private: */ struct completion completion; + spinlock_t irq_lock; /* protects .irq_dis and irq en/disable state */ bool irq_dis; bool bus_locked; @@ -96,7 +100,8 @@ struct ad_sigma_delta { unsigned int active_slots; unsigned int current_slot; unsigned int num_slots; - int irq_line; + struct gpio_desc *rdy_gpiod; + int irq_line; bool status_appended; /* map slots to channels in order to know what to expect from devices */ unsigned int *slots; @@ -178,8 +183,7 @@ int ad_sd_write_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, int ad_sd_read_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, unsigned int size, unsigned int *val); -int ad_sd_reset(struct ad_sigma_delta *sigma_delta, - unsigned int reset_length); +int ad_sd_reset(struct ad_sigma_delta *sigma_delta); int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *val); diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h index 10be00f3b120..e59d909cb659 100644 --- a/include/linux/iio/backend.h +++ b/include/linux/iio/backend.h @@ -70,6 +70,12 @@ enum iio_backend_sample_trigger { IIO_BACKEND_SAMPLE_TRIGGER_MAX }; +enum iio_backend_interface_type { + IIO_BACKEND_INTERFACE_SERIAL_LVDS, + IIO_BACKEND_INTERFACE_SERIAL_CMOS, + IIO_BACKEND_INTERFACE_MAX +}; + /** * struct iio_backend_ops - operations structure for an iio_backend * @enable: Enable backend. @@ -78,6 +84,7 @@ enum iio_backend_sample_trigger { * @chan_disable: Disable one channel. * @data_format_set: Configure the data format for a specific channel. * @data_source_set: Configure the data source for a specific channel. + * @data_source_get: Data source getter for a specific channel. * @set_sample_rate: Configure the sampling rate for a specific channel. * @test_pattern_set: Configure a test pattern. * @chan_status: Get the channel status. @@ -88,6 +95,9 @@ enum iio_backend_sample_trigger { * @extend_chan_spec: Extend an IIO channel. * @ext_info_set: Extended info setter. * @ext_info_get: Extended info getter. + * @interface_type_get: Interface type. + * @data_size_set: Data size. + * @oversampling_ratio_set: Set Oversampling ratio. * @read_raw: Read a channel attribute from a backend device * @debugfs_print_chan_status: Print channel status into a buffer. * @debugfs_reg_access: Read or write register value of backend. @@ -106,6 +116,8 @@ struct iio_backend_ops { const struct iio_backend_data_fmt *data); int (*data_source_set)(struct iio_backend *back, unsigned int chan, enum iio_backend_data_source data); + int (*data_source_get)(struct iio_backend *back, unsigned int chan, + enum iio_backend_data_source *data); int (*set_sample_rate)(struct iio_backend *back, unsigned int chan, u64 sample_rate_hz); int (*test_pattern_set)(struct iio_backend *back, @@ -128,6 +140,11 @@ struct iio_backend_ops { const char *buf, size_t len); int (*ext_info_get)(struct iio_backend *back, uintptr_t private, const struct iio_chan_spec *chan, char *buf); + int (*interface_type_get)(struct iio_backend *back, + enum iio_backend_interface_type *type); + int (*data_size_set)(struct iio_backend *back, unsigned int size); + int (*oversampling_ratio_set)(struct iio_backend *back, + unsigned int ratio); int (*read_raw)(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); @@ -162,6 +179,8 @@ int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan, const struct iio_backend_data_fmt *data); int iio_backend_data_source_set(struct iio_backend *back, unsigned int chan, enum iio_backend_data_source data); +int iio_backend_data_source_get(struct iio_backend *back, unsigned int chan, + enum iio_backend_data_source *data); int iio_backend_set_sampling_freq(struct iio_backend *back, unsigned int chan, u64 sample_rate_hz); int iio_backend_test_pattern_set(struct iio_backend *back, @@ -186,6 +205,11 @@ ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private, const char *buf, size_t len); ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private, const struct iio_chan_spec *chan, char *buf); +int iio_backend_interface_type_get(struct iio_backend *back, + enum iio_backend_interface_type *type); +int iio_backend_data_size_set(struct iio_backend *back, unsigned int size); +int iio_backend_oversampling_ratio_set(struct iio_backend *back, + unsigned int ratio); int iio_backend_read_raw(struct iio_backend *back, struct iio_chan_spec const *chan, int *val, int *val2, long mask); diff --git a/include/linux/iio/buffer-dmaengine.h b/include/linux/iio/buffer-dmaengine.h index 81d9a19aeb91..37f27545f69f 100644 --- a/include/linux/iio/buffer-dmaengine.h +++ b/include/linux/iio/buffer-dmaengine.h @@ -11,8 +11,9 @@ struct iio_dev; struct device; +struct dma_chan; -void iio_dmaengine_buffer_free(struct iio_buffer *buffer); +void iio_dmaengine_buffer_teardown(struct iio_buffer *buffer); struct iio_buffer *iio_dmaengine_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, const char *channel, @@ -26,6 +27,10 @@ int devm_iio_dmaengine_buffer_setup_ext(struct device *dev, struct iio_dev *indio_dev, const char *channel, enum iio_buffer_direction dir); +int devm_iio_dmaengine_buffer_setup_with_handle(struct device *dev, + struct iio_dev *indio_dev, + struct dma_chan *chan, + enum iio_buffer_direction dir); #define devm_iio_dmaengine_buffer_setup(dev, indio_dev, channel) \ devm_iio_dmaengine_buffer_setup_ext(dev, indio_dev, channel, \ diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h index 418b1307d3f2..5c84ec4a9810 100644 --- a/include/linux/iio/buffer.h +++ b/include/linux/iio/buffer.h @@ -37,7 +37,7 @@ int iio_pop_from_buffer(struct iio_buffer *buffer, void *data); static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev, void *data, int64_t timestamp) { - if (indio_dev->scan_timestamp) { + if (ACCESS_PRIVATE(indio_dev, scan_timestamp)) { size_t ts_offset = indio_dev->scan_bytes / sizeof(int64_t) - 1; ((int64_t *)data)[ts_offset] = timestamp; } @@ -45,6 +45,18 @@ static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev, return iio_push_to_buffers(indio_dev, data); } +static inline int iio_push_to_buffers_with_ts(struct iio_dev *indio_dev, + void *data, size_t data_total_len, + s64 timestamp) +{ + if (unlikely(data_total_len < indio_dev->scan_bytes)) { + dev_err(&indio_dev->dev, "Undersized storage pushed to buffer\n"); + return -ENOSPC; + } + + return iio_push_to_buffers_with_timestamp(indio_dev, data, timestamp); +} + int iio_push_to_buffers_with_ts_unaligned(struct iio_dev *indio_dev, const void *data, size_t data_sz, int64_t timestamp); diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 333d1d8ccb37..6a4479616479 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -418,7 +418,7 @@ unsigned int iio_get_channel_ext_info_count(struct iio_channel *chan); * @chan: The channel being queried. * @attr: The ext_info attribute to read. * @buf: Where to store the attribute value. Assumed to hold - * at least PAGE_SIZE bytes. + * at least PAGE_SIZE bytes and to be aligned at PAGE_SIZE. * * Returns the number of bytes written to buf (perhaps w/o zero termination; * it need not even be a string), or an error code. @@ -445,7 +445,7 @@ ssize_t iio_write_channel_ext_info(struct iio_channel *chan, const char *attr, * iio_read_channel_label() - read label for a given channel * @chan: The channel being queried. * @buf: Where to store the attribute value. Assumed to hold - * at least PAGE_SIZE bytes. + * at least PAGE_SIZE bytes and to be aligned at PAGE_SIZE. * * Returns the number of bytes written to buf, or an error code. */ diff --git a/include/linux/iio/iio-gts-helper.h b/include/linux/iio/iio-gts-helper.h index 9cb6c80dea71..66f830ab9b49 100644 --- a/include/linux/iio/iio-gts-helper.h +++ b/include/linux/iio/iio-gts-helper.h @@ -188,6 +188,9 @@ int iio_gts_total_gain_to_scale(struct iio_gts *gts, int total_gain, int iio_gts_find_gain_sel_for_scale_using_time(struct iio_gts *gts, int time_sel, int scale_int, int scale_nano, int *gain_sel); +int iio_gts_find_gain_time_sel_for_scale(struct iio_gts *gts, int scale_int, + int scale_nano, int *gain_sel, + int *time_sel); int iio_gts_get_scale(struct iio_gts *gts, int gain, int time, int *scale_int, int *scale_nano); int iio_gts_find_new_gain_sel_by_old_gain_time(struct iio_gts *gts, @@ -196,11 +199,15 @@ int iio_gts_find_new_gain_sel_by_old_gain_time(struct iio_gts *gts, int iio_gts_find_new_gain_by_old_gain_time(struct iio_gts *gts, int old_gain, int old_time, int new_time, int *new_gain); +int iio_gts_find_new_gain_by_gain_time_min(struct iio_gts *gts, int old_gain, + int old_time, int new_time, + int *new_gain, bool *in_range); int iio_gts_avail_times(struct iio_gts *gts, const int **vals, int *type, int *length); int iio_gts_all_avail_scales(struct iio_gts *gts, const int **vals, int *type, int *length); int iio_gts_avail_scales_for_time(struct iio_gts *gts, int time, const int **vals, int *type, int *length); +int iio_gts_get_total_gain(struct iio_gts *gts, int gain, int time); #endif diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index a89e7e43e441..4247497f3f8b 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -28,7 +28,7 @@ * @groupcounter: index of next attribute group * @legacy_scan_el_group: attribute group for legacy scan elements attribute group * @legacy_buffer_group: attribute group for legacy buffer attributes group - * @bounce_buffer: for devices that call iio_push_to_buffers_with_timestamp_unaligned() + * @bounce_buffer: for devices that call iio_push_to_buffers_with_ts_unaligned() * @bounce_buffer_size: size of currently allocate bounce buffer * @scan_index_timestamp: cache of the index to the timestamp * @clock_id: timestamping clock posix identifier diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index ae65890d4567..d11668f14a3e 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -7,9 +7,11 @@ #ifndef _INDUSTRIAL_IO_H_ #define _INDUSTRIAL_IO_H_ +#include <linux/align.h> #include <linux/device.h> #include <linux/cdev.h> -#include <linux/cleanup.h> +#include <linux/compiler_types.h> +#include <linux/minmax.h> #include <linux/slab.h> #include <linux/iio/types.h> /* IIO TODO LIST */ @@ -611,7 +613,7 @@ struct iio_dev { const unsigned long *available_scan_masks; unsigned int __private masklength; const unsigned long *active_scan_mask; - bool scan_timestamp; + bool __private scan_timestamp; struct iio_trigger *trig; struct iio_poll_func *pollfunc; struct iio_poll_func *pollfunc_event; @@ -659,34 +661,31 @@ void iio_device_unregister(struct iio_dev *indio_dev); int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev, struct module *this_mod); int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); -int iio_device_claim_direct_mode(struct iio_dev *indio_dev); -void iio_device_release_direct_mode(struct iio_dev *indio_dev); +bool __iio_device_claim_direct(struct iio_dev *indio_dev); +void __iio_device_release_direct(struct iio_dev *indio_dev); /* - * This autocleanup logic is normally used via - * iio_device_claim_direct_scoped(). + * Helper functions that allow claim and release of direct mode + * in a fashion that doesn't generate many false positives from sparse. + * Note this must remain static inline in the header so that sparse + * can see the __acquire() marking. Revisit when sparse supports + * __cond_acquires() */ -DEFINE_GUARD(iio_claim_direct, struct iio_dev *, iio_device_claim_direct_mode(_T), - iio_device_release_direct_mode(_T)) +static inline bool iio_device_claim_direct(struct iio_dev *indio_dev) +{ + if (!__iio_device_claim_direct(indio_dev)) + return false; -DEFINE_GUARD_COND(iio_claim_direct, _try, ({ - struct iio_dev *dev; - int d = iio_device_claim_direct_mode(_T); + __acquire(iio_dev); - if (d < 0) - dev = NULL; - else - dev = _T; - dev; - })) + return true; +} -/** - * iio_device_claim_direct_scoped() - Scoped call to iio_device_claim_direct. - * @fail: What to do on failure to claim device. - * @iio_dev: Pointer to the IIO devices structure - */ -#define iio_device_claim_direct_scoped(fail, iio_dev) \ - scoped_cond_guard(iio_claim_direct_try, fail, iio_dev) +static inline void iio_device_release_direct(struct iio_dev *indio_dev) +{ + __iio_device_release_direct(indio_dev); + __release(indio_dev); +} int iio_device_claim_buffer_mode(struct iio_dev *indio_dev); void iio_device_release_buffer_mode(struct iio_dev *indio_dev); @@ -778,8 +777,45 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) * to in turn include IIO_DMA_MINALIGN'd elements such as buffers which * must not share cachelines with the rest of the structure, thus making * them safe for use with non-coherent DMA. + * + * A number of drivers also use this on buffers that include a 64-bit timestamp + * that is used with iio_push_to_buffer_with_ts(). Therefore, in the case where + * DMA alignment is not sufficient for proper timestamp alignment, we align to + * 8 bytes instead. */ -#define IIO_DMA_MINALIGN ARCH_DMA_MINALIGN +#define IIO_DMA_MINALIGN MAX(ARCH_DMA_MINALIGN, sizeof(s64)) + +#define __IIO_DECLARE_BUFFER_WITH_TS(type, name, count) \ + type name[ALIGN((count), sizeof(s64) / sizeof(type)) + sizeof(s64) / sizeof(type)] + +/** + * IIO_DECLARE_BUFFER_WITH_TS() - Declare a buffer with timestamp + * @type: element type of the buffer + * @name: identifier name of the buffer + * @count: number of elements in the buffer + * + * Declares a buffer that is safe to use with iio_push_to_buffer_with_ts(). In + * addition to allocating enough space for @count elements of @type, it also + * allocates space for a s64 timestamp at the end of the buffer and ensures + * proper alignment of the timestamp. + */ +#define IIO_DECLARE_BUFFER_WITH_TS(type, name, count) \ + __IIO_DECLARE_BUFFER_WITH_TS(type, name, count) __aligned(sizeof(s64)) + +/** + * IIO_DECLARE_DMA_BUFFER_WITH_TS() - Declare a DMA-aligned buffer with timestamp + * @type: element type of the buffer + * @name: identifier name of the buffer + * @count: number of elements in the buffer + * + * Same as IIO_DECLARE_BUFFER_WITH_TS(), but is uses __aligned(IIO_DMA_MINALIGN) + * to ensure that the buffer doesn't share cachelines with anything that comes + * before it in a struct. This should not be used for stack-allocated buffers + * as stack memory cannot generally be used for DMA. + */ +#define IIO_DECLARE_DMA_BUFFER_WITH_TS(type, name, count) \ + __IIO_DECLARE_BUFFER_WITH_TS(type, name, count) __aligned(IIO_DMA_MINALIGN) + struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv); /* The information at the returned address is guaranteed to be cacheline aligned */ diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index e6a75356567a..aa160511e265 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -44,6 +44,8 @@ struct adis_timeout { * @glob_cmd_reg: Register address of the GLOB_CMD register * @msc_ctrl_reg: Register address of the MSC_CTRL register * @diag_stat_reg: Register address of the DIAG_STAT register + * @diag_stat_size: Length (in bytes) of the DIAG_STAT register. If 0 the + * default length is 2 bytes long. * @prod_id_reg: Register address of the PROD_ID register * @prod_id: Product ID code that should be expected when reading @prod_id_reg * @self_test_mask: Bitmask of supported self-test operations @@ -70,6 +72,7 @@ struct adis_data { unsigned int glob_cmd_reg; unsigned int msc_ctrl_reg; unsigned int diag_stat_reg; + unsigned int diag_stat_size; unsigned int prod_id_reg; unsigned int prod_id; @@ -95,13 +98,28 @@ struct adis_data { }; /** + * struct adis_ops: Custom ops for adis devices. + * @write: Custom spi write implementation. + * @read: Custom spi read implementation. + * @reset: Custom sw reset implementation. The custom implementation does not + * need to sleep after the reset. It's done by the library already. + */ +struct adis_ops { + int (*write)(struct adis *adis, unsigned int reg, unsigned int value, + unsigned int size); + int (*read)(struct adis *adis, unsigned int reg, unsigned int *value, + unsigned int size); + int (*reset)(struct adis *adis); +}; + +/** * struct adis - ADIS device instance data * @spi: Reference to SPI device which owns this ADIS IIO device * @trig: IIO trigger object data * @data: ADIS chip variant specific data - * @burst: ADIS burst transfer information * @burst_extra_len: Burst extra length. Should only be used by devices that can * dynamically change their burst mode length. + * @ops: ops struct for custom read and write functions * @state_lock: Lock used by the device to protect state * @msg: SPI message object * @xfer: SPI transfer objects to be used for a @msg @@ -117,6 +135,7 @@ struct adis { const struct adis_data *data; unsigned int burst_extra_len; + const struct adis_ops *ops; /** * The state_lock is meant to be used during operations that require * a sequence of SPI R/W in order to protect the SPI transfer @@ -169,7 +188,7 @@ int __adis_read_reg(struct adis *adis, unsigned int reg, static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg, u8 val) { - return __adis_write_reg(adis, reg, val, 1); + return adis->ops->write(adis, reg, val, 1); } /** @@ -181,7 +200,7 @@ static inline int __adis_write_reg_8(struct adis *adis, unsigned int reg, static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg, u16 val) { - return __adis_write_reg(adis, reg, val, 2); + return adis->ops->write(adis, reg, val, 2); } /** @@ -193,7 +212,7 @@ static inline int __adis_write_reg_16(struct adis *adis, unsigned int reg, static inline int __adis_write_reg_32(struct adis *adis, unsigned int reg, u32 val) { - return __adis_write_reg(adis, reg, val, 4); + return adis->ops->write(adis, reg, val, 4); } /** @@ -208,7 +227,7 @@ static inline int __adis_read_reg_16(struct adis *adis, unsigned int reg, unsigned int tmp; int ret; - ret = __adis_read_reg(adis, reg, &tmp, 2); + ret = adis->ops->read(adis, reg, &tmp, 2); if (ret == 0) *val = tmp; @@ -227,7 +246,7 @@ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg, unsigned int tmp; int ret; - ret = __adis_read_reg(adis, reg, &tmp, 4); + ret = adis->ops->read(adis, reg, &tmp, 4); if (ret == 0) *val = tmp; @@ -245,7 +264,7 @@ static inline int adis_write_reg(struct adis *adis, unsigned int reg, unsigned int val, unsigned int size) { guard(mutex)(&adis->state_lock); - return __adis_write_reg(adis, reg, val, size); + return adis->ops->write(adis, reg, val, size); } /** @@ -259,7 +278,7 @@ static int adis_read_reg(struct adis *adis, unsigned int reg, unsigned int *val, unsigned int size) { guard(mutex)(&adis->state_lock); - return __adis_read_reg(adis, reg, val, size); + return adis->ops->read(adis, reg, val, size); } /** diff --git a/include/linux/iio/timer/stm32-lptim-trigger.h b/include/linux/iio/timer/stm32-lptim-trigger.h index a34dcf6a6001..ce3cf0addb2e 100644 --- a/include/linux/iio/timer/stm32-lptim-trigger.h +++ b/include/linux/iio/timer/stm32-lptim-trigger.h @@ -14,6 +14,15 @@ #define LPTIM1_OUT "lptim1_out" #define LPTIM2_OUT "lptim2_out" #define LPTIM3_OUT "lptim3_out" +#define LPTIM4_OUT "lptim4_out" +#define LPTIM5_OUT "lptim5_out" + +#define LPTIM1_CH1 "lptim1_ch1" +#define LPTIM1_CH2 "lptim1_ch2" +#define LPTIM2_CH1 "lptim2_ch1" +#define LPTIM2_CH2 "lptim2_ch2" +#define LPTIM3_CH1 "lptim3_ch1" +#define LPTIM4_CH1 "lptim4_ch1" #if IS_REACHABLE(CONFIG_IIO_STM32_LPTIMER_TRIGGER) bool is_stm32_lptim_trigger(struct iio_trigger *trig); diff --git a/include/linux/iio/timer/stm32-timer-trigger.h b/include/linux/iio/timer/stm32-timer-trigger.h index 37572e4dc73a..1ee237b56183 100644 --- a/include/linux/iio/timer/stm32-timer-trigger.h +++ b/include/linux/iio/timer/stm32-timer-trigger.h @@ -72,6 +72,12 @@ #define TIM17_OC1 "tim17_oc1" +#define TIM20_OC1 "tim20_oc1" +#define TIM20_OC2 "tim20_oc2" +#define TIM20_OC3 "tim20_oc3" +#define TIM20_TRGO "tim20_trgo" +#define TIM20_TRGO2 "tim20_trgo2" + #if IS_REACHABLE(CONFIG_IIO_STM32_TIMER_TRIGGER) bool is_stm32_timer_trigger(struct iio_trigger *trig); #else diff --git a/include/linux/ima.h b/include/linux/ima.h index 0bae61a15b60..8e29cb4e6a01 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -32,6 +32,9 @@ static inline void ima_appraise_parse_cmdline(void) {} #ifdef CONFIG_IMA_KEXEC extern void ima_add_kexec_buffer(struct kimage *image); +extern void ima_kexec_post_load(struct kimage *image); +#else +static inline void ima_kexec_post_load(struct kimage *image) {} #endif #else diff --git a/include/linux/inet.h b/include/linux/inet.h index bd8276e96e60..9158772f3559 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -55,6 +55,6 @@ extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char extern int inet_pton_with_scope(struct net *net, unsigned short af, const char *src, const char *port, struct sockaddr_storage *addr); -extern bool inet_addr_is_any(struct sockaddr *addr); +bool inet_addr_is_any(struct sockaddr_storage *addr); #endif /* _LINUX_INET_H */ diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h index bc7babe91b2e..bf675a8aef8a 100644 --- a/include/linux/instrumentation.h +++ b/include/linux/instrumentation.h @@ -4,14 +4,14 @@ #ifdef CONFIG_NOINSTR_VALIDATION +#include <linux/objtool.h> #include <linux/stringify.h> /* Begin/end of an instrumentation safe region */ #define __instrumentation_begin(c) ({ \ asm volatile(__stringify(c) ": nop\n\t" \ - ".pushsection .discard.instr_begin\n\t" \ - ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ + ANNOTATE_INSTR_BEGIN(__ASM_BREF(c)) \ + : : "i" (c)); \ }) #define instrumentation_begin() __instrumentation_begin(__COUNTER__) @@ -48,9 +48,8 @@ */ #define __instrumentation_end(c) ({ \ asm volatile(__stringify(c) ": nop\n\t" \ - ".pushsection .discard.instr_end\n\t" \ - ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ + ANNOTATE_INSTR_END(__ASM_BREF(c)) \ + : : "i" (c)); \ }) #define instrumentation_end() __instrumentation_end(__COUNTER__) #else /* !CONFIG_NOINSTR_VALIDATION */ diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index 771622650247..dfbf7d9d7bb5 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -100,7 +100,6 @@ void ishtp_cl_destroy_connection(struct ishtp_cl *cl, bool reset); int ishtp_cl_send(struct ishtp_cl *cl, uint8_t *buf, size_t length); int ishtp_cl_flush_queues(struct ishtp_cl *cl); int ishtp_cl_io_rb_recycle(struct ishtp_cl_rb *rb); -bool ishtp_cl_tx_empty(struct ishtp_cl *cl); struct ishtp_cl_rb *ishtp_cl_rx_get_rb(struct ishtp_cl *cl); void *ishtp_get_client_data(struct ishtp_cl *cl); void ishtp_set_client_data(struct ishtp_cl *cl, void *data); diff --git a/include/linux/intel_vsec.h b/include/linux/intel_vsec.h index b94beab64610..bc95821f1bfb 100644 --- a/include/linux/intel_vsec.h +++ b/include/linux/intel_vsec.h @@ -139,12 +139,13 @@ static inline struct intel_vsec_device *auxdev_to_ivdev(struct auxiliary_device } #if IS_ENABLED(CONFIG_INTEL_VSEC) -void intel_vsec_register(struct pci_dev *pdev, +int intel_vsec_register(struct pci_dev *pdev, struct intel_vsec_platform_info *info); #else -static inline void intel_vsec_register(struct pci_dev *pdev, +static inline int intel_vsec_register(struct pci_dev *pdev, struct intel_vsec_platform_info *info) { + return -ENODEV; } #endif #endif diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index f5aef8784692..8a2f652a05ff 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -116,8 +116,11 @@ struct icc_node { int icc_std_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, u32 peak_bw, u32 *agg_avg, u32 *agg_peak); +struct icc_node *icc_node_create_dyn(void); struct icc_node *icc_node_create(int id); void icc_node_destroy(int id); +int icc_node_set_name(struct icc_node *node, const struct icc_provider *provider, const char *name); +int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node); int icc_link_create(struct icc_node *node, const int dst_id); void icc_node_add(struct icc_node *node, struct icc_provider *provider); void icc_node_del(struct icc_node *node); @@ -136,6 +139,11 @@ static inline int icc_std_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, return -ENOTSUPP; } +static inline struct icc_node *icc_node_create_dyn(void) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct icc_node *icc_node_create(int id) { return ERR_PTR(-ENOTSUPP); @@ -145,6 +153,17 @@ static inline void icc_node_destroy(int id) { } +static inline int icc_node_set_name(struct icc_node *node, const struct icc_provider *provider, + const char *name) +{ + return -EOPNOTSUPP; +} + +static inline int icc_link_nodes(struct icc_node *src_node, struct icc_node **dst_node) +{ + return -EOPNOTSUPP; +} + static inline int icc_link_create(struct icc_node *node, const int dst_id) { return -ENOTSUPP; diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h index 97ac253df62c..e4b8808823ad 100644 --- a/include/linux/interconnect.h +++ b/include/linux/interconnect.h @@ -20,6 +20,9 @@ #define Mbps_to_icc(x) ((x) * 1000 / 8) #define Gbps_to_icc(x) ((x) * 1000 * 1000 / 8) +/* macro to indicate dynamic id allocation */ +#define ICC_ALLOC_DYN_ID -1 + struct icc_path; struct device; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 8cd9327e4e78..51b6484c0493 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -140,7 +140,7 @@ extern irqreturn_t no_action(int cpl, void *dev_id); /* * If a (PCI) device interrupt is not connected we set dev->irq to * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we - * can distingiush that case from other error returns. + * can distinguish that case from other error returns. * * 0x80000000 is guaranteed to be outside the available range of interrupts * and easy to distinguish from other possible incorrect values. @@ -448,7 +448,7 @@ irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, static inline void disable_irq_nosync_lockdep(unsigned int irq) { disable_irq_nosync(irq); -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT) local_irq_disable(); #endif } @@ -456,22 +456,14 @@ static inline void disable_irq_nosync_lockdep(unsigned int irq) static inline void disable_irq_nosync_lockdep_irqsave(unsigned int irq, unsigned long *flags) { disable_irq_nosync(irq); -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT) local_irq_save(*flags); #endif } -static inline void disable_irq_lockdep(unsigned int irq) -{ - disable_irq(irq); -#ifdef CONFIG_LOCKDEP - local_irq_disable(); -#endif -} - static inline void enable_irq_lockdep(unsigned int irq) { -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT) local_irq_enable(); #endif enable_irq(irq); @@ -479,7 +471,7 @@ static inline void enable_irq_lockdep(unsigned int irq) static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long *flags) { -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PREEMPT_RT) local_irq_restore(*flags); #endif enable_irq(irq); diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h index aaa8a0767aa3..1b400f26f63d 100644 --- a/include/linux/interval_tree_generic.h +++ b/include/linux/interval_tree_generic.h @@ -104,12 +104,8 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ if (ITSTART(node) <= last) { /* Cond1 */ \ if (start <= ITLAST(node)) /* Cond2 */ \ return node; /* node is leftmost match */ \ - if (node->ITRB.rb_right) { \ - node = rb_entry(node->ITRB.rb_right, \ - ITSTRUCT, ITRB); \ - if (start <= node->ITSUBTREE) \ - continue; \ - } \ + node = rb_entry(node->ITRB.rb_right, ITSTRUCT, ITRB); \ + continue; \ } \ return NULL; /* No match */ \ } \ diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h index f32522bb3aa5..d3eade7cf663 100644 --- a/include/linux/io-64-nonatomic-hi-lo.h +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -101,22 +101,38 @@ static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr) #ifndef ioread64 #define ioread64_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define ioread64 __ioread64_hi_lo +#else #define ioread64 ioread64_hi_lo #endif +#endif #ifndef iowrite64 #define iowrite64_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define iowrite64 __iowrite64_hi_lo +#else #define iowrite64 iowrite64_hi_lo #endif +#endif #ifndef ioread64be #define ioread64be_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define ioread64be __ioread64be_hi_lo +#else #define ioread64be ioread64be_hi_lo #endif +#endif #ifndef iowrite64be #define iowrite64be_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define iowrite64be __iowrite64be_hi_lo +#else #define iowrite64be iowrite64be_hi_lo #endif +#endif #endif /* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */ diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h index 448a21435dba..94e676ec3d3f 100644 --- a/include/linux/io-64-nonatomic-lo-hi.h +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -101,22 +101,38 @@ static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr) #ifndef ioread64 #define ioread64_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define ioread64 __ioread64_lo_hi +#else #define ioread64 ioread64_lo_hi #endif +#endif #ifndef iowrite64 #define iowrite64_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define iowrite64 __iowrite64_lo_hi +#else #define iowrite64 iowrite64_lo_hi #endif +#endif #ifndef ioread64be #define ioread64be_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define ioread64be __ioread64be_lo_hi +#else #define ioread64be ioread64be_lo_hi #endif +#endif #ifndef iowrite64be #define iowrite64be_is_nonatomic +#if defined(CONFIG_GENERIC_IOMAP) && defined(CONFIG_64BIT) +#define iowrite64be __iowrite64be_lo_hi +#else #define iowrite64be iowrite64be_lo_hi #endif +#endif #endif /* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */ diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ce86b09ae80f..138fbd89b1e6 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -88,6 +88,13 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. * IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits + * + * IO_PGTABLE_QUIRK_NO_WARN: Do not WARN_ON() on conflicting + * mappings, but silently return -EEXISTS. Normally an attempt + * to map over an existing mapping would indicate some sort of + * kernel bug, which would justify the WARN_ON(). But for GPU + * drivers, this could be under control of userspace. Which + * deserves an error return, but not to spam dmesg. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -97,6 +104,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) #define IO_PGTABLE_QUIRK_ARM_S2FWB BIT(8) + #define IO_PGTABLE_QUIRK_NO_WARN BIT(9) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; @@ -181,11 +189,21 @@ struct io_pgtable_cfg { }; /** + * struct arm_lpae_io_pgtable_walk_data - information from a pgtable walk + * + * @ptes: The recorded PTE values from the walk + */ +struct arm_lpae_io_pgtable_walk_data { + u64 ptes[4]; +}; + +/** * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. * * @map_pages: Map a physically contiguous range of pages of the same size. * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. * @iova_to_phys: Translate iova to physical address. + * @pgtable_walk: (optional) Perform a page table walk for a given iova. * * These functions map directly onto the iommu_ops member functions with * the same names. @@ -199,6 +217,7 @@ struct io_pgtable_ops { struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); + int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova, void *wd); int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, unsigned long iova, size_t size, unsigned long flags, diff --git a/include/linux/io.h b/include/linux/io.h index 59ec5eea696c..0642c7ee41db 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -9,13 +9,10 @@ #include <linux/sizes.h> #include <linux/types.h> #include <linux/init.h> -#include <linux/bug.h> -#include <linux/err.h> #include <asm/io.h> #include <asm/page.h> struct device; -struct resource; #ifndef __iowrite32_copy void __iowrite32_copy(void __iomem *to, const void *from, size_t count); @@ -65,8 +62,6 @@ static inline void devm_ioport_unmap(struct device *dev, void __iomem *addr) } #endif -#define IOMEM_ERR_PTR(err) (__force void __iomem *)ERR_PTR(err) - void __iomem *devm_ioremap(struct device *dev, resource_size_t offset, resource_size_t size); void __iomem *devm_ioremap_uc(struct device *dev, resource_size_t offset, @@ -188,4 +183,25 @@ static inline void arch_io_free_memtype_wc(resource_size_t base, int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start, resource_size_t size); +#ifdef CONFIG_STRICT_DEVMEM +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; + + while (cursor < to) { + if (!devmem_is_allowed(pfn)) + return 0; + cursor += PAGE_SIZE; + pfn++; + } + return 1; +} +#else +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + return 1; +} +#endif #endif /* _LINUX_IO_H */ diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index a3ce553413de..53408124c1e5 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -4,6 +4,7 @@ #include <uapi/linux/io_uring.h> #include <linux/io_uring_types.h> +#include <linux/blk-mq.h> /* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */ #define IORING_URING_CMD_CANCELABLE (1U << 30) @@ -19,7 +20,6 @@ struct io_uring_cmd { }; struct io_uring_cmd_data { - struct io_uring_sqe sqes[2]; void *op_data; }; @@ -39,7 +39,14 @@ static inline void io_uring_cmd_private_sz_check(size_t cmd_sz) #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, - struct iov_iter *iter, void *ioucmd); + struct iov_iter *iter, + struct io_uring_cmd *ioucmd, + unsigned int issue_flags); +int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, + const struct iovec __user *uvec, + size_t uvec_segs, + int ddir, struct iov_iter *iter, + unsigned issue_flags); /* * Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd @@ -66,8 +73,18 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd, void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd); #else -static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, - struct iov_iter *iter, void *ioucmd) +static inline int +io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, + struct iov_iter *iter, struct io_uring_cmd *ioucmd, + unsigned int issue_flags) +{ + return -EOPNOTSUPP; +} +static inline int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd, + const struct iovec __user *uvec, + size_t uvec_segs, + int ddir, struct iov_iter *iter, + unsigned issue_flags) { return -EOPNOTSUPP; } @@ -123,4 +140,19 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur return cmd_to_io_kiocb(cmd)->async_data; } +/* + * Return uring_cmd's context reference as its context handle for driver to + * track per-context resource, such as registered kernel IO buffer + */ +static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd) +{ + return cmd_to_io_kiocb(cmd)->ctx; +} + +int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, + void (*release)(void *), unsigned int index, + unsigned int issue_flags); +int io_buffer_unregister_bvec(struct io_uring_cmd *cmd, unsigned int index, + unsigned int issue_flags); + #endif /* _LINUX_IO_URING_CMD_H */ diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index fd4cdb0860a2..a7efcec2e3d0 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -78,8 +78,9 @@ struct io_hash_table { struct io_mapped_region { struct page **pages; - void *vmap_ptr; - size_t nr_pages; + void *ptr; + unsigned nr_pages; + unsigned flags; }; /* @@ -107,6 +108,14 @@ struct io_uring_task { } ____cacheline_aligned_in_smp; }; +struct iou_vec { + union { + struct iovec *iovec; + struct bio_vec *bvec; + }; + unsigned nr; /* number of struct iovec it can hold */ +}; + struct io_uring { u32 head; u32 tail; @@ -221,7 +230,8 @@ struct io_alloc_cache { void **entries; unsigned int nr_cached; unsigned int max_cached; - size_t elem_size; + unsigned int elem_size; + unsigned int init_clear; }; struct io_ring_ctx { @@ -290,16 +300,23 @@ struct io_ring_ctx { struct io_file_table file_table; struct io_rsrc_data buf_table; + struct io_alloc_cache node_cache; + struct io_alloc_cache imu_cache; struct io_submit_state submit_state; + /* + * Modifications are protected by ->uring_lock and ->mmap_lock. + * The flags, buf_pages and buf_nr_pages fields should be stable + * once published. + */ struct xarray io_bl_xa; struct io_hash_table cancel_table; struct io_alloc_cache apoll_cache; struct io_alloc_cache netmsg_cache; struct io_alloc_cache rw_cache; - struct io_alloc_cache uring_cache; + struct io_alloc_cache cmd_cache; /* * Any cancelable uring_cmd is added to this list in @@ -324,7 +341,6 @@ struct io_ring_ctx { unsigned cached_cq_tail; unsigned cq_entries; struct io_ev_fd __rcu *io_ev_fd; - unsigned cq_extra; void *cq_wait_arg; size_t cq_wait_size; @@ -353,7 +369,6 @@ struct io_ring_ctx { spinlock_t completion_lock; - struct list_head io_buffers_comp; struct list_head cq_overflow_list; struct hlist_head waitid_list; @@ -372,12 +387,13 @@ struct io_ring_ctx { unsigned int file_alloc_start; unsigned int file_alloc_end; - struct list_head io_buffers_cache; - /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; + /* Stores zcrx object pointers of type struct io_zcrx_ifq */ + struct xarray zcrx_ctxs; + u32 pers_next; struct xarray personalities; @@ -400,6 +416,7 @@ struct io_ring_ctx { struct callback_head poll_wq_task_work; struct list_head defer_list; + unsigned nr_drained; struct io_alloc_cache msg_cache; spinlock_t msg_lock; @@ -418,29 +435,30 @@ struct io_ring_ctx { /* protected by ->completion_lock */ unsigned evfd_last_cq_tail; + unsigned nr_req_allocated; /* * Protection for resize vs mmap races - both the mmap and resize * side will need to grab this lock, to prevent either side from * being run concurrently with the other. */ - struct mutex resize_lock; - - /* - * If IORING_SETUP_NO_MMAP is used, then the below holds - * the gup'ed pages for the two rings, and the sqes. - */ - unsigned short n_ring_pages; - unsigned short n_sqe_pages; - struct page **ring_pages; - struct page **sqe_pages; + struct mutex mmap_lock; + struct io_mapped_region sq_region; + struct io_mapped_region ring_region; /* used for optimised request parameter and wait argument passing */ struct io_mapped_region param_region; }; +/* + * Token indicating function is called in task work context: + * ctx->uring_lock is held and any completions generated will be flushed. + * ONLY core io_uring.c should instantiate this struct. + */ struct io_tw_state { }; +/* Alias to use in code that doesn't instantiate struct io_tw_state */ +typedef struct io_tw_state io_tw_token_t; enum { REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, @@ -470,6 +488,7 @@ enum { REQ_F_SKIP_LINK_CQES_BIT, REQ_F_SINGLE_POLL_BIT, REQ_F_DOUBLE_POLL_BIT, + REQ_F_MULTISHOT_BIT, REQ_F_APOLL_MULTISHOT_BIT, REQ_F_CLEAR_POLLIN_BIT, /* keep async read/write and isreg together and in order */ @@ -481,6 +500,8 @@ enum { REQ_F_BL_NO_RECYCLE_BIT, REQ_F_BUFFERS_COMMIT_BIT, REQ_F_BUF_NODE_BIT, + REQ_F_HAS_METADATA_BIT, + REQ_F_IMPORT_BUFFER_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -545,6 +566,8 @@ enum { REQ_F_SINGLE_POLL = IO_REQ_FLAG(REQ_F_SINGLE_POLL_BIT), /* double poll may active */ REQ_F_DOUBLE_POLL = IO_REQ_FLAG(REQ_F_DOUBLE_POLL_BIT), + /* request posts multiple completions, should be set at prep time */ + REQ_F_MULTISHOT = IO_REQ_FLAG(REQ_F_MULTISHOT_BIT), /* fast poll multishot mode */ REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT), /* recvmsg special flag, clear EPOLLIN */ @@ -561,9 +584,16 @@ enum { REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT), /* buf node is valid */ REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT), + /* request has read/write metadata assigned */ + REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT), + /* + * For vectored fixed buffers, resolve iovec to registered buffers. + * For SEND_ZC, whether to import buffers (i.e. the first issue). + */ + REQ_F_IMPORT_BUFFER = IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT), }; -typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts); +typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw); struct io_task_work { struct llist_node node; @@ -598,7 +628,11 @@ static inline void io_kiocb_cmd_sz_check(size_t cmd_sz) io_kiocb_cmd_sz_check(sizeof(cmd_type)) , \ ((cmd_type *)&(req)->cmd) \ ) -#define cmd_to_io_kiocb(ptr) ((struct io_kiocb *) ptr) + +static inline struct io_kiocb *cmd_to_io_kiocb(void *ptr) +{ + return ptr; +} struct io_kiocb { union { @@ -617,8 +651,7 @@ struct io_kiocb { u8 iopoll_completed; /* * Can be either a fixed buffer index, or used with provided buffers. - * For the latter, before issue it points to the buffer group ID, - * and after selection it points to the buffer ID itself. + * For the latter, it points to the selected buffer ID. */ u16 buf_index; @@ -665,6 +698,8 @@ struct io_kiocb { struct hlist_node hash_node; /* For IOPOLL setup queues, with hybrid polling */ u64 iopoll_start; + /* for private io_kiocb freeing */ + struct rcu_head rcu_head; }; /* internal polling, see IORING_FEAT_FAST_POLL */ struct async_poll *apoll; @@ -677,7 +712,7 @@ struct io_kiocb { const struct cred *creds; struct io_wq_work work; - struct { + struct io_big_cqe { u64 extra1; u64 extra2; } big_cqe; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 75bf54e76f3b..522644d62f30 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -56,6 +56,13 @@ struct vm_fault; * * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must * never be merged with the mapping before it. + * + * IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block + * assigned to it yet and the file system will do that in the bio submission + * handler, splitting the I/O as needed. + * + * IOMAP_F_ATOMIC_BIO indicates that (write) I/O will be issued as an atomic + * bio, i.e. set REQ_ATOMIC. */ #define IOMAP_F_NEW (1U << 0) #define IOMAP_F_DIRTY (1U << 1) @@ -68,6 +75,13 @@ struct vm_fault; #endif /* CONFIG_BUFFER_HEAD */ #define IOMAP_F_XATTR (1U << 5) #define IOMAP_F_BOUNDARY (1U << 6) +#define IOMAP_F_ANON_WRITE (1U << 7) +#define IOMAP_F_ATOMIC_BIO (1U << 8) + +/* + * Flag reserved for file system specific usage + */ +#define IOMAP_F_PRIVATE (1U << 12) /* * Flags set by the core iomap code during operations: @@ -79,14 +93,8 @@ struct vm_fault; * range it covers needs to be remapped by the high level before the operation * can proceed. */ -#define IOMAP_F_SIZE_CHANGED (1U << 8) -#define IOMAP_F_STALE (1U << 9) - -/* - * Flags from 0x1000 up are for file system specific usage: - */ -#define IOMAP_F_PRIVATE (1U << 12) - +#define IOMAP_F_SIZE_CHANGED (1U << 14) +#define IOMAP_F_STALE (1U << 15) /* * Magic value for addr: @@ -111,6 +119,8 @@ struct iomap { static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) { + if (iomap->flags & IOMAP_F_ANON_WRITE) + return U64_MAX; /* invalid */ return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; } @@ -182,7 +192,8 @@ struct iomap_folio_ops { #else #define IOMAP_DAX 0 #endif /* CONFIG_FS_DAX */ -#define IOMAP_ATOMIC (1 << 9) +#define IOMAP_ATOMIC (1 << 9) /* torn-write protection */ +#define IOMAP_DONTCACHE (1 << 10) struct iomap_ops { /* @@ -211,8 +222,10 @@ struct iomap_ops { * calls to iomap_iter(). Treat as read-only in the body. * @len: The remaining length of the file segment we're operating on. * It is updated at the same time as @pos. - * @processed: The number of bytes processed by the body in the most recent - * iteration, or a negative errno. 0 causes the iteration to stop. + * @iter_start_pos: The original start pos for the current iomap. Used for + * incremental iter advance. + * @status: Status of the most recent iteration. Zero on success or a negative + * errno on error. * @flags: Zero or more of the iomap_begin flags above. * @iomap: Map describing the I/O iteration * @srcmap: Source map for COW operations @@ -221,7 +234,8 @@ struct iomap_iter { struct inode *inode; loff_t pos; u64 len; - s64 processed; + loff_t iter_start_pos; + int status; unsigned flags; struct iomap iomap; struct iomap srcmap; @@ -229,20 +243,46 @@ struct iomap_iter { }; int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops); +int iomap_iter_advance(struct iomap_iter *iter, u64 *count); /** - * iomap_length - length of the current iomap iteration + * iomap_length_trim - trimmed length of the current iomap iteration * @iter: iteration structure + * @pos: File position to trim from. + * @len: Length of the mapping to trim to. * - * Returns the length that the operation applies to for the current iteration. + * Returns a trimmed length that the operation applies to for the current + * iteration. */ -static inline u64 iomap_length(const struct iomap_iter *iter) +static inline u64 iomap_length_trim(const struct iomap_iter *iter, loff_t pos, + u64 len) { u64 end = iter->iomap.offset + iter->iomap.length; if (iter->srcmap.type != IOMAP_HOLE) end = min(end, iter->srcmap.offset + iter->srcmap.length); - return min(iter->len, end - iter->pos); + return min(len, end - pos); +} + +/** + * iomap_length - length of the current iomap iteration + * @iter: iteration structure + * + * Returns the length that the operation applies to for the current iteration. + */ +static inline u64 iomap_length(const struct iomap_iter *iter) +{ + return iomap_length_trim(iter, iter->pos, iter->len); +} + +/** + * iomap_iter_advance_full - advance by the full length of current map + */ +static inline int iomap_iter_advance_full(struct iomap_iter *iter) +{ + u64 length = iomap_length(iter); + + return iomap_iter_advance(iter, &length); } /** @@ -306,12 +346,11 @@ bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, - bool *did_zero, const struct iomap_ops *ops); + bool *did_zero, const struct iomap_ops *ops, void *private); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - const struct iomap_ops *ops); -vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, - const struct iomap_ops *ops); - + const struct iomap_ops *ops, void *private); +vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops, + void *private); typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, struct iomap *iomap); void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte, @@ -328,16 +367,45 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops); /* + * Flags for iomap_ioend->io_flags. + */ +/* shared COW extent */ +#define IOMAP_IOEND_SHARED (1U << 0) +/* unwritten extent */ +#define IOMAP_IOEND_UNWRITTEN (1U << 1) +/* don't merge into previous ioend */ +#define IOMAP_IOEND_BOUNDARY (1U << 2) +/* is direct I/O */ +#define IOMAP_IOEND_DIRECT (1U << 3) +/* is DONTCACHE I/O */ +#define IOMAP_IOEND_DONTCACHE (1U << 4) + +/* + * Flags that if set on either ioend prevent the merge of two ioends. + * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way) + */ +#define IOMAP_IOEND_NOMERGE_FLAGS \ + (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT | \ + IOMAP_IOEND_DONTCACHE) + +/* * Structure for writeback I/O completions. + * + * File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io + * for direct I/O) can split a bio generated by iomap. In that case the parent + * ioend it was split from is recorded in ioend->io_parent. */ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ - u16 io_type; - u16 io_flags; /* IOMAP_F_* */ + u16 io_flags; /* IOMAP_IOEND_* */ struct inode *io_inode; /* file being written to */ - size_t io_size; /* size of data within eof */ + size_t io_size; /* size of the extent */ + atomic_t io_remaining; /* completetion defer count */ + int io_error; /* stashed away status */ + struct iomap_ioend *io_parent; /* parent for completions */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ + void *io_private; /* file system private data */ struct bio io_bio; /* MUST BE LAST! */ }; @@ -362,12 +430,14 @@ struct iomap_writeback_ops { loff_t offset, unsigned len); /* - * Optional, allows the file systems to perform actions just before - * submitting the bio and/or override the bio end_io handler for complex - * operations like copy on write extent manipulation or unwritten extent - * conversions. + * Optional, allows the file systems to hook into bio submission, + * including overriding the bi_end_io handler. + * + * Returns 0 if the bio was successfully submitted, or a negative + * error code if status was non-zero or another error happened and + * the bio could not be submitted. */ - int (*prepare_ioend)(struct iomap_ioend *ioend, int status); + int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); /* * Optional, allows the file system to discard state on a page where @@ -383,6 +453,10 @@ struct iomap_writepage_ctx { u32 nr_folios; /* folios added to the ioend */ }; +struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, + loff_t file_offset, u16 ioend_flags); +struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, + unsigned int max_len, bool is_append); void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends); @@ -454,4 +528,6 @@ int iomap_swapfile_activate(struct swap_info_struct *sis, # define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO) #endif /* CONFIG_SWAP */ +extern struct bio_set iomap_ioend_bioset; + #endif /* LINUX_IOMAP_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 318d27841130..156732807994 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -41,9 +41,12 @@ struct iommu_dirty_ops; struct notifier_block; struct iommu_sva; struct iommu_dma_cookie; +struct iommu_dma_msi_cookie; struct iommu_fault_param; struct iommufd_ctx; struct iommufd_viommu; +struct msi_desc; +struct msi_msg; #define IOMMU_FAULT_PERM_READ (1 << 0) /* read */ #define IOMMU_FAULT_PERM_WRITE (1 << 1) /* write */ @@ -163,6 +166,15 @@ struct iommu_domain_geometry { bool force_aperture; /* DMA only allowed in mappable range? */ }; +enum iommu_domain_cookie_type { + IOMMU_COOKIE_NONE, + IOMMU_COOKIE_DMA_IOVA, + IOMMU_COOKIE_DMA_MSI, + IOMMU_COOKIE_FAULT_HANDLER, + IOMMU_COOKIE_SVA, + IOMMU_COOKIE_IOMMUFD, +}; + /* Domain feature flags */ #define __IOMMU_DOMAIN_PAGING (1U << 0) /* Support for iommu_map/unmap */ #define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API @@ -209,15 +221,18 @@ struct iommu_domain_geometry { struct iommu_domain { unsigned type; + enum iommu_domain_cookie_type cookie_type; const struct iommu_domain_ops *ops; const struct iommu_dirty_ops *dirty_ops; const struct iommu_ops *owner; /* Whose domain_alloc we came from */ unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */ struct iommu_domain_geometry geometry; - struct iommu_dma_cookie *iova_cookie; int (*iopf_handler)(struct iopf_group *group); - void *fault_data; - union { + + union { /* cookie */ + struct iommu_dma_cookie *iova_cookie; + struct iommu_dma_msi_cookie *msi_cookie; + struct iommufd_hw_pagetable *iommufd_hwpt; struct { iommu_fault_handler_t handler; void *handler_token; @@ -301,23 +316,6 @@ struct iommu_iort_rmr_data { u32 num_sids; }; -/** - * enum iommu_dev_features - Per device IOMMU features - * @IOMMU_DEV_FEAT_SVA: Shared Virtual Addresses - * @IOMMU_DEV_FEAT_IOPF: I/O Page Faults such as PRI or Stall. Generally - * enabling %IOMMU_DEV_FEAT_SVA requires - * %IOMMU_DEV_FEAT_IOPF, but some devices manage I/O Page - * Faults themselves instead of relying on the IOMMU. When - * supported, this feature must be enabled before and - * disabled after %IOMMU_DEV_FEAT_SVA. - * - * Device drivers enable a feature using iommu_dev_enable_feature(). - */ -enum iommu_dev_features { - IOMMU_DEV_FEAT_SVA, - IOMMU_DEV_FEAT_IOPF, -}; - #define IOMMU_NO_PASID (0U) /* Reserved for DMA w/o PASID */ #define IOMMU_FIRST_GLOBAL_PASID (1U) /*starting range for allocation */ #define IOMMU_PASID_INVALID (-1U) @@ -326,6 +324,18 @@ typedef unsigned int ioasid_t; /* Read but do not clear any dirty bits */ #define IOMMU_DIRTY_NO_CLEAR (1 << 0) +/* + * Pages allocated through iommu_alloc_pages_node_sz() can be placed on this + * list using iommu_pages_list_add(). Note: ONLY pages from + * iommu_alloc_pages_node_sz() can be used this way! + */ +struct iommu_pages_list { + struct list_head pages; +}; + +#define IOMMU_PAGES_LIST_INIT(name) \ + ((struct iommu_pages_list){ .pages = LIST_HEAD_INIT(name.pages) }) + #ifdef CONFIG_IOMMU_API /** @@ -348,7 +358,7 @@ struct iommu_iotlb_gather { unsigned long start; unsigned long end; size_t pgsize; - struct list_head freelist; + struct iommu_pages_list freelist; bool queued; }; @@ -425,10 +435,10 @@ static inline int __iommu_copy_struct_from_user( void *dst_data, const struct iommu_user_data *src_data, unsigned int data_type, size_t data_len, size_t min_len) { - if (src_data->type != data_type) - return -EINVAL; if (WARN_ON(!dst_data || !src_data)) return -EINVAL; + if (src_data->type != data_type) + return -EINVAL; if (src_data->len < min_len || data_len < src_data->len) return -EINVAL; return copy_struct_from_user(dst_data, data_len, src_data->uptr, @@ -441,8 +451,8 @@ static inline int __iommu_copy_struct_from_user( * include/uapi/linux/iommufd.h * @user_data: Pointer to a struct iommu_user_data for user space data info * @data_type: The data type of the @kdst. Must match with @user_data->type - * @min_last: The last memember of the data structure @kdst points in the - * initial version. + * @min_last: The last member of the data structure @kdst points in the initial + * version. * Return 0 for success, otherwise -error. */ #define iommu_copy_struct_from_user(kdst, user_data, data_type, min_last) \ @@ -554,9 +564,10 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * op is allocated in the iommu driver and freed by the caller after * use. The information type is one of enum iommu_hw_info_type defined * in include/uapi/linux/iommufd.h. - * @domain_alloc: allocate and return an iommu domain if success. Otherwise - * NULL is returned. The domain is not fully initialized until - * the caller iommu_domain_alloc() returns. + * @domain_alloc: Do not use in new drivers + * @domain_alloc_identity: allocate an IDENTITY domain. Drivers should prefer to + * use identity_domain instead. This should only be used + * if dynamic logic is necessary. * @domain_alloc_paging_flags: Allocate an iommu domain corresponding to the * input parameters as defined in * include/uapi/linux/iommufd.h. The @user_data can be @@ -579,17 +590,12 @@ iommu_copy_struct_from_full_user_array(void *kdst, size_t kdst_entry_size, * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) - * @dev_enable/disable_feat: per device entries to enable/disable - * iommu specific features. * @page_response: handle page request response * @def_domain_type: device default domain type, return value: * - IOMMU_DOMAIN_IDENTITY: must use an identity domain * - IOMMU_DOMAIN_DMA: must use a dma domain * - 0: use the default setting * @default_domain_ops: the default ops for domains - * @remove_dev_pasid: Remove any translation configurations of a specific - * pasid, so that any DMA transactions with this pasid - * will be blocked by the hardware. * @viommu_alloc: Allocate an iommufd_viommu on a physical IOMMU instance behind * the @dev, as the set of virtualization resources shared/passed * to user space IOMMU instance. And associate it with a nesting @@ -617,7 +623,10 @@ struct iommu_ops { void *(*hw_info)(struct device *dev, u32 *length, u32 *type); /* Domain allocation and freeing by the iommu driver */ +#if IS_ENABLED(CONFIG_FSL_PAMU) struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); +#endif + struct iommu_domain *(*domain_alloc_identity)(struct device *dev); struct iommu_domain *(*domain_alloc_paging_flags)( struct device *dev, u32 flags, const struct iommu_user_data *user_data); @@ -640,15 +649,10 @@ struct iommu_ops { bool (*is_attach_deferred)(struct device *dev); /* Per device IOMMU features */ - int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f); - int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f); - void (*page_response)(struct device *dev, struct iopf_fault *evt, struct iommu_page_response *msg); int (*def_domain_type)(struct device *dev); - void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain); struct iommufd_viommu *(*viommu_alloc)( struct device *dev, struct iommu_domain *parent_domain, @@ -740,6 +744,7 @@ struct iommu_domain_ops { * @dev: struct device for sysfs handling * @singleton_group: Used internally for drivers that have only one group * @max_pasids: number of supported PASIDs + * @ready: set once iommu_device_register() has completed successfully */ struct iommu_device { struct list_head list; @@ -748,6 +753,7 @@ struct iommu_device { struct device *dev; struct iommu_group *singleton_group; u32 max_pasids; + bool ready; }; /** @@ -842,7 +848,7 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { *gather = (struct iommu_iotlb_gather) { .start = ULONG_MAX, - .freelist = LIST_HEAD_INIT(gather->freelist), + .freelist = IOMMU_PAGES_LIST_INIT(gather->freelist), }; } @@ -862,6 +868,10 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); +int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); +int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, @@ -1084,7 +1094,6 @@ struct iommu_mm_data { }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode); -void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) @@ -1114,9 +1123,6 @@ void dev_iommu_priv_set(struct device *dev, void *priv); extern struct mutex iommu_probe_device_lock; int iommu_probe_device(struct device *dev); -int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f); -int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f); - int iommu_device_use_default_domain(struct device *dev); void iommu_device_unuse_default_domain(struct device *dev); @@ -1395,28 +1401,12 @@ static inline int iommu_fwspec_init(struct device *dev, return -ENODEV; } -static inline void iommu_fwspec_free(struct device *dev) -{ -} - static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids) { return -ENODEV; } -static inline int -iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) -{ - return -ENODEV; -} - -static inline int -iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) -{ - return -ENODEV; -} - static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { return NULL; @@ -1475,6 +1465,18 @@ static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) static inline void iommu_free_global_pasid(ioasid_t pasid) {} #endif /* CONFIG_IOMMU_API */ +#ifdef CONFIG_IRQ_MSI_IOMMU +#ifdef CONFIG_IOMMU_API +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); +#else +static inline int iommu_dma_prepare_msi(struct msi_desc *desc, + phys_addr_t msi_addr) +{ + return 0; +} +#endif /* CONFIG_IOMMU_API */ +#endif /* CONFIG_IRQ_MSI_IOMMU */ + #if IS_ENABLED(CONFIG_LOCKDEP) && IS_ENABLED(CONFIG_IOMMU_API) void iommu_group_mutex_assert(struct device *dev); #else @@ -1508,32 +1510,12 @@ static inline void iommu_debugfs_setup(void) {} #endif #ifdef CONFIG_IOMMU_DMA -#include <linux/msi.h> - int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); - -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); -void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); - #else /* CONFIG_IOMMU_DMA */ - -struct msi_desc; -struct msi_msg; - static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) { return -ENODEV; } - -static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) -{ - return 0; -} - -static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) -{ -} - #endif /* CONFIG_IOMMU_DMA */ /* diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 11110c749200..34b6e6ca4bfa 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -8,9 +8,11 @@ #include <linux/err.h> #include <linux/errno.h> +#include <linux/iommu.h> #include <linux/refcount.h> #include <linux/types.h> #include <linux/xarray.h> +#include <uapi/linux/iommufd.h> struct device; struct file; @@ -34,6 +36,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_FAULT, IOMMUFD_OBJ_VIOMMU, IOMMUFD_OBJ_VDEVICE, + IOMMUFD_OBJ_VEVENTQ, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -52,9 +55,11 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); -int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id); -int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id); -void iommufd_device_detach(struct iommufd_device *idev); +int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id); +int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid, + u32 *pt_id); +void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid); struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev); u32 iommufd_device_to_id(struct iommufd_device *idev); @@ -93,6 +98,8 @@ struct iommufd_viommu { const struct iommufd_viommu_ops *ops; struct xarray vdevs; + struct list_head veventqs; + struct rw_semaphore veventqs_rwsem; unsigned int type; }; @@ -187,6 +194,11 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, enum iommufd_object_type type); struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id); +int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, unsigned long *vdev_id); +int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, void *event_data, + size_t data_len); #else /* !CONFIG_IOMMUFD_DRIVER_CORE */ static inline struct iommufd_object * _iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, @@ -200,6 +212,20 @@ iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { return NULL; } + +static inline int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, + struct device *dev, + unsigned long *vdev_id) +{ + return -ENOENT; +} + +static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu, + enum iommu_veventq_type type, + void *event_data, size_t data_len) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_IOMMUFD_DRIVER_CORE */ /* diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 5385349f0b8a..e8b2d6aa4013 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -154,15 +154,20 @@ enum { }; /* helpers to define resources */ -#define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ +#define DEFINE_RES_NAMED_DESC(_start, _size, _name, _flags, _desc) \ (struct resource) { \ .start = (_start), \ .end = (_start) + (_size) - 1, \ .name = (_name), \ .flags = (_flags), \ - .desc = IORES_DESC_NONE, \ + .desc = (_desc), \ } +#define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ + DEFINE_RES_NAMED_DESC(_start, _size, _name, _flags, IORES_DESC_NONE) +#define DEFINE_RES(_start, _size, _flags) \ + DEFINE_RES_NAMED(_start, _size, NULL, _flags) + #define DEFINE_RES_IO_NAMED(_start, _size, _name) \ DEFINE_RES_NAMED((_start), (_size), (_name), IORESOURCE_IO) #define DEFINE_RES_IO(_start, _size) \ diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index b25377b6ea98..5210e8371238 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -60,7 +60,8 @@ static inline int __get_task_ioprio(struct task_struct *p) int prio; if (!ioc) - return IOPRIO_DEFAULT; + return IOPRIO_PRIO_VALUE(task_nice_ioclass(p), + task_nice_ioprio(p)); if (p != current) lockdep_assert_held(&p->alloc_lock); diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index a1c9c0d48ebf..7da6602eab71 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -93,7 +93,8 @@ struct ipmi_user_hndl { /* * Called when the interface detects a watchdog pre-timeout. If - * this is NULL, it will be ignored for the user. + * this is NULL, it will be ignored for the user. Note that you + * can't do any IPMI calls from here, it's called with locks held. */ void (*ipmi_watchdog_pretimeout)(void *handler_data); @@ -126,7 +127,7 @@ int ipmi_create_user(unsigned int if_num, * the users before you destroy the callback structures, it should be * safe, too. */ -int ipmi_destroy_user(struct ipmi_user *user); +void ipmi_destroy_user(struct ipmi_user *user); /* Get the IPMI version of the BMC we are talking to. */ int ipmi_get_version(struct ipmi_user *user, @@ -343,4 +344,14 @@ extern int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data); /* Helper function for computing the IPMB checksum of some data. */ unsigned char ipmb_checksum(unsigned char *data, int size); +/* + * For things that must send messages at panic time, like the IPMI watchdog + * driver that extends the reset time on a panic, use this to send messages + * from panic context. Note that this puts the driver into a mode that + * only works at panic time, so only use it then. + */ +void ipmi_panic_request_and_wait(struct ipmi_user *user, + struct ipmi_addr *addr, + struct kernel_ipmi_msg *msg); + #endif /* __LINUX_IPMI_H */ diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index a6e2aadbb91b..5aeeed22f35b 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -207,6 +207,7 @@ struct inet6_cork { struct ipv6_txoptions *opt; u8 hop_limit; u8 tclass; + u8 dontfrag:1; }; /* struct ipv6_pinfo - ipv6 private area */ diff --git a/include/linux/irq.h b/include/linux/irq.h index fa711f80957b..1d6b606a81ef 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -64,7 +64,6 @@ enum irqchip_irq_state; * IRQ_NOAUTOEN - Interrupt is not automatically enabled in * request/setup_irq() * IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set) - * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context * IRQ_NESTED_THREAD - Interrupt nests into another thread * IRQ_PER_CPU_DEVID - Dev_id is a per-cpu variable * IRQ_IS_POLLED - Always polled by another interrupt. Exclude @@ -93,7 +92,6 @@ enum { IRQ_NOREQUEST = (1 << 11), IRQ_NOAUTOEN = (1 << 12), IRQ_NO_BALANCING = (1 << 13), - IRQ_MOVE_PCNTXT = (1 << 14), IRQ_NESTED_THREAD = (1 << 15), IRQ_NOTHREAD = (1 << 16), IRQ_PER_CPU_DEVID = (1 << 17), @@ -105,7 +103,7 @@ enum { #define IRQF_MODIFY_MASK \ (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ - IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ + IRQ_NOAUTOEN | IRQ_LEVEL | IRQ_NO_BALANCING | \ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_HIDDEN) @@ -201,8 +199,6 @@ struct irq_data { * IRQD_LEVEL - Interrupt is level triggered * IRQD_WAKEUP_STATE - Interrupt is configured for wakeup * from suspend - * IRQD_MOVE_PCNTXT - Interrupt can be moved in process - * context * IRQD_IRQ_DISABLED - Disabled state of the interrupt * IRQD_IRQ_MASKED - Masked state of the interrupt * IRQD_IRQ_INPROGRESS - In progress state of the interrupt @@ -233,7 +229,6 @@ enum { IRQD_AFFINITY_SET = BIT(12), IRQD_LEVEL = BIT(13), IRQD_WAKEUP_STATE = BIT(14), - IRQD_MOVE_PCNTXT = BIT(15), IRQD_IRQ_DISABLED = BIT(16), IRQD_IRQ_MASKED = BIT(17), IRQD_IRQ_INPROGRESS = BIT(18), @@ -338,11 +333,6 @@ static inline bool irqd_is_wakeup_set(struct irq_data *d) return __irqd_to_state(d) & IRQD_WAKEUP_STATE; } -static inline bool irqd_can_move_in_process_context(struct irq_data *d) -{ - return __irqd_to_state(d) & IRQD_MOVE_PCNTXT; -} - static inline bool irqd_irq_disabled(struct irq_data *d) { return __irqd_to_state(d) & IRQD_IRQ_DISABLED; @@ -496,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @ipi_send_mask: send an IPI to destination cpus in cpumask * @irq_nmi_setup: function called from core code before enabling an NMI * @irq_nmi_teardown: function called from core code after disabling an NMI + * @irq_force_complete_move: optional function to force complete pending irq move * @flags: chip specific flags */ struct irq_chip { @@ -547,6 +538,8 @@ struct irq_chip { int (*irq_nmi_setup)(struct irq_data *data); void (*irq_nmi_teardown)(struct irq_data *data); + void (*irq_force_complete_move)(struct irq_data *data); + unsigned long flags; }; @@ -567,6 +560,7 @@ struct irq_chip { * in the suspend path if they are in disabled state * IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup * IRQCHIP_IMMUTABLE: Don't ever change anything in this chip + * IRQCHIP_MOVE_DEFERRED: Move the interrupt in actual interrupt context */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -581,6 +575,7 @@ enum { IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9), IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10), IRQCHIP_IMMUTABLE = (1 << 11), + IRQCHIP_MOVE_DEFERRED = (1 << 12), }; #include <linux/irqdesc.h> @@ -602,7 +597,6 @@ enum { struct irqaction; extern int setup_percpu_irq(unsigned int irq, struct irqaction *new); -extern void remove_percpu_irq(unsigned int irq, struct irqaction *act); #ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE extern void irq_cpu_online(void); @@ -620,6 +614,7 @@ extern int irq_affinity_online_cpu(unsigned int cpu); #endif #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) +bool irq_can_move_in_process_context(struct irq_data *data); void __irq_move_irq(struct irq_data *data); static inline void irq_move_irq(struct irq_data *data) { @@ -627,11 +622,10 @@ static inline void irq_move_irq(struct irq_data *data) __irq_move_irq(data); } void irq_move_masked_irq(struct irq_data *data); -void irq_force_complete_move(struct irq_desc *desc); #else +static inline bool irq_can_move_in_process_context(struct irq_data *data) { return true; } static inline void irq_move_irq(struct irq_data *data) { } static inline void irq_move_masked_irq(struct irq_data *data) { } -static inline void irq_force_complete_move(struct irq_desc *desc) { } #endif extern int no_irq_affinity; @@ -694,6 +688,9 @@ extern int irq_chip_request_resources_parent(struct irq_data *data); extern void irq_chip_release_resources_parent(struct irq_data *data); #endif +/* Disable or mask interrupts during a kernel kexec */ +extern void machine_kexec_mask_interrupts(void); + /* Handling of unhandled and spurious interrupts: */ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); @@ -702,7 +699,7 @@ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); extern int noirqdebug_setup(char *str); /* Checks whether the interrupt can be requested by request_irq(): */ -extern int can_request_irq(unsigned int irq, unsigned long irqflags); +extern bool can_request_irq(unsigned int irq, unsigned long irqflags); /* Dummy irq-chip implementations: */ extern struct irq_chip no_irq_chip; @@ -1224,31 +1221,6 @@ static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX) -#ifdef CONFIG_SMP -static inline void irq_gc_lock(struct irq_chip_generic *gc) -{ - raw_spin_lock(&gc->lock); -} - -static inline void irq_gc_unlock(struct irq_chip_generic *gc) -{ - raw_spin_unlock(&gc->lock); -} -#else -static inline void irq_gc_lock(struct irq_chip_generic *gc) { } -static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } -#endif - -/* - * The irqsave variants are for usage in non interrupt code. Do not use - * them in irq_chip callbacks. Use irq_gc_lock() instead. - */ -#define irq_gc_lock_irqsave(gc, flags) \ - raw_spin_lock_irqsave(&(gc)->lock, flags) - -#define irq_gc_unlock_irqrestore(gc, flags) \ - raw_spin_unlock_irqrestore(&(gc)->lock, flags) - static inline void irq_reg_writel(struct irq_chip_generic *gc, u32 val, int reg_offset) { diff --git a/include/linux/irqchip/irq-davinci-aintc.h b/include/linux/irqchip/irq-davinci-aintc.h deleted file mode 100644 index ea4e087fac98..000000000000 --- a/include/linux/irqchip/irq-davinci-aintc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2019 Texas Instruments - */ - -#ifndef _LINUX_IRQ_DAVINCI_AINTC_ -#define _LINUX_IRQ_DAVINCI_AINTC_ - -#include <linux/ioport.h> - -/** - * struct davinci_aintc_config - configuration data for davinci-aintc driver. - * - * @reg: register range to map - * @num_irqs: number of HW interrupts supported by the controller - * @prios: an array of size num_irqs containing priority settings for - * each interrupt - */ -struct davinci_aintc_config { - struct resource reg; - unsigned int num_irqs; - u8 *prios; -}; - -void davinci_aintc_init(const struct davinci_aintc_config *config); - -#endif /* _LINUX_IRQ_DAVINCI_AINTC_ */ diff --git a/include/linux/irqchip/irq-davinci-cp-intc.h b/include/linux/irqchip/irq-davinci-cp-intc.h deleted file mode 100644 index 8d71ed5b5a61..000000000000 --- a/include/linux/irqchip/irq-davinci-cp-intc.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2019 Texas Instruments - */ - -#ifndef _LINUX_IRQ_DAVINCI_CP_INTC_ -#define _LINUX_IRQ_DAVINCI_CP_INTC_ - -#include <linux/ioport.h> - -/** - * struct davinci_cp_intc_config - configuration data for davinci-cp-intc - * driver. - * - * @reg: register range to map - * @num_irqs: number of HW interrupts supported by the controller - */ -struct davinci_cp_intc_config { - struct resource reg; - unsigned int num_irqs; -}; - -int davinci_cp_intc_init(const struct davinci_cp_intc_config *config); - -#endif /* _LINUX_IRQ_DAVINCI_CP_INTC_ */ diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h new file mode 100644 index 000000000000..224ac28e88d7 --- /dev/null +++ b/include/linux/irqchip/irq-msi-lib.h @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2022 Linutronix GmbH +// Copyright (C) 2022 Intel + +#ifndef _IRQCHIP_IRQ_MSI_LIB_H +#define _IRQCHIP_IRQ_MSI_LIB_H + +#include <linux/bits.h> +#include <linux/irqdomain.h> +#include <linux/msi.h> + +#ifdef CONFIG_PCI_MSI +#define MATCH_PCI_MSI BIT(DOMAIN_BUS_PCI_MSI) +#else +#define MATCH_PCI_MSI (0) +#endif + +#define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI) + +struct msi_domain_info; +int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token); + +bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, + struct msi_domain_info *info); + +#endif /* _IRQCHIP_IRQ_MSI_LIB_H */ diff --git a/include/linux/irqchip/irq-renesas-rzv2h.h b/include/linux/irqchip/irq-renesas-rzv2h.h new file mode 100644 index 000000000000..618a60d2eac0 --- /dev/null +++ b/include/linux/irqchip/irq-renesas-rzv2h.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Renesas RZ/V2H(P) Interrupt Control Unit (ICU) + * + * Copyright (C) 2025 Renesas Electronics Corporation. + */ + +#ifndef __LINUX_IRQ_RENESAS_RZV2H +#define __LINUX_IRQ_RENESAS_RZV2H + +#include <linux/platform_device.h> + +#define RZV2H_ICU_DMAC_REQ_NO_DEFAULT 0x3ff + +#ifdef CONFIG_RENESAS_RZV2H_ICU +void rzv2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index, u8 dmac_channel, + u16 req_no); +#else +static inline void rzv2h_icu_register_dma_req(struct platform_device *icu_dev, u8 dmac_index, + u8 dmac_channel, u16 req_no) { } +#endif + +#endif /* __LINUX_IRQ_RENESAS_RZV2H */ diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e432b6a12a32..7387d183029b 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -1,30 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * irq_domain - IRQ translation domains + * irq_domain - IRQ Translation Domains * - * Translation infrastructure between hw and linux irq numbers. This is - * helpful for interrupt controllers to implement mapping between hardware - * irq numbers and the Linux irq number space. - * - * irq_domains also have hooks for translating device tree or other - * firmware interrupt representations into a hardware irq number that - * can be mapped back to a Linux irq number without any extra platform - * support code. - * - * Interrupt controller "domain" data structure. This could be defined as a - * irq domain controller. That is, it handles the mapping between hardware - * and virtual interrupt numbers for a given interrupt domain. The domain - * structure is generally created by the PIC code for a given PIC instance - * (though a domain can cover more than one PIC if they have a flat number - * model). It's the domain callbacks that are responsible for setting the - * irq_chip on a given irq_desc after it's been mapped. - * - * The host code and data structures use a fwnode_handle pointer to - * identify the domain. In some cases, and in order to preserve source - * code compatibility, this fwnode pointer is "upgraded" to a DT - * device_node. For those firmware infrastructures that do not provide - * a unique identifier for an interrupt controller, the irq_domain - * code offers a fwnode allocator. + * See Documentation/core-api/irq/irq-domain.rst for the details. */ #ifndef _LINUX_IRQDOMAIN_H @@ -61,9 +39,9 @@ struct msi_parent_ops; * pass a device-specific description of an interrupt. */ struct irq_fwspec { - struct fwnode_handle *fwnode; - int param_count; - u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; + struct fwnode_handle *fwnode; + int param_count; + u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; }; /* Conversion function from of_phandle_args fields to fwspec */ @@ -72,26 +50,26 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, /** * struct irq_domain_ops - Methods for irq_domain objects - * @match: Match an interrupt controller device node to a host, returns - * 1 on a match - * @select: Match an interrupt controller fw specification. It is more generic - * than @match as it receives a complete struct irq_fwspec. Therefore, - * @select is preferred if provided. Returns 1 on a match. - * @map: Create or update a mapping between a virtual irq number and a hw - * irq number. This is called only once for a given mapping. - * @unmap: Dispose of such a mapping - * @xlate: Given a device tree node and interrupt specifier, decode - * the hardware irq number and linux irq type value. - * @alloc: Allocate @nr_irqs interrupts starting from @virq. - * @free: Free @nr_irqs interrupts starting from @virq. - * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only - * reserve the vector. If unset, assign the vector (called from - * request_irq()). - * @deactivate: Disarm one interrupt (@irqd). - * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and - * linux irq type value (@out_type). This is a generalised @xlate - * (over struct irq_fwspec) and is preferred if provided. - * @debug_show: For domains to show specific data for an interrupt in debugfs. + * @match: Match an interrupt controller device node to a domain, returns + * 1 on a match + * @select: Match an interrupt controller fw specification. It is more generic + * than @match as it receives a complete struct irq_fwspec. Therefore, + * @select is preferred if provided. Returns 1 on a match. + * @map: Create or update a mapping between a virtual irq number and a hw + * irq number. This is called only once for a given mapping. + * @unmap: Dispose of such a mapping + * @xlate: Given a device tree node and interrupt specifier, decode + * the hardware irq number and linux irq type value. + * @alloc: Allocate @nr_irqs interrupts starting from @virq. + * @free: Free @nr_irqs interrupts starting from @virq. + * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only + * reserve the vector. If unset, assign the vector (called from + * request_irq()). + * @deactivate: Disarm one interrupt (@irqd). + * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and + * linux irq type value (@out_type). This is a generalised @xlate + * (over struct irq_fwspec) and is preferred if provided. + * @debug_show: For domains to show specific data for an interrupt in debugfs. * * Functions below are provided by the driver and called whenever a new mapping * is created or an old mapping is disposed. The driver can then proceed to @@ -99,29 +77,29 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, * to setup the irq_desc when returning from map(). */ struct irq_domain_ops { - int (*match)(struct irq_domain *d, struct device_node *node, - enum irq_domain_bus_token bus_token); - int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec, - enum irq_domain_bus_token bus_token); - int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw); - void (*unmap)(struct irq_domain *d, unsigned int virq); - int (*xlate)(struct irq_domain *d, struct device_node *node, - const u32 *intspec, unsigned int intsize, - unsigned long *out_hwirq, unsigned int *out_type); + int (*match)(struct irq_domain *d, struct device_node *node, + enum irq_domain_bus_token bus_token); + int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token); + int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw); + void (*unmap)(struct irq_domain *d, unsigned int virq); + int (*xlate)(struct irq_domain *d, struct device_node *node, + const u32 *intspec, unsigned int intsize, + unsigned long *out_hwirq, unsigned int *out_type); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /* extended V2 interfaces to support hierarchy irq_domains */ - int (*alloc)(struct irq_domain *d, unsigned int virq, - unsigned int nr_irqs, void *arg); - void (*free)(struct irq_domain *d, unsigned int virq, - unsigned int nr_irqs); - int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); - void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); - int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, - unsigned long *out_hwirq, unsigned int *out_type); + int (*alloc)(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs, void *arg); + void (*free)(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs); + int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); + void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); + int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); #endif #ifdef CONFIG_GENERIC_IRQ_DEBUGFS - void (*debug_show)(struct seq_file *m, struct irq_domain *d, - struct irq_data *irqd, int ind); + void (*debug_show)(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind); #endif }; @@ -231,6 +209,9 @@ enum { /* Irq domain must destroy generic chips when removed */ IRQ_DOMAIN_FLAG_DESTROY_GC = (1 << 10), + /* Address and data pair is mutable when irq_set_affinity() */ + IRQ_DOMAIN_FLAG_MSI_IMMUTABLE = (1 << 11), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -244,8 +225,7 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) return to_of_node(d->fwnode); } -static inline void irq_domain_set_pm_device(struct irq_domain *d, - struct device *dev) +static inline void irq_domain_set_pm_device(struct irq_domain *d, struct device *dev) { if (d) d->pm_dev = dev; @@ -261,14 +241,12 @@ enum { IRQCHIP_FWNODE_NAMED_ID, }; -static inline -struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name) +static inline struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name) { return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL); } -static inline -struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id) +static inline struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id) { return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name, NULL); @@ -281,6 +259,8 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) void irq_domain_free_fwnode(struct fwnode_handle *fwnode); +DEFINE_FREE(irq_domain_free_fwnode, struct fwnode_handle *, if (_T) irq_domain_free_fwnode(_T)) + struct irq_domain_chip_generic_info; /** @@ -333,35 +313,18 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); struct irq_domain *devm_irq_domain_instantiate(struct device *dev, const struct irq_domain_info *info); -struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, - unsigned int size, +struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, - const struct irq_domain_ops *ops, - void *host_data); -struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, - unsigned int size, - unsigned int first_irq, - irq_hw_number_t first_hwirq, - const struct irq_domain_ops *ops, - void *host_data); -struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, - unsigned int size, - unsigned int first_irq, - irq_hw_number_t first_hwirq, - const struct irq_domain_ops *ops, - void *host_data); -extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, - enum irq_domain_bus_token bus_token); -extern void irq_set_default_host(struct irq_domain *host); -extern struct irq_domain *irq_get_default_host(void); -extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, - irq_hw_number_t hwirq, int node, - const struct irq_affinity_desc *affinity); - -static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) -{ - return node ? &node->fwnode : NULL; -} + const struct irq_domain_ops *ops, void *host_data); +struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, unsigned int size, + unsigned int first_irq, irq_hw_number_t first_hwirq, + const struct irq_domain_ops *ops, void *host_data); +struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token); +void irq_set_default_domain(struct irq_domain *domain); +struct irq_domain *irq_get_default_domain(void); +int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, + const struct irq_affinity_desc *affinity); extern const struct fwnode_operations irqchip_fwnode_ops; @@ -370,12 +333,10 @@ static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode) return fwnode && fwnode->ops == &irqchip_fwnode_ops; } -extern void irq_domain_update_bus_token(struct irq_domain *domain, - enum irq_domain_bus_token bus_token); +void irq_domain_update_bus_token(struct irq_domain *domain, enum irq_domain_bus_token bus_token); -static inline -struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, - enum irq_domain_bus_token bus_token) +static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token) { struct irq_fwspec fwspec = { .fwnode = fwnode, @@ -387,7 +348,7 @@ struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, static inline struct irq_domain *irq_find_matching_host(struct device_node *node, enum irq_domain_bus_token bus_token) { - return irq_find_matching_fwnode(of_node_to_fwnode(node), bus_token); + return irq_find_matching_fwnode(of_fwnode_handle(node), bus_token); } static inline struct irq_domain *irq_find_host(struct device_node *node) @@ -401,136 +362,107 @@ static inline struct irq_domain *irq_find_host(struct device_node *node) return d; } -static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_node, - unsigned int size, - unsigned int first_irq, - const struct irq_domain_ops *ops, - void *host_data) -{ - return irq_domain_create_simple(of_node_to_fwnode(of_node), size, first_irq, ops, host_data); -} - -/** - * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. - * @of_node: pointer to interrupt controller's device tree node. - * @size: Number of interrupts in the domain. - * @ops: map/unmap domain callbacks - * @host_data: Controller private data pointer - */ -static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node, - unsigned int size, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain_info info = { - .fwnode = of_node_to_fwnode(of_node), - .size = size, - .hwirq_max = size, - .ops = ops, - .host_data = host_data, - }; - struct irq_domain *d; - - d = irq_domain_instantiate(&info); - return IS_ERR(d) ? NULL : d; -} - #ifdef CONFIG_IRQ_DOMAIN_NOMAP -static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, - unsigned int max_irq, - const struct irq_domain_ops *ops, - void *host_data) +static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *fwnode, + unsigned int max_irq, + const struct irq_domain_ops *ops, + void *host_data) { - struct irq_domain_info info = { - .fwnode = of_node_to_fwnode(of_node), + const struct irq_domain_info info = { + .fwnode = fwnode, .hwirq_max = max_irq, .direct_max = max_irq, .ops = ops, .host_data = host_data, }; - struct irq_domain *d; + struct irq_domain *d = irq_domain_instantiate(&info); - d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } -extern unsigned int irq_create_direct_mapping(struct irq_domain *host); +unsigned int irq_create_direct_mapping(struct irq_domain *domain); #endif -static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain_info info = { - .fwnode = of_node_to_fwnode(of_node), - .hwirq_max = ~0U, - .ops = ops, - .host_data = host_data, - }; - struct irq_domain *d; - - d = irq_domain_instantiate(&info); - return IS_ERR(d) ? NULL : d; -} - +/** + * irq_domain_create_linear - Allocate and register a linear revmap irq_domain. + * @fwnode: pointer to interrupt controller's FW node. + * @size: Number of interrupts in the domain. + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer + * + * Returns: Newly created irq_domain + */ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode, - unsigned int size, - const struct irq_domain_ops *ops, - void *host_data) + unsigned int size, + const struct irq_domain_ops *ops, + void *host_data) { - struct irq_domain_info info = { + const struct irq_domain_info info = { .fwnode = fwnode, .size = size, .hwirq_max = size, .ops = ops, .host_data = host_data, }; - struct irq_domain *d; + struct irq_domain *d = irq_domain_instantiate(&info); - d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode, - const struct irq_domain_ops *ops, - void *host_data) + const struct irq_domain_ops *ops, + void *host_data) { - struct irq_domain_info info = { + const struct irq_domain_info info = { .fwnode = fwnode, .hwirq_max = ~0, .ops = ops, .host_data = host_data, }; - struct irq_domain *d; + struct irq_domain *d = irq_domain_instantiate(&info); - d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } -extern void irq_domain_remove(struct irq_domain *host); +void irq_domain_remove(struct irq_domain *domain); -extern int irq_domain_associate(struct irq_domain *domain, unsigned int irq, - irq_hw_number_t hwirq); -extern void irq_domain_associate_many(struct irq_domain *domain, - unsigned int irq_base, - irq_hw_number_t hwirq_base, int count); +int irq_domain_associate(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq); +void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, + irq_hw_number_t hwirq_base, int count); -extern unsigned int irq_create_mapping_affinity(struct irq_domain *host, - irq_hw_number_t hwirq, - const struct irq_affinity_desc *affinity); -extern unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); -extern void irq_dispose_mapping(unsigned int virq); +unsigned int irq_create_mapping_affinity(struct irq_domain *domain, irq_hw_number_t hwirq, + const struct irq_affinity_desc *affinity); +unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); +void irq_dispose_mapping(unsigned int virq); -static inline unsigned int irq_create_mapping(struct irq_domain *host, - irq_hw_number_t hwirq) +/** + * irq_create_mapping - Map a hardware interrupt into linux irq space + * @domain: domain owning this hardware interrupt or NULL for default domain + * @hwirq: hardware irq number in that domain space + * + * Only one mapping per hardware interrupt is permitted. + * + * If the sense/trigger is to be specified, set_irq_type() should be called + * on the number returned from that call. + * + * Returns: Linux irq number or 0 on error + */ +static inline unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { - return irq_create_mapping_affinity(host, hwirq, NULL); + return irq_create_mapping_affinity(domain, hwirq, NULL); } -extern struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, - irq_hw_number_t hwirq, - unsigned int *irq); +struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, + irq_hw_number_t hwirq, + unsigned int *irq); +/** + * irq_resolve_mapping - Find a linux irq from a hw irq number. + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space + * + * Returns: Interrupt descriptor + */ static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { @@ -539,8 +471,10 @@ static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain, /** * irq_find_mapping() - Find a linux irq from a hw irq number. - * @domain: domain owning this hardware interrupt - * @hwirq: hardware irq number in that domain space + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space + * + * Returns: Linux irq number or 0 if not found */ static inline unsigned int irq_find_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) @@ -553,108 +487,115 @@ static inline unsigned int irq_find_mapping(struct irq_domain *domain, return 0; } -static inline unsigned int irq_linear_revmap(struct irq_domain *domain, - irq_hw_number_t hwirq) -{ - return irq_find_mapping(domain, hwirq); -} - extern const struct irq_domain_ops irq_domain_simple_ops; /* stock xlate functions */ int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type); + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type); + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type); - -int irq_domain_translate_twocell(struct irq_domain *d, - struct irq_fwspec *fwspec, - unsigned long *out_hwirq, - unsigned int *out_type); - -int irq_domain_translate_onecell(struct irq_domain *d, - struct irq_fwspec *fwspec, - unsigned long *out_hwirq, - unsigned int *out_type); + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); +int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); + +int irq_domain_translate_onecell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); +int irq_domain_translate_twocell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); +int irq_domain_translate_twothreecell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); /* IPI functions */ int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest); int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); /* V2 interfaces to support hierarchy IRQ domains. */ -extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, - unsigned int virq); -extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, - const struct irq_chip *chip, - void *chip_data, irq_flow_handler_t handler, - void *handler_data, const char *handler_name); -extern void irq_domain_reset_irq_data(struct irq_data *irq_data); +struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); +void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, + void *handler_data, const char *handler_name); +void irq_domain_reset_irq_data(struct irq_data *irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -extern struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, - unsigned int flags, unsigned int size, - struct fwnode_handle *fwnode, - const struct irq_domain_ops *ops, void *host_data); - -static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent, - unsigned int flags, - unsigned int size, - struct device_node *node, - const struct irq_domain_ops *ops, - void *host_data) -{ - return irq_domain_create_hierarchy(parent, flags, size, - of_node_to_fwnode(node), - ops, host_data); -} - -extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, - unsigned int nr_irqs, int node, void *arg, - bool realloc, - const struct irq_affinity_desc *affinity); -extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); -extern int irq_domain_activate_irq(struct irq_data *irq_data, bool early); -extern void irq_domain_deactivate_irq(struct irq_data *irq_data); - -static inline int irq_domain_alloc_irqs(struct irq_domain *domain, - unsigned int nr_irqs, int node, void *arg) -{ - return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, - NULL); -} - -extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, - unsigned int irq_base, - unsigned int nr_irqs, void *arg); -extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, - unsigned int virq, - irq_hw_number_t hwirq, - const struct irq_chip *chip, - void *chip_data); -extern void irq_domain_free_irqs_common(struct irq_domain *domain, - unsigned int virq, - unsigned int nr_irqs); -extern void irq_domain_free_irqs_top(struct irq_domain *domain, - unsigned int virq, unsigned int nr_irqs); - -extern int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg); -extern int irq_domain_pop_irq(struct irq_domain *domain, int virq); - -extern int irq_domain_alloc_irqs_parent(struct irq_domain *domain, - unsigned int irq_base, - unsigned int nr_irqs, void *arg); - -extern void irq_domain_free_irqs_parent(struct irq_domain *domain, - unsigned int irq_base, - unsigned int nr_irqs); - -extern int irq_domain_disconnect_hierarchy(struct irq_domain *domain, - unsigned int virq); +/** + * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy + * @parent: Parent irq domain to associate with the new domain + * @flags: Irq domain flags associated to the domain + * @size: Size of the domain. See below + * @fwnode: Optional fwnode of the interrupt controller + * @ops: Pointer to the interrupt domain callbacks + * @host_data: Controller private data pointer + * + * If @size is 0 a tree domain is created, otherwise a linear domain. + * + * If successful the parent is associated to the new domain and the + * domain flags are set. + * + * Returns: A pointer to IRQ domain, or %NULL on failure. + */ +static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, + unsigned int flags, unsigned int size, + struct fwnode_handle *fwnode, + const struct irq_domain_ops *ops, + void *host_data) +{ + const struct irq_domain_info info = { + .fwnode = fwnode, + .size = size, + .hwirq_max = size ? : ~0U, + .ops = ops, + .host_data = host_data, + .domain_flags = flags, + .parent = parent, + }; + struct irq_domain *d = irq_domain_instantiate(&info); + + return IS_ERR(d) ? NULL : d; +} + +int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, + int node, void *arg, bool realloc, + const struct irq_affinity_desc *affinity); +void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); +int irq_domain_activate_irq(struct irq_data *irq_data, bool early); +void irq_domain_deactivate_irq(struct irq_data *irq_data); + +/** + * irq_domain_alloc_irqs - Allocate IRQs from domain + * @domain: domain to allocate from + * @nr_irqs: number of IRQs to allocate + * @node: NUMA node id for memory allocation + * @arg: domain specific argument + * + * See __irq_domain_alloc_irqs()' documentation. + */ +static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, + int node, void *arg) +{ + return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, NULL); +} + +int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq, const struct irq_chip *chip, + void *chip_data); +void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs); +void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); + +int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg); +int irq_domain_pop_irq(struct irq_domain *domain, int virq); + +int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base, + unsigned int nr_irqs, void *arg); + +void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base, + unsigned int nr_irqs); + +int irq_domain_disconnect_hierarchy(struct irq_domain *domain, unsigned int virq); static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { @@ -663,8 +604,7 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) static inline bool irq_domain_is_ipi(struct irq_domain *domain) { - return domain->flags & - (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE); + return domain->flags & (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE); } static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) @@ -692,15 +632,18 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE; } +static inline bool irq_domain_is_msi_immutable(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_IMMUTABLE; +} #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ -static inline int irq_domain_alloc_irqs(struct irq_domain *domain, - unsigned int nr_irqs, int node, void *arg) +static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, + int node, void *arg) { return -1; } -static inline void irq_domain_free_irqs(unsigned int virq, - unsigned int nr_irqs) { } +static inline void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) { } static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { @@ -740,8 +683,7 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #ifdef CONFIG_GENERIC_MSI_IRQ -int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, - unsigned int type); +int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, unsigned int type); void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq); #else static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, @@ -756,10 +698,50 @@ static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsig } #endif +/* Deprecated functions. Will be removed in the merge window */ +static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) +{ + return node ? &node->fwnode : NULL; +} + +static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain_info info = { + .fwnode = of_fwnode_handle(of_node), + .hwirq_max = ~0U, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; +} + +static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node, + unsigned int size, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain_info info = { + .fwnode = of_fwnode_handle(of_node), + .size = size, + .hwirq_max = size, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; +} + #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } -static inline struct irq_domain *irq_find_matching_fwnode( - struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token) +static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token) { return NULL; } diff --git a/include/linux/ism.h b/include/linux/ism.h index 5428edd90982..8358b4cd7ba6 100644 --- a/include/linux/ism.h +++ b/include/linux/ism.h @@ -28,6 +28,7 @@ struct ism_dmb { struct ism_dev { spinlock_t lock; /* protects the ism device */ + spinlock_t cmd_lock; /* serializes cmds */ struct list_head list; struct pci_dev *pdev; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 50f7ea8714bf..43b9297fe8a7 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -28,7 +28,7 @@ #include <linux/slab.h> #include <linux/bit_spinlock.h> #include <linux/blkdev.h> -#include <crypto/hash.h> +#include <linux/crc32c.h> #endif #define journal_oom_retry 1 @@ -459,7 +459,6 @@ struct jbd2_revoke_table_s; * @h_ref: Reference count on this handle. * @h_err: Field for caller's use to track errors through large fs operations. * @h_sync: Flag for sync-on-close. - * @h_jdata: Flag to force data journaling. * @h_reserved: Flag for handle for reserved credits. * @h_aborted: Flag indicating fatal error on handle. * @h_type: For handle statistics. @@ -491,7 +490,6 @@ struct jbd2_journal_handle /* Flags [no locking] */ unsigned int h_sync: 1; - unsigned int h_jdata: 1; unsigned int h_reserved: 1; unsigned int h_aborted: 1; unsigned int h_type: 8; @@ -700,12 +698,6 @@ struct transaction_s /* Disk flush needs to be sent to fs partition [no locking] */ int t_need_data_flush; - - /* - * For use by the filesystem to store fs-specific data - * structures associated with the transaction - */ - struct list_head t_private_list; }; struct transaction_run_stats_s { @@ -1242,13 +1234,6 @@ struct journal_s void *j_private; /** - * @j_chksum_driver: - * - * Reference to checksum algorithm driver via cryptoapi. - */ - struct crypto_shash *j_chksum_driver; - - /** * @j_csum_seed: * * Precomputed journal UUID checksum for seeding other checksums. @@ -1395,9 +1380,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) #define JBD2_FLUSHED 0x008 /* The journal superblock has been flushed */ #define JBD2_LOADED 0x010 /* The journal superblock has been loaded */ #define JBD2_BARRIER 0x020 /* Use IDE barriers */ -#define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file - * data write error in ordered - * mode */ #define JBD2_CYCLE_RECORD 0x080 /* Journal cycled record log on * clean and empty filesystem * logging area */ @@ -1414,7 +1396,6 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) */ /* Filing buffers */ -extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); extern bool __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); @@ -1634,10 +1615,12 @@ extern void jbd2_journal_destroy_revoke_record_cache(void); extern void jbd2_journal_destroy_revoke_table_cache(void); extern int __init jbd2_journal_init_revoke_record_cache(void); extern int __init jbd2_journal_init_revoke_table_cache(void); +struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size); +void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table); extern void jbd2_journal_destroy_revoke(journal_t *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); -extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); +extern void jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); extern void jbd2_journal_write_revoke_records(transaction_t *transaction, struct list_head *log_bufs); @@ -1740,20 +1723,13 @@ static inline int tid_geq(tid_t x, tid_t y) return (difference >= 0); } -extern int jbd2_journal_blocks_per_page(struct inode *inode); +extern int jbd2_journal_blocks_per_folio(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); -static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j) -{ - return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j); -} - static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) { - WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) && - journal->j_chksum_driver == NULL); - - return journal->j_chksum_driver != NULL; + return jbd2_has_feature_csum2(journal) || + jbd2_has_feature_csum3(journal); } static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) @@ -1790,27 +1766,9 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) #define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Types 5 -/* JBD uses a CRC32 checksum */ -#define JBD_MAX_CHECKSUM_SIZE 4 - -static inline u32 jbd2_chksum(journal_t *journal, u32 crc, - const void *address, unsigned int length) +static inline u32 jbd2_chksum(u32 crc, const void *address, unsigned int length) { - DEFINE_RAW_FLEX(struct shash_desc, desc, __ctx, - DIV_ROUND_UP(JBD_MAX_CHECKSUM_SIZE, - sizeof(*((struct shash_desc *)0)->__ctx))); - int err; - - BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) > - JBD_MAX_CHECKSUM_SIZE); - - desc->tfm = journal->j_chksum_driver; - *(u32 *)desc->__ctx = crc; - - err = crypto_shash_update(desc, address, length); - BUG_ON(err); - - return *(u32 *)desc->__ctx; + return crc32c(crc, address, length); } /* Return most recent uncommitted transaction */ diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 0ea8c9887429..91b20788273d 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -59,7 +59,7 @@ /* LATCH is used in the interval timer and ftape setup. */ #define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */ -extern int register_refined_jiffies(long clock_tick_rate); +extern void register_refined_jiffies(long clock_tick_rate); /* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */ #define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f5a2727ca4a9..fdb79dd1ebd8 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -75,6 +75,7 @@ #include <linux/types.h> #include <linux/compiler.h> +#include <linux/cleanup.h> extern bool static_key_initialized; @@ -347,6 +348,8 @@ static inline void static_key_disable(struct static_key *key) #endif /* CONFIG_JUMP_LABEL */ +DEFINE_LOCK_GUARD_0(jump_label_lock, jump_label_lock(), jump_label_unlock()) + #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #define jump_label_enabled static_key_enabled diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 1c6a6c1704d8..d5dd54c53ace 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -55,12 +55,11 @@ static inline void *dereference_symbol_descriptor(void *ptr) if (is_ksym_addr((unsigned long)ptr)) return ptr; - preempt_disable(); + guard(rcu)(); mod = __module_address((unsigned long)ptr); if (mod) ptr = dereference_module_function_descriptor(mod, ptr); - preempt_enable(); #endif return ptr; } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 6bbfc8aa42e8..890011071f2b 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -153,7 +153,7 @@ static __always_inline void kasan_unpoison_new_object(struct kmem_cache *cache, void __kasan_poison_new_object(struct kmem_cache *cache, void *object); /** - * kasan_unpoison_new_object - Repoison a new slab object. + * kasan_poison_new_object - Repoison a new slab object. * @cache: Cache the object belong to. * @object: Pointer to the object. * @@ -491,7 +491,6 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); -void kasan_record_aux_stack_noalloc(void *ptr); #else /* CONFIG_KASAN_GENERIC */ @@ -509,7 +508,6 @@ static inline void kasan_cache_create(struct kmem_cache *cache, static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_record_aux_stack(void *ptr) {} -static inline void kasan_record_aux_stack_noalloc(void *ptr) {} #endif /* CONFIG_KASAN_GENERIC */ diff --git a/include/linux/kcore.h b/include/linux/kcore.h index 86c0f1d18998..9a2fa013c91d 100644 --- a/include/linux/kcore.h +++ b/include/linux/kcore.h @@ -20,19 +20,6 @@ struct kcore_list { int type; }; -struct vmcore { - struct list_head list; - unsigned long long paddr; - unsigned long long size; - loff_t offset; -}; - -struct vmcoredd_node { - struct list_head list; /* List of dumps */ - void *buf; /* Buffer containing device's dump */ - unsigned int size; /* Size of the buffer */ -}; - #ifdef CONFIG_PROC_KCORE void __init kclist_add(struct kcore_list *, void *, size_t, int type); diff --git a/include/linux/kdb.h b/include/linux/kdb.h index f6c2ddb16b95..ecbf819deeca 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -104,7 +104,7 @@ extern int kdb_initial_cpu; #define KDB_NOENVVALUE (-6) #define KDB_NOTIMP (-7) #define KDB_ENVFULL (-8) -#define KDB_ENVBUFFULL (-9) +#define KDB_KMALLOCFAILED (-9) #define KDB_TOOMANYBPT (-10) #define KDB_TOOMANYDBREGS (-11) #define KDB_DUPBPT (-12) @@ -140,9 +140,6 @@ extern const char *kdb_diemsg; extern unsigned int kdb_flags; /* Global flags, see kdb_state for per cpu state */ -extern void kdb_save_flags(void); -extern void kdb_restore_flags(void); - #define KDB_FLAG(flag) (kdb_flags & KDB_FLAG_##flag) #define KDB_FLAG_SET(flag) ((void)(kdb_flags |= KDB_FLAG_##flag)) #define KDB_FLAG_CLEAR(flag) ((void)(kdb_flags &= ~KDB_FLAG_##flag)) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index be2e8c0a187e..1cce1f6410a9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -33,6 +33,7 @@ #include <linux/sprintf.h> #include <linux/static_call_types.h> #include <linux/instruction_pointer.h> +#include <linux/util_macros.h> #include <linux/wordpart.h> #include <asm/byteorder.h> @@ -41,19 +42,6 @@ #define STACK_MAGIC 0xdeadbeef -/* generic data direction definitions */ -#define READ 0 -#define WRITE 1 - -#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) - -#define u64_to_user_ptr(x) ( \ -{ \ - typecheck(u64, (x)); \ - (void __user *)(uintptr_t)(x); \ -} \ -) - struct completion; struct user; diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 87c79d076d6d..b5a5f32fdfd1 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -147,6 +147,11 @@ enum kernfs_root_flag { * Support user xattrs to be written to nodes rooted at this root. */ KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008, + + /* + * Renames must not change the parent node. + */ + KERNFS_ROOT_INVARIANT_PARENT = 0x0010, }; /* type-specific structures for kernfs_node union members */ @@ -199,8 +204,8 @@ struct kernfs_node { * never moved to a different parent, it is safe to access the * parent directly. */ - struct kernfs_node *parent; - const char *name; + struct kernfs_node __rcu *__parent; + const char __rcu *name; struct rb_node rb; @@ -395,7 +400,7 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) } int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); -int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn, +int kernfs_path_from_node(struct kernfs_node *kn_to, struct kernfs_node *kn_from, char *buf, size_t buflen); void pr_cont_kernfs_name(struct kernfs_node *kn); void pr_cont_kernfs_path(struct kernfs_node *kn); @@ -416,6 +421,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, void *priv); void kernfs_destroy_root(struct kernfs_root *root); +unsigned int kernfs_root_flags(struct kernfs_node *kn); struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, @@ -514,6 +520,8 @@ kernfs_create_root(struct kernfs_syscall_ops *scops, unsigned int flags, { return ERR_PTR(-ENOSYS); } static inline void kernfs_destroy_root(struct kernfs_root *root) { } +static inline unsigned int kernfs_root_flags(struct kernfs_node *kn) +{ return 0; } static inline struct kernfs_node * kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f0e9f8eda7a3..03f85ad03025 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -25,6 +25,10 @@ extern note_buf_t __percpu *crash_notes; +#ifdef CONFIG_CRASH_DUMP +#include <linux/prandom.h> +#endif + #ifdef CONFIG_KEXEC_CORE #include <linux/list.h> #include <linux/compat.h> @@ -68,8 +72,6 @@ extern note_buf_t __percpu *crash_notes; #define KEXEC_CRASH_MEM_ALIGN PAGE_SIZE #endif -#define KEXEC_CORE_NOTE_NAME CRASH_CORE_NOTE_NAME - /* * This structure is used to hold the arguments that are used when loading * kernel binaries. @@ -171,6 +173,7 @@ int kexec_image_post_load_cleanup_default(struct kimage *image); * @buf_min: The buffer can't be placed below this address. * @buf_max: The buffer can't be placed above this address. * @top_down: Allocate from top of memory. + * @random: Place the buffer at a random position. */ struct kexec_buf { struct kimage *image; @@ -182,8 +185,33 @@ struct kexec_buf { unsigned long buf_min; unsigned long buf_max; bool top_down; +#ifdef CONFIG_CRASH_DUMP + bool random; +#endif }; + +#ifdef CONFIG_CRASH_DUMP +static inline void kexec_random_range_start(unsigned long start, + unsigned long end, + struct kexec_buf *kbuf, + unsigned long *temp_start) +{ + unsigned short i; + + if (kbuf->random) { + get_random_bytes(&i, sizeof(unsigned short)); + *temp_start = start + (end - start) / USHRT_MAX * i; + } +} +#else +static inline void kexec_random_range_start(unsigned long start, + unsigned long end, + struct kexec_buf *kbuf, + unsigned long *temp_start) +{} +#endif + int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf); int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, void *buf, unsigned int size, @@ -205,6 +233,15 @@ static inline int arch_kimage_file_post_load_cleanup(struct kimage *image) } #endif +#ifndef arch_check_excluded_range +static inline int arch_check_excluded_range(struct kimage *image, + unsigned long start, + unsigned long end) +{ + return 0; +} +#endif + #ifdef CONFIG_KEXEC_SIG #ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION int kexec_kernel_verify_pe_sig(const char *kernel, unsigned long kernel_len); @@ -362,12 +399,24 @@ struct kimage { phys_addr_t ima_buffer_addr; size_t ima_buffer_size; + + unsigned long ima_segment_index; + bool is_ima_segment_index_set; #endif + struct { + struct kexec_segment *scratch; + phys_addr_t fdt; + } kho; + /* Core ELF header buffer */ void *elf_headers; unsigned long elf_headers_sz; unsigned long elf_load_addr; + + /* dm crypt keys buffer */ + unsigned long dm_crypt_keys_addr; + unsigned long dm_crypt_keys_sz; }; /* kexec interface functions */ @@ -467,13 +516,19 @@ extern bool kexec_file_dbg_print; #define kexec_dprintk(fmt, arg...) \ do { if (kexec_file_dbg_print) pr_info(fmt, ##arg); } while (0) +extern void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size); +extern void kimage_unmap_segment(void *buffer); #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; +struct kimage; static inline void __crash_kexec(struct pt_regs *regs) { } static inline void crash_kexec(struct pt_regs *regs) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } static inline int kexec_crash_loaded(void) { return 0; } +static inline void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size) +{ return NULL; } +static inline void kimage_unmap_segment(void *buffer) { } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h new file mode 100644 index 000000000000..348844cffb13 --- /dev/null +++ b/include/linux/kexec_handover.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_KEXEC_HANDOVER_H +#define LINUX_KEXEC_HANDOVER_H + +#include <linux/types.h> +#include <linux/errno.h> + +struct kho_scratch { + phys_addr_t addr; + phys_addr_t size; +}; + +/* KHO Notifier index */ +enum kho_event { + KEXEC_KHO_FINALIZE = 0, + KEXEC_KHO_ABORT = 1, +}; + +struct folio; +struct notifier_block; + +#define DECLARE_KHOSER_PTR(name, type) \ + union { \ + phys_addr_t phys; \ + type ptr; \ + } name +#define KHOSER_STORE_PTR(dest, val) \ + ({ \ + typeof(val) v = val; \ + typecheck(typeof((dest).ptr), v); \ + (dest).phys = virt_to_phys(v); \ + }) +#define KHOSER_LOAD_PTR(src) \ + ({ \ + typeof(src) s = src; \ + (typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \ + }) + +struct kho_serialization; + +#ifdef CONFIG_KEXEC_HANDOVER +bool kho_is_enabled(void); + +int kho_preserve_folio(struct folio *folio); +int kho_preserve_phys(phys_addr_t phys, size_t size); +struct folio *kho_restore_folio(phys_addr_t phys); +int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt); +int kho_retrieve_subtree(const char *name, phys_addr_t *phys); + +int register_kho_notifier(struct notifier_block *nb); +int unregister_kho_notifier(struct notifier_block *nb); + +void kho_memory_init(void); + +void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, phys_addr_t scratch_phys, + u64 scratch_len); +#else +static inline bool kho_is_enabled(void) +{ + return false; +} + +static inline int kho_preserve_folio(struct folio *folio) +{ + return -EOPNOTSUPP; +} + +static inline int kho_preserve_phys(phys_addr_t phys, size_t size) +{ + return -EOPNOTSUPP; +} + +static inline struct folio *kho_restore_folio(phys_addr_t phys) +{ + return NULL; +} + +static inline int kho_add_subtree(struct kho_serialization *ser, + const char *name, void *fdt) +{ + return -EOPNOTSUPP; +} + +static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys) +{ + return -EOPNOTSUPP; +} + +static inline int register_kho_notifier(struct notifier_block *nb) +{ + return -EOPNOTSUPP; +} + +static inline int unregister_kho_notifier(struct notifier_block *nb) +{ + return -EOPNOTSUPP; +} + +static inline void kho_memory_init(void) +{ +} + +static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len, + phys_addr_t scratch_phys, u64 scratch_len) +{ +} +#endif /* CONFIG_KEXEC_HANDOVER */ + +#endif /* LINUX_KEXEC_HANDOVER_H */ diff --git a/include/linux/key.h b/include/linux/key.h index 074dca3222b9..81b8f05c6898 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -236,6 +236,7 @@ struct key { #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ +#define KEY_FLAG_USER_ALIVE 10 /* set if final put has not happened on key yet */ /* the key type and key description string * - the desc is used to match a key against search criteria diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 76e891ee9e37..5eebbe7a3545 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -257,7 +257,6 @@ extern void kgdb_arch_late(void); * hardware breakpoints. * @correct_hw_break: Allow an architecture to specify how to correct the * hardware debug registers. - * @enable_nmi: Manage NMI-triggered entry to KGDB */ struct kgdb_arch { unsigned char gdb_bpt_instr[BREAK_INSTR_SIZE]; @@ -270,8 +269,6 @@ struct kgdb_arch { void (*disable_hw_break)(struct pt_regs *regs); void (*remove_all_hw_break)(void); void (*correct_hw_break)(void); - - void (*enable_nmi)(bool on); }; /** @@ -306,16 +303,6 @@ extern const struct kgdb_arch arch_kgdb_ops; extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs); -#ifdef CONFIG_SERIAL_KGDB_NMI -extern int kgdb_register_nmi_console(void); -extern int kgdb_unregister_nmi_console(void); -extern bool kgdb_nmi_poll_knock(void); -#else -static inline int kgdb_register_nmi_console(void) { return 0; } -static inline int kgdb_unregister_nmi_console(void) { return 0; } -static inline bool kgdb_nmi_poll_knock(void) { return true; } -#endif - extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops); extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops); extern struct kgdb_io *dbg_io_ops; diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 1f46046080f5..b8d69cfbb58b 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -15,16 +15,8 @@ extern void khugepaged_enter_vma(struct vm_area_struct *vma, unsigned long vm_flags); extern void khugepaged_min_free_kbytes_update(void); extern bool current_is_khugepaged(void); -#ifdef CONFIG_SHMEM extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, bool install_pmd); -#else -static inline int collapse_pte_mapped_thp(struct mm_struct *mm, - unsigned long addr, bool install_pmd) -{ - return 0; -} -#endif static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) { diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 93a73c076d16..fbd424b2abb1 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -28,6 +28,7 @@ extern void kmemleak_update_trace(const void *ptr) __ref; extern void kmemleak_not_leak(const void *ptr) __ref; extern void kmemleak_transient_leak(const void *ptr) __ref; extern void kmemleak_ignore(const void *ptr) __ref; +extern void kmemleak_ignore_percpu(const void __percpu *ptr) __ref; extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref; extern void kmemleak_no_scan(const void *ptr) __ref; extern void kmemleak_alloc_phys(phys_addr_t phys, size_t size, @@ -97,6 +98,9 @@ static inline void kmemleak_not_leak(const void *ptr) static inline void kmemleak_transient_leak(const void *ptr) { } +static inline void kmemleak_ignore_percpu(const void __percpu *ptr) +{ +} static inline void kmemleak_ignore(const void *ptr) { } diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index be707748e7ce..150fe2ae1b6b 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -52,8 +52,6 @@ const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj); bool kobj_ns_current_may_mount(enum kobj_ns_type type); void *kobj_ns_grab_current(enum kobj_ns_type type); -const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk); -const void *kobj_ns_initial(enum kobj_ns_type type); void kobj_ns_drop(enum kobj_ns_type type, void *ns); #endif /* _LINUX_KOBJECT_NS_H */ diff --git a/include/linux/kref.h b/include/linux/kref.h index d32e21a2538c..88e82ab1367c 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -46,18 +46,18 @@ static inline void kref_get(struct kref *kref) } /** - * kref_put - decrement refcount for object. - * @kref: object. - * @release: pointer to the function that will clean up the object when the + * kref_put - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the * last reference to the object is released. - * This pointer is required, and it is not acceptable to pass kfree - * in as this function. * - * Decrement the refcount, and if 0, call release(). - * Return 1 if the object was removed, otherwise return 0. Beware, if this - * function returns 0, you still can not count on the kref from remaining in - * memory. Only use the return value if you want to see if the kref is now - * gone, not present. + * Decrement the refcount, and if 0, call @release. The caller may not + * pass NULL or kfree() as the release function. + * + * Return: 1 if this call removed the object, otherwise return 0. Beware, + * if this function returns 0, another caller may have removed the object + * by the time this function returns. The return value is only certain + * if you want to see if the object is definitely released. */ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { @@ -68,17 +68,37 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) return 0; } +/** + * kref_put_mutex - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the + * last reference to the object is released. + * @mutex: Mutex which protects the release function. + * + * This variant of kref_lock() calls the @release function with the @mutex + * held. The @release function will release the mutex. + */ static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), - struct mutex *lock) + struct mutex *mutex) { - if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) { + if (refcount_dec_and_mutex_lock(&kref->refcount, mutex)) { release(kref); return 1; } return 0; } +/** + * kref_put_lock - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the + * last reference to the object is released. + * @lock: Spinlock which protects the release function. + * + * This variant of kref_lock() calls the @release function with the @lock + * held. The @release function will release the lock. + */ static inline int kref_put_lock(struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) @@ -94,8 +114,6 @@ static inline int kref_put_lock(struct kref *kref, * kref_get_unless_zero - Increment refcount for object unless it is zero. * @kref: object. * - * Return non-zero if the increment succeeded. Otherwise return 0. - * * This function is intended to simplify locking around refcounting for * objects that can be looked up from a lookup structure, and which are * removed from that lookup structure in the object destructor. @@ -105,6 +123,8 @@ static inline int kref_put_lock(struct kref *kref, * With a lookup followed by a kref_get_unless_zero *with return value check* * locking in the kref_put path can be deferred to the actual removal from * the lookup structure and RCU lookups become trivial. + * + * Return: non-zero if the increment succeeded. Otherwise return 0. */ static inline int __must_check kref_get_unless_zero(struct kref *kref) { diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 6a53ac4885bb..d73095b5cd96 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -93,6 +93,7 @@ void folio_migrate_ksm(struct folio *newfolio, struct folio *folio); void collect_procs_ksm(const struct folio *folio, const struct page *page, struct list_head *to_kill, int force_early); long ksm_process_profit(struct mm_struct *); +bool ksm_process_mergeable(struct mm_struct *mm); #else /* !CONFIG_KSM */ diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h index 7fcf29a4e0de..6ea897222af1 100644 --- a/include/linux/kstrtox.h +++ b/include/linux/kstrtox.h @@ -143,6 +143,7 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t */ extern unsigned long simple_strtoul(const char *,char **,unsigned int); +extern unsigned long simple_strntoul(const char *,char **,unsigned int,size_t); extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); diff --git a/include/linux/kthread.h b/include/linux/kthread.h index b11f53c1ba2e..8d27403888ce 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -85,6 +85,7 @@ kthread_run_on_cpu(int (*threadfn)(void *data), void *data, void free_kthread_struct(struct task_struct *k); void kthread_bind(struct task_struct *k, unsigned int cpu); void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); +int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask); int kthread_stop(struct task_struct *k); int kthread_stop_put(struct task_struct *k); bool kthread_should_stop(void); @@ -186,13 +187,58 @@ extern void __kthread_init_worker(struct kthread_worker *worker, int kthread_worker_fn(void *worker_ptr); -__printf(2, 3) -struct kthread_worker * -kthread_create_worker(unsigned int flags, const char namefmt[], ...); +__printf(3, 4) +struct kthread_worker *kthread_create_worker_on_node(unsigned int flags, + int node, + const char namefmt[], ...); + +#define kthread_create_worker(flags, namefmt, ...) \ + kthread_create_worker_on_node(flags, NUMA_NO_NODE, namefmt, ## __VA_ARGS__); + +/** + * kthread_run_worker - create and wake a kthread worker. + * @flags: flags modifying the default behavior of the worker + * @namefmt: printf-style name for the thread. + * + * Description: Convenient wrapper for kthread_create_worker() followed by + * wake_up_process(). Returns the kthread_worker or ERR_PTR(-ENOMEM). + */ +#define kthread_run_worker(flags, namefmt, ...) \ +({ \ + struct kthread_worker *__kw \ + = kthread_create_worker(flags, namefmt, ## __VA_ARGS__); \ + if (!IS_ERR(__kw)) \ + wake_up_process(__kw->task); \ + __kw; \ +}) -__printf(3, 4) struct kthread_worker * +struct kthread_worker * kthread_create_worker_on_cpu(int cpu, unsigned int flags, - const char namefmt[], ...); + const char namefmt[]); + +/** + * kthread_run_worker_on_cpu - create and wake a cpu bound kthread worker. + * @cpu: CPU number + * @flags: flags modifying the default behavior of the worker + * @namefmt: printf-style name for the thread. Format is restricted + * to "name.*%u". Code fills in cpu number. + * + * Description: Convenient wrapper for kthread_create_worker_on_cpu() + * followed by wake_up_process(). Returns the kthread_worker or + * ERR_PTR(-ENOMEM). + */ +static inline struct kthread_worker * +kthread_run_worker_on_cpu(int cpu, unsigned int flags, + const char namefmt[]) +{ + struct kthread_worker *kw; + + kw = kthread_create_worker_on_cpu(cpu, flags, namefmt); + if (!IS_ERR(kw)) + wake_up_process(kw->task); + + return kw; +} bool kthread_queue_work(struct kthread_worker *worker, struct kthread_work *work); diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 3a4e723eae0f..383ed9985802 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -222,6 +222,11 @@ static inline ktime_t ns_to_ktime(u64 ns) return ns; } +static inline ktime_t us_to_ktime(u64 us) +{ + return us * NSEC_PER_USEC; +} + static inline ktime_t ms_to_ktime(u64 ms) { return ms * NSEC_PER_MSEC; diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h index 4862c98d80d3..da4d9b5f58f1 100644 --- a/include/linux/kvm_dirty_ring.h +++ b/include/linux/kvm_dirty_ring.h @@ -32,7 +32,7 @@ struct kvm_dirty_ring { * If CONFIG_HAVE_HVM_DIRTY_RING not defined, kvm_dirty_ring.o should * not be included as well, so define these nop functions for the arch. */ -static inline u32 kvm_dirty_ring_get_rsvd_entries(void) +static inline u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm) { return 0; } @@ -42,7 +42,7 @@ static inline bool kvm_use_dirty_bitmap(struct kvm *kvm) return true; } -static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, +static inline int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring, int index, u32 size) { return 0; @@ -71,11 +71,12 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring) #else /* CONFIG_HAVE_KVM_DIRTY_RING */ -int kvm_cpu_dirty_log_size(void); +int kvm_cpu_dirty_log_size(struct kvm *kvm); bool kvm_use_dirty_bitmap(struct kvm *kvm); bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm); -u32 kvm_dirty_ring_get_rsvd_entries(void); -int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size); +u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm); +int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring, + int index, u32 size); /* * called with kvm->slots_lock held, returns the number of diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b0b38744c4b0..3bde4fb5c6aa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -255,12 +255,19 @@ union kvm_mmu_notifier_arg { unsigned long attributes; }; +enum kvm_gfn_range_filter { + KVM_FILTER_SHARED = BIT(0), + KVM_FILTER_PRIVATE = BIT(1), +}; + struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; gfn_t end; union kvm_mmu_notifier_arg arg; + enum kvm_gfn_range_filter attr_filter; bool may_block; + bool lockless; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); @@ -596,7 +603,12 @@ struct kvm_memory_slot { #ifdef CONFIG_KVM_PRIVATE_MEM struct { - struct file __rcu *file; + /* + * Writes protected by kvm->slots_lock. Acquiring a + * reference via kvm_gmem_get_file() is protected by + * either kvm->slots_lock or kvm->srcu. + */ + struct file *file; pgoff_t pgoff; } gmem; #endif @@ -979,9 +991,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) return xa_load(&kvm->vcpu_array, i); } -#define kvm_for_each_vcpu(idx, vcpup, kvm) \ - xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ - (atomic_read(&kvm->online_vcpus) - 1)) +#define kvm_for_each_vcpu(idx, vcpup, kvm) \ + if (atomic_read(&kvm->online_vcpus)) \ + xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ + (atomic_read(&kvm->online_vcpus) - 1)) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { @@ -1002,6 +1015,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) void kvm_destroy_vcpus(struct kvm *kvm); +int kvm_trylock_all_vcpus(struct kvm *kvm); +int kvm_lock_all_vcpus(struct kvm *kvm); +void kvm_unlock_all_vcpus(struct kvm *kvm); + void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); @@ -1192,7 +1209,7 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn * -- just change its flags * * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): + * differentiation is the best we can do for kvm_set_memory_region(): */ enum kvm_mr_change { KVM_MR_CREATE, @@ -1201,10 +1218,8 @@ enum kvm_mr_change { KVM_MR_FLAGS_ONLY, }; -int kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region2 *mem); -int __kvm_set_memory_region(struct kvm *kvm, - const struct kvm_userspace_memory_region2 *mem); +int kvm_set_internal_memslot(struct kvm *kvm, + const struct kvm_userspace_memory_region2 *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -1494,7 +1509,16 @@ bool kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); -void kvm_vcpu_kick(struct kvm_vcpu *vcpu); + +#ifndef CONFIG_S390 +void __kvm_vcpu_kick(struct kvm_vcpu *vcpu, bool wait); + +static inline void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + __kvm_vcpu_kick(vcpu, false); +} +#endif + int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); @@ -1599,13 +1623,13 @@ void kvm_arch_disable_virtualization(void); int kvm_arch_enable_virtualization_cpu(void); void kvm_arch_disable_virtualization_cpu(void); #endif +bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); -int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); void kvm_arch_create_vm_debugfs(struct kvm *kvm); @@ -1737,7 +1761,6 @@ static inline void kvm_unregister_perf_callbacks(void) {} int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); -void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); @@ -2243,6 +2266,14 @@ static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) __kvm_make_request(req, vcpu); } +#ifndef CONFIG_S390 +static inline void kvm_make_request_and_kick(int req, struct kvm_vcpu *vcpu) +{ + kvm_make_request(req, vcpu); + __kvm_vcpu_kick(vcpu, req & KVM_REQUEST_WAIT); +} +#endif + static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) { return READ_ONCE(vcpu->requests); @@ -2275,6 +2306,7 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) } #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +extern bool enable_virt_at_load; extern bool kvm_rebooting; #endif @@ -2373,7 +2405,7 @@ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot) struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); -#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS) bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); @@ -2562,4 +2594,12 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, struct kvm_pre_fault_memory *range); #endif +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +int kvm_enable_virtualization(void); +void kvm_disable_virtualization(void); +#else +static inline int kvm_enable_virtualization(void) { return 0; } +static inline void kvm_disable_virtualization(void) { } +#endif + #endif diff --git a/include/linux/lcd.h b/include/linux/lcd.h index c3ccdff4519a..d4fa03722b72 100644 --- a/include/linux/lcd.h +++ b/include/linux/lcd.h @@ -11,7 +11,6 @@ #include <linux/device.h> #include <linux/mutex.h> -#include <linux/notifier.h> #define LCD_POWER_ON (0) #define LCD_POWER_REDUCED (1) // deprecated; don't use in new code @@ -79,8 +78,11 @@ struct lcd_device { const struct lcd_ops *ops; /* Serialise access to set_power method */ struct mutex update_lock; - /* The framebuffer notifier block */ - struct notifier_block fb_notif; + + /** + * @entry: List entry of all registered lcd devices + */ + struct list_head entry; struct device dev; }; @@ -125,6 +127,19 @@ extern void lcd_device_unregister(struct lcd_device *ld); extern void devm_lcd_device_unregister(struct device *dev, struct lcd_device *ld); +#if IS_REACHABLE(CONFIG_LCD_CLASS_DEVICE) +void lcd_notify_blank_all(struct device *display_dev, int power); +void lcd_notify_mode_change_all(struct device *display_dev, + unsigned int width, unsigned int height); +#else +static inline void lcd_notify_blank_all(struct device *display_dev, int power) +{} + +static inline void lcd_notify_mode_change_all(struct device *display_dev, + unsigned int width, unsigned int height) +{} +#endif + #define to_lcd_device(obj) container_of(obj, struct lcd_device, dev) static inline void * lcd_get_data(struct lcd_device *ld_dev) diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h index 36df927ec4b7..21ec856c36bc 100644 --- a/include/linux/led-class-flash.h +++ b/include/linux/led-class-flash.h @@ -45,6 +45,8 @@ struct led_flash_ops { int (*timeout_set)(struct led_classdev_flash *fled_cdev, u32 timeout); /* get the flash LED fault */ int (*fault_get)(struct led_classdev_flash *fled_cdev, u32 *fault); + /* set flash duration */ + int (*duration_set)(struct led_classdev_flash *fled_cdev, u32 duration); }; /* @@ -75,6 +77,9 @@ struct led_classdev_flash { /* flash timeout value in microseconds along with its constraints */ struct led_flash_setting timeout; + /* flash timeout value in microseconds along with its constraints */ + struct led_flash_setting duration; + /* LED Flash class sysfs groups */ const struct attribute_group *sysfs_groups[LED_FLASH_SYSFS_GROUPS_SIZE]; }; @@ -209,4 +214,15 @@ int led_set_flash_timeout(struct led_classdev_flash *fled_cdev, u32 timeout); */ int led_get_flash_fault(struct led_classdev_flash *fled_cdev, u32 *fault); +/** + * led_set_flash_duration - set flash LED duration + * @fled_cdev: the flash LED to set + * @timeout: the flash duration to set it to + * + * Set the flash strobe duration. + * + * Returns: 0 on success or negative error value on failure + */ +int led_set_flash_duration(struct led_classdev_flash *fled_cdev, u32 duration); + #endif /* __LINUX_FLASH_LEDS_H_INCLUDED */ diff --git a/include/linux/leds.h b/include/linux/leds.h index 98f9719c924c..b3f0aa081064 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -640,6 +640,12 @@ static inline void ledtrig_flash_ctrl(bool on) {} static inline void ledtrig_torch_ctrl(bool on) {} #endif +#if IS_REACHABLE(CONFIG_LEDS_TRIGGER_BACKLIGHT) +void ledtrig_backlight_blank(bool blank); +#else +static inline void ledtrig_backlight_blank(bool blank) {} +#endif + /* * Generic LED platform data for describing LED names and default triggers. */ diff --git a/include/linux/libata.h b/include/linux/libata.h index c1a85d46eba6..e9d87cb1dbb7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -41,17 +41,6 @@ */ #undef ATA_IRQ_TRAP /* define to ack screaming irqs */ - -#define ata_print_version_once(dev, version) \ -({ \ - static bool __print_once; \ - \ - if (!__print_once) { \ - __print_once = true; \ - ata_print_version(dev, version); \ - } \ -}) - /* defines only for the constants which don't work well as enums */ #define ATA_TAG_POISON 0xfafbfcfdU @@ -88,6 +77,7 @@ enum ata_quirks { __ATA_QUIRK_MAX_SEC_1024, /* Limit max sects to 1024 */ __ATA_QUIRK_MAX_TRIM_128M, /* Limit max trim size to 128M */ __ATA_QUIRK_NO_NCQ_ON_ATI, /* Disable NCQ on ATI chipset */ + __ATA_QUIRK_NO_LPM_ON_ATI, /* Disable LPM on ATI chipset */ __ATA_QUIRK_NO_ID_DEV_LOG, /* Identify device log missing */ __ATA_QUIRK_NO_LOG_DIR, /* Do not read log directory */ __ATA_QUIRK_NO_FUA, /* Do not use FUA */ @@ -432,6 +422,7 @@ enum { ATA_QUIRK_MAX_SEC_1024 = (1U << __ATA_QUIRK_MAX_SEC_1024), ATA_QUIRK_MAX_TRIM_128M = (1U << __ATA_QUIRK_MAX_TRIM_128M), ATA_QUIRK_NO_NCQ_ON_ATI = (1U << __ATA_QUIRK_NO_NCQ_ON_ATI), + ATA_QUIRK_NO_LPM_ON_ATI = (1U << __ATA_QUIRK_NO_LPM_ON_ATI), ATA_QUIRK_NO_ID_DEV_LOG = (1U << __ATA_QUIRK_NO_ID_DEV_LOG), ATA_QUIRK_NO_LOG_DIR = (1U << __ATA_QUIRK_NO_LOG_DIR), ATA_QUIRK_NO_FUA = (1U << __ATA_QUIRK_NO_FUA), @@ -543,6 +534,7 @@ typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes) extern struct device_attribute dev_attr_unload_heads; #ifdef CONFIG_SATA_HOST +extern struct device_attribute dev_attr_link_power_management_supported; extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_ncq_prio_supported; extern struct device_attribute dev_attr_ncq_prio_enable; @@ -1199,10 +1191,9 @@ extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); -extern int ata_scsi_slave_alloc(struct scsi_device *sdev); -int ata_scsi_device_configure(struct scsi_device *sdev, - struct queue_limits *lim); -extern void ata_scsi_slave_destroy(struct scsi_device *sdev); +extern int ata_scsi_sdev_init(struct scsi_device *sdev); +int ata_scsi_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim); +extern void ata_scsi_sdev_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth); extern int ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev, @@ -1301,8 +1292,8 @@ extern struct ata_port *ata_port_alloc(struct ata_host *host); extern void ata_port_free(struct ata_port *ap); extern int ata_tport_add(struct device *parent, struct ata_port *ap); extern void ata_tport_delete(struct ata_port *ap); -int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim, - struct ata_port *ap); +int ata_sas_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim, + struct ata_port *ap); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); @@ -1362,7 +1353,7 @@ int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm); int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm); unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, const struct ata_acpi_gtm *gtm); -int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm); +int ata_acpi_cbl_pata_type(struct ata_port *ap); #else static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) { @@ -1387,10 +1378,9 @@ static inline unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, return 0; } -static inline int ata_acpi_cbl_80wire(struct ata_port *ap, - const struct ata_acpi_gtm *gtm) +static inline int ata_acpi_cbl_pata_type(struct ata_port *ap) { - return 0; + return ATA_CBL_PATA40; } #endif @@ -1458,8 +1448,8 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .this_id = ATA_SHT_THIS_ID, \ .emulated = ATA_SHT_EMULATED, \ .proc_name = drv_name, \ - .slave_alloc = ata_scsi_slave_alloc, \ - .slave_destroy = ata_scsi_slave_destroy, \ + .sdev_init = ata_scsi_sdev_init, \ + .sdev_destroy = ata_scsi_sdev_destroy, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ .max_sectors = ATA_MAX_SECTORS_LBA48 @@ -1467,14 +1457,14 @@ extern const struct attribute_group *ata_common_sdev_groups[]; #define ATA_SUBBASE_SHT(drv_name) \ __ATA_BASE_SHT(drv_name), \ .can_queue = ATA_DEF_QUEUE, \ - .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ - .device_configure = ata_scsi_device_configure + .tag_alloc_policy_rr = true, \ + .sdev_configure = ata_scsi_sdev_configure #define ATA_SUBBASE_SHT_QD(drv_name, drv_qd) \ __ATA_BASE_SHT(drv_name), \ .can_queue = drv_qd, \ - .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ - .device_configure = ata_scsi_device_configure + .tag_alloc_policy_rr = true, \ + .sdev_configure = ata_scsi_sdev_configure #define ATA_BASE_SHT(drv_name) \ ATA_SUBBASE_SHT(drv_name), \ @@ -1592,7 +1582,11 @@ do { \ #define ata_dev_dbg(dev, fmt, ...) \ ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__) -void ata_print_version(const struct device *dev, const char *version); +static inline void ata_print_version_once(const struct device *dev, + const char *version) +{ + dev_dbg_once(dev, "version %s\n", version); +} /* * ata_eh_info helpers @@ -1624,6 +1618,8 @@ static inline void ata_port_desc_misc(struct ata_port *ap, int irq) { ata_port_desc(ap, "irq %d", irq); ata_port_desc(ap, "lpm-pol %d", ap->target_lpm_policy); + if (ap->pflags & ATA_PFLAG_EXTERNAL) + ata_port_desc(ap, "ext"); } static inline bool ata_tag_internal(unsigned int tag) diff --git a/include/linux/libgcc.h b/include/linux/libgcc.h index fc388da6a027..0d68f9d6a6a7 100644 --- a/include/linux/libgcc.h +++ b/include/linux/libgcc.h @@ -34,4 +34,8 @@ long long notrace __lshrdi3(long long u, word_type b); long long notrace __muldi3(long long u, long long v); word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b); +#ifdef CONFIG_HAVE_ARCH_LIBGCC_H +#include <asm/libgcc.h> +#endif + #endif /* __ASM_LIBGCC_H */ diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 5c8865bb59d9..b11660b706c5 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -134,10 +134,6 @@ .size name, .-name #endif -/* If symbol 'name' is treated as a subroutine (gets called, and returns) - * then please use ENDPROC to mark 'name' as STT_FUNC for the benefit of - * static analysis tools such as stack depth analyzer. - */ #ifndef ENDPROC /* deprecated, use SYM_FUNC_END */ #define ENDPROC(name) \ diff --git a/include/linux/list.h b/include/linux/list.h index 29a375889fb8..e7e28afd28f8 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -50,9 +50,9 @@ static inline void INIT_LIST_HEAD(struct list_head *list) * Performs the full set of list corruption checks before __list_add(). * On list corruption reports a warning, and returns false. */ -extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, - struct list_head *prev, - struct list_head *next); +bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new, + struct list_head *prev, + struct list_head *next); /* * Performs list corruption checks before __list_add(). Returns false if a @@ -93,7 +93,7 @@ static __always_inline bool __list_add_valid(struct list_head *new, * Performs the full set of list corruption checks before __list_del_entry(). * On list corruption reports a warning, and returns false. */ -extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); +bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry); /* * Performs list corruption checks before __list_del_entry(). Returns false if a diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 05c166811f6b..fe739d35a864 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -91,13 +91,24 @@ void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren * @memcg: the cgroup of the sublist to add the item to. * * If the element is already part of a list, this function returns doing - * nothing. Therefore the caller does not need to keep state about whether or - * not the element already belongs in the list and is allowed to lazy update - * it. Note however that this is valid for *a* list, not *this* list. If - * the caller organize itself in a way that elements can be in more than - * one type of list, it is up to the caller to fully remove the item from - * the previous list (with list_lru_del() for instance) before moving it - * to @lru. + * nothing. This means that it is not necessary to keep state about whether or + * not the element already belongs in the list. That said, this logic only + * works if the item is in *this* list. If the item might be in some other + * list, then you cannot rely on this check and you must remove it from the + * other list before trying to insert it. + * + * The lru list consists of many sublists internally; the @nid and @memcg + * parameters are used to determine which sublist to insert the item into. + * It's important to use the right value of @nid and @memcg when deleting the + * item, since it might otherwise get deleted from the wrong sublist. + * + * This also applies when attempting to insert the item multiple times - if + * the item is currently in one sublist and you call list_lru_add() again, you + * must pass the right @nid and @memcg parameters so that the same sublist is + * used. + * + * You must ensure that the memcg is not freed during this call (e.g., with + * rcu or by taking a css refcnt). * * Return: true if the list was updated, false otherwise */ @@ -113,7 +124,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, * memcg of the sublist is determined by @item list_head. This assumption is * valid for slab objects LRU such as dentries, inodes, etc. * - * Return value: true if the list was updated, false otherwise + * Return: true if the list was updated, false otherwise */ bool list_lru_add_obj(struct list_lru *lru, struct list_head *item); @@ -125,8 +136,19 @@ bool list_lru_add_obj(struct list_lru *lru, struct list_head *item); * @memcg: the cgroup of the sublist to delete the item from. * * This function works analogously as list_lru_add() in terms of list - * manipulation. The comments about an element already pertaining to - * a list are also valid for list_lru_del(). + * manipulation. + * + * The comments in list_lru_add() about an element already being in a list are + * also valid for list_lru_del(), that is, you can delete an item that has + * already been removed or never been added. However, if the item is in a + * list, it must be in *this* list, and you must pass the right value of @nid + * and @memcg so that the right sublist is used. + * + * You must ensure that the memcg is not freed during this call (e.g., with + * rcu or by taking a css refcnt). When a memcg is deleted, list_lru entries + * are automatically moved to the parent memcg. This is done in a race-free + * way, so during deletion of an memcg both the old and new memcg will resolve + * to the same sublist internally. * * Return: true if the list was updated, false otherwise */ @@ -142,7 +164,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, * memcg of the sublist is determined by @item list_head. This assumption is * valid for slab objects LRU such as dentries, inodes, etc. * - * Return value: true if the list was updated, false otherwise. + * Return: true if the list was updated, false otherwise. */ bool list_lru_del_obj(struct list_lru *lru, struct list_head *item); diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index fa6e8471bd22..248db9b77ee2 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -28,6 +28,7 @@ struct hlist_nulls_node { #define NULLS_MARKER(value) (1UL | (((long)value) << 1)) #define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) +#define HLIST_NULLS_HEAD_INIT(nulls) {.first = (struct hlist_nulls_node *)NULLS_MARKER(nulls)} #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h index 013794fb5da0..065c185f2763 100644 --- a/include/linux/livepatch_sched.h +++ b/include/linux/livepatch_sched.h @@ -3,27 +3,23 @@ #define _LINUX_LIVEPATCH_SCHED_H_ #include <linux/jump_label.h> -#include <linux/static_call_types.h> +#include <linux/sched.h> #ifdef CONFIG_LIVEPATCH void __klp_sched_try_switch(void); -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key); -static __always_inline void klp_sched_try_switch(void) +static __always_inline void klp_sched_try_switch(struct task_struct *curr) { - if (static_branch_unlikely(&klp_sched_try_switch_key)) + if (static_branch_unlikely(&klp_sched_try_switch_key) && + READ_ONCE(curr->__state) & TASK_FREEZABLE) __klp_sched_try_switch(); } -#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ - #else /* !CONFIG_LIVEPATCH */ -static inline void klp_sched_try_switch(void) {} -static inline void __klp_sched_try_switch(void) {} +static inline void klp_sched_try_switch(struct task_struct *curr) {} #endif /* CONFIG_LIVEPATCH */ #endif /* _LINUX_LIVEPATCH_SCHED_H_ */ diff --git a/include/linux/llist.h b/include/linux/llist.h index 2c982ff7475a..27b17f64bcee 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -223,9 +223,26 @@ static inline struct llist_node *llist_next(struct llist_node *node) return node->next; } -extern bool llist_add_batch(struct llist_node *new_first, - struct llist_node *new_last, - struct llist_head *head); +/** + * llist_add_batch - add several linked entries in batch + * @new_first: first entry in batch to be added + * @new_last: last entry in batch to be added + * @head: the head for your lock-less list + * + * Return whether list is empty before adding. + */ +static inline bool llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head) +{ + struct llist_node *first = READ_ONCE(head->first); + + do { + new_last->next = first; + } while (!try_cmpxchg(&head->first, &first, new_first)); + + return !first; +} static inline bool __llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 091dc0b6bdfb..16a2ee4f8310 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -51,6 +51,34 @@ #define local_unlock_irqrestore(lock, flags) \ __local_unlock_irqrestore(lock, flags) +/** + * local_lock_init - Runtime initialize a lock instance + */ +#define local_trylock_init(lock) __local_trylock_init(lock) + +/** + * local_trylock - Try to acquire a per CPU local lock + * @lock: The lock variable + * + * The function can be used in any context such as NMI or HARDIRQ. Due to + * locking constrains it will _always_ fail to acquire the lock in NMI or + * HARDIRQ context on PREEMPT_RT. + */ +#define local_trylock(lock) __local_trylock(lock) + +/** + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable + * interrupts if acquired + * @lock: The lock variable + * @flags: Storage for interrupt flags + * + * The function can be used in any context such as NMI or HARDIRQ. Due to + * locking constrains it will _always_ fail to acquire the lock in NMI or + * HARDIRQ context on PREEMPT_RT. + */ +#define local_trylock_irqsave(lock, flags) \ + __local_trylock_irqsave(lock, flags) + DEFINE_GUARD(local_lock, local_lock_t __percpu*, local_lock(_T), local_unlock(_T)) diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 8dd71fbbb6d2..8d5ac16a9b17 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -15,6 +15,12 @@ typedef struct { #endif } local_lock_t; +/* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ +typedef struct { + local_lock_t llock; + u8 acquired; +} local_trylock_t; + #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ .dep_map = { \ @@ -24,6 +30,9 @@ typedef struct { }, \ .owner = NULL, +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ + .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, + static inline void local_lock_acquire(local_lock_t *l) { lock_map_acquire(&l->dep_map); @@ -31,6 +40,13 @@ static inline void local_lock_acquire(local_lock_t *l) l->owner = current; } +static inline void local_trylock_acquire(local_lock_t *l) +{ + lock_map_acquire_try(&l->dep_map); + DEBUG_LOCKS_WARN_ON(l->owner); + l->owner = current; +} + static inline void local_lock_release(local_lock_t *l) { DEBUG_LOCKS_WARN_ON(l->owner != current); @@ -44,12 +60,15 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } +static inline void local_trylock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } +#define INIT_LOCAL_TRYLOCK(lockname) { LOCAL_TRYLOCK_DEBUG_INIT(lockname) } #define __local_lock_init(lock) \ do { \ @@ -62,6 +81,8 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_trylock_init(lock) __local_lock_init(lock.llock) + #define __spinlock_nested_bh_init(lock) \ do { \ static struct lock_class_key __key; \ @@ -73,39 +94,105 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_lock_acquire(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + _Generic((lock), \ + __percpu local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 0); \ + WRITE_ONCE(tl->acquired, 1); \ + }), \ + __percpu local_lock_t *: (void)0); \ + local_lock_acquire(l); \ + } while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irq(lock) \ do { \ local_irq_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irqsave(lock, flags) \ do { \ local_irq_save(flags); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ + } while (0) + +#define __local_trylock(lock) \ + ({ \ + local_trylock_t *tl; \ + \ + preempt_disable(); \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ + preempt_enable(); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ + } \ + !!tl; \ + }) + +#define __local_trylock_irqsave(lock, flags) \ + ({ \ + local_trylock_t *tl; \ + \ + local_irq_save(flags); \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ + local_irq_restore(flags); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ + } \ + !!tl; \ + }) + +#define __local_lock_release(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + local_lock_release(l); \ + _Generic((lock), \ + __percpu local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 1); \ + WRITE_ONCE(tl->acquired, 0); \ + }), \ + __percpu local_lock_t *: (void)0); \ } while (0) #define __local_unlock(lock) \ do { \ - local_lock_release(this_cpu_ptr(lock)); \ + __local_lock_release(lock); \ preempt_enable(); \ } while (0) #define __local_unlock_irq(lock) \ do { \ - local_lock_release(this_cpu_ptr(lock)); \ + __local_lock_release(lock); \ local_irq_enable(); \ } while (0) #define __local_unlock_irqrestore(lock, flags) \ do { \ - local_lock_release(this_cpu_ptr(lock)); \ + __local_lock_release(lock); \ local_irq_restore(flags); \ } while (0) @@ -125,14 +212,18 @@ do { \ * critical section while staying preemptible. */ typedef spinlock_t local_lock_t; +typedef spinlock_t local_trylock_t; #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) +#define INIT_LOCAL_TRYLOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) #define __local_lock_init(l) \ do { \ local_spin_lock_init((l)); \ } while (0) +#define __local_trylock_init(l) __local_lock_init(l) + #define __local_lock(__lock) \ do { \ migrate_disable(); \ @@ -169,4 +260,26 @@ do { \ spin_unlock(this_cpu_ptr((lock))); \ } while (0) +#define __local_trylock(lock) \ + ({ \ + int __locked; \ + \ + if (in_nmi() | in_hardirq()) { \ + __locked = 0; \ + } else { \ + migrate_disable(); \ + __locked = spin_trylock(this_cpu_ptr((lock))); \ + if (!__locked) \ + migrate_enable(); \ + } \ + __locked; \ + }) + +#define __local_trylock_irqsave(lock, flags) \ + ({ \ + typecheck(unsigned long, flags); \ + flags = 0; \ + __local_trylock(lock); \ + }) + #endif /* CONFIG_PREEMPT_RT */ diff --git a/include/linux/lockref.h b/include/linux/lockref.h index c3a1f78bc884..676721ee878d 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -34,14 +34,25 @@ struct lockref { }; }; -extern void lockref_get(struct lockref *); -extern int lockref_put_return(struct lockref *); -extern int lockref_get_not_zero(struct lockref *); -extern int lockref_put_not_zero(struct lockref *); -extern int lockref_put_or_lock(struct lockref *); - -extern void lockref_mark_dead(struct lockref *); -extern int lockref_get_not_dead(struct lockref *); +/** + * lockref_init - Initialize a lockref + * @lockref: pointer to lockref structure + * + * Initializes @lockref->count to 1. + */ +static inline void lockref_init(struct lockref *lockref) +{ + spin_lock_init(&lockref->lock); + lockref->count = 1; +} + +void lockref_get(struct lockref *lockref); +int lockref_put_return(struct lockref *lockref); +bool lockref_get_not_zero(struct lockref *lockref); +bool lockref_put_or_lock(struct lockref *lockref); + +void lockref_mark_dead(struct lockref *lockref); +bool lockref_get_not_dead(struct lockref *lockref); /* Must be called under spinlock for reliable results */ static inline bool __lockref_is_dead(const struct lockref *l) diff --git a/include/linux/log2.h b/include/linux/log2.h index 9f30d087a128..1366cb688a6d 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -41,7 +41,7 @@ int __ilog2_u64(u64 n) * *not* considered a power of two. * Return: true if @n is a power of 2, otherwise false. */ -static inline __attribute__((const)) +static __always_inline __attribute__((const)) bool is_power_of_2(unsigned long n) { return (n != 0 && ((n & (n - 1)) == 0)); diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 97a8b21eb033..382c56a97bba 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -5,7 +5,7 @@ * * Author : Etienne BASSET <etienne.basset@ensta.org> * - * All credits to : Stephen Smalley, <sds@tycho.nsa.gov> + * All credits to : Stephen Smalley * All BUGS to : Etienne BASSET <etienne.basset@ensta.org> */ #ifndef _LSM_COMMON_LOGGING_ @@ -77,6 +77,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_LOCKDOWN 15 #define LSM_AUDIT_DATA_NOTIFICATION 16 #define LSM_AUDIT_DATA_ANONINODE 17 +#define LSM_AUDIT_DATA_NLMSGTYPE 18 union { struct path path; struct dentry *dentry; @@ -98,6 +99,7 @@ struct common_audit_data { struct lsm_ibendport_audit *ibendport; int reason; const char *anonclass; + u16 nlmsg_type; } u; /* this union contains LSM specific data */ union { @@ -116,14 +118,36 @@ struct common_audit_data { #define v4info fam.v4 #define v6info fam.v6 +#ifdef CONFIG_AUDIT + int ipv4_skb_to_auditdata(struct sk_buff *skb, struct common_audit_data *ad, u8 *proto); +#if IS_ENABLED(CONFIG_IPV6) int ipv6_skb_to_auditdata(struct sk_buff *skb, struct common_audit_data *ad, u8 *proto); +#endif /* IS_ENABLED(CONFIG_IPV6) */ void common_lsm_audit(struct common_audit_data *a, void (*pre_audit)(struct audit_buffer *, void *), void (*post_audit)(struct audit_buffer *, void *)); +void audit_log_lsm_data(struct audit_buffer *ab, + const struct common_audit_data *a); + +#else /* CONFIG_AUDIT */ + +static inline void common_lsm_audit(struct common_audit_data *a, + void (*pre_audit)(struct audit_buffer *, void *), + void (*post_audit)(struct audit_buffer *, void *)) +{ +} + +static inline void audit_log_lsm_data(struct audit_buffer *ab, + const struct common_audit_data *a) +{ +} + +#endif /* CONFIG_AUDIT */ + #endif diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index eb2937599cb0..bf3bbac4e02a 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -83,7 +83,7 @@ LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, - void **ctx, u32 *ctxlen) + struct lsm_context *cp) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, struct cred *new) @@ -295,17 +295,16 @@ LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) -LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, - u32 *seclen) +LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, struct lsm_context *cp) LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop, - char **secdata, u32 *seclen) + struct lsm_context *cp) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) -LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) +LSM_HOOK(void, LSM_RET_VOID, release_secctx, struct lsm_context *cp) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) -LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx, - u32 *ctxlen) +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, + struct lsm_context *cp) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) LSM_HOOK(int, 0, post_notification, const struct cred *w_cred, @@ -427,14 +426,14 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) #endif /* CONFIG_AUDIT */ #ifdef CONFIG_BPF_SYSCALL -LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size) +LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel) LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode) LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog) LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) + struct bpf_token *token, bool kernel) LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map) LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) + struct bpf_token *token, bool kernel) LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog) LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr, const struct path *path) @@ -446,7 +445,7 @@ LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap) LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) #ifdef CONFIG_PERF_EVENTS -LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) +LSM_HOOK(int, 0, perf_event_open, int type) LSM_HOOK(int, 0, perf_event_alloc, struct perf_event *event) LSM_HOOK(int, 0, perf_event_read, struct perf_event *event) LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) @@ -456,6 +455,7 @@ LSM_HOOK(int, 0, perf_event_write, struct perf_event *event) LSM_HOOK(int, 0, uring_override_creds, const struct cred *new) LSM_HOOK(int, 0, uring_sqpoll, void) LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) +LSM_HOOK(int, 0, uring_allowed, void) #endif /* CONFIG_IO_URING */ LSM_HOOK(void, LSM_RET_VOID, initramfs_populated, void) diff --git a/include/linux/lz4.h b/include/linux/lz4.h index b16e15b9587a..ad6042a718b5 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -645,4 +645,10 @@ int LZ4_decompress_safe_usingDict(const char *source, char *dest, int LZ4_decompress_fast_usingDict(const char *source, char *dest, int originalSize, const char *dictStart, int dictSize); +#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) + +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + #endif diff --git a/include/linux/lzo.h b/include/linux/lzo.h index e95c7d1092b2..4d30e3624acd 100644 --- a/include/linux/lzo.h +++ b/include/linux/lzo.h @@ -24,10 +24,18 @@ int lzo1x_1_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem); +/* Same as above but does not write more than dst_len to dst. */ +int lzo1x_1_compress_safe(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + /* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */ int lzorle1x_1_compress(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len, void *wrkmem); +/* Same as above but does not write more than dst_len to dst. */ +int lzorle1x_1_compress_safe(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + /* safe decompression with overrun testing */ int lzo1x_decompress_safe(const unsigned char *src, size_t src_len, unsigned char *dst, size_t *dst_len); diff --git a/include/linux/mailbox/exynos-message.h b/include/linux/mailbox/exynos-message.h new file mode 100644 index 000000000000..5a9ed5ce2046 --- /dev/null +++ b/include/linux/mailbox/exynos-message.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Exynos mailbox message. + * + * Copyright 2024 Linaro Ltd. + */ + +#ifndef _LINUX_EXYNOS_MESSAGE_H_ +#define _LINUX_EXYNOS_MESSAGE_H_ + +#define EXYNOS_MBOX_CHAN_TYPE_DOORBELL 0 +#define EXYNOS_MBOX_CHAN_TYPE_DATA 1 + +struct exynos_mbox_msg { + unsigned int chan_id; + unsigned int chan_type; +}; + +#endif /* _LINUX_EXYNOS_MESSAGE_H_ */ diff --git a/include/linux/mailbox/mchp-ipc.h b/include/linux/mailbox/mchp-ipc.h new file mode 100644 index 000000000000..f084ac9e291b --- /dev/null +++ b/include/linux/mailbox/mchp-ipc.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + *Copyright (c) 2024 Microchip Technology Inc. All rights reserved. + */ + +#ifndef _LINUX_MCHP_IPC_H_ +#define _LINUX_MCHP_IPC_H_ + +#include <linux/mailbox_controller.h> +#include <linux/types.h> + +struct mchp_ipc_msg { + u32 *buf; + u16 size; +}; + +struct mchp_ipc_sbi_chan { + void *buf_base_tx; + void *buf_base_rx; + void *msg_buf_tx; + void *msg_buf_rx; + phys_addr_t buf_base_tx_addr; + phys_addr_t buf_base_rx_addr; + phys_addr_t msg_buf_tx_addr; + phys_addr_t msg_buf_rx_addr; + int chan_aggregated_irq; + int mp_irq; + int mc_irq; + u32 id; + u32 max_msg_size; +}; + +#endif /* _LINUX_MCHP_IPC_H_ */ diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h index a8f0070c7aa9..4c1a91b07de3 100644 --- a/include/linux/mailbox/mtk-cmdq-mailbox.h +++ b/include/linux/mailbox/mtk-cmdq-mailbox.h @@ -75,7 +75,6 @@ struct cmdq_pkt { dma_addr_t pa_base; size_t cmd_buf_size; /* command occupied size */ size_t buf_size; /* real buffer size */ - void *cl; }; u8 cmdq_get_shift_pa(struct mbox_chan *chan); diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h index 734694912ef7..c6eea9afb943 100644 --- a/include/linux/mailbox_client.h +++ b/include/linux/mailbox_client.h @@ -7,8 +7,8 @@ #ifndef __MAILBOX_CLIENT_H #define __MAILBOX_CLIENT_H -#include <linux/of.h> #include <linux/device.h> +#include <linux/of.h> struct mbox_chan; diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index 6fee33cb52f5..ad01c4082358 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -3,11 +3,11 @@ #ifndef __MAILBOX_CONTROLLER_H #define __MAILBOX_CONTROLLER_H +#include <linux/completion.h> +#include <linux/device.h> +#include <linux/hrtimer.h> #include <linux/of.h> #include <linux/types.h> -#include <linux/hrtimer.h> -#include <linux/device.h> -#include <linux/completion.h> struct mbox_chan; @@ -134,7 +134,4 @@ void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */ int devm_mbox_controller_register(struct device *dev, struct mbox_controller *mbox); -void devm_mbox_controller_unregister(struct device *dev, - struct mbox_controller *mbox); - #endif /* __MAILBOX_CONTROLLER_H */ diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index cbbcd18d4186..9ef129038224 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -463,6 +463,8 @@ struct ma_wr_state { void __rcu **slots; /* mas->node->slots pointer */ void *entry; /* The entry to write */ void *content; /* The existing entry that is being overwritten */ + unsigned char vacant_height; /* Height of lowest node with free space */ + unsigned char sufficient_height;/* Height of lowest node with min sufficiency + 1 nodes */ }; #define mas_lock(mas) spin_lock(&((mas)->tree->ma_lock)) @@ -498,6 +500,8 @@ struct ma_wr_state { .mas = ma_state, \ .content = NULL, \ .entry = wr_entry, \ + .vacant_height = 0, \ + .sufficient_height = 0 \ } #define MA_TOPIARY(name, tree) \ diff --git a/include/linux/math.h b/include/linux/math.h index f5f18dc3616b..0198c92cbe3e 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -34,6 +34,18 @@ */ #define round_down(x, y) ((x) & ~__round_mask(x, y)) +/** + * DIV_ROUND_UP_POW2 - divide and round up + * @n: numerator + * @d: denominator (must be a power of 2) + * + * Divides @n by @d and rounds up to next multiple of @d (which must be a power + * of 2). Avoids integer overflows that may occur with __KERNEL_DIV_ROUND_UP(). + * Performance is roughly equivalent to __KERNEL_DIV_ROUND_UP(). + */ +#define DIV_ROUND_UP_POW2(n, d) \ + ((n) / (d) + !!((n) & ((d) - 1))) + #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP #define DIV_ROUND_DOWN_ULL(ll, d) \ diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 3c3deac57894..e43ff9f980a4 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -45,10 +45,7 @@ struct mdio_device { unsigned int reset_deassert_delay; }; -static inline struct mdio_device *to_mdio_device(const struct device *dev) -{ - return container_of(dev, struct mdio_device, dev); -} +#define to_mdio_device(__dev) container_of_const(__dev, struct mdio_device, dev) /* struct mdio_driver_common: Common to all MDIO drivers */ struct mdio_driver_common { diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index b38a56a13f39..725fd7727422 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -97,8 +97,6 @@ ssize_t mei_cldev_send(struct mei_cl_device *cldev, const u8 *buf, ssize_t mei_cldev_send_timeout(struct mei_cl_device *cldev, const u8 *buf, size_t length, unsigned long timeout); ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); -ssize_t mei_cldev_recv_nonblock(struct mei_cl_device *cldev, u8 *buf, - size_t length); ssize_t mei_cldev_recv_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, unsigned long timeout); ssize_t mei_cldev_send_vtag(struct mei_cl_device *cldev, const u8 *buf, @@ -107,8 +105,6 @@ ssize_t mei_cldev_send_vtag_timeout(struct mei_cl_device *cldev, const u8 *buf, size_t length, u8 vtag, unsigned long timeout); ssize_t mei_cldev_recv_vtag(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag); -ssize_t mei_cldev_recv_nonblock_vtag(struct mei_cl_device *cldev, u8 *buf, - size_t length, u8 *vtag); ssize_t mei_cldev_recv_vtag_timeout(struct mei_cl_device *cldev, u8 *buf, size_t length, u8 *vtag, unsigned long timeout); @@ -116,7 +112,6 @@ int mei_cldev_register_rx_cb(struct mei_cl_device *cldev, mei_cldev_cb_t rx_cb); int mei_cldev_register_notif_cb(struct mei_cl_device *cldev, mei_cldev_cb_t notif_cb); -const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev); u8 mei_cldev_ver(const struct mei_cl_device *cldev); void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev); diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h index ae4526389261..07584c5e36fb 100644 --- a/include/linux/mem_encrypt.h +++ b/include/linux/mem_encrypt.h @@ -26,11 +26,34 @@ */ #define __sme_set(x) ((x) | sme_me_mask) #define __sme_clr(x) ((x) & ~sme_me_mask) + +#define dma_addr_encrypted(x) __sme_set(x) +#define dma_addr_canonical(x) __sme_clr(x) + #else #define __sme_set(x) (x) #define __sme_clr(x) (x) #endif +/* + * dma_addr_encrypted() and dma_addr_unencrypted() are for converting a given DMA + * address to the respective type of addressing. + * + * dma_addr_canonical() is used to reverse any conversions for encrypted/decrypted + * back to the canonical address. + */ +#ifndef dma_addr_encrypted +#define dma_addr_encrypted(x) (x) +#endif + +#ifndef dma_addr_unencrypted +#define dma_addr_unencrypted(x) (x) +#endif + +#ifndef dma_addr_canonical +#define dma_addr_canonical(x) (x) +#endif + #endif /* __ASSEMBLY__ */ #endif /* __MEM_ENCRYPT_H__ */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 673d5cae7c81..bb19a2534224 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -42,6 +42,14 @@ extern unsigned long long max_possible_pfn; * kernel resource tree. * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are * not initialized (only for reserved regions). + * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use, + * either explictitly with memblock_reserve_kern() or via memblock + * allocation APIs. All memblock allocations set this flag. + * @MEMBLOCK_KHO_SCRATCH: memory region that kexec can pass to the next + * kernel in handover mode. During early boot, we do not know about all + * memory reservations yet, so we get scratch memory from the previous + * kernel that we know is good to use. It is the only memory that + * allocations may happen from in this phase. */ enum memblock_flags { MEMBLOCK_NONE = 0x0, /* No special request */ @@ -50,6 +58,8 @@ enum memblock_flags { MEMBLOCK_NOMAP = 0x4, /* don't add to kernel direct mapping */ MEMBLOCK_DRIVER_MANAGED = 0x8, /* always detected via a driver */ MEMBLOCK_RSRV_NOINIT = 0x10, /* don't initialize struct pages */ + MEMBLOCK_RSRV_KERN = 0x20, /* memory reserved for kernel use */ + MEMBLOCK_KHO_SCRATCH = 0x40, /* scratch memory for kexec handover */ }; /** @@ -116,7 +126,19 @@ int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid, int memblock_add(phys_addr_t base, phys_addr_t size); int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_phys_free(phys_addr_t base, phys_addr_t size); -int memblock_reserve(phys_addr_t base, phys_addr_t size); +int __memblock_reserve(phys_addr_t base, phys_addr_t size, int nid, + enum memblock_flags flags); + +static __always_inline int memblock_reserve(phys_addr_t base, phys_addr_t size) +{ + return __memblock_reserve(base, size, NUMA_NO_NODE, 0); +} + +static __always_inline int memblock_reserve_kern(phys_addr_t base, phys_addr_t size) +{ + return __memblock_reserve(base, size, NUMA_NO_NODE, MEMBLOCK_RSRV_KERN); +} + #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP int memblock_physmem_add(phys_addr_t base, phys_addr_t size); #endif @@ -132,8 +154,9 @@ int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); int memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t size); +int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size); +int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size); -void memblock_free_all(void); void memblock_free(void *ptr, size_t size); void reset_all_zones_managed_pages(void); @@ -276,6 +299,11 @@ static inline bool memblock_is_driver_managed(struct memblock_region *m) return m->flags & MEMBLOCK_DRIVER_MANAGED; } +static inline bool memblock_is_kho_scratch(struct memblock_region *m) +{ + return m->flags & MEMBLOCK_KHO_SCRATCH; +} + int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, unsigned long *end_pfn); void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, @@ -378,6 +406,10 @@ static inline int memblock_get_region_node(const struct memblock_region *r) /* Flags for memblock allocation APIs */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 +/* + * MEMBLOCK_ALLOC_NOLEAKTRACE avoids kmemleak tracing. It implies + * MEMBLOCK_ALLOC_ACCESSIBLE + */ #define MEMBLOCK_ALLOC_NOLEAKTRACE 1 /* We are using top down, so it is safe to use 0 here */ @@ -417,6 +449,12 @@ static __always_inline void *memblock_alloc(phys_addr_t size, phys_addr_t align) MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); } +void *__memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align, + const char *func); + +#define memblock_alloc_or_panic(size, align) \ + __memblock_alloc_or_panic(size, align, __func__) + static inline void *memblock_alloc_raw(phys_addr_t size, phys_addr_t align) { @@ -467,6 +505,7 @@ static inline __init_memblock bool memblock_bottom_up(void) phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); +phys_addr_t memblock_reserved_kern_size(phys_addr_t limit, int nid); unsigned long memblock_estimated_nr_free_pages(void); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); @@ -593,5 +632,14 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) { } static inline void memtest_report_meminfo(struct seq_file *m) { } #endif +#ifdef CONFIG_MEMBLOCK_KHO_SCRATCH +void memblock_set_kho_scratch_only(void); +void memblock_clear_kho_scratch_only(void); +void memmap_init_kho_scratch_pages(void); +#else +static inline void memblock_set_kho_scratch_only(void) { } +static inline void memblock_clear_kho_scratch_only(void) { } +static inline void memmap_init_kho_scratch_pages(void) {} +#endif #endif /* _LINUX_MEMBLOCK_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5502aa8e138e..87b6688f124a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -113,6 +113,12 @@ struct mem_cgroup_per_node { CACHELINE_PADDING(_pad2_); unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter; + +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC + /* slab stats for nmi context */ + atomic_t slab_reclaimable; + atomic_t slab_unreclaimable; +#endif }; struct mem_cgroup_threshold { @@ -236,6 +242,10 @@ struct mem_cgroup { atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS]; +#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC + /* MEMCG_KMEM for nmi context */ + atomic_t kmem_stat; +#endif /* * Hint of reclaim pressure for socket memroy management. Note * that this indicator should NOT be used in legacy cgroup mode @@ -438,9 +448,7 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio) */ static inline bool folio_memcg_charged(struct folio *folio) { - if (folio_memcg_kmem(folio)) - return __folio_objcg(folio) != NULL; - return __folio_memcg(folio) != NULL; + return folio->memcg_data != 0; } /* @@ -620,8 +628,6 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, page_counter_read(&memcg->memory); } -void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg); - int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); /** @@ -646,14 +652,11 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, return __mem_cgroup_charge(folio, mm, gfp); } -int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, - long nr_pages); +int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp); int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); - void __mem_cgroup_uncharge(struct folio *folio); /** @@ -677,7 +680,6 @@ static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) __mem_cgroup_uncharge_folios(folios); } -void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages); void mem_cgroup_replace_folio(struct folio *old, struct folio *new); void mem_cgroup_migrate(struct folio *old, struct folio *new); @@ -911,19 +913,9 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim, struct mem_cgroup *oom_domain); void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); -void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, - int val); - /* idx can be of type enum memcg_stat_item or node_stat_item */ -static inline void mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_memcg_state(memcg, idx, val); - local_irq_restore(flags); -} +void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, int val); static inline void mod_memcg_page_state(struct page *page, enum memcg_stat_item idx, int val) @@ -960,19 +952,8 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, local_irq_restore(flags); } -void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, - unsigned long count); - -static inline void count_memcg_events(struct mem_cgroup *memcg, - enum vm_event_item idx, - unsigned long count) -{ - unsigned long flags; - - local_irq_save(flags); - __count_memcg_events(memcg, idx, count); - local_irq_restore(flags); -} +void count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, + unsigned long count); static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) @@ -1044,8 +1025,28 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -void split_page_memcg(struct page *head, int old_order, int new_order); +void split_page_memcg(struct page *first, unsigned order); +void folio_split_memcg_refs(struct folio *folio, unsigned old_order, + unsigned new_order); + +static inline u64 cgroup_id_from_mm(struct mm_struct *mm) +{ + struct mem_cgroup *memcg; + u64 id; + + if (mem_cgroup_disabled()) + return 0; + + rcu_read_lock(); + memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); + if (!memcg) + memcg = root_mem_cgroup; + id = cgroup_id(memcg->css.cgroup); + rcu_read_unlock(); + return id; +} +extern int mem_cgroup_init(void); #else /* CONFIG_MEMCG */ #define MEM_CGROUP_ID_SHIFT 0 @@ -1135,21 +1136,15 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, return false; } -static inline void mem_cgroup_commit_charge(struct folio *folio, - struct mem_cgroup *memcg) -{ -} - static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) { return 0; } -static inline int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, - gfp_t gfp, long nr_pages) +static inline int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp) { - return 0; + return 0; } static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, @@ -1158,10 +1153,6 @@ static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, return 0; } -static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr) -{ -} - static inline void mem_cgroup_uncharge(struct folio *folio) { } @@ -1170,11 +1161,6 @@ static inline void mem_cgroup_uncharge_folios(struct folio_batch *folios) { } -static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, - unsigned int nr_pages) -{ -} - static inline void mem_cgroup_replace_folio(struct folio *old, struct folio *new) { @@ -1378,12 +1364,6 @@ static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) { } -static inline void __mod_memcg_state(struct mem_cgroup *memcg, - enum memcg_stat_item idx, - int nr) -{ -} - static inline void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, int nr) @@ -1437,12 +1417,6 @@ static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, } static inline void count_memcg_events(struct mem_cgroup *memcg, - enum vm_event_item idx, - unsigned long count) -{ -} - -static inline void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, unsigned long count) { @@ -1463,9 +1437,21 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) { } -static inline void split_page_memcg(struct page *head, int old_order, int new_order) +static inline void split_page_memcg(struct page *first, unsigned order) { } + +static inline void folio_split_memcg_refs(struct folio *folio, + unsigned old_order, unsigned new_order) +{ +} + +static inline u64 cgroup_id_from_mm(struct mm_struct *mm) +{ + return 0; +} + +static inline int mem_cgroup_init(void) { return 0; } #endif /* CONFIG_MEMCG */ /* @@ -1730,6 +1716,8 @@ static inline void count_objcg_events(struct obj_cgroup *objcg, rcu_read_unlock(); } +bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid); + #else static inline bool mem_cgroup_kmem_disabled(void) { @@ -1787,6 +1775,15 @@ static inline void count_objcg_events(struct obj_cgroup *objcg, { } +static inline ino_t page_cgroup_ino(struct page *page) +{ + return 0; +} + +static inline bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid) +{ + return true; +} #endif /* CONFIG_MEMCG */ #if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP) @@ -1841,6 +1838,9 @@ static inline void mem_cgroup_exit_user_fault(void) current->in_user_fault = 0; } +void memcg1_swapout(struct folio *folio, swp_entry_t entry); +void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages); + #else /* CONFIG_MEMCG_V1 */ static inline unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -1868,6 +1868,14 @@ static inline void mem_cgroup_exit_user_fault(void) { } +static inline void memcg1_swapout(struct folio *folio, swp_entry_t entry) +{ +} + +static inline void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages) +{ +} + #endif /* CONFIG_MEMCG_V1 */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/memfd.h b/include/linux/memfd.h index d437e3070850..246daadbfde8 100644 --- a/include/linux/memfd.h +++ b/include/linux/memfd.h @@ -7,7 +7,14 @@ #ifdef CONFIG_MEMFD_CREATE extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg); struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx); -unsigned int *memfd_file_seals_ptr(struct file *file); +/* + * Check for any existing seals on mmap, return an error if access is denied due + * to sealing, or 0 otherwise. + * + * We also update VMA flags if appropriate by manipulating the VMA flags pointed + * to by vm_flags_ptr. + */ +int memfd_check_seals_mmap(struct file *file, unsigned long *vm_flags_ptr); #else static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a) { @@ -17,19 +24,11 @@ static inline struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) { return ERR_PTR(-EINVAL); } - -static inline unsigned int *memfd_file_seals_ptr(struct file *file) +static inline int memfd_check_seals_mmap(struct file *file, + unsigned long *vm_flags_ptr) { - return NULL; + return 0; } #endif -/* Retrieve memfd seals associated with the file, if any. */ -static inline unsigned int memfd_file_seals(struct file *file) -{ - unsigned int *sealsp = memfd_file_seals_ptr(file); - - return sealsp ? *sealsp : 0; -} - #endif /* __LINUX_MEMFD_H */ diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 0dc0cf2863e2..7a805796fcfd 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -18,7 +18,7 @@ * adistance value (slightly faster) than default DRAM adistance to be part of * the same memory tier. */ -#define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1)) +#define MEMTIER_ADISTANCE_DRAM ((4L * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1)) struct memory_tier; struct memory_dev_type { diff --git a/include/linux/memory.h b/include/linux/memory.h index c0afee5d126e..5ec4e6d209b9 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -25,7 +25,7 @@ /** * struct memory_group - a logical group of memory blocks * @nid: The node id for all memory blocks inside the memory group. - * @blocks: List of all memory blocks belonging to this memory group. + * @memory_blocks: List of all memory blocks belonging to this memory group. * @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this * memory group. * @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this @@ -149,6 +149,14 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri) { return 0; } +static inline int memory_block_advise_max_size(unsigned long size) +{ + return -ENODEV; +} +static inline unsigned long memory_block_advised_max_size(void) +{ + return 0; +} #else /* CONFIG_MEMORY_HOTPLUG */ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); @@ -181,6 +189,8 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func, void memory_block_add_nid(struct memory_block *mem, int nid, enum meminit_context context); #endif /* CONFIG_NUMA */ +int memory_block_advise_max_size(unsigned long size); +unsigned long memory_block_advised_max_size(void); #endif /* CONFIG_MEMORY_HOTPLUG */ /* diff --git a/include/linux/memory/ti-aemif.h b/include/linux/memory/ti-aemif.h new file mode 100644 index 000000000000..da94a9d985e7 --- /dev/null +++ b/include/linux/memory/ti-aemif.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __MEMORY_TI_AEMIF_H +#define __MEMORY_TI_AEMIF_H + +/** + * struct aemif_cs_timings: structure to hold CS timing configuration + * values are expressed in number of clock cycles - 1 + * @ta: minimum turn around time + * @rhold: read hold width + * @rstrobe: read strobe width + * @rsetup: read setup width + * @whold: write hold width + * @wstrobe: write strobe width + * @wsetup: write setup width + */ +struct aemif_cs_timings { + u32 ta; + u32 rhold; + u32 rstrobe; + u32 rsetup; + u32 whold; + u32 wstrobe; + u32 wsetup; +}; + +struct aemif_device; + +int aemif_set_cs_timings(struct aemif_device *aemif, u8 cs, struct aemif_cs_timings *timings); +int aemif_check_cs_timings(struct aemif_cs_timings *timings); + +#endif // __MEMORY_TI_AEMIF_H diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index b27ddce5d324..eaac5ae8c05c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -144,8 +144,6 @@ extern u64 max_mem_size; extern int mhp_online_type_from_str(const char *str); -/* Default online_type (MMOP_*) when new memory blocks are added. */ -extern int mhp_default_online_type; /* If movable_node boot option specified */ extern bool movable_node_enabled; static inline bool movable_node_is_enabled(void) @@ -303,6 +301,9 @@ static inline void __remove_memory(u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ #ifdef CONFIG_MEMORY_HOTPLUG +/* Default online_type (MMOP_*) when new memory blocks are added. */ +extern int mhp_get_default_online_type(void); +extern void mhp_set_default_online_type(int online_type); extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat); extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index ce9885e0178a..0fe96f3ab3ef 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/spinlock.h> +#include <linux/node.h> #include <linux/nodemask.h> #include <linux/pagemap.h> #include <uapi/linux/mempolicy.h> @@ -178,6 +179,9 @@ static inline bool mpol_is_preferred_many(struct mempolicy *pol) extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); +extern int mempolicy_set_node_perf(unsigned int node, + struct access_coordinate *coords); + #else struct mempolicy {}; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 3f7143ade32c..4aa151914eab 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -161,7 +161,7 @@ static inline bool is_device_private_page(const struct page *page) { return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PRIVATE; + page_pgmap(page)->type == MEMORY_DEVICE_PRIVATE; } static inline bool folio_is_device_private(const struct folio *folio) @@ -173,13 +173,13 @@ static inline bool is_pci_p2pdma_page(const struct page *page) { return IS_ENABLED(CONFIG_PCI_P2PDMA) && is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; + page_pgmap(page)->type == MEMORY_DEVICE_PCI_P2PDMA; } static inline bool is_device_coherent_page(const struct page *page) { return is_zone_device_page(page) && - page->pgmap->type == MEMORY_DEVICE_COHERENT; + page_pgmap(page)->type == MEMORY_DEVICE_COHERENT; } static inline bool folio_is_device_coherent(const struct folio *folio) @@ -187,6 +187,17 @@ static inline bool folio_is_device_coherent(const struct folio *folio) return is_device_coherent_page(&folio->page); } +static inline bool is_fsdax_page(const struct page *page) +{ + return is_zone_device_page(page) && + page_pgmap(page)->type == MEMORY_DEVICE_FS_DAX; +} + +static inline bool folio_is_fsdax(const struct folio *folio) +{ + return is_fsdax_page(&folio->page); +} + #ifdef CONFIG_ZONE_DEVICE void zone_device_page_init(struct page *page); void *memremap_pages(struct dev_pagemap *pgmap, int nid); diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h index 2445842d482d..c7a3c53eba68 100644 --- a/include/linux/mfd/aat2870.h +++ b/include/linux/mfd/aat2870.h @@ -133,9 +133,6 @@ struct aat2870_data { int (*read)(struct aat2870_data *aat2870, u8 addr, u8 *val); int (*write)(struct aat2870_data *aat2870, u8 addr, u8 val); int (*update)(struct aat2870_data *aat2870, u8 addr, u8 mask, u8 val); - - /* for debugfs */ - struct dentry *dentry_root; }; struct aat2870_subdev_info { diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index c3df0e615fbf..3c5aecf1d4b5 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -137,6 +137,7 @@ enum axp20x_variants { #define AXP717_IRQ2_STATE 0x4a #define AXP717_IRQ3_STATE 0x4b #define AXP717_IRQ4_STATE 0x4c +#define AXP717_TS_PIN_CFG 0x50 #define AXP717_ICC_CHG_SET 0x62 #define AXP717_ITERM_CHG_SET 0x63 #define AXP717_CV_CHG_SET 0x64 diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h index 6b8791da6119..5a5783abd47b 100644 --- a/include/linux/mfd/bcm590xx.h +++ b/include/linux/mfd/bcm590xx.h @@ -13,6 +13,26 @@ #include <linux/i2c.h> #include <linux/regmap.h> +/* PMU ID register values; also used as device type */ +#define BCM590XX_PMUID_BCM59054 0x54 +#define BCM590XX_PMUID_BCM59056 0x56 + +/* Known chip revision IDs */ +#define BCM59054_REV_DIGITAL_A1 1 +#define BCM59054_REV_ANALOG_A1 2 + +#define BCM59056_REV_DIGITAL_A0 1 +#define BCM59056_REV_ANALOG_A0 1 + +#define BCM59056_REV_DIGITAL_B0 2 +#define BCM59056_REV_ANALOG_B0 2 + +/* regmap types */ +enum bcm590xx_regmap_type { + BCM590XX_REGMAP_PRI, + BCM590XX_REGMAP_SEC, +}; + /* max register address */ #define BCM590XX_MAX_REGISTER_PRI 0xe7 #define BCM590XX_MAX_REGISTER_SEC 0xf0 @@ -23,7 +43,13 @@ struct bcm590xx { struct i2c_client *i2c_sec; struct regmap *regmap_pri; struct regmap *regmap_sec; - unsigned int id; + + /* PMU ID value; also used as device type */ + u8 pmu_id; + + /* Chip revision, read from PMUREV reg */ + u8 rev_digital; + u8 rev_analog; }; #endif /* __LINUX_MFD_BCM590XX_H */ diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index e8bcad641d8c..faeea7abd688 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -72,7 +72,7 @@ struct mfd_cell { int (*resume)(struct platform_device *dev); /* platform data passed to the sub devices drivers */ - void *platform_data; + const void *platform_data; size_t pdata_size; /* Matches ACPI */ diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index 76feb3a7066d..9cb2fc2938ce 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -93,6 +93,8 @@ struct da9052 { int chip_irq; + int fault_log; + /* SOC I/O transfer related fixes for DA9052/53 */ int (*fix_io) (struct da9052 *da9052, unsigned char reg); }; diff --git a/include/linux/mfd/dbx500-prcmu.h b/include/linux/mfd/dbx500-prcmu.h index dd0fc891b228..98567623c9df 100644 --- a/include/linux/mfd/dbx500-prcmu.h +++ b/include/linux/mfd/dbx500-prcmu.h @@ -215,7 +215,7 @@ struct prcmu_fw_version { static inline void prcmu_early_init(void) { - return db8500_prcmu_early_init(); + db8500_prcmu_early_init(); } static inline int prcmu_set_power_state(u8 state, bool keep_ulp_clk, @@ -302,7 +302,7 @@ static inline int prcmu_request_ape_opp_100_voltage(bool enable) static inline void prcmu_system_reset(u16 reset_code) { - return db8500_prcmu_system_reset(reset_code); + db8500_prcmu_system_reset(reset_code); } static inline u16 prcmu_get_reset_code(void) @@ -314,7 +314,7 @@ int prcmu_ac_wake_req(void); void prcmu_ac_sleep_req(void); static inline void prcmu_modem_reset(void) { - return db8500_prcmu_modem_reset(); + db8500_prcmu_modem_reset(); } static inline bool prcmu_is_ac_wake_requested(void) diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h index ffde195e12b7..ea51b1cdca5a 100644 --- a/include/linux/mfd/ezx-pcap.h +++ b/include/linux/mfd/ezx-pcap.h @@ -31,7 +31,6 @@ int ezx_pcap_set_bits(struct pcap_chip *, u8, u32, u32); int pcap_to_irq(struct pcap_chip *, int); int irq_to_pcap(struct pcap_chip *, int); int pcap_adc_async(struct pcap_chip *, u8, u32, u8[], void *, void *); -int pcap_adc_sync(struct pcap_chip *, u8, u32, u8[], u16[]); void pcap_set_ts_bits(struct pcap_chip *, u32); #define PCAP_SECOND_PORT 1 diff --git a/include/linux/mfd/lp3943.h b/include/linux/mfd/lp3943.h index 020a339f96e8..402f01078fcc 100644 --- a/include/linux/mfd/lp3943.h +++ b/include/linux/mfd/lp3943.h @@ -11,7 +11,6 @@ #define __MFD_LP3943_H__ #include <linux/gpio.h> -#include <linux/pwm.h> #include <linux/regmap.h> /* Registers */ diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h index a21374f8ad26..dd51a37fa37f 100644 --- a/include/linux/mfd/max14577-private.h +++ b/include/linux/mfd/max14577-private.h @@ -2,7 +2,7 @@ /* * max14577-private.h - Common API for the Maxim 14577/77836 internal sub chip * - * Copyright (C) 2014 Samsung Electrnoics + * Copyright (C) 2014 Samsung Electronics * Chanwoo Choi <cw00.choi@samsung.com> * Krzysztof Kozlowski <krzk@kernel.org> */ diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h index 8b3ef891ba42..0fda5c2e745a 100644 --- a/include/linux/mfd/max14577.h +++ b/include/linux/mfd/max14577.h @@ -2,7 +2,7 @@ /* * max14577.h - Driver for the Maxim 14577/77836 * - * Copyright (C) 2014 Samsung Electrnoics + * Copyright (C) 2014 Samsung Electronics * Chanwoo Choi <cw00.choi@samsung.com> * Krzysztof Kozlowski <krzk@kernel.org> * diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h index ea635d12a741..e6b8b4014dc0 100644 --- a/include/linux/mfd/max77686-private.h +++ b/include/linux/mfd/max77686-private.h @@ -2,7 +2,7 @@ /* * max77686-private.h - Voltage regulator driver for the Maxim 77686/802 * - * Copyright (C) 2012 Samsung Electrnoics + * Copyright (C) 2012 Samsung Electronics * Chiwoong Byun <woong.byun@samsung.com> */ diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h index d0fb510875e6..7c4624acd1db 100644 --- a/include/linux/mfd/max77686.h +++ b/include/linux/mfd/max77686.h @@ -2,7 +2,7 @@ /* * max77686.h - Driver for the Maxim 77686/802 * - * Copyright (C) 2012 Samsung Electrnoics + * Copyright (C) 2012 Samsung Electronics * Chiwoong Byun <woong.byun@samsung.com> * * This driver is based on max8997.h diff --git a/include/linux/mfd/max77693-common.h b/include/linux/mfd/max77693-common.h index a5bce099f1ed..ec2e1b2dceb8 100644 --- a/include/linux/mfd/max77693-common.h +++ b/include/linux/mfd/max77693-common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ /* - * Common data shared between Maxim 77693 and 77843 drivers + * Common data shared between Maxim 77693, 77705 and 77843 drivers * * Copyright (C) 2015 Samsung Electronics */ @@ -11,6 +11,7 @@ enum max77693_types { TYPE_MAX77693_UNKNOWN, TYPE_MAX77693, + TYPE_MAX77705, TYPE_MAX77843, TYPE_MAX77693_NUM, @@ -32,6 +33,7 @@ struct max77693_dev { struct regmap *regmap_muic; struct regmap *regmap_haptic; /* Only MAX77693 */ struct regmap *regmap_chg; /* Only MAX77843 */ + struct regmap *regmap_leds; /* Only MAX77705 */ struct regmap_irq_chip_data *irq_data_led; struct regmap_irq_chip_data *irq_data_topsys; diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index c324d548619e..8e7c35b5ea1c 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -2,7 +2,7 @@ /* * max77693-private.h - Voltage regulator driver for the Maxim 77693 * - * Copyright (C) 2012 Samsung Electrnoics + * Copyright (C) 2012 Samsung Electronics * SangYoung Son <hello.son@samsung.com> * * This program is not provided / owned by Maxim Integrated Products. diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h index c67c16ba8649..8e77ebeb7cf1 100644 --- a/include/linux/mfd/max77693.h +++ b/include/linux/mfd/max77693.h @@ -2,7 +2,7 @@ /* * max77693.h - Driver for the Maxim 77693 * - * Copyright (C) 2012 Samsung Electrnoics + * Copyright (C) 2012 Samsung Electronics * SangYoung Son <hello.son@samsung.com> * * This program is not provided / owned by Maxim Integrated Products. diff --git a/include/linux/mfd/max77705-private.h b/include/linux/mfd/max77705-private.h new file mode 100644 index 000000000000..214de7feeb8c --- /dev/null +++ b/include/linux/mfd/max77705-private.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Maxim MAX77705 definitions. + * + * Copyright (C) 2015 Samsung Electronics, Inc. + * Copyright (C) 2025 Dzmitry Sankouski <dsankouski@gmail.com> + */ + +#ifndef __LINUX_MFD_MAX77705_PRIV_H +#define __LINUX_MFD_MAX77705_PRIV_H + +#define MAX77705_SRC_IRQ_CHG BIT(0) +#define MAX77705_SRC_IRQ_TOP BIT(1) +#define MAX77705_SRC_IRQ_FG BIT(2) +#define MAX77705_SRC_IRQ_USBC BIT(3) +#define MAX77705_SRC_IRQ_ALL (MAX77705_SRC_IRQ_CHG | MAX77705_SRC_IRQ_TOP | \ + MAX77705_SRC_IRQ_FG | MAX77705_SRC_IRQ_USBC) + +/* MAX77705_PMIC_REG_PMICREV register */ +#define MAX77705_VERSION_SHIFT 3 +#define MAX77705_REVISION_MASK GENMASK(2, 0) +#define MAX77705_VERSION_MASK GENMASK(7, MAX77705_VERSION_SHIFT) +/* MAX77705_PMIC_REG_MAINCTRL1 register */ +#define MAX77705_MAINCTRL1_BIASEN_SHIFT 7 +#define MAX77705_MAINCTRL1_BIASEN_MASK BIT(MAX77705_MAINCTRL1_BIASEN_SHIFT) +/* MAX77705_PMIC_REG_MCONFIG2 (haptics) register */ +#define MAX77705_CONFIG2_MEN_SHIFT 6 +#define MAX77705_CONFIG2_MODE_SHIFT 7 +#define MAX77705_CONFIG2_HTYP_SHIFT 5 +/* MAX77705_PMIC_REG_SYSTEM_INT_MASK register */ +#define MAX77705_SYSTEM_IRQ_BSTEN_INT BIT(3) +#define MAX77705_SYSTEM_IRQ_SYSUVLO_INT BIT(4) +#define MAX77705_SYSTEM_IRQ_SYSOVLO_INT BIT(5) +#define MAX77705_SYSTEM_IRQ_TSHDN_INT BIT(6) +#define MAX77705_SYSTEM_IRQ_TM_INT BIT(7) +/* MAX77705_RGBLED_REG_LEDEN register */ +#define MAX77705_RGBLED_EN_WIDTH 2 +/* MAX77705_RGBLED_REG_LEDBLNK register */ +#define MAX77705_RGB_DELAY_100_STEP_LIM 500 +#define MAX77705_RGB_DELAY_100_STEP_COUNT 4 +#define MAX77705_RGB_DELAY_100_STEP 100 +#define MAX77705_RGB_DELAY_250_STEP_LIM 3250 +#define MAX77705_RGB_DELAY_250_STEP 250 +#define MAX77705_RGB_DELAY_500_STEP 500 +#define MAX77705_RGB_DELAY_500_STEP_COUNT 10 +#define MAX77705_RGB_DELAY_500_STEP_LIM 5000 +#define MAX77705_RGB_DELAY_1000_STEP_LIM 8000 +#define MAX77705_RGB_DELAY_1000_STEP_COUNT 13 +#define MAX77705_RGB_DELAY_1000_STEP 1000 +#define MAX77705_RGB_DELAY_2000_STEP 2000 +#define MAX77705_RGB_DELAY_2000_STEP_COUNT 13 +#define MAX77705_RGB_DELAY_2000_STEP_LIM 12000 + +enum max77705_hw_rev { + MAX77705_PASS1 = 1, + MAX77705_PASS2, + MAX77705_PASS3 +}; + +enum max77705_reg { + MAX77705_PMIC_REG_PMICID1 = 0x00, + MAX77705_PMIC_REG_PMICREV = 0x01, + MAX77705_PMIC_REG_MAINCTRL1 = 0x02, + MAX77705_PMIC_REG_BSTOUT_MASK = 0x03, + MAX77705_PMIC_REG_FORCE_EN_MASK = 0x08, + MAX77705_PMIC_REG_MCONFIG = 0x10, + MAX77705_PMIC_REG_MCONFIG2 = 0x11, + MAX77705_PMIC_REG_INTSRC = 0x22, + MAX77705_PMIC_REG_INTSRC_MASK = 0x23, + MAX77705_PMIC_REG_SYSTEM_INT = 0x24, + MAX77705_PMIC_REG_RESERVED_25 = 0x25, + MAX77705_PMIC_REG_SYSTEM_INT_MASK = 0x26, + MAX77705_PMIC_REG_RESERVED_27 = 0x27, + MAX77705_PMIC_REG_RESERVED_28 = 0x28, + MAX77705_PMIC_REG_RESERVED_29 = 0x29, + MAX77705_PMIC_REG_BOOSTCONTROL1 = 0x4C, + MAX77705_PMIC_REG_BOOSTCONTROL2 = 0x4F, + MAX77705_PMIC_REG_SW_RESET = 0x50, + MAX77705_PMIC_REG_USBC_RESET = 0x51, + + MAX77705_PMIC_REG_END +}; + +enum max77705_chg_reg { + MAX77705_CHG_REG_BASE = 0xB0, + MAX77705_CHG_REG_INT = 0, + MAX77705_CHG_REG_INT_MASK, + MAX77705_CHG_REG_INT_OK, + MAX77705_CHG_REG_DETAILS_00, + MAX77705_CHG_REG_DETAILS_01, + MAX77705_CHG_REG_DETAILS_02, + MAX77705_CHG_REG_DTLS_03, + MAX77705_CHG_REG_CNFG_00, + MAX77705_CHG_REG_CNFG_01, + MAX77705_CHG_REG_CNFG_02, + MAX77705_CHG_REG_CNFG_03, + MAX77705_CHG_REG_CNFG_04, + MAX77705_CHG_REG_CNFG_05, + MAX77705_CHG_REG_CNFG_06, + MAX77705_CHG_REG_CNFG_07, + MAX77705_CHG_REG_CNFG_08, + MAX77705_CHG_REG_CNFG_09, + MAX77705_CHG_REG_CNFG_10, + MAX77705_CHG_REG_CNFG_11, + + MAX77705_CHG_REG_CNFG_12, + MAX77705_CHG_REG_CNFG_13, + MAX77705_CHG_REG_CNFG_14, + MAX77705_CHG_REG_SAFEOUT_CTRL +}; + +enum max77705_fuelgauge_reg { + STATUS_REG = 0x00, + VALRT_THRESHOLD_REG = 0x01, + TALRT_THRESHOLD_REG = 0x02, + SALRT_THRESHOLD_REG = 0x03, + REMCAP_REP_REG = 0x05, + SOCREP_REG = 0x06, + TEMPERATURE_REG = 0x08, + VCELL_REG = 0x09, + TIME_TO_EMPTY_REG = 0x11, + FULLSOCTHR_REG = 0x13, + CURRENT_REG = 0x0A, + AVG_CURRENT_REG = 0x0B, + SOCMIX_REG = 0x0D, + SOCAV_REG = 0x0E, + REMCAP_MIX_REG = 0x0F, + FULLCAP_REG = 0x10, + RFAST_REG = 0x15, + AVR_TEMPERATURE_REG = 0x16, + CYCLES_REG = 0x17, + DESIGNCAP_REG = 0x18, + AVR_VCELL_REG = 0x19, + TIME_TO_FULL_REG = 0x20, + CONFIG_REG = 0x1D, + ICHGTERM_REG = 0x1E, + REMCAP_AV_REG = 0x1F, + FULLCAP_NOM_REG = 0x23, + LEARN_CFG_REG = 0x28, + FILTER_CFG_REG = 0x29, + MISCCFG_REG = 0x2B, + QRTABLE20_REG = 0x32, + FULLCAP_REP_REG = 0x35, + RCOMP_REG = 0x38, + VEMPTY_REG = 0x3A, + FSTAT_REG = 0x3D, + DISCHARGE_THRESHOLD_REG = 0x40, + QRTABLE30_REG = 0x42, + ISYS_REG = 0x43, + DQACC_REG = 0x45, + DPACC_REG = 0x46, + AVGISYS_REG = 0x4B, + QH_REG = 0x4D, + VSYS_REG = 0xB1, + TALRTTH2_REG = 0xB2, + VBYP_REG = 0xB3, + CONFIG2_REG = 0xBB, + IIN_REG = 0xD0, + OCV_REG = 0xEE, + VFOCV_REG = 0xFB, + VFSOC_REG = 0xFF, + + MAX77705_FG_END +}; + +enum max77705_led_reg { + MAX77705_RGBLED_REG_BASE = 0x30, + MAX77705_RGBLED_REG_LEDEN = 0, + MAX77705_RGBLED_REG_LED0BRT, + MAX77705_RGBLED_REG_LED1BRT, + MAX77705_RGBLED_REG_LED2BRT, + MAX77705_RGBLED_REG_LED3BRT, + MAX77705_RGBLED_REG_LEDRMP, + MAX77705_RGBLED_REG_LEDBLNK, + MAX77705_LED_REG_END +}; + +enum max77705_charger_battery_state { + MAX77705_BATTERY_NOBAT, + MAX77705_BATTERY_PREQUALIFICATION, + MAX77705_BATTERY_DEAD, + MAX77705_BATTERY_GOOD, + MAX77705_BATTERY_LOWVOLTAGE, + MAX77705_BATTERY_OVERVOLTAGE, + MAX77705_BATTERY_RESERVED +}; + +enum max77705_charger_charge_type { + MAX77705_CHARGER_CONSTANT_CURRENT = 1, + MAX77705_CHARGER_CONSTANT_VOLTAGE, + MAX77705_CHARGER_END_OF_CHARGE, + MAX77705_CHARGER_DONE +}; + +#endif /* __LINUX_MFD_MAX77705_PRIV_H */ diff --git a/include/linux/mfd/max77759.h b/include/linux/mfd/max77759.h new file mode 100644 index 000000000000..c6face34e385 --- /dev/null +++ b/include/linux/mfd/max77759.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2020 Google Inc. + * Copyright 2025 Linaro Ltd. + * + * Maxim MAX77759 core driver + */ + +#ifndef __LINUX_MFD_MAX77759_H +#define __LINUX_MFD_MAX77759_H + +#include <linux/completion.h> +#include <linux/mutex.h> +#include <linux/regmap.h> + +#define MAX77759_PMIC_REG_PMIC_ID 0x00 +#define MAX77759_PMIC_REG_PMIC_REVISION 0x01 +#define MAX77759_PMIC_REG_OTP_REVISION 0x02 +#define MAX77759_PMIC_REG_INTSRC 0x22 +#define MAX77759_PMIC_REG_INTSRCMASK 0x23 +#define MAX77759_PMIC_REG_INTSRC_MAXQ BIT(3) +#define MAX77759_PMIC_REG_INTSRC_TOPSYS BIT(1) +#define MAX77759_PMIC_REG_INTSRC_CHGR BIT(0) +#define MAX77759_PMIC_REG_TOPSYS_INT 0x24 +#define MAX77759_PMIC_REG_TOPSYS_INT_MASK 0x26 +#define MAX77759_PMIC_REG_TOPSYS_INT_TSHDN BIT(6) +#define MAX77759_PMIC_REG_TOPSYS_INT_SYSOVLO BIT(5) +#define MAX77759_PMIC_REG_TOPSYS_INT_SYSUVLO BIT(4) +#define MAX77759_PMIC_REG_TOPSYS_INT_FSHIP BIT(0) +#define MAX77759_PMIC_REG_I2C_CNFG 0x40 +#define MAX77759_PMIC_REG_SWRESET 0x50 +#define MAX77759_PMIC_REG_CONTROL_FG 0x51 + +#define MAX77759_MAXQ_REG_UIC_INT1 0x64 +#define MAX77759_MAXQ_REG_UIC_INT1_APCMDRESI BIT(7) +#define MAX77759_MAXQ_REG_UIC_INT1_SYSMSGI BIT(6) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIO6I BIT(1) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIO5I BIT(0) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(offs, en) (((en) & 1) << (offs)) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIOxI_MASK(offs) \ + MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(offs, ~0) +#define MAX77759_MAXQ_REG_UIC_INT2 0x65 +#define MAX77759_MAXQ_REG_UIC_INT3 0x66 +#define MAX77759_MAXQ_REG_UIC_INT4 0x67 +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS1 0x68 +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS2 0x69 +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS3 0x6a +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS4 0x6b +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS5 0x6c +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS6 0x6d +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS7 0x6f +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS8 0x6f +#define MAX77759_MAXQ_REG_UIC_INT1_M 0x70 +#define MAX77759_MAXQ_REG_UIC_INT2_M 0x71 +#define MAX77759_MAXQ_REG_UIC_INT3_M 0x72 +#define MAX77759_MAXQ_REG_UIC_INT4_M 0x73 +#define MAX77759_MAXQ_REG_AP_DATAOUT0 0x81 +#define MAX77759_MAXQ_REG_AP_DATAOUT32 0xa1 +#define MAX77759_MAXQ_REG_AP_DATAIN0 0xb1 +#define MAX77759_MAXQ_REG_UIC_SWRST 0xe0 + +#define MAX77759_CHGR_REG_CHG_INT 0xb0 +#define MAX77759_CHGR_REG_CHG_INT2 0xb1 +#define MAX77759_CHGR_REG_CHG_INT_MASK 0xb2 +#define MAX77759_CHGR_REG_CHG_INT2_MASK 0xb3 +#define MAX77759_CHGR_REG_CHG_INT_OK 0xb4 +#define MAX77759_CHGR_REG_CHG_DETAILS_00 0xb5 +#define MAX77759_CHGR_REG_CHG_DETAILS_01 0xb6 +#define MAX77759_CHGR_REG_CHG_DETAILS_02 0xb7 +#define MAX77759_CHGR_REG_CHG_DETAILS_03 0xb8 +#define MAX77759_CHGR_REG_CHG_CNFG_00 0xb9 +#define MAX77759_CHGR_REG_CHG_CNFG_01 0xba +#define MAX77759_CHGR_REG_CHG_CNFG_02 0xbb +#define MAX77759_CHGR_REG_CHG_CNFG_03 0xbc +#define MAX77759_CHGR_REG_CHG_CNFG_04 0xbd +#define MAX77759_CHGR_REG_CHG_CNFG_05 0xbe +#define MAX77759_CHGR_REG_CHG_CNFG_06 0xbf +#define MAX77759_CHGR_REG_CHG_CNFG_07 0xc0 +#define MAX77759_CHGR_REG_CHG_CNFG_08 0xc1 +#define MAX77759_CHGR_REG_CHG_CNFG_09 0xc2 +#define MAX77759_CHGR_REG_CHG_CNFG_10 0xc3 +#define MAX77759_CHGR_REG_CHG_CNFG_11 0xc4 +#define MAX77759_CHGR_REG_CHG_CNFG_12 0xc5 +#define MAX77759_CHGR_REG_CHG_CNFG_13 0xc6 +#define MAX77759_CHGR_REG_CHG_CNFG_14 0xc7 +#define MAX77759_CHGR_REG_CHG_CNFG_15 0xc8 +#define MAX77759_CHGR_REG_CHG_CNFG_16 0xc9 +#define MAX77759_CHGR_REG_CHG_CNFG_17 0xca +#define MAX77759_CHGR_REG_CHG_CNFG_18 0xcb +#define MAX77759_CHGR_REG_CHG_CNFG_19 0xcc + +/* MaxQ opcodes for max77759_maxq_command() */ +#define MAX77759_MAXQ_OPCODE_MAXLENGTH (MAX77759_MAXQ_REG_AP_DATAOUT32 - \ + MAX77759_MAXQ_REG_AP_DATAOUT0 + \ + 1) + +#define MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_READ 0x21 +#define MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_WRITE 0x22 +#define MAX77759_MAXQ_OPCODE_GPIO_CONTROL_READ 0x23 +#define MAX77759_MAXQ_OPCODE_GPIO_CONTROL_WRITE 0x24 +#define MAX77759_MAXQ_OPCODE_USER_SPACE_READ 0x81 +#define MAX77759_MAXQ_OPCODE_USER_SPACE_WRITE 0x82 + +/** + * struct max77759 - core max77759 internal data structure + * + * @regmap_top: Regmap for accessing TOP registers + * @maxq_lock: Lock for serializing access to MaxQ + * @regmap_maxq: Regmap for accessing MaxQ registers + * @cmd_done: Used to signal completion of a MaxQ command + * @regmap_charger: Regmap for accessing charger registers + * + * The MAX77759 comprises several sub-blocks, namely TOP, MaxQ, Charger, + * Fuel Gauge, and TCPCI. + */ +struct max77759 { + struct regmap *regmap_top; + + /* This protects MaxQ commands - only one can be active */ + struct mutex maxq_lock; + struct regmap *regmap_maxq; + struct completion cmd_done; + + struct regmap *regmap_charger; +}; + +/** + * struct max77759_maxq_command - structure containing the MaxQ command to + * send + * + * @length: The number of bytes to send. + * @cmd: The data to send. + */ +struct max77759_maxq_command { + u8 length; + u8 cmd[] __counted_by(length); +}; + +/** + * struct max77759_maxq_response - structure containing the MaxQ response + * + * @length: The number of bytes to receive. + * @rsp: The data received. Must have at least @length bytes space. + */ +struct max77759_maxq_response { + u8 length; + u8 rsp[] __counted_by(length); +}; + +/** + * max77759_maxq_command() - issue a MaxQ command and wait for the response + * and associated data + * + * @max77759: The core max77759 device handle. + * @cmd: The command to be sent. + * @rsp: Any response data associated with the command will be copied here; + * can be %NULL if the command has no response (other than ACK). + * + * Return: 0 on success, a negative error number otherwise. + */ +int max77759_maxq_command(struct max77759 *max77759, + const struct max77759_maxq_command *cmd, + struct max77759_maxq_response *rsp); + +#endif /* __LINUX_MFD_MAX77759_H */ diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h index a10cd6945232..261c0aae7d00 100644 --- a/include/linux/mfd/max8997-private.h +++ b/include/linux/mfd/max8997-private.h @@ -2,7 +2,7 @@ /* * max8997-private.h - Voltage regulator driver for the Maxim 8997 * - * Copyright (C) 2010 Samsung Electrnoics + * Copyright (C) 2010 Samsung Electronics * MyungJoo Ham <myungjoo.ham@samsung.com> */ @@ -397,7 +397,6 @@ enum max8997_types { }; extern int max8997_irq_init(struct max8997_dev *max8997); -extern void max8997_irq_exit(struct max8997_dev *max8997); extern int max8997_irq_resume(struct max8997_dev *max8997); extern int max8997_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest); diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h index 5c2cc1103437..fb36e1386069 100644 --- a/include/linux/mfd/max8997.h +++ b/include/linux/mfd/max8997.h @@ -2,7 +2,7 @@ /* * max8997.h - Driver for the Maxim 8997/8966 * - * Copyright (C) 2009-2010 Samsung Electrnoics + * Copyright (C) 2009-2010 Samsung Electronics * MyungJoo Ham <myungjoo.ham@samsung.com> * * This driver is based on max8998.h diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h index 6deb5f577602..d77dc18db6eb 100644 --- a/include/linux/mfd/max8998-private.h +++ b/include/linux/mfd/max8998-private.h @@ -2,7 +2,7 @@ /* * max8998-private.h - Voltage regulator driver for the Maxim 8998 * - * Copyright (C) 2009-2010 Samsung Electrnoics + * Copyright (C) 2009-2010 Samsung Electronics * Kyungmin Park <kyungmin.park@samsung.com> * Marek Szyprowski <m.szyprowski@samsung.com> */ diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h index a054e55c8646..5473f1983e31 100644 --- a/include/linux/mfd/max8998.h +++ b/include/linux/mfd/max8998.h @@ -2,7 +2,7 @@ /* * max8998.h - Voltage regulator driver for the Maxim 8998 * - * Copyright (C) 2009-2010 Samsung Electrnoics + * Copyright (C) 2009-2010 Samsung Electronics * Kyungmin Park <kyungmin.park@samsung.com> * Marek Szyprowski <m.szyprowski@samsung.com> */ diff --git a/include/linux/mfd/mt6397/rtc.h b/include/linux/mfd/mt6397/rtc.h index 068ae1c0f0e8..27883af44f87 100644 --- a/include/linux/mfd/mt6397/rtc.h +++ b/include/linux/mfd/mt6397/rtc.h @@ -60,11 +60,6 @@ #define RTC_PDN2 0x002e #define RTC_PDN2_PWRON_ALARM BIT(4) -#define RTC_MIN_YEAR 1968 -#define RTC_BASE_YEAR 1900 -#define RTC_NUM_YEARS 128 -#define RTC_MIN_YEAR_OFFSET (RTC_MIN_YEAR - RTC_BASE_YEAR) - #define MTK_RTC_POLL_DELAY_US 10 #define MTK_RTC_POLL_TIMEOUT (jiffies_to_usecs(HZ)) diff --git a/include/linux/mfd/pcf50633/adc.h b/include/linux/mfd/pcf50633/adc.h deleted file mode 100644 index 6a81896d4889..000000000000 --- a/include/linux/mfd/pcf50633/adc.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * adc.h -- Driver for NXP PCF50633 ADC - * - * (C) 2006-2008 by Openmoko, Inc. - * All rights reserved. - */ - -#ifndef __LINUX_MFD_PCF50633_ADC_H -#define __LINUX_MFD_PCF50633_ADC_H - -#include <linux/mfd/pcf50633/core.h> -#include <linux/platform_device.h> - -/* ADC Registers */ -#define PCF50633_REG_ADCC3 0x52 -#define PCF50633_REG_ADCC2 0x53 -#define PCF50633_REG_ADCC1 0x54 -#define PCF50633_REG_ADCS1 0x55 -#define PCF50633_REG_ADCS2 0x56 -#define PCF50633_REG_ADCS3 0x57 - -#define PCF50633_ADCC1_ADCSTART 0x01 -#define PCF50633_ADCC1_RES_8BIT 0x02 -#define PCF50633_ADCC1_RES_10BIT 0x00 -#define PCF50633_ADCC1_AVERAGE_NO 0x00 -#define PCF50633_ADCC1_AVERAGE_4 0x04 -#define PCF50633_ADCC1_AVERAGE_8 0x08 -#define PCF50633_ADCC1_AVERAGE_16 0x0c -#define PCF50633_ADCC1_MUX_BATSNS_RES 0x00 -#define PCF50633_ADCC1_MUX_BATSNS_SUBTR 0x10 -#define PCF50633_ADCC1_MUX_ADCIN2_RES 0x20 -#define PCF50633_ADCC1_MUX_ADCIN2_SUBTR 0x30 -#define PCF50633_ADCC1_MUX_BATTEMP 0x60 -#define PCF50633_ADCC1_MUX_ADCIN1 0x70 -#define PCF50633_ADCC1_AVERAGE_MASK 0x0c -#define PCF50633_ADCC1_ADCMUX_MASK 0xf0 - -#define PCF50633_ADCC2_RATIO_NONE 0x00 -#define PCF50633_ADCC2_RATIO_BATTEMP 0x01 -#define PCF50633_ADCC2_RATIO_ADCIN1 0x02 -#define PCF50633_ADCC2_RATIO_BOTH 0x03 -#define PCF50633_ADCC2_RATIOSETTL_100US 0x04 - -#define PCF50633_ADCC3_ACCSW_EN 0x01 -#define PCF50633_ADCC3_NTCSW_EN 0x04 -#define PCF50633_ADCC3_RES_DIV_TWO 0x10 -#define PCF50633_ADCC3_RES_DIV_THREE 0x00 - -#define PCF50633_ADCS3_REF_NTCSW 0x00 -#define PCF50633_ADCS3_REF_ACCSW 0x10 -#define PCF50633_ADCS3_REF_2V0 0x20 -#define PCF50633_ADCS3_REF_VISA 0x30 -#define PCF50633_ADCS3_REF_2V0_2 0x70 -#define PCF50633_ADCS3_ADCRDY 0x80 - -#define PCF50633_ADCS3_ADCDAT1L_MASK 0x03 -#define PCF50633_ADCS3_ADCDAT2L_MASK 0x0c -#define PCF50633_ADCS3_ADCDAT2L_SHIFT 2 -#define PCF50633_ASCS3_REF_MASK 0x70 - -extern int -pcf50633_adc_async_read(struct pcf50633 *pcf, int mux, int avg, - void (*callback)(struct pcf50633 *, void *, int), - void *callback_param); -extern int -pcf50633_adc_sync_read(struct pcf50633 *pcf, int mux, int avg); - -#endif /* __LINUX_PCF50633_ADC_H */ diff --git a/include/linux/mfd/pcf50633/backlight.h b/include/linux/mfd/pcf50633/backlight.h deleted file mode 100644 index fd4a4f8d6c13..000000000000 --- a/include/linux/mfd/pcf50633/backlight.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de> - * PCF50633 backlight device driver - */ - -#ifndef __LINUX_MFD_PCF50633_BACKLIGHT -#define __LINUX_MFD_PCF50633_BACKLIGHT - -/* -* @default_brightness: Backlight brightness is initialized to this value -* -* Brightness to be used after the driver has been probed. -* Valid range 0-63. -* -* @default_brightness_limit: The actual brightness is limited by this value -* -* Brightness limit to be used after the driver has been probed. This is useful -* when it is not known how much power is available for the backlight during -* probe. -* Valid range 0-63. Can be changed later with pcf50633_bl_set_brightness_limit. -* -* @ramp_time: Display ramp time when changing brightness -* -* When changing the backlights brightness the change is not instant, instead -* it fades smooth from one state to another. This value specifies how long -* the fade should take. The lower the value the higher the fade time. -* Valid range 0-255 -*/ -struct pcf50633_bl_platform_data { - unsigned int default_brightness; - unsigned int default_brightness_limit; - uint8_t ramp_time; -}; - - -struct pcf50633; - -int pcf50633_bl_set_brightness_limit(struct pcf50633 *pcf, unsigned int limit); - -#endif - diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h index 539f27f8bd89..42d2b0e4884e 100644 --- a/include/linux/mfd/pcf50633/core.h +++ b/include/linux/mfd/pcf50633/core.h @@ -15,7 +15,6 @@ #include <linux/regulator/machine.h> #include <linux/pm.h> #include <linux/power_supply.h> -#include <linux/mfd/pcf50633/backlight.h> struct pcf50633; struct regmap; @@ -42,8 +41,6 @@ struct pcf50633_platform_data { void (*force_shutdown)(struct pcf50633 *); u8 resumers[5]; - - struct pcf50633_bl_platform_data *backlight_data; }; struct pcf50633_irq { diff --git a/include/linux/mfd/pcf50633/gpio.h b/include/linux/mfd/pcf50633/gpio.h deleted file mode 100644 index f589e35795f1..000000000000 --- a/include/linux/mfd/pcf50633/gpio.h +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * gpio.h -- GPIO driver for NXP PCF50633 - * - * (C) 2006-2008 by Openmoko, Inc. - * All rights reserved. - */ - -#ifndef __LINUX_MFD_PCF50633_GPIO_H -#define __LINUX_MFD_PCF50633_GPIO_H - -#include <linux/mfd/pcf50633/core.h> - -#define PCF50633_GPIO1 1 -#define PCF50633_GPIO2 2 -#define PCF50633_GPIO3 3 -#define PCF50633_GPO 4 - -#define PCF50633_REG_GPIO1CFG 0x14 -#define PCF50633_REG_GPIO2CFG 0x15 -#define PCF50633_REG_GPIO3CFG 0x16 -#define PCF50633_REG_GPOCFG 0x17 - -#define PCF50633_GPOCFG_GPOSEL_MASK 0x07 - -enum pcf50633_reg_gpocfg { - PCF50633_GPOCFG_GPOSEL_0 = 0x00, - PCF50633_GPOCFG_GPOSEL_LED_NFET = 0x01, - PCF50633_GPOCFG_GPOSEL_SYSxOK = 0x02, - PCF50633_GPOCFG_GPOSEL_CLK32K = 0x03, - PCF50633_GPOCFG_GPOSEL_ADAPUSB = 0x04, - PCF50633_GPOCFG_GPOSEL_USBxOK = 0x05, - PCF50633_GPOCFG_GPOSEL_ACTPH4 = 0x06, - PCF50633_GPOCFG_GPOSEL_1 = 0x07, - PCF50633_GPOCFG_GPOSEL_INVERSE = 0x08, -}; - -int pcf50633_gpio_set(struct pcf50633 *pcf, int gpio, u8 val); -u8 pcf50633_gpio_get(struct pcf50633 *pcf, int gpio); - -int pcf50633_gpio_invert_set(struct pcf50633 *, int gpio, int invert); -int pcf50633_gpio_invert_get(struct pcf50633 *pcf, int gpio); - -int pcf50633_gpio_power_supply_set(struct pcf50633 *, - int gpio, int regulator, int on); -#endif /* __LINUX_MFD_PCF50633_GPIO_H */ - - diff --git a/include/linux/mfd/pcf50633/mbc.h b/include/linux/mfd/pcf50633/mbc.h deleted file mode 100644 index fa5cb9256d99..000000000000 --- a/include/linux/mfd/pcf50633/mbc.h +++ /dev/null @@ -1,130 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * mbc.h -- Driver for NXP PCF50633 Main Battery Charger - * - * (C) 2006-2008 by Openmoko, Inc. - * All rights reserved. - */ - -#ifndef __LINUX_MFD_PCF50633_MBC_H -#define __LINUX_MFD_PCF50633_MBC_H - -#include <linux/mfd/pcf50633/core.h> -#include <linux/platform_device.h> - -#define PCF50633_REG_MBCC1 0x43 -#define PCF50633_REG_MBCC2 0x44 -#define PCF50633_REG_MBCC3 0x45 -#define PCF50633_REG_MBCC4 0x46 -#define PCF50633_REG_MBCC5 0x47 -#define PCF50633_REG_MBCC6 0x48 -#define PCF50633_REG_MBCC7 0x49 -#define PCF50633_REG_MBCC8 0x4a -#define PCF50633_REG_MBCS1 0x4b -#define PCF50633_REG_MBCS2 0x4c -#define PCF50633_REG_MBCS3 0x4d - -enum pcf50633_reg_mbcc1 { - PCF50633_MBCC1_CHGENA = 0x01, /* Charger enable */ - PCF50633_MBCC1_AUTOSTOP = 0x02, - PCF50633_MBCC1_AUTORES = 0x04, /* automatic resume */ - PCF50633_MBCC1_RESUME = 0x08, /* explicit resume cmd */ - PCF50633_MBCC1_RESTART = 0x10, /* restart charging */ - PCF50633_MBCC1_PREWDTIME_60M = 0x20, /* max. precharging time */ - PCF50633_MBCC1_WDTIME_1H = 0x00, - PCF50633_MBCC1_WDTIME_2H = 0x40, - PCF50633_MBCC1_WDTIME_4H = 0x80, - PCF50633_MBCC1_WDTIME_6H = 0xc0, -}; -#define PCF50633_MBCC1_WDTIME_MASK 0xc0 - -enum pcf50633_reg_mbcc2 { - PCF50633_MBCC2_VBATCOND_2V7 = 0x00, - PCF50633_MBCC2_VBATCOND_2V85 = 0x01, - PCF50633_MBCC2_VBATCOND_3V0 = 0x02, - PCF50633_MBCC2_VBATCOND_3V15 = 0x03, - PCF50633_MBCC2_VMAX_4V = 0x00, - PCF50633_MBCC2_VMAX_4V20 = 0x28, - PCF50633_MBCC2_VRESDEBTIME_64S = 0x80, /* debounce time (32/64sec) */ -}; - -enum pcf50633_reg_mbcc7 { - PCF50633_MBCC7_USB_100mA = 0x00, - PCF50633_MBCC7_USB_500mA = 0x01, - PCF50633_MBCC7_USB_1000mA = 0x02, - PCF50633_MBCC7_USB_SUSPEND = 0x03, - PCF50633_MBCC7_BATTEMP_EN = 0x04, - PCF50633_MBCC7_BATSYSIMAX_1A6 = 0x00, - PCF50633_MBCC7_BATSYSIMAX_1A8 = 0x40, - PCF50633_MBCC7_BATSYSIMAX_2A0 = 0x80, - PCF50633_MBCC7_BATSYSIMAX_2A2 = 0xc0, -}; -#define PCF50633_MBCC7_USB_MASK 0x03 - -enum pcf50633_reg_mbcc8 { - PCF50633_MBCC8_USBENASUS = 0x10, -}; - -enum pcf50633_reg_mbcs1 { - PCF50633_MBCS1_USBPRES = 0x01, - PCF50633_MBCS1_USBOK = 0x02, - PCF50633_MBCS1_ADAPTPRES = 0x04, - PCF50633_MBCS1_ADAPTOK = 0x08, - PCF50633_MBCS1_TBAT_OK = 0x00, - PCF50633_MBCS1_TBAT_ABOVE = 0x10, - PCF50633_MBCS1_TBAT_BELOW = 0x20, - PCF50633_MBCS1_TBAT_UNDEF = 0x30, - PCF50633_MBCS1_PREWDTEXP = 0x40, - PCF50633_MBCS1_WDTEXP = 0x80, -}; - -enum pcf50633_reg_mbcs2_mbcmod { - PCF50633_MBCS2_MBC_PLAY = 0x00, - PCF50633_MBCS2_MBC_USB_PRE = 0x01, - PCF50633_MBCS2_MBC_USB_PRE_WAIT = 0x02, - PCF50633_MBCS2_MBC_USB_FAST = 0x03, - PCF50633_MBCS2_MBC_USB_FAST_WAIT = 0x04, - PCF50633_MBCS2_MBC_USB_SUSPEND = 0x05, - PCF50633_MBCS2_MBC_ADP_PRE = 0x06, - PCF50633_MBCS2_MBC_ADP_PRE_WAIT = 0x07, - PCF50633_MBCS2_MBC_ADP_FAST = 0x08, - PCF50633_MBCS2_MBC_ADP_FAST_WAIT = 0x09, - PCF50633_MBCS2_MBC_BAT_FULL = 0x0a, - PCF50633_MBCS2_MBC_HALT = 0x0b, -}; -#define PCF50633_MBCS2_MBC_MASK 0x0f -enum pcf50633_reg_mbcs2_chgstat { - PCF50633_MBCS2_CHGS_NONE = 0x00, - PCF50633_MBCS2_CHGS_ADAPTER = 0x10, - PCF50633_MBCS2_CHGS_USB = 0x20, - PCF50633_MBCS2_CHGS_BOTH = 0x30, -}; -#define PCF50633_MBCS2_RESSTAT_AUTO 0x40 - -enum pcf50633_reg_mbcs3 { - PCF50633_MBCS3_USBLIM_PLAY = 0x01, - PCF50633_MBCS3_USBLIM_CGH = 0x02, - PCF50633_MBCS3_TLIM_PLAY = 0x04, - PCF50633_MBCS3_TLIM_CHG = 0x08, - PCF50633_MBCS3_ILIM = 0x10, /* 1: Ibat > Icutoff */ - PCF50633_MBCS3_VLIM = 0x20, /* 1: Vbat == Vmax */ - PCF50633_MBCS3_VBATSTAT = 0x40, /* 1: Vbat > Vbatcond */ - PCF50633_MBCS3_VRES = 0x80, /* 1: Vbat > Vth(RES) */ -}; - -#define PCF50633_MBCC2_VBATCOND_MASK 0x03 -#define PCF50633_MBCC2_VMAX_MASK 0x3c - -/* Charger status */ -#define PCF50633_MBC_USB_ONLINE 0x01 -#define PCF50633_MBC_USB_ACTIVE 0x02 -#define PCF50633_MBC_ADAPTER_ONLINE 0x04 -#define PCF50633_MBC_ADAPTER_ACTIVE 0x08 - -int pcf50633_mbc_usb_curlim_set(struct pcf50633 *pcf, int ma); - -int pcf50633_mbc_get_status(struct pcf50633 *); -int pcf50633_mbc_get_usb_online_status(struct pcf50633 *); - -#endif - diff --git a/include/linux/mfd/pcf50633/pmic.h b/include/linux/mfd/pcf50633/pmic.h deleted file mode 100644 index eac0c3d8e984..000000000000 --- a/include/linux/mfd/pcf50633/pmic.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_MFD_PCF50633_PMIC_H -#define __LINUX_MFD_PCF50633_PMIC_H - -#include <linux/mfd/pcf50633/core.h> -#include <linux/platform_device.h> - -#define PCF50633_REG_AUTOOUT 0x1a -#define PCF50633_REG_AUTOENA 0x1b -#define PCF50633_REG_AUTOCTL 0x1c -#define PCF50633_REG_AUTOMXC 0x1d -#define PCF50633_REG_DOWN1OUT 0x1e -#define PCF50633_REG_DOWN1ENA 0x1f -#define PCF50633_REG_DOWN1CTL 0x20 -#define PCF50633_REG_DOWN1MXC 0x21 -#define PCF50633_REG_DOWN2OUT 0x22 -#define PCF50633_REG_DOWN2ENA 0x23 -#define PCF50633_REG_DOWN2CTL 0x24 -#define PCF50633_REG_DOWN2MXC 0x25 -#define PCF50633_REG_MEMLDOOUT 0x26 -#define PCF50633_REG_MEMLDOENA 0x27 -#define PCF50633_REG_LDO1OUT 0x2d -#define PCF50633_REG_LDO1ENA 0x2e -#define PCF50633_REG_LDO2OUT 0x2f -#define PCF50633_REG_LDO2ENA 0x30 -#define PCF50633_REG_LDO3OUT 0x31 -#define PCF50633_REG_LDO3ENA 0x32 -#define PCF50633_REG_LDO4OUT 0x33 -#define PCF50633_REG_LDO4ENA 0x34 -#define PCF50633_REG_LDO5OUT 0x35 -#define PCF50633_REG_LDO5ENA 0x36 -#define PCF50633_REG_LDO6OUT 0x37 -#define PCF50633_REG_LDO6ENA 0x38 -#define PCF50633_REG_HCLDOOUT 0x39 -#define PCF50633_REG_HCLDOENA 0x3a -#define PCF50633_REG_HCLDOOVL 0x40 - -enum pcf50633_regulator_enable { - PCF50633_REGULATOR_ON = 0x01, - PCF50633_REGULATOR_ON_GPIO1 = 0x02, - PCF50633_REGULATOR_ON_GPIO2 = 0x04, - PCF50633_REGULATOR_ON_GPIO3 = 0x08, -}; -#define PCF50633_REGULATOR_ON_MASK 0x0f - -enum pcf50633_regulator_phase { - PCF50633_REGULATOR_ACTPH1 = 0x00, - PCF50633_REGULATOR_ACTPH2 = 0x10, - PCF50633_REGULATOR_ACTPH3 = 0x20, - PCF50633_REGULATOR_ACTPH4 = 0x30, -}; -#define PCF50633_REGULATOR_ACTPH_MASK 0x30 - -enum pcf50633_regulator_id { - PCF50633_REGULATOR_AUTO, - PCF50633_REGULATOR_DOWN1, - PCF50633_REGULATOR_DOWN2, - PCF50633_REGULATOR_LDO1, - PCF50633_REGULATOR_LDO2, - PCF50633_REGULATOR_LDO3, - PCF50633_REGULATOR_LDO4, - PCF50633_REGULATOR_LDO5, - PCF50633_REGULATOR_LDO6, - PCF50633_REGULATOR_HCLDO, - PCF50633_REGULATOR_MEMLDO, -}; -#endif - diff --git a/include/linux/mfd/qnap-mcu.h b/include/linux/mfd/qnap-mcu.h new file mode 100644 index 000000000000..8d48c212fd44 --- /dev/null +++ b/include/linux/mfd/qnap-mcu.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Core definitions for QNAP MCU MFD driver. + * Copyright (C) 2024 Heiko Stuebner <heiko@sntech.de> + */ + +#ifndef _LINUX_QNAP_MCU_H_ +#define _LINUX_QNAP_MCU_H_ + +struct qnap_mcu; + +struct qnap_mcu_variant { + u32 baud_rate; + int num_drives; + int fan_pwm_min; + int fan_pwm_max; + bool usb_led; +}; + +int qnap_mcu_exec(struct qnap_mcu *mcu, + const u8 *cmd_data, size_t cmd_data_size, + u8 *reply_data, size_t reply_data_size); +int qnap_mcu_exec_with_ack(struct qnap_mcu *mcu, + const u8 *cmd_data, size_t cmd_data_size); + +#endif /* _LINUX_QNAP_MCU_H_ */ diff --git a/include/linux/mfd/rohm-bd96801.h b/include/linux/mfd/rohm-bd96801.h index e2d9e10b6364..68c8ac8ad409 100644 --- a/include/linux/mfd/rohm-bd96801.h +++ b/include/linux/mfd/rohm-bd96801.h @@ -40,7 +40,9 @@ * INTB status registers are at range 0x5c ... 0x63 */ #define BD96801_REG_INT_SYS_ERRB1 0x52 +#define BD96801_REG_INT_BUCK2_ERRB 0x56 #define BD96801_REG_INT_SYS_INTB 0x5c +#define BD96801_REG_INT_BUCK2_INTB 0x5e #define BD96801_REG_INT_LDO7_INTB 0x63 /* MASK registers */ diff --git a/include/linux/mfd/rohm-bd96802.h b/include/linux/mfd/rohm-bd96802.h new file mode 100644 index 000000000000..bf4b77944edf --- /dev/null +++ b/include/linux/mfd/rohm-bd96802.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2025 ROHM Semiconductors + * + * The digital interface of trhe BD96802 PMIC is a reduced version of the + * BD96801. Hence the BD96801 definitions are used for registers and masks + * while this header only holds the IRQ definitions - mainly to avoid gaps in + * IRQ numbers caused by the lack of some BUCKs / LDOs and their respective + * IRQs. + */ + +#ifndef __LINUX_MFD_BD96802_H__ +#define __LINUX_MFD_BD96802_H__ + +/* ERRB IRQs */ +enum { + /* Reg 0x52, 0x53, 0x54 - ERRB system IRQs */ + BD96802_OTP_ERR_STAT, + BD96802_DBIST_ERR_STAT, + BD96802_EEP_ERR_STAT, + BD96802_ABIST_ERR_STAT, + BD96802_PRSTB_ERR_STAT, + BD96802_DRMOS1_ERR_STAT, + BD96802_DRMOS2_ERR_STAT, + BD96802_SLAVE_ERR_STAT, + BD96802_VREF_ERR_STAT, + BD96802_TSD_ERR_STAT, + BD96802_UVLO_ERR_STAT, + BD96802_OVLO_ERR_STAT, + BD96802_OSC_ERR_STAT, + BD96802_PON_ERR_STAT, + BD96802_POFF_ERR_STAT, + BD96802_CMD_SHDN_ERR_STAT, + BD96802_INT_SHDN_ERR_STAT, + + /* Reg 0x55 BUCK1 ERR IRQs */ + BD96802_BUCK1_PVIN_ERR_STAT, + BD96802_BUCK1_OVP_ERR_STAT, + BD96802_BUCK1_UVP_ERR_STAT, + BD96802_BUCK1_SHDN_ERR_STAT, + + /* Reg 0x56 BUCK2 ERR IRQs */ + BD96802_BUCK2_PVIN_ERR_STAT, + BD96802_BUCK2_OVP_ERR_STAT, + BD96802_BUCK2_UVP_ERR_STAT, + BD96802_BUCK2_SHDN_ERR_STAT, +}; + +/* INTB IRQs */ +enum { + /* Reg 0x5c (System INTB) */ + BD96802_TW_STAT, + BD96802_WDT_ERR_STAT, + BD96802_I2C_ERR_STAT, + BD96802_CHIP_IF_ERR_STAT, + + /* Reg 0x5d (BUCK1 INTB) */ + BD96802_BUCK1_OCPH_STAT, + BD96802_BUCK1_OCPL_STAT, + BD96802_BUCK1_OCPN_STAT, + BD96802_BUCK1_OVD_STAT, + BD96802_BUCK1_UVD_STAT, + BD96802_BUCK1_TW_CH_STAT, + + /* Reg 0x5e (BUCK2 INTB) */ + BD96802_BUCK2_OCPH_STAT, + BD96802_BUCK2_OCPL_STAT, + BD96802_BUCK2_OCPN_STAT, + BD96802_BUCK2_OVD_STAT, + BD96802_BUCK2_UVD_STAT, + BD96802_BUCK2_TW_CH_STAT, +}; + +#endif diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index e7d4e6afe388..579e8dcfcca4 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -17,6 +17,9 @@ enum rohm_chip_type { ROHM_CHIP_TYPE_BD71837, ROHM_CHIP_TYPE_BD71847, ROHM_CHIP_TYPE_BD96801, + ROHM_CHIP_TYPE_BD96802, + ROHM_CHIP_TYPE_BD96805, + ROHM_CHIP_TYPE_BD96806, ROHM_CHIP_TYPE_AMOUNT }; diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index 750274d41fc0..d785e101fe79 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -39,11 +39,13 @@ enum sec_device_type { S5M8767X, S2DOS05, S2MPA01, + S2MPG10, S2MPS11X, S2MPS13X, S2MPS14X, S2MPS15X, S2MPU02, + S2MPU05, }; /** @@ -65,15 +67,11 @@ struct sec_pmic_dev { struct regmap *regmap_pmic; struct i2c_client *i2c; - unsigned long device_type; + int device_type; int irq; struct regmap_irq_chip_data *irq_data; }; -int sec_irq_init(struct sec_pmic_dev *sec_pmic); -void sec_irq_exit(struct sec_pmic_dev *sec_pmic); -int sec_irq_resume(struct sec_pmic_dev *sec_pmic); - struct sec_platform_data { struct sec_regulator_data *regulators; struct sec_opmode_data *opmode; diff --git a/include/linux/mfd/samsung/irq.h b/include/linux/mfd/samsung/irq.h index 3fd2775eb9bb..b4805cbd949b 100644 --- a/include/linux/mfd/samsung/irq.h +++ b/include/linux/mfd/samsung/irq.h @@ -57,6 +57,109 @@ enum s2mpa01_irq { #define S2MPA01_IRQ_B24_TSD_MASK (1 << 4) #define S2MPA01_IRQ_B35_TSD_MASK (1 << 5) +enum s2mpg10_irq { + /* PMIC */ + S2MPG10_IRQ_PWRONF, + S2MPG10_IRQ_PWRONR, + S2MPG10_IRQ_JIGONBF, + S2MPG10_IRQ_JIGONBR, + S2MPG10_IRQ_ACOKBF, + S2MPG10_IRQ_ACOKBR, + S2MPG10_IRQ_PWRON1S, + S2MPG10_IRQ_MRB, +#define S2MPG10_IRQ_PWRONF_MASK BIT(0) +#define S2MPG10_IRQ_PWRONR_MASK BIT(1) +#define S2MPG10_IRQ_JIGONBF_MASK BIT(2) +#define S2MPG10_IRQ_JIGONBR_MASK BIT(3) +#define S2MPG10_IRQ_ACOKBF_MASK BIT(4) +#define S2MPG10_IRQ_ACOKBR_MASK BIT(5) +#define S2MPG10_IRQ_PWRON1S_MASK BIT(6) +#define S2MPG10_IRQ_MRB_MASK BIT(7) + + S2MPG10_IRQ_RTC60S, + S2MPG10_IRQ_RTCA1, + S2MPG10_IRQ_RTCA0, + S2MPG10_IRQ_RTC1S, + S2MPG10_IRQ_WTSR_COLDRST, + S2MPG10_IRQ_WTSR, + S2MPG10_IRQ_WRST, + S2MPG10_IRQ_SMPL, +#define S2MPG10_IRQ_RTC60S_MASK BIT(0) +#define S2MPG10_IRQ_RTCA1_MASK BIT(1) +#define S2MPG10_IRQ_RTCA0_MASK BIT(2) +#define S2MPG10_IRQ_RTC1S_MASK BIT(3) +#define S2MPG10_IRQ_WTSR_COLDRST_MASK BIT(4) +#define S2MPG10_IRQ_WTSR_MASK BIT(5) +#define S2MPG10_IRQ_WRST_MASK BIT(6) +#define S2MPG10_IRQ_SMPL_MASK BIT(7) + + S2MPG10_IRQ_120C, + S2MPG10_IRQ_140C, + S2MPG10_IRQ_TSD, + S2MPG10_IRQ_PIF_TIMEOUT1, + S2MPG10_IRQ_PIF_TIMEOUT2, + S2MPG10_IRQ_SPD_PARITY_ERR, + S2MPG10_IRQ_SPD_ABNORMAL_STOP, + S2MPG10_IRQ_PMETER_OVERF, +#define S2MPG10_IRQ_INT120C_MASK BIT(0) +#define S2MPG10_IRQ_INT140C_MASK BIT(1) +#define S2MPG10_IRQ_TSD_MASK BIT(2) +#define S2MPG10_IRQ_PIF_TIMEOUT1_MASK BIT(3) +#define S2MPG10_IRQ_PIF_TIMEOUT2_MASK BIT(4) +#define S2MPG10_IRQ_SPD_PARITY_ERR_MASK BIT(5) +#define S2MPG10_IRQ_SPD_ABNORMAL_STOP_MASK BIT(6) +#define S2MPG10_IRQ_PMETER_OVERF_MASK BIT(7) + + S2MPG10_IRQ_OCP_B1M, + S2MPG10_IRQ_OCP_B2M, + S2MPG10_IRQ_OCP_B3M, + S2MPG10_IRQ_OCP_B4M, + S2MPG10_IRQ_OCP_B5M, + S2MPG10_IRQ_OCP_B6M, + S2MPG10_IRQ_OCP_B7M, + S2MPG10_IRQ_OCP_B8M, +#define S2MPG10_IRQ_OCP_B1M_MASK BIT(0) +#define S2MPG10_IRQ_OCP_B2M_MASK BIT(1) +#define S2MPG10_IRQ_OCP_B3M_MASK BIT(2) +#define S2MPG10_IRQ_OCP_B4M_MASK BIT(3) +#define S2MPG10_IRQ_OCP_B5M_MASK BIT(4) +#define S2MPG10_IRQ_OCP_B6M_MASK BIT(5) +#define S2MPG10_IRQ_OCP_B7M_MASK BIT(6) +#define S2MPG10_IRQ_OCP_B8M_MASK BIT(7) + + S2MPG10_IRQ_OCP_B9M, + S2MPG10_IRQ_OCP_B10M, + S2MPG10_IRQ_WLWP_ACC, + S2MPG10_IRQ_SMPL_TIMEOUT, + S2MPG10_IRQ_WTSR_TIMEOUT, + S2MPG10_IRQ_SPD_SRP_PKT_RST, +#define S2MPG10_IRQ_OCP_B9M_MASK BIT(0) +#define S2MPG10_IRQ_OCP_B10M_MASK BIT(1) +#define S2MPG10_IRQ_WLWP_ACC_MASK BIT(2) +#define S2MPG10_IRQ_SMPL_TIMEOUT_MASK BIT(5) +#define S2MPG10_IRQ_WTSR_TIMEOUT_MASK BIT(6) +#define S2MPG10_IRQ_SPD_SRP_PKT_RST_MASK BIT(7) + + S2MPG10_IRQ_PWR_WARN_CH0, + S2MPG10_IRQ_PWR_WARN_CH1, + S2MPG10_IRQ_PWR_WARN_CH2, + S2MPG10_IRQ_PWR_WARN_CH3, + S2MPG10_IRQ_PWR_WARN_CH4, + S2MPG10_IRQ_PWR_WARN_CH5, + S2MPG10_IRQ_PWR_WARN_CH6, + S2MPG10_IRQ_PWR_WARN_CH7, +#define S2MPG10_IRQ_PWR_WARN_CH0_MASK BIT(0) +#define S2MPG10_IRQ_PWR_WARN_CH1_MASK BIT(1) +#define S2MPG10_IRQ_PWR_WARN_CH2_MASK BIT(2) +#define S2MPG10_IRQ_PWR_WARN_CH3_MASK BIT(3) +#define S2MPG10_IRQ_PWR_WARN_CH4_MASK BIT(4) +#define S2MPG10_IRQ_PWR_WARN_CH5_MASK BIT(5) +#define S2MPG10_IRQ_PWR_WARN_CH6_MASK BIT(6) +#define S2MPG10_IRQ_PWR_WARN_CH7_MASK BIT(7) + + S2MPG10_IRQ_NR, +}; + enum s2mps11_irq { S2MPS11_IRQ_PWRONF, S2MPS11_IRQ_PWRONR, @@ -150,6 +253,50 @@ enum s2mpu02_irq { /* Masks for interrupts are the same as in s2mps11 */ #define S2MPS14_IRQ_TSD_MASK (1 << 2) +enum s2mpu05_irq { + S2MPU05_IRQ_PWRONF, + S2MPU05_IRQ_PWRONR, + S2MPU05_IRQ_JIGONBF, + S2MPU05_IRQ_JIGONBR, + S2MPU05_IRQ_ACOKF, + S2MPU05_IRQ_ACOKR, + S2MPU05_IRQ_PWRON1S, + S2MPU05_IRQ_MRB, + + S2MPU05_IRQ_RTC60S, + S2MPU05_IRQ_RTCA1, + S2MPU05_IRQ_RTCA0, + S2MPU05_IRQ_SMPL, + S2MPU05_IRQ_RTC1S, + S2MPU05_IRQ_WTSR, + + S2MPU05_IRQ_INT120C, + S2MPU05_IRQ_INT140C, + S2MPU05_IRQ_TSD, + + S2MPU05_IRQ_NR, +}; + +#define S2MPU05_IRQ_PWRONF_MASK BIT(0) +#define S2MPU05_IRQ_PWRONR_MASK BIT(1) +#define S2MPU05_IRQ_JIGONBF_MASK BIT(2) +#define S2MPU05_IRQ_JIGONBR_MASK BIT(3) +#define S2MPU05_IRQ_ACOKF_MASK BIT(4) +#define S2MPU05_IRQ_ACOKR_MASK BIT(5) +#define S2MPU05_IRQ_PWRON1S_MASK BIT(6) +#define S2MPU05_IRQ_MRB_MASK BIT(7) + +#define S2MPU05_IRQ_RTC60S_MASK BIT(0) +#define S2MPU05_IRQ_RTCA1_MASK BIT(1) +#define S2MPU05_IRQ_RTCA0_MASK BIT(2) +#define S2MPU05_IRQ_SMPL_MASK BIT(3) +#define S2MPU05_IRQ_RTC1S_MASK BIT(4) +#define S2MPU05_IRQ_WTSR_MASK BIT(5) + +#define S2MPU05_IRQ_INT120C_MASK BIT(0) +#define S2MPU05_IRQ_INT140C_MASK BIT(1) +#define S2MPU05_IRQ_TSD_MASK BIT(2) + enum s5m8767_irq { S5M8767_IRQ_PWRR, S5M8767_IRQ_PWRF, diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h index 0204decfc9aa..51c4239a1fa6 100644 --- a/include/linux/mfd/samsung/rtc.h +++ b/include/linux/mfd/samsung/rtc.h @@ -72,6 +72,37 @@ enum s2mps_rtc_reg { S2MPS_RTC_REG_MAX, }; +enum s2mpg10_rtc_reg { + S2MPG10_RTC_CTRL, + S2MPG10_RTC_UPDATE, + S2MPG10_RTC_SMPL, + S2MPG10_RTC_WTSR, + S2MPG10_RTC_CAP_SEL, + S2MPG10_RTC_MSEC, + S2MPG10_RTC_SEC, + S2MPG10_RTC_MIN, + S2MPG10_RTC_HOUR, + S2MPG10_RTC_WEEK, + S2MPG10_RTC_DAY, + S2MPG10_RTC_MON, + S2MPG10_RTC_YEAR, + S2MPG10_RTC_A0SEC, + S2MPG10_RTC_A0MIN, + S2MPG10_RTC_A0HOUR, + S2MPG10_RTC_A0WEEK, + S2MPG10_RTC_A0DAY, + S2MPG10_RTC_A0MON, + S2MPG10_RTC_A0YEAR, + S2MPG10_RTC_A1SEC, + S2MPG10_RTC_A1MIN, + S2MPG10_RTC_A1HOUR, + S2MPG10_RTC_A1WEEK, + S2MPG10_RTC_A1DAY, + S2MPG10_RTC_A1MON, + S2MPG10_RTC_A1YEAR, + S2MPG10_RTC_OSC_CTRL, +}; + #define RTC_I2C_ADDR (0x0C >> 1) #define HOUR_12 (1 << 7) @@ -124,10 +155,16 @@ enum s2mps_rtc_reg { #define ALARM_ENABLE_SHIFT 7 #define ALARM_ENABLE_MASK (1 << ALARM_ENABLE_SHIFT) +/* WTSR & SMPL registers */ #define SMPL_ENABLE_SHIFT 7 #define SMPL_ENABLE_MASK (1 << SMPL_ENABLE_SHIFT) #define WTSR_ENABLE_SHIFT 6 #define WTSR_ENABLE_MASK (1 << WTSR_ENABLE_SHIFT) +#define S2MPG10_WTSR_COLDTIMER GENMASK(6, 5) +#define S2MPG10_WTSR_COLDRST BIT(4) +#define S2MPG10_WTSR_WTSRT GENMASK(3, 1) +#define S2MPG10_WTSR_WTSR_EN BIT(0) + #endif /* __LINUX_MFD_SEC_RTC_H */ diff --git a/include/linux/mfd/samsung/s2mpg10.h b/include/linux/mfd/samsung/s2mpg10.h new file mode 100644 index 000000000000..9f5919b89a3c --- /dev/null +++ b/include/linux/mfd/samsung/s2mpg10.h @@ -0,0 +1,454 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2015 Samsung Electronics + * Copyright 2020 Google Inc + * Copyright 2025 Linaro Ltd. + */ + +#ifndef __LINUX_MFD_S2MPG10_H +#define __LINUX_MFD_S2MPG10_H + +/* Common registers (type 0x000) */ +enum s2mpg10_common_reg { + S2MPG10_COMMON_CHIPID, + S2MPG10_COMMON_INT, + S2MPG10_COMMON_INT_MASK, + S2MPG10_COMMON_SPD_CTRL1 = 0x0a, + S2MPG10_COMMON_SPD_CTRL2, + S2MPG10_COMMON_SPD_CTRL3, + S2MPG10_COMMON_MON1SEL = 0x1a, + S2MPG10_COMMON_MON2SEL, + S2MPG10_COMMON_MONR, + S2MPG10_COMMON_DEBUG_CTRL1, + S2MPG10_COMMON_DEBUG_CTRL2, + S2MPG10_COMMON_DEBUG_CTRL3, + S2MPG10_COMMON_DEBUG_CTRL4, + S2MPG10_COMMON_DEBUG_CTRL5, + S2MPG10_COMMON_DEBUG_CTRL6, + S2MPG10_COMMON_DEBUG_CTRL7, + S2MPG10_COMMON_DEBUG_CTRL8, + S2MPG10_COMMON_TEST_MODE1, + S2MPG10_COMMON_TEST_MODE2, + S2MPG10_COMMON_SPD_DEBUG1, + S2MPG10_COMMON_SPD_DEBUG2, + S2MPG10_COMMON_SPD_DEBUG3, + S2MPG10_COMMON_SPD_DEBUG4, +}; + +/* For S2MPG10_COMMON_INT and S2MPG10_COMMON_INT_MASK */ +#define S2MPG10_COMMON_INT_SRC GENMASK(7, 0) +#define S2MPG10_COMMON_INT_SRC_PMIC BIT(0) + +/* PMIC registers (type 0x100) */ +enum s2mpg10_pmic_reg { + S2MPG10_PMIC_INT1, + S2MPG10_PMIC_INT2, + S2MPG10_PMIC_INT3, + S2MPG10_PMIC_INT4, + S2MPG10_PMIC_INT5, + S2MPG10_PMIC_INT6, + S2MPG10_PMIC_INT1M, + S2MPG10_PMIC_INT2M, + S2MPG10_PMIC_INT3M, + S2MPG10_PMIC_INT4M, + S2MPG10_PMIC_INT5M, + S2MPG10_PMIC_INT6M, + S2MPG10_PMIC_STATUS1, + S2MPG10_PMIC_STATUS2, + S2MPG10_PMIC_PWRONSRC, + S2MPG10_PMIC_OFFSRC, + S2MPG10_PMIC_BU_CHG, + S2MPG10_PMIC_RTCBUF, + S2MPG10_PMIC_COMMON_CTRL1, + S2MPG10_PMIC_COMMON_CTRL2, + S2MPG10_PMIC_COMMON_CTRL3, + S2MPG10_PMIC_COMMON_CTRL4, + S2MPG10_PMIC_SMPL_WARN_CTRL, + S2MPG10_PMIC_MIMICKING_CTRL, + S2MPG10_PMIC_B1M_CTRL, + S2MPG10_PMIC_B1M_OUT1, + S2MPG10_PMIC_B1M_OUT2, + S2MPG10_PMIC_B2M_CTRL, + S2MPG10_PMIC_B2M_OUT1, + S2MPG10_PMIC_B2M_OUT2, + S2MPG10_PMIC_B3M_CTRL, + S2MPG10_PMIC_B3M_OUT1, + S2MPG10_PMIC_B3M_OUT2, + S2MPG10_PMIC_B4M_CTRL, + S2MPG10_PMIC_B4M_OUT1, + S2MPG10_PMIC_B4M_OUT2, + S2MPG10_PMIC_B5M_CTRL, + S2MPG10_PMIC_B5M_OUT1, + S2MPG10_PMIC_B5M_OUT2, + S2MPG10_PMIC_B6M_CTRL, + S2MPG10_PMIC_B6M_OUT1, + S2MPG10_PMIC_B6M_OUT2, + S2MPG10_PMIC_B7M_CTRL, + S2MPG10_PMIC_B7M_OUT1, + S2MPG10_PMIC_B7M_OUT2, + S2MPG10_PMIC_B8M_CTRL, + S2MPG10_PMIC_B8M_OUT1, + S2MPG10_PMIC_B8M_OUT2, + S2MPG10_PMIC_B9M_CTRL, + S2MPG10_PMIC_B9M_OUT1, + S2MPG10_PMIC_B9M_OUT2, + S2MPG10_PMIC_B10M_CTRL, + S2MPG10_PMIC_B10M_OUT1, + S2MPG10_PMIC_B10M_OUT2, + S2MPG10_PMIC_BUCK1M_USONIC, + S2MPG10_PMIC_BUCK2M_USONIC, + S2MPG10_PMIC_BUCK3M_USONIC, + S2MPG10_PMIC_BUCK4M_USONIC, + S2MPG10_PMIC_BUCK5M_USONIC, + S2MPG10_PMIC_BUCK6M_USONIC, + S2MPG10_PMIC_BUCK7M_USONIC, + S2MPG10_PMIC_BUCK8M_USONIC, + S2MPG10_PMIC_BUCK9M_USONIC, + S2MPG10_PMIC_BUCK10M_USONIC, + S2MPG10_PMIC_L1M_CTRL, + S2MPG10_PMIC_L2M_CTRL, + S2MPG10_PMIC_L3M_CTRL, + S2MPG10_PMIC_L4M_CTRL, + S2MPG10_PMIC_L5M_CTRL, + S2MPG10_PMIC_L6M_CTRL, + S2MPG10_PMIC_L7M_CTRL, + S2MPG10_PMIC_L8M_CTRL, + S2MPG10_PMIC_L9M_CTRL, + S2MPG10_PMIC_L10M_CTRL, + S2MPG10_PMIC_L11M_CTRL1, + S2MPG10_PMIC_L11M_CTRL2, + S2MPG10_PMIC_L12M_CTRL1, + S2MPG10_PMIC_L12M_CTRL2, + S2MPG10_PMIC_L13M_CTRL1, + S2MPG10_PMIC_L13M_CTRL2, + S2MPG10_PMIC_L14M_CTRL, + S2MPG10_PMIC_L15M_CTRL1, + S2MPG10_PMIC_L15M_CTRL2, + S2MPG10_PMIC_L16M_CTRL, + S2MPG10_PMIC_L17M_CTRL, + S2MPG10_PMIC_L18M_CTRL, + S2MPG10_PMIC_L19M_CTRL, + S2MPG10_PMIC_L20M_CTRL, + S2MPG10_PMIC_L21M_CTRL, + S2MPG10_PMIC_L22M_CTRL, + S2MPG10_PMIC_L23M_CTRL, + S2MPG10_PMIC_L24M_CTRL, + S2MPG10_PMIC_L25M_CTRL, + S2MPG10_PMIC_L26M_CTRL, + S2MPG10_PMIC_L27M_CTRL, + S2MPG10_PMIC_L28M_CTRL, + S2MPG10_PMIC_L29M_CTRL, + S2MPG10_PMIC_L30M_CTRL, + S2MPG10_PMIC_L31M_CTRL, + S2MPG10_PMIC_LDO_CTRL1, + S2MPG10_PMIC_LDO_CTRL2, + S2MPG10_PMIC_LDO_DSCH1, + S2MPG10_PMIC_LDO_DSCH2, + S2MPG10_PMIC_LDO_DSCH3, + S2MPG10_PMIC_LDO_DSCH4, + S2MPG10_PMIC_LDO_BUCK7M_HLIMIT, + S2MPG10_PMIC_LDO_BUCK7M_LLIMIT, + S2MPG10_PMIC_LDO_LDO21M_HLIMIT, + S2MPG10_PMIC_LDO_LDO21M_LLIMIT, + S2MPG10_PMIC_LDO_LDO11M_HLIMIT, + S2MPG10_PMIC_DVS_RAMP1, + S2MPG10_PMIC_DVS_RAMP2, + S2MPG10_PMIC_DVS_RAMP3, + S2MPG10_PMIC_DVS_RAMP4, + S2MPG10_PMIC_DVS_RAMP5, + S2MPG10_PMIC_DVS_RAMP6, + S2MPG10_PMIC_DVS_SYNC_CTRL1, + S2MPG10_PMIC_DVS_SYNC_CTRL2, + S2MPG10_PMIC_DVS_SYNC_CTRL3, + S2MPG10_PMIC_DVS_SYNC_CTRL4, + S2MPG10_PMIC_DVS_SYNC_CTRL5, + S2MPG10_PMIC_DVS_SYNC_CTRL6, + S2MPG10_PMIC_OFF_CTRL1, + S2MPG10_PMIC_OFF_CTRL2, + S2MPG10_PMIC_OFF_CTRL3, + S2MPG10_PMIC_OFF_CTRL4, + S2MPG10_PMIC_SEQ_CTRL1, + S2MPG10_PMIC_SEQ_CTRL2, + S2MPG10_PMIC_SEQ_CTRL3, + S2MPG10_PMIC_SEQ_CTRL4, + S2MPG10_PMIC_SEQ_CTRL5, + S2MPG10_PMIC_SEQ_CTRL6, + S2MPG10_PMIC_SEQ_CTRL7, + S2MPG10_PMIC_SEQ_CTRL8, + S2MPG10_PMIC_SEQ_CTRL9, + S2MPG10_PMIC_SEQ_CTRL10, + S2MPG10_PMIC_SEQ_CTRL11, + S2MPG10_PMIC_SEQ_CTRL12, + S2MPG10_PMIC_SEQ_CTRL13, + S2MPG10_PMIC_SEQ_CTRL14, + S2MPG10_PMIC_SEQ_CTRL15, + S2MPG10_PMIC_SEQ_CTRL16, + S2MPG10_PMIC_SEQ_CTRL17, + S2MPG10_PMIC_SEQ_CTRL18, + S2MPG10_PMIC_SEQ_CTRL19, + S2MPG10_PMIC_SEQ_CTRL20, + S2MPG10_PMIC_SEQ_CTRL21, + S2MPG10_PMIC_SEQ_CTRL22, + S2MPG10_PMIC_SEQ_CTRL23, + S2MPG10_PMIC_SEQ_CTRL24, + S2MPG10_PMIC_SEQ_CTRL25, + S2MPG10_PMIC_SEQ_CTRL26, + S2MPG10_PMIC_SEQ_CTRL27, + S2MPG10_PMIC_SEQ_CTRL28, + S2MPG10_PMIC_SEQ_CTRL29, + S2MPG10_PMIC_SEQ_CTRL30, + S2MPG10_PMIC_SEQ_CTRL31, + S2MPG10_PMIC_SEQ_CTRL32, + S2MPG10_PMIC_SEQ_CTRL33, + S2MPG10_PMIC_SEQ_CTRL34, + S2MPG10_PMIC_SEQ_CTRL35, + S2MPG10_PMIC_OFF_SEQ_CTRL1, + S2MPG10_PMIC_OFF_SEQ_CTRL2, + S2MPG10_PMIC_OFF_SEQ_CTRL3, + S2MPG10_PMIC_OFF_SEQ_CTRL4, + S2MPG10_PMIC_OFF_SEQ_CTRL5, + S2MPG10_PMIC_OFF_SEQ_CTRL6, + S2MPG10_PMIC_OFF_SEQ_CTRL7, + S2MPG10_PMIC_OFF_SEQ_CTRL8, + S2MPG10_PMIC_OFF_SEQ_CTRL9, + S2MPG10_PMIC_OFF_SEQ_CTRL10, + S2MPG10_PMIC_OFF_SEQ_CTRL11, + S2MPG10_PMIC_OFF_SEQ_CTRL12, + S2MPG10_PMIC_OFF_SEQ_CTRL13, + S2MPG10_PMIC_OFF_SEQ_CTRL14, + S2MPG10_PMIC_OFF_SEQ_CTRL15, + S2MPG10_PMIC_OFF_SEQ_CTRL16, + S2MPG10_PMIC_OFF_SEQ_CTRL17, + S2MPG10_PMIC_OFF_SEQ_CTRL18, + S2MPG10_PMIC_PCTRLSEL1, + S2MPG10_PMIC_PCTRLSEL2, + S2MPG10_PMIC_PCTRLSEL3, + S2MPG10_PMIC_PCTRLSEL4, + S2MPG10_PMIC_PCTRLSEL5, + S2MPG10_PMIC_PCTRLSEL6, + S2MPG10_PMIC_PCTRLSEL7, + S2MPG10_PMIC_PCTRLSEL8, + S2MPG10_PMIC_PCTRLSEL9, + S2MPG10_PMIC_PCTRLSEL10, + S2MPG10_PMIC_PCTRLSEL11, + S2MPG10_PMIC_PCTRLSEL12, + S2MPG10_PMIC_PCTRLSEL13, + S2MPG10_PMIC_DCTRLSEL1, + S2MPG10_PMIC_DCTRLSEL2, + S2MPG10_PMIC_DCTRLSEL3, + S2MPG10_PMIC_DCTRLSEL4, + S2MPG10_PMIC_DCTRLSEL5, + S2MPG10_PMIC_DCTRLSEL6, + S2MPG10_PMIC_DCTRLSEL7, + S2MPG10_PMIC_GPIO_CTRL1, + S2MPG10_PMIC_GPIO_CTRL2, + S2MPG10_PMIC_GPIO_CTRL3, + S2MPG10_PMIC_GPIO_CTRL4, + S2MPG10_PMIC_GPIO_CTRL5, + S2MPG10_PMIC_GPIO_CTRL6, + S2MPG10_PMIC_GPIO_CTRL7, + S2MPG10_PMIC_B2M_OCP_WARN, + S2MPG10_PMIC_B2M_OCP_WARN_X, + S2MPG10_PMIC_B2M_OCP_WARN_Y, + S2MPG10_PMIC_B2M_OCP_WARN_Z, + S2MPG10_PMIC_B3M_OCP_WARN, + S2MPG10_PMIC_B3M_OCP_WARN_X, + S2MPG10_PMIC_B3M_OCP_WARN_Y, + S2MPG10_PMIC_B3M_OCP_WARN_Z, + S2MPG10_PMIC_B10M_OCP_WARN, + S2MPG10_PMIC_B10M_OCP_WARN_X, + S2MPG10_PMIC_B10M_OCP_WARN_Y, + S2MPG10_PMIC_B10M_OCP_WARN_Z, + S2MPG10_PMIC_B2M_SOFT_OCP_WARN, + S2MPG10_PMIC_B2M_SOFT_OCP_WARN_X, + S2MPG10_PMIC_B2M_SOFT_OCP_WARN_Y, + S2MPG10_PMIC_B2M_SOFT_OCP_WARN_Z, + S2MPG10_PMIC_B3M_SOFT_OCP_WARN, + S2MPG10_PMIC_B3M_SOFT_OCP_WARN_X, + S2MPG10_PMIC_B3M_SOFT_OCP_WARN_Y, + S2MPG10_PMIC_B3M_SOFT_OCP_WARN_Z, + S2MPG10_PMIC_B10M_SOFT_OCP_WARN, + S2MPG10_PMIC_B10M_SOFT_OCP_WARN_X, + S2MPG10_PMIC_B10M_SOFT_OCP_WARN_Y, + S2MPG10_PMIC_B10M_SOFT_OCP_WARN_Z, + S2MPG10_PMIC_BUCK_OCP_EN1, + S2MPG10_PMIC_BUCK_OCP_EN2, + S2MPG10_PMIC_BUCK_OCP_PD_EN1, + S2MPG10_PMIC_BUCK_OCP_PD_EN2, + S2MPG10_PMIC_BUCK_OCP_CTRL1, + S2MPG10_PMIC_BUCK_OCP_CTRL2, + S2MPG10_PMIC_BUCK_OCP_CTRL3, + S2MPG10_PMIC_BUCK_OCP_CTRL4, + S2MPG10_PMIC_BUCK_OCP_CTRL5, + S2MPG10_PMIC_PIF_CTRL, + S2MPG10_PMIC_BUCK_HR_MODE1, + S2MPG10_PMIC_BUCK_HR_MODE2, + S2MPG10_PMIC_FAULTOUT_CTRL, + S2MPG10_PMIC_LDO_SENSE1, + S2MPG10_PMIC_LDO_SENSE2, + S2MPG10_PMIC_LDO_SENSE3, + S2MPG10_PMIC_LDO_SENSE4, +}; + +/* Meter registers (type 0xa00) */ +enum s2mpg10_meter_reg { + S2MPG10_METER_CTRL1, + S2MPG10_METER_CTRL2, + S2MPG10_METER_CTRL3, + S2MPG10_METER_CTRL4, + S2MPG10_METER_BUCKEN1, + S2MPG10_METER_BUCKEN2, + S2MPG10_METER_MUXSEL0, + S2MPG10_METER_MUXSEL1, + S2MPG10_METER_MUXSEL2, + S2MPG10_METER_MUXSEL3, + S2MPG10_METER_MUXSEL4, + S2MPG10_METER_MUXSEL5, + S2MPG10_METER_MUXSEL6, + S2MPG10_METER_MUXSEL7, + S2MPG10_METER_LPF_C0_0, + S2MPG10_METER_LPF_C0_1, + S2MPG10_METER_LPF_C0_2, + S2MPG10_METER_LPF_C0_3, + S2MPG10_METER_LPF_C0_4, + S2MPG10_METER_LPF_C0_5, + S2MPG10_METER_LPF_C0_6, + S2MPG10_METER_LPF_C0_7, + S2MPG10_METER_PWR_WARN0, + S2MPG10_METER_PWR_WARN1, + S2MPG10_METER_PWR_WARN2, + S2MPG10_METER_PWR_WARN3, + S2MPG10_METER_PWR_WARN4, + S2MPG10_METER_PWR_WARN5, + S2MPG10_METER_PWR_WARN6, + S2MPG10_METER_PWR_WARN7, + S2MPG10_METER_PWR_HYS1, + S2MPG10_METER_PWR_HYS2, + S2MPG10_METER_PWR_HYS3, + S2MPG10_METER_PWR_HYS4, + S2MPG10_METER_ACC_DATA_CH0_1 = 0x40, + S2MPG10_METER_ACC_DATA_CH0_2, + S2MPG10_METER_ACC_DATA_CH0_3, + S2MPG10_METER_ACC_DATA_CH0_4, + S2MPG10_METER_ACC_DATA_CH0_5, + S2MPG10_METER_ACC_DATA_CH0_6, + S2MPG10_METER_ACC_DATA_CH1_1, + S2MPG10_METER_ACC_DATA_CH1_2, + S2MPG10_METER_ACC_DATA_CH1_3, + S2MPG10_METER_ACC_DATA_CH1_4, + S2MPG10_METER_ACC_DATA_CH1_5, + S2MPG10_METER_ACC_DATA_CH1_6, + S2MPG10_METER_ACC_DATA_CH2_1, + S2MPG10_METER_ACC_DATA_CH2_2, + S2MPG10_METER_ACC_DATA_CH2_3, + S2MPG10_METER_ACC_DATA_CH2_4, + S2MPG10_METER_ACC_DATA_CH2_5, + S2MPG10_METER_ACC_DATA_CH2_6, + S2MPG10_METER_ACC_DATA_CH3_1, + S2MPG10_METER_ACC_DATA_CH3_2, + S2MPG10_METER_ACC_DATA_CH3_3, + S2MPG10_METER_ACC_DATA_CH3_4, + S2MPG10_METER_ACC_DATA_CH3_5, + S2MPG10_METER_ACC_DATA_CH3_6, + S2MPG10_METER_ACC_DATA_CH4_1, + S2MPG10_METER_ACC_DATA_CH4_2, + S2MPG10_METER_ACC_DATA_CH4_3, + S2MPG10_METER_ACC_DATA_CH4_4, + S2MPG10_METER_ACC_DATA_CH4_5, + S2MPG10_METER_ACC_DATA_CH4_6, + S2MPG10_METER_ACC_DATA_CH5_1, + S2MPG10_METER_ACC_DATA_CH5_2, + S2MPG10_METER_ACC_DATA_CH5_3, + S2MPG10_METER_ACC_DATA_CH5_4, + S2MPG10_METER_ACC_DATA_CH5_5, + S2MPG10_METER_ACC_DATA_CH5_6, + S2MPG10_METER_ACC_DATA_CH6_1, + S2MPG10_METER_ACC_DATA_CH6_2, + S2MPG10_METER_ACC_DATA_CH6_3, + S2MPG10_METER_ACC_DATA_CH6_4, + S2MPG10_METER_ACC_DATA_CH6_5, + S2MPG10_METER_ACC_DATA_CH6_6, + S2MPG10_METER_ACC_DATA_CH7_1, + S2MPG10_METER_ACC_DATA_CH7_2, + S2MPG10_METER_ACC_DATA_CH7_3, + S2MPG10_METER_ACC_DATA_CH7_4, + S2MPG10_METER_ACC_DATA_CH7_5, + S2MPG10_METER_ACC_DATA_CH7_6, + S2MPG10_METER_ACC_COUNT_1, + S2MPG10_METER_ACC_COUNT_2, + S2MPG10_METER_ACC_COUNT_3, + S2MPG10_METER_LPF_DATA_CH0_1, + S2MPG10_METER_LPF_DATA_CH0_2, + S2MPG10_METER_LPF_DATA_CH0_3, + S2MPG10_METER_LPF_DATA_CH1_1, + S2MPG10_METER_LPF_DATA_CH1_2, + S2MPG10_METER_LPF_DATA_CH1_3, + S2MPG10_METER_LPF_DATA_CH2_1, + S2MPG10_METER_LPF_DATA_CH2_2, + S2MPG10_METER_LPF_DATA_CH2_3, + S2MPG10_METER_LPF_DATA_CH3_1, + S2MPG10_METER_LPF_DATA_CH3_2, + S2MPG10_METER_LPF_DATA_CH3_3, + S2MPG10_METER_LPF_DATA_CH4_1, + S2MPG10_METER_LPF_DATA_CH4_2, + S2MPG10_METER_LPF_DATA_CH4_3, + S2MPG10_METER_LPF_DATA_CH5_1, + S2MPG10_METER_LPF_DATA_CH5_2, + S2MPG10_METER_LPF_DATA_CH5_3, + S2MPG10_METER_LPF_DATA_CH6_1, + S2MPG10_METER_LPF_DATA_CH6_2, + S2MPG10_METER_LPF_DATA_CH6_3, + S2MPG10_METER_LPF_DATA_CH7_1, + S2MPG10_METER_LPF_DATA_CH7_2, + S2MPG10_METER_LPF_DATA_CH7_3, + S2MPG10_METER_DSM_TRIM_OFFSET = 0xee, + S2MPG10_METER_BUCK_METER_TRIM3 = 0xf1, +}; + +/* S2MPG10 regulator IDs */ +enum s2mpg10_regulators { + S2MPG10_LDO1, + S2MPG10_LDO2, + S2MPG10_LDO3, + S2MPG10_LDO4, + S2MPG10_LDO5, + S2MPG10_LDO6, + S2MPG10_LDO7, + S2MPG10_LDO8, + S2MPG10_LDO9, + S2MPG10_LDO10, + S2MPG10_LDO11, + S2MPG10_LDO12, + S2MPG10_LDO13, + S2MPG10_LDO14, + S2MPG10_LDO15, + S2MPG10_LDO16, + S2MPG10_LDO17, + S2MPG10_LDO18, + S2MPG10_LDO19, + S2MPG10_LDO20, + S2MPG10_LDO21, + S2MPG10_LDO22, + S2MPG10_LDO23, + S2MPG10_LDO24, + S2MPG10_LDO25, + S2MPG10_LDO26, + S2MPG10_LDO27, + S2MPG10_LDO28, + S2MPG10_LDO29, + S2MPG10_LDO30, + S2MPG10_LDO31, + S2MPG10_BUCK1, + S2MPG10_BUCK2, + S2MPG10_BUCK3, + S2MPG10_BUCK4, + S2MPG10_BUCK5, + S2MPG10_BUCK6, + S2MPG10_BUCK7, + S2MPG10_BUCK8, + S2MPG10_BUCK9, + S2MPG10_BUCK10, + S2MPG10_REGULATOR_MAX, +}; + +#endif /* __LINUX_MFD_S2MPG10_H */ diff --git a/include/linux/mfd/samsung/s2mpu05.h b/include/linux/mfd/samsung/s2mpu05.h new file mode 100644 index 000000000000..fcdb6c8adb03 --- /dev/null +++ b/include/linux/mfd/samsung/s2mpu05.h @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright (c) 2015 Samsung Electronics Co., Ltd + * Copyright (c) 2025 Kaustabh Chakraborty <kauschluss@disroot.org> + */ + +#ifndef __LINUX_MFD_S2MPU05_H +#define __LINUX_MFD_S2MPU05_H + +/* S2MPU05 registers */ +enum S2MPU05_reg { + S2MPU05_REG_ID, + S2MPU05_REG_INT1, + S2MPU05_REG_INT2, + S2MPU05_REG_INT3, + S2MPU05_REG_INT1M, + S2MPU05_REG_INT2M, + S2MPU05_REG_INT3M, + S2MPU05_REG_ST1, + S2MPU05_REG_ST2, + S2MPU05_REG_PWRONSRC, + S2MPU05_REG_OFFSRC, + S2MPU05_REG_BU_CHG, + S2MPU05_REG_RTC_BUF, + S2MPU05_REG_CTRL1, + S2MPU05_REG_CTRL2, + S2MPU05_REG_ETC_TEST, + S2MPU05_REG_OTP_ADRL, + S2MPU05_REG_OTP_ADRH, + S2MPU05_REG_OTP_DATA, + S2MPU05_REG_MON1SEL, + S2MPU05_REG_MON2SEL, + S2MPU05_REG_CTRL3, + S2MPU05_REG_ETC_OTP, + S2MPU05_REG_UVLO, + S2MPU05_REG_TIME_CTRL1, + S2MPU05_REG_TIME_CTRL2, + S2MPU05_REG_B1CTRL1, + S2MPU05_REG_B1CTRL2, + S2MPU05_REG_B2CTRL1, + S2MPU05_REG_B2CTRL2, + S2MPU05_REG_B2CTRL3, + S2MPU05_REG_B2CTRL4, + S2MPU05_REG_B3CTRL1, + S2MPU05_REG_B3CTRL2, + S2MPU05_REG_B3CTRL3, + S2MPU05_REG_B4CTRL1, + S2MPU05_REG_B4CTRL2, + S2MPU05_REG_B5CTRL1, + S2MPU05_REG_B5CTRL2, + S2MPU05_REG_BUCK_RAMP, + S2MPU05_REG_LDO_DVS1, + S2MPU05_REG_LDO_DVS9, + S2MPU05_REG_LDO_DVS10, + S2MPU05_REG_L1CTRL, + S2MPU05_REG_L2CTRL, + S2MPU05_REG_L3CTRL, + S2MPU05_REG_L4CTRL, + S2MPU05_REG_L5CTRL, + S2MPU05_REG_L6CTRL, + S2MPU05_REG_L7CTRL, + S2MPU05_REG_L8CTRL, + S2MPU05_REG_L9CTRL1, + S2MPU05_REG_L9CTRL2, + S2MPU05_REG_L10CTRL, + S2MPU05_REG_L11CTRL1, + S2MPU05_REG_L11CTRL2, + S2MPU05_REG_L12CTRL, + S2MPU05_REG_L13CTRL, + S2MPU05_REG_L14CTRL, + S2MPU05_REG_L15CTRL, + S2MPU05_REG_L16CTRL, + S2MPU05_REG_L17CTRL1, + S2MPU05_REG_L17CTRL2, + S2MPU05_REG_L18CTRL1, + S2MPU05_REG_L18CTRL2, + S2MPU05_REG_L19CTRL, + S2MPU05_REG_L20CTRL, + S2MPU05_REG_L21CTRL, + S2MPU05_REG_L22CTRL, + S2MPU05_REG_L23CTRL, + S2MPU05_REG_L24CTRL, + S2MPU05_REG_L25CTRL, + S2MPU05_REG_L26CTRL, + S2MPU05_REG_L27CTRL, + S2MPU05_REG_L28CTRL, + S2MPU05_REG_L29CTRL, + S2MPU05_REG_L30CTRL, + S2MPU05_REG_L31CTRL, + S2MPU05_REG_L32CTRL, + S2MPU05_REG_L33CTRL, + S2MPU05_REG_L34CTRL, + S2MPU05_REG_L35CTRL, + S2MPU05_REG_LDO_DSCH1, + S2MPU05_REG_LDO_DSCH2, + S2MPU05_REG_LDO_DSCH3, + S2MPU05_REG_LDO_DSCH4, + S2MPU05_REG_LDO_DSCH5, + S2MPU05_REG_LDO_CTRL1, + S2MPU05_REG_LDO_CTRL2, + S2MPU05_REG_TCXO_CTRL, + S2MPU05_REG_SELMIF, +}; + +/* S2MPU05 regulator ids */ +enum S2MPU05_regulators { + S2MPU05_LDO1, + S2MPU05_LDO2, + S2MPU05_LDO3, + S2MPU05_LDO4, + S2MPU05_LDO5, + S2MPU05_LDO6, + S2MPU05_LDO7, + S2MPU05_LDO8, + S2MPU05_LDO9, + S2MPU05_LDO10, + S2MPU05_LDO11, + S2MPU05_LDO12, + S2MPU05_LDO13, + S2MPU05_LDO14, + S2MPU05_LDO15, + S2MPU05_LDO16, + S2MPU05_LDO17, + S2MPU05_LDO18, + S2MPU05_LDO19, + S2MPU05_LDO20, + S2MPU05_LDO21, + S2MPU05_LDO22, + S2MPU05_LDO23, + S2MPU05_LDO24, + S2MPU05_LDO25, + S2MPU05_LDO26, + S2MPU05_LDO27, + S2MPU05_LDO28, + S2MPU05_LDO29, + S2MPU05_LDO30, + S2MPU05_LDO31, + S2MPU05_LDO32, + S2MPU05_LDO33, + S2MPU05_LDO34, + S2MPU05_LDO35, + S2MPU05_BUCK1, + S2MPU05_BUCK2, + S2MPU05_BUCK3, + S2MPU05_BUCK4, + S2MPU05_BUCK5, + + S2MPU05_REGULATOR_MAX, +}; + +#define S2MPU05_SW_ENABLE_MASK 0x03 + +#define S2MPU05_ENABLE_TIME_LDO 128 +#define S2MPU05_ENABLE_TIME_BUCK1 110 +#define S2MPU05_ENABLE_TIME_BUCK2 110 +#define S2MPU05_ENABLE_TIME_BUCK3 110 +#define S2MPU05_ENABLE_TIME_BUCK4 150 +#define S2MPU05_ENABLE_TIME_BUCK5 150 + +#define S2MPU05_LDO_MIN1 800000 +#define S2MPU05_LDO_MIN2 1800000 +#define S2MPU05_LDO_MIN3 400000 +#define S2MPU05_LDO_STEP1 12500 +#define S2MPU05_LDO_STEP2 25000 + +#define S2MPU05_BUCK_MIN1 400000 +#define S2MPU05_BUCK_MIN2 600000 +#define S2MPU05_BUCK_STEP1 6250 +#define S2MPU05_BUCK_STEP2 12500 + +#define S2MPU05_RAMP_DELAY 12000 /* uV/uS */ + +#define S2MPU05_ENABLE_SHIFT 6 +#define S2MPU05_ENABLE_MASK (0x03 << S2MPU05_ENABLE_SHIFT) + +#define S2MPU05_LDO_VSEL_MASK 0x3F +#define S2MPU05_BUCK_VSEL_MASK 0xFF +#define S2MPU05_LDO_N_VOLTAGES (S2MPU05_LDO_VSEL_MASK + 1) +#define S2MPU05_BUCK_N_VOLTAGES (S2MPU05_BUCK_VSEL_MASK + 1) + +#define S2MPU05_PMIC_EN_SHIFT 6 + +#endif /* __LINUX_MFD_S2MPU05_H */ diff --git a/include/linux/mfd/sta2x11-mfd.h b/include/linux/mfd/sta2x11-mfd.h deleted file mode 100644 index 2001ca5c44a9..000000000000 --- a/include/linux/mfd/sta2x11-mfd.h +++ /dev/null @@ -1,506 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2009-2011 Wind River Systems, Inc. - * Copyright (c) 2011 ST Microelectronics (Alessandro Rubini) - * - * The STMicroelectronics ConneXt (STA2X11) chip has several unrelated - * functions in one PCI endpoint functions. This driver simply - * registers the platform devices in this iomemregion and exports a few - * functions to access common registers - */ - -#ifndef __STA2X11_MFD_H -#define __STA2X11_MFD_H -#include <linux/types.h> -#include <linux/pci.h> - -enum sta2x11_mfd_plat_dev { - sta2x11_sctl = 0, - sta2x11_gpio, - sta2x11_scr, - sta2x11_time, - sta2x11_apbreg, - sta2x11_apb_soc_regs, - sta2x11_vic, - sta2x11_n_mfd_plat_devs, -}; - -#define STA2X11_MFD_SCTL_NAME "sta2x11-sctl" -#define STA2X11_MFD_GPIO_NAME "sta2x11-gpio" -#define STA2X11_MFD_SCR_NAME "sta2x11-scr" -#define STA2X11_MFD_TIME_NAME "sta2x11-time" -#define STA2X11_MFD_APBREG_NAME "sta2x11-apbreg" -#define STA2X11_MFD_APB_SOC_REGS_NAME "sta2x11-apb-soc-regs" -#define STA2X11_MFD_VIC_NAME "sta2x11-vic" - -extern u32 -__sta2x11_mfd_mask(struct pci_dev *, u32, u32, u32, enum sta2x11_mfd_plat_dev); - -/* - * The MFD PCI block includes the GPIO peripherals and other register blocks. - * For GPIO, we have 32*4 bits (I use "gsta" for "gpio sta2x11".) - */ -#define GSTA_GPIO_PER_BLOCK 32 -#define GSTA_NR_BLOCKS 4 -#define GSTA_NR_GPIO (GSTA_GPIO_PER_BLOCK * GSTA_NR_BLOCKS) - -/* Pinconfig is set by the board definition: altfunc, pull-up, pull-down */ -struct sta2x11_gpio_pdata { - unsigned pinconfig[GSTA_NR_GPIO]; -}; - -/* Macros below lifted from sh_pfc.h, with minor differences */ -#define PINMUX_TYPE_NONE 0 -#define PINMUX_TYPE_FUNCTION 1 -#define PINMUX_TYPE_OUTPUT_LOW 2 -#define PINMUX_TYPE_OUTPUT_HIGH 3 -#define PINMUX_TYPE_INPUT 4 -#define PINMUX_TYPE_INPUT_PULLUP 5 -#define PINMUX_TYPE_INPUT_PULLDOWN 6 - -/* Give names to GPIO pins, like PXA does, taken from the manual */ -#define STA2X11_GPIO0 0 -#define STA2X11_GPIO1 1 -#define STA2X11_GPIO2 2 -#define STA2X11_GPIO3 3 -#define STA2X11_GPIO4 4 -#define STA2X11_GPIO5 5 -#define STA2X11_GPIO6 6 -#define STA2X11_GPIO7 7 -#define STA2X11_GPIO8_RGBOUT_RED7 8 -#define STA2X11_GPIO9_RGBOUT_RED6 9 -#define STA2X11_GPIO10_RGBOUT_RED5 10 -#define STA2X11_GPIO11_RGBOUT_RED4 11 -#define STA2X11_GPIO12_RGBOUT_RED3 12 -#define STA2X11_GPIO13_RGBOUT_RED2 13 -#define STA2X11_GPIO14_RGBOUT_RED1 14 -#define STA2X11_GPIO15_RGBOUT_RED0 15 -#define STA2X11_GPIO16_RGBOUT_GREEN7 16 -#define STA2X11_GPIO17_RGBOUT_GREEN6 17 -#define STA2X11_GPIO18_RGBOUT_GREEN5 18 -#define STA2X11_GPIO19_RGBOUT_GREEN4 19 -#define STA2X11_GPIO20_RGBOUT_GREEN3 20 -#define STA2X11_GPIO21_RGBOUT_GREEN2 21 -#define STA2X11_GPIO22_RGBOUT_GREEN1 22 -#define STA2X11_GPIO23_RGBOUT_GREEN0 23 -#define STA2X11_GPIO24_RGBOUT_BLUE7 24 -#define STA2X11_GPIO25_RGBOUT_BLUE6 25 -#define STA2X11_GPIO26_RGBOUT_BLUE5 26 -#define STA2X11_GPIO27_RGBOUT_BLUE4 27 -#define STA2X11_GPIO28_RGBOUT_BLUE3 28 -#define STA2X11_GPIO29_RGBOUT_BLUE2 29 -#define STA2X11_GPIO30_RGBOUT_BLUE1 30 -#define STA2X11_GPIO31_RGBOUT_BLUE0 31 -#define STA2X11_GPIO32_RGBOUT_VSYNCH 32 -#define STA2X11_GPIO33_RGBOUT_HSYNCH 33 -#define STA2X11_GPIO34_RGBOUT_DEN 34 -#define STA2X11_GPIO35_ETH_CRS_DV 35 -#define STA2X11_GPIO36_ETH_TXD1 36 -#define STA2X11_GPIO37_ETH_TXD0 37 -#define STA2X11_GPIO38_ETH_TX_EN 38 -#define STA2X11_GPIO39_MDIO 39 -#define STA2X11_GPIO40_ETH_REF_CLK 40 -#define STA2X11_GPIO41_ETH_RXD1 41 -#define STA2X11_GPIO42_ETH_RXD0 42 -#define STA2X11_GPIO43_MDC 43 -#define STA2X11_GPIO44_CAN_TX 44 -#define STA2X11_GPIO45_CAN_RX 45 -#define STA2X11_GPIO46_MLB_DAT 46 -#define STA2X11_GPIO47_MLB_SIG 47 -#define STA2X11_GPIO48_SPI0_CLK 48 -#define STA2X11_GPIO49_SPI0_TXD 49 -#define STA2X11_GPIO50_SPI0_RXD 50 -#define STA2X11_GPIO51_SPI0_FRM 51 -#define STA2X11_GPIO52_SPI1_CLK 52 -#define STA2X11_GPIO53_SPI1_TXD 53 -#define STA2X11_GPIO54_SPI1_RXD 54 -#define STA2X11_GPIO55_SPI1_FRM 55 -#define STA2X11_GPIO56_SPI2_CLK 56 -#define STA2X11_GPIO57_SPI2_TXD 57 -#define STA2X11_GPIO58_SPI2_RXD 58 -#define STA2X11_GPIO59_SPI2_FRM 59 -#define STA2X11_GPIO60_I2C0_SCL 60 -#define STA2X11_GPIO61_I2C0_SDA 61 -#define STA2X11_GPIO62_I2C1_SCL 62 -#define STA2X11_GPIO63_I2C1_SDA 63 -#define STA2X11_GPIO64_I2C2_SCL 64 -#define STA2X11_GPIO65_I2C2_SDA 65 -#define STA2X11_GPIO66_I2C3_SCL 66 -#define STA2X11_GPIO67_I2C3_SDA 67 -#define STA2X11_GPIO68_MSP0_RCK 68 -#define STA2X11_GPIO69_MSP0_RXD 69 -#define STA2X11_GPIO70_MSP0_RFS 70 -#define STA2X11_GPIO71_MSP0_TCK 71 -#define STA2X11_GPIO72_MSP0_TXD 72 -#define STA2X11_GPIO73_MSP0_TFS 73 -#define STA2X11_GPIO74_MSP0_SCK 74 -#define STA2X11_GPIO75_MSP1_CK 75 -#define STA2X11_GPIO76_MSP1_RXD 76 -#define STA2X11_GPIO77_MSP1_FS 77 -#define STA2X11_GPIO78_MSP1_TXD 78 -#define STA2X11_GPIO79_MSP2_CK 79 -#define STA2X11_GPIO80_MSP2_RXD 80 -#define STA2X11_GPIO81_MSP2_FS 81 -#define STA2X11_GPIO82_MSP2_TXD 82 -#define STA2X11_GPIO83_MSP3_CK 83 -#define STA2X11_GPIO84_MSP3_RXD 84 -#define STA2X11_GPIO85_MSP3_FS 85 -#define STA2X11_GPIO86_MSP3_TXD 86 -#define STA2X11_GPIO87_MSP4_CK 87 -#define STA2X11_GPIO88_MSP4_RXD 88 -#define STA2X11_GPIO89_MSP4_FS 89 -#define STA2X11_GPIO90_MSP4_TXD 90 -#define STA2X11_GPIO91_MSP5_CK 91 -#define STA2X11_GPIO92_MSP5_RXD 92 -#define STA2X11_GPIO93_MSP5_FS 93 -#define STA2X11_GPIO94_MSP5_TXD 94 -#define STA2X11_GPIO95_SDIO3_DAT3 95 -#define STA2X11_GPIO96_SDIO3_DAT2 96 -#define STA2X11_GPIO97_SDIO3_DAT1 97 -#define STA2X11_GPIO98_SDIO3_DAT0 98 -#define STA2X11_GPIO99_SDIO3_CLK 99 -#define STA2X11_GPIO100_SDIO3_CMD 100 -#define STA2X11_GPIO101 101 -#define STA2X11_GPIO102 102 -#define STA2X11_GPIO103 103 -#define STA2X11_GPIO104 104 -#define STA2X11_GPIO105_SDIO2_DAT3 105 -#define STA2X11_GPIO106_SDIO2_DAT2 106 -#define STA2X11_GPIO107_SDIO2_DAT1 107 -#define STA2X11_GPIO108_SDIO2_DAT0 108 -#define STA2X11_GPIO109_SDIO2_CLK 109 -#define STA2X11_GPIO110_SDIO2_CMD 110 -#define STA2X11_GPIO111 111 -#define STA2X11_GPIO112 112 -#define STA2X11_GPIO113 113 -#define STA2X11_GPIO114 114 -#define STA2X11_GPIO115_SDIO1_DAT3 115 -#define STA2X11_GPIO116_SDIO1_DAT2 116 -#define STA2X11_GPIO117_SDIO1_DAT1 117 -#define STA2X11_GPIO118_SDIO1_DAT0 118 -#define STA2X11_GPIO119_SDIO1_CLK 119 -#define STA2X11_GPIO120_SDIO1_CMD 120 -#define STA2X11_GPIO121 121 -#define STA2X11_GPIO122 122 -#define STA2X11_GPIO123 123 -#define STA2X11_GPIO124 124 -#define STA2X11_GPIO125_UART2_TXD 125 -#define STA2X11_GPIO126_UART2_RXD 126 -#define STA2X11_GPIO127_UART3_TXD 127 - -/* - * The APB bridge has its own registers, needed by our users as well. - * They are accessed with the following read/mask/write function. - */ -static inline u32 -sta2x11_apbreg_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) -{ - return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apbreg); -} - -/* CAN and MLB */ -#define APBREG_BSR 0x00 /* Bridge Status Reg */ -#define APBREG_PAER 0x08 /* Peripherals Address Error Reg */ -#define APBREG_PWAC 0x20 /* Peripheral Write Access Control reg */ -#define APBREG_PRAC 0x40 /* Peripheral Read Access Control reg */ -#define APBREG_PCG 0x60 /* Peripheral Clock Gating Reg */ -#define APBREG_PUR 0x80 /* Peripheral Under Reset Reg */ -#define APBREG_EMU_PCG 0xA0 /* Emulator Peripheral Clock Gating Reg */ - -#define APBREG_CAN (1 << 1) -#define APBREG_MLB (1 << 3) - -/* SARAC */ -#define APBREG_BSR_SARAC 0x100 /* Bridge Status Reg */ -#define APBREG_PAER_SARAC 0x108 /* Peripherals Address Error Reg */ -#define APBREG_PWAC_SARAC 0x120 /* Peripheral Write Access Control reg */ -#define APBREG_PRAC_SARAC 0x140 /* Peripheral Read Access Control reg */ -#define APBREG_PCG_SARAC 0x160 /* Peripheral Clock Gating Reg */ -#define APBREG_PUR_SARAC 0x180 /* Peripheral Under Reset Reg */ -#define APBREG_EMU_PCG_SARAC 0x1A0 /* Emulator Peripheral Clock Gating Reg */ - -#define APBREG_SARAC (1 << 2) - -/* - * The system controller has its own registers. Some of these are accessed - * by out users as well, using the following read/mask/write/function - */ -static inline -u32 sta2x11_sctl_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) -{ - return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_sctl); -} - -#define SCTL_SCCTL 0x00 /* System controller control register */ -#define SCTL_ARMCFG 0x04 /* ARM configuration register */ -#define SCTL_SCPLLCTL 0x08 /* PLL control status register */ - -#define SCTL_SCPLLCTL_AUDIO_PLL_PD BIT(1) -#define SCTL_SCPLLCTL_FRAC_CONTROL BIT(3) -#define SCTL_SCPLLCTL_STRB_BYPASS BIT(6) -#define SCTL_SCPLLCTL_STRB_INPUT BIT(8) - -#define SCTL_SCPLLFCTRL 0x0c /* PLL frequency control register */ - -#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_MASK 0xff -#define SCTL_SCPLLFCTRL_AUDIO_PLL_NDIV_SHIFT 10 -#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_MASK 7 -#define SCTL_SCPLLFCTRL_AUDIO_PLL_IDF_SHIFT 21 -#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_MASK 7 -#define SCTL_SCPLLFCTRL_AUDIO_PLL_ODF_SHIFT 18 -#define SCTL_SCPLLFCTRL_DITHER_DISABLE_MASK 0x03 -#define SCTL_SCPLLFCTRL_DITHER_DISABLE_SHIFT 4 - - -#define SCTL_SCRESFRACT 0x10 /* PLL fractional input register */ - -#define SCTL_SCRESFRACT_MASK 0x0000ffff - - -#define SCTL_SCRESCTRL1 0x14 /* Peripheral reset control 1 */ -#define SCTL_SCRESXTRL2 0x18 /* Peripheral reset control 2 */ -#define SCTL_SCPEREN0 0x1c /* Peripheral clock enable register 0 */ -#define SCTL_SCPEREN1 0x20 /* Peripheral clock enable register 1 */ -#define SCTL_SCPEREN2 0x24 /* Peripheral clock enable register 2 */ -#define SCTL_SCGRST 0x28 /* Peripheral global reset */ -#define SCTL_SCPCIECSBRST 0x2c /* PCIe PAB CSB reset status register */ -#define SCTL_SCPCIPMCR1 0x30 /* PCI power management control 1 */ -#define SCTL_SCPCIPMCR2 0x34 /* PCI power management control 2 */ -#define SCTL_SCPCIPMSR1 0x38 /* PCI power management status 1 */ -#define SCTL_SCPCIPMSR2 0x3c /* PCI power management status 2 */ -#define SCTL_SCPCIPMSR3 0x40 /* PCI power management status 3 */ -#define SCTL_SCINTREN 0x44 /* Interrupt enable */ -#define SCTL_SCRISR 0x48 /* RAW interrupt status */ -#define SCTL_SCCLKSTAT0 0x4c /* Peripheral clocks status 0 */ -#define SCTL_SCCLKSTAT1 0x50 /* Peripheral clocks status 1 */ -#define SCTL_SCCLKSTAT2 0x54 /* Peripheral clocks status 2 */ -#define SCTL_SCRSTSTA 0x58 /* Reset status register */ - -#define SCTL_SCRESCTRL1_USB_PHY_POR (1 << 0) -#define SCTL_SCRESCTRL1_USB_OTG (1 << 1) -#define SCTL_SCRESCTRL1_USB_HRST (1 << 2) -#define SCTL_SCRESCTRL1_USB_PHY_HOST (1 << 3) -#define SCTL_SCRESCTRL1_SATAII (1 << 4) -#define SCTL_SCRESCTRL1_VIP (1 << 5) -#define SCTL_SCRESCTRL1_PER_MMC0 (1 << 6) -#define SCTL_SCRESCTRL1_PER_MMC1 (1 << 7) -#define SCTL_SCRESCTRL1_PER_GPIO0 (1 << 8) -#define SCTL_SCRESCTRL1_PER_GPIO1 (1 << 9) -#define SCTL_SCRESCTRL1_PER_GPIO2 (1 << 10) -#define SCTL_SCRESCTRL1_PER_GPIO3 (1 << 11) -#define SCTL_SCRESCTRL1_PER_MTU0 (1 << 12) -#define SCTL_SCRESCTRL1_KER_SPI0 (1 << 13) -#define SCTL_SCRESCTRL1_KER_SPI1 (1 << 14) -#define SCTL_SCRESCTRL1_KER_SPI2 (1 << 15) -#define SCTL_SCRESCTRL1_KER_MCI0 (1 << 16) -#define SCTL_SCRESCTRL1_KER_MCI1 (1 << 17) -#define SCTL_SCRESCTRL1_PRE_HSI2C0 (1 << 18) -#define SCTL_SCRESCTRL1_PER_HSI2C1 (1 << 19) -#define SCTL_SCRESCTRL1_PER_HSI2C2 (1 << 20) -#define SCTL_SCRESCTRL1_PER_HSI2C3 (1 << 21) -#define SCTL_SCRESCTRL1_PER_MSP0 (1 << 22) -#define SCTL_SCRESCTRL1_PER_MSP1 (1 << 23) -#define SCTL_SCRESCTRL1_PER_MSP2 (1 << 24) -#define SCTL_SCRESCTRL1_PER_MSP3 (1 << 25) -#define SCTL_SCRESCTRL1_PER_MSP4 (1 << 26) -#define SCTL_SCRESCTRL1_PER_MSP5 (1 << 27) -#define SCTL_SCRESCTRL1_PER_MMC (1 << 28) -#define SCTL_SCRESCTRL1_KER_MSP0 (1 << 29) -#define SCTL_SCRESCTRL1_KER_MSP1 (1 << 30) -#define SCTL_SCRESCTRL1_KER_MSP2 (1 << 31) - -#define SCTL_SCPEREN0_UART0 (1 << 0) -#define SCTL_SCPEREN0_UART1 (1 << 1) -#define SCTL_SCPEREN0_UART2 (1 << 2) -#define SCTL_SCPEREN0_UART3 (1 << 3) -#define SCTL_SCPEREN0_MSP0 (1 << 4) -#define SCTL_SCPEREN0_MSP1 (1 << 5) -#define SCTL_SCPEREN0_MSP2 (1 << 6) -#define SCTL_SCPEREN0_MSP3 (1 << 7) -#define SCTL_SCPEREN0_MSP4 (1 << 8) -#define SCTL_SCPEREN0_MSP5 (1 << 9) -#define SCTL_SCPEREN0_SPI0 (1 << 10) -#define SCTL_SCPEREN0_SPI1 (1 << 11) -#define SCTL_SCPEREN0_SPI2 (1 << 12) -#define SCTL_SCPEREN0_I2C0 (1 << 13) -#define SCTL_SCPEREN0_I2C1 (1 << 14) -#define SCTL_SCPEREN0_I2C2 (1 << 15) -#define SCTL_SCPEREN0_I2C3 (1 << 16) -#define SCTL_SCPEREN0_SVDO_LVDS (1 << 17) -#define SCTL_SCPEREN0_USB_HOST (1 << 18) -#define SCTL_SCPEREN0_USB_OTG (1 << 19) -#define SCTL_SCPEREN0_MCI0 (1 << 20) -#define SCTL_SCPEREN0_MCI1 (1 << 21) -#define SCTL_SCPEREN0_MCI2 (1 << 22) -#define SCTL_SCPEREN0_MCI3 (1 << 23) -#define SCTL_SCPEREN0_SATA (1 << 24) -#define SCTL_SCPEREN0_ETHERNET (1 << 25) -#define SCTL_SCPEREN0_VIC (1 << 26) -#define SCTL_SCPEREN0_DMA_AUDIO (1 << 27) -#define SCTL_SCPEREN0_DMA_SOC (1 << 28) -#define SCTL_SCPEREN0_RAM (1 << 29) -#define SCTL_SCPEREN0_VIP (1 << 30) -#define SCTL_SCPEREN0_ARM (1 << 31) - -#define SCTL_SCPEREN1_UART0 (1 << 0) -#define SCTL_SCPEREN1_UART1 (1 << 1) -#define SCTL_SCPEREN1_UART2 (1 << 2) -#define SCTL_SCPEREN1_UART3 (1 << 3) -#define SCTL_SCPEREN1_MSP0 (1 << 4) -#define SCTL_SCPEREN1_MSP1 (1 << 5) -#define SCTL_SCPEREN1_MSP2 (1 << 6) -#define SCTL_SCPEREN1_MSP3 (1 << 7) -#define SCTL_SCPEREN1_MSP4 (1 << 8) -#define SCTL_SCPEREN1_MSP5 (1 << 9) -#define SCTL_SCPEREN1_SPI0 (1 << 10) -#define SCTL_SCPEREN1_SPI1 (1 << 11) -#define SCTL_SCPEREN1_SPI2 (1 << 12) -#define SCTL_SCPEREN1_I2C0 (1 << 13) -#define SCTL_SCPEREN1_I2C1 (1 << 14) -#define SCTL_SCPEREN1_I2C2 (1 << 15) -#define SCTL_SCPEREN1_I2C3 (1 << 16) -#define SCTL_SCPEREN1_USB_PHY (1 << 17) - -/* - * APB-SOC registers - */ -static inline -u32 sta2x11_apb_soc_regs_mask(struct pci_dev *pdev, u32 reg, u32 mask, u32 val) -{ - return __sta2x11_mfd_mask(pdev, reg, mask, val, sta2x11_apb_soc_regs); -} - -#define PCIE_EP1_FUNC3_0_INTR_REG 0x000 -#define PCIE_EP1_FUNC7_4_INTR_REG 0x004 -#define PCIE_EP2_FUNC3_0_INTR_REG 0x008 -#define PCIE_EP2_FUNC7_4_INTR_REG 0x00c -#define PCIE_EP3_FUNC3_0_INTR_REG 0x010 -#define PCIE_EP3_FUNC7_4_INTR_REG 0x014 -#define PCIE_EP4_FUNC3_0_INTR_REG 0x018 -#define PCIE_EP4_FUNC7_4_INTR_REG 0x01c -#define PCIE_INTR_ENABLE0_REG 0x020 -#define PCIE_INTR_ENABLE1_REG 0x024 -#define PCIE_EP1_FUNC_TC_REG 0x028 -#define PCIE_EP2_FUNC_TC_REG 0x02c -#define PCIE_EP3_FUNC_TC_REG 0x030 -#define PCIE_EP4_FUNC_TC_REG 0x034 -#define PCIE_EP1_FUNC_F_REG 0x038 -#define PCIE_EP2_FUNC_F_REG 0x03c -#define PCIE_EP3_FUNC_F_REG 0x040 -#define PCIE_EP4_FUNC_F_REG 0x044 -#define PCIE_PAB_AMBA_SW_RST_REG 0x048 -#define PCIE_PM_STATUS_0_PORT_0_4 0x04c -#define PCIE_PM_STATUS_7_0_EP1 0x050 -#define PCIE_PM_STATUS_7_0_EP2 0x054 -#define PCIE_PM_STATUS_7_0_EP3 0x058 -#define PCIE_PM_STATUS_7_0_EP4 0x05c -#define PCIE_DEV_ID_0_EP1_REG 0x060 -#define PCIE_CC_REV_ID_0_EP1_REG 0x064 -#define PCIE_DEV_ID_1_EP1_REG 0x068 -#define PCIE_CC_REV_ID_1_EP1_REG 0x06c -#define PCIE_DEV_ID_2_EP1_REG 0x070 -#define PCIE_CC_REV_ID_2_EP1_REG 0x074 -#define PCIE_DEV_ID_3_EP1_REG 0x078 -#define PCIE_CC_REV_ID_3_EP1_REG 0x07c -#define PCIE_DEV_ID_4_EP1_REG 0x080 -#define PCIE_CC_REV_ID_4_EP1_REG 0x084 -#define PCIE_DEV_ID_5_EP1_REG 0x088 -#define PCIE_CC_REV_ID_5_EP1_REG 0x08c -#define PCIE_DEV_ID_6_EP1_REG 0x090 -#define PCIE_CC_REV_ID_6_EP1_REG 0x094 -#define PCIE_DEV_ID_7_EP1_REG 0x098 -#define PCIE_CC_REV_ID_7_EP1_REG 0x09c -#define PCIE_DEV_ID_0_EP2_REG 0x0a0 -#define PCIE_CC_REV_ID_0_EP2_REG 0x0a4 -#define PCIE_DEV_ID_1_EP2_REG 0x0a8 -#define PCIE_CC_REV_ID_1_EP2_REG 0x0ac -#define PCIE_DEV_ID_2_EP2_REG 0x0b0 -#define PCIE_CC_REV_ID_2_EP2_REG 0x0b4 -#define PCIE_DEV_ID_3_EP2_REG 0x0b8 -#define PCIE_CC_REV_ID_3_EP2_REG 0x0bc -#define PCIE_DEV_ID_4_EP2_REG 0x0c0 -#define PCIE_CC_REV_ID_4_EP2_REG 0x0c4 -#define PCIE_DEV_ID_5_EP2_REG 0x0c8 -#define PCIE_CC_REV_ID_5_EP2_REG 0x0cc -#define PCIE_DEV_ID_6_EP2_REG 0x0d0 -#define PCIE_CC_REV_ID_6_EP2_REG 0x0d4 -#define PCIE_DEV_ID_7_EP2_REG 0x0d8 -#define PCIE_CC_REV_ID_7_EP2_REG 0x0dC -#define PCIE_DEV_ID_0_EP3_REG 0x0e0 -#define PCIE_CC_REV_ID_0_EP3_REG 0x0e4 -#define PCIE_DEV_ID_1_EP3_REG 0x0e8 -#define PCIE_CC_REV_ID_1_EP3_REG 0x0ec -#define PCIE_DEV_ID_2_EP3_REG 0x0f0 -#define PCIE_CC_REV_ID_2_EP3_REG 0x0f4 -#define PCIE_DEV_ID_3_EP3_REG 0x0f8 -#define PCIE_CC_REV_ID_3_EP3_REG 0x0fc -#define PCIE_DEV_ID_4_EP3_REG 0x100 -#define PCIE_CC_REV_ID_4_EP3_REG 0x104 -#define PCIE_DEV_ID_5_EP3_REG 0x108 -#define PCIE_CC_REV_ID_5_EP3_REG 0x10c -#define PCIE_DEV_ID_6_EP3_REG 0x110 -#define PCIE_CC_REV_ID_6_EP3_REG 0x114 -#define PCIE_DEV_ID_7_EP3_REG 0x118 -#define PCIE_CC_REV_ID_7_EP3_REG 0x11c -#define PCIE_DEV_ID_0_EP4_REG 0x120 -#define PCIE_CC_REV_ID_0_EP4_REG 0x124 -#define PCIE_DEV_ID_1_EP4_REG 0x128 -#define PCIE_CC_REV_ID_1_EP4_REG 0x12c -#define PCIE_DEV_ID_2_EP4_REG 0x130 -#define PCIE_CC_REV_ID_2_EP4_REG 0x134 -#define PCIE_DEV_ID_3_EP4_REG 0x138 -#define PCIE_CC_REV_ID_3_EP4_REG 0x13c -#define PCIE_DEV_ID_4_EP4_REG 0x140 -#define PCIE_CC_REV_ID_4_EP4_REG 0x144 -#define PCIE_DEV_ID_5_EP4_REG 0x148 -#define PCIE_CC_REV_ID_5_EP4_REG 0x14c -#define PCIE_DEV_ID_6_EP4_REG 0x150 -#define PCIE_CC_REV_ID_6_EP4_REG 0x154 -#define PCIE_DEV_ID_7_EP4_REG 0x158 -#define PCIE_CC_REV_ID_7_EP4_REG 0x15c -#define PCIE_SUBSYS_VEN_ID_REG 0x160 -#define PCIE_COMMON_CLOCK_CONFIG_0_4_0 0x164 -#define PCIE_MIPHYP_SSC_EN_REG 0x168 -#define PCIE_MIPHYP_ADDR_REG 0x16c -#define PCIE_L1_ASPM_READY_REG 0x170 -#define PCIE_EXT_CFG_RDY_REG 0x174 -#define PCIE_SoC_INT_ROUTER_STATUS0_REG 0x178 -#define PCIE_SoC_INT_ROUTER_STATUS1_REG 0x17c -#define PCIE_SoC_INT_ROUTER_STATUS2_REG 0x180 -#define PCIE_SoC_INT_ROUTER_STATUS3_REG 0x184 -#define DMA_IP_CTRL_REG 0x324 -#define DISP_BRIDGE_PU_PD_CTRL_REG 0x328 -#define VIP_PU_PD_CTRL_REG 0x32c -#define USB_MLB_PU_PD_CTRL_REG 0x330 -#define SDIO_PU_PD_MISCFUNC_CTRL_REG1 0x334 -#define SDIO_PU_PD_MISCFUNC_CTRL_REG2 0x338 -#define UART_PU_PD_CTRL_REG 0x33c -#define ARM_Lock 0x340 -#define SYS_IO_CHAR_REG1 0x344 -#define SYS_IO_CHAR_REG2 0x348 -#define SATA_CORE_ID_REG 0x34c -#define SATA_CTRL_REG 0x350 -#define I2C_HSFIX_MISC_REG 0x354 -#define SPARE2_RESERVED 0x358 -#define SPARE3_RESERVED 0x35c -#define MASTER_LOCK_REG 0x368 -#define SYSTEM_CONFIG_STATUS_REG 0x36c -#define MSP_CLK_CTRL_REG 0x39c -#define COMPENSATION_REG1 0x3c4 -#define COMPENSATION_REG2 0x3c8 -#define COMPENSATION_REG3 0x3cc -#define TEST_CTL_REG 0x3d0 - -/* - * SECR (OTP) registers - */ -#define STA2X11_SECR_CR 0x00 -#define STA2X11_SECR_FVR0 0x10 -#define STA2X11_SECR_FVR1 0x14 - -extern int sta2x11_mfd_get_regs_data(struct platform_device *pdev, - enum sta2x11_mfd_plat_dev index, - void __iomem **regs, - spinlock_t **lock); - -#endif /* __STA2X11_MFD_H */ diff --git a/include/linux/mfd/stm32-lptimer.h b/include/linux/mfd/stm32-lptimer.h index 06d3f11dc3c9..a592c8dc716d 100644 --- a/include/linux/mfd/stm32-lptimer.h +++ b/include/linux/mfd/stm32-lptimer.h @@ -17,20 +17,30 @@ #define STM32_LPTIM_IER 0x08 /* Interrupt Enable Reg */ #define STM32_LPTIM_CFGR 0x0C /* Configuration Reg */ #define STM32_LPTIM_CR 0x10 /* Control Reg */ -#define STM32_LPTIM_CMP 0x14 /* Compare Reg */ +#define STM32_LPTIM_CMP 0x14 /* Compare Reg (MP25 CCR1) */ #define STM32_LPTIM_ARR 0x18 /* Autoreload Reg */ #define STM32_LPTIM_CNT 0x1C /* Counter Reg */ +#define STM32_LPTIM_CCMR1 0x2C /* Capture/Compare Mode MP25 */ +#define STM32_LPTIM_CCR2 0x34 /* Compare Reg2 MP25 */ + +#define STM32_LPTIM_HWCFGR2 0x3EC /* Hardware configuration register 2 - MP25 */ +#define STM32_LPTIM_HWCFGR1 0x3F0 /* Hardware configuration register 1 - MP15 */ +#define STM32_LPTIM_VERR 0x3F4 /* Version identification register - MP15 */ /* STM32_LPTIM_ISR - bit fields */ +#define STM32_LPTIM_DIEROK_ARROK (BIT(24) | BIT(4)) /* MP25 */ +#define STM32_LPTIM_CMP2_ARROK (BIT(19) | BIT(4)) #define STM32_LPTIM_CMPOK_ARROK GENMASK(4, 3) #define STM32_LPTIM_ARROK BIT(4) #define STM32_LPTIM_CMPOK BIT(3) /* STM32_LPTIM_ICR - bit fields */ -#define STM32_LPTIM_ARRMCF BIT(1) +#define STM32_LPTIM_DIEROKCF_ARROKCF (BIT(24) | BIT(4)) /* MP25 */ +#define STM32_LPTIM_CMP2OKCF_ARROKCF (BIT(19) | BIT(4)) #define STM32_LPTIM_CMPOKCF_ARROKCF GENMASK(4, 3) +#define STM32_LPTIM_ARRMCF BIT(1) -/* STM32_LPTIM_IER - bit flieds */ +/* STM32_LPTIM_IER - bit fields */ #define STM32_LPTIM_ARRMIE BIT(1) /* STM32_LPTIM_CR - bit fields */ @@ -53,16 +63,37 @@ /* STM32_LPTIM_ARR */ #define STM32_LPTIM_MAX_ARR 0xFFFF +/* STM32_LPTIM_CCMR1 */ +#define STM32_LPTIM_CC2P GENMASK(19, 18) +#define STM32_LPTIM_CC2E BIT(17) +#define STM32_LPTIM_CC2SEL BIT(16) +#define STM32_LPTIM_CC1P GENMASK(3, 2) +#define STM32_LPTIM_CC1E BIT(1) +#define STM32_LPTIM_CC1SEL BIT(0) + +/* STM32_LPTIM_HWCFGR1 */ +#define STM32_LPTIM_HWCFGR1_ENCODER BIT(16) + +/* STM32_LPTIM_HWCFGR2 */ +#define STM32_LPTIM_HWCFGR2_CHAN_NUM GENMASK(3, 0) + +/* STM32_LPTIM_VERR */ +#define STM32_LPTIM_VERR_23 0x23 /* STM32MP25 */ + /** * struct stm32_lptimer - STM32 Low-Power Timer data assigned by parent device * @clk: clock reference for this instance * @regmap: register map reference for this instance * @has_encoder: indicates this Low-Power Timer supports encoder mode + * @num_cc_chans: indicates the number of capture/compare channels + * @version: indicates the major and minor revision of the controller */ struct stm32_lptimer { struct clk *clk; struct regmap *regmap; bool has_encoder; + unsigned int num_cc_chans; + u32 version; }; #endif diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index f09ba598c97a..23b0cae4a9f8 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -33,6 +33,9 @@ #define TIM_DCR 0x48 /* DMA control register */ #define TIM_DMAR 0x4C /* DMA register for transfer */ #define TIM_TISEL 0x68 /* Input Selection */ +#define TIM_HWCFGR2 0x3EC /* hardware configuration 2 Reg (MP25) */ +#define TIM_HWCFGR1 0x3F0 /* hardware configuration 1 Reg (MP25) */ +#define TIM_IPIDR 0x3F8 /* IP identification Reg (MP25) */ #define TIM_CR1_CEN BIT(0) /* Counter Enable */ #define TIM_CR1_DIR BIT(4) /* Counter Direction */ @@ -100,6 +103,9 @@ #define TIM_BDTR_BKF(x) (0xf << (16 + (x) * 4)) #define TIM_DCR_DBA GENMASK(4, 0) /* DMA base addr */ #define TIM_DCR_DBL GENMASK(12, 8) /* DMA burst len */ +#define TIM_HWCFGR1_NB_OF_CC GENMASK(3, 0) /* Capture/compare channels */ +#define TIM_HWCFGR1_NB_OF_DT GENMASK(7, 4) /* Complementary outputs & dead-time generators */ +#define TIM_HWCFGR2_CNT_WIDTH GENMASK(15, 8) /* Counter width */ #define MAX_TIM_PSC 0xFFFF #define MAX_TIM_ICPSC 0x3 @@ -113,6 +119,8 @@ #define TIM_BDTR_BKF_MASK 0xF #define TIM_BDTR_BKF_SHIFT(x) (16 + (x) * 4) +#define STM32MP25_TIM_IPIDR 0x00120002 + enum stm32_timers_dmas { STM32_TIMERS_DMA_CH1, STM32_TIMERS_DMA_CH2, @@ -151,6 +159,7 @@ struct stm32_timers_dma { struct stm32_timers { struct clk *clk; + u32 ipidr; struct regmap *regmap; u32 max_arr; struct stm32_timers_dma dma; /* Only to be used by the parent */ diff --git a/include/linux/mfd/tps65219.h b/include/linux/mfd/tps65219.h index e6826e34e2a6..3e8d29189267 100644 --- a/include/linux/mfd/tps65219.h +++ b/include/linux/mfd/tps65219.h @@ -1,8 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Functions to access TPS65219 Power Management IC. + * Functions to access TPS65215/TPS65219 Power Management Integrated Chips * * Copyright (C) 2022 BayLibre Incorporated - https://www.baylibre.com/ + * Copyright (C) 2024 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef MFD_TPS65219_H @@ -10,16 +11,15 @@ #include <linux/bitops.h> #include <linux/notifier.h> +#include <linux/regmap.h> #include <linux/regulator/driver.h> -struct regmap; -struct regmap_irq_chip_data; - -#define TPS65219_1V35 1350000 -#define TPS65219_1V8 1800000 - -/* TPS chip id list */ -#define TPS65219 0xF0 +/* Chip id list*/ +enum pmic_id { + TPS65214, + TPS65215, + TPS65219, +}; /* I2C ID for TPS65219 part */ #define TPS65219_I2C_ID 0x24 @@ -29,15 +29,23 @@ struct regmap_irq_chip_data; #define TPS65219_REG_NVM_ID 0x01 #define TPS65219_REG_ENABLE_CTRL 0x02 #define TPS65219_REG_BUCKS_CONFIG 0x03 +#define TPS65214_REG_LOCK 0x03 #define TPS65219_REG_LDO4_VOUT 0x04 +#define TPS65214_REG_LDO1_VOUT_STBY 0x04 #define TPS65219_REG_LDO3_VOUT 0x05 +#define TPS65215_REG_LDO2_VOUT 0x05 +#define TPS65214_REG_LDO1_VOUT 0x05 #define TPS65219_REG_LDO2_VOUT 0x06 +#define TPS65214_REG_LDO2_VOUT 0x06 #define TPS65219_REG_LDO1_VOUT 0x07 +#define TPS65214_REG_LDO2_VOUT_STBY 0x07 #define TPS65219_REG_BUCK3_VOUT 0x8 #define TPS65219_REG_BUCK2_VOUT 0x9 #define TPS65219_REG_BUCK1_VOUT 0xA #define TPS65219_REG_LDO4_SEQUENCE_SLOT 0xB #define TPS65219_REG_LDO3_SEQUENCE_SLOT 0xC +#define TPS65215_REG_LDO2_SEQUENCE_SLOT 0xC +#define TPS65214_REG_LDO1_SEQUENCE_SLOT 0xC #define TPS65219_REG_LDO2_SEQUENCE_SLOT 0xD #define TPS65219_REG_LDO1_SEQUENCE_SLOT 0xE #define TPS65219_REG_BUCK3_SEQUENCE_SLOT 0xF @@ -46,15 +54,21 @@ struct regmap_irq_chip_data; #define TPS65219_REG_nRST_SEQUENCE_SLOT 0x12 #define TPS65219_REG_GPIO_SEQUENCE_SLOT 0x13 #define TPS65219_REG_GPO2_SEQUENCE_SLOT 0x14 +#define TPS65214_REG_GPIO_GPI_SEQUENCE_SLOT 0x14 #define TPS65219_REG_GPO1_SEQUENCE_SLOT 0x15 +#define TPS65214_REG_GPO_SEQUENCE_SLOT 0x15 #define TPS65219_REG_POWER_UP_SLOT_DURATION_1 0x16 #define TPS65219_REG_POWER_UP_SLOT_DURATION_2 0x17 +/* _SLOT_DURATION_3 doesn't apply to TPS65215*/ #define TPS65219_REG_POWER_UP_SLOT_DURATION_3 0x18 #define TPS65219_REG_POWER_UP_SLOT_DURATION_4 0x19 +#define TPS65214_REG_BUCK3_VOUT_STBY 0x19 #define TPS65219_REG_POWER_DOWN_SLOT_DURATION_1 0x1A #define TPS65219_REG_POWER_DOWN_SLOT_DURATION_2 0x1B #define TPS65219_REG_POWER_DOWN_SLOT_DURATION_3 0x1C +#define TPS65214_REG_BUCK2_VOUT_STBY 0x1C #define TPS65219_REG_POWER_DOWN_SLOT_DURATION_4 0x1D +#define TPS65214_REG_BUCK1_VOUT_STBY 0x1D #define TPS65219_REG_GENERAL_CONFIG 0x1E #define TPS65219_REG_MFP_1_CONFIG 0x1F #define TPS65219_REG_MFP_2_CONFIG 0x20 @@ -72,9 +86,19 @@ struct regmap_irq_chip_data; #define TPS65219_REG_DISCHARGE_CONFIG 0x2A /* main irq registers */ #define TPS65219_REG_INT_SOURCE 0x2B -/* 'sub irq' registers */ + +/* TPS65219 'sub irq' registers */ #define TPS65219_REG_INT_LDO_3_4 0x2C #define TPS65219_REG_INT_LDO_1_2 0x2D + +/* TPS65215 specific 'sub irq' registers */ +#define TPS65215_REG_INT_LDO_2 0x2C +#define TPS65215_REG_INT_LDO_1 0x2D + +/* TPS65214 specific 'sub irq' register */ +#define TPS65214_REG_INT_LDO_1_2 0x2D + +/* Common TPS65215 & TPS65219 'sub irq' registers */ #define TPS65219_REG_INT_BUCK_3 0x2E #define TPS65219_REG_INT_BUCK_1_2 0x2F #define TPS65219_REG_INT_SYSTEM 0x30 @@ -91,6 +115,17 @@ struct regmap_irq_chip_data; #define TPS65219_REG_INT_TO_RV_POS 6 #define TPS65219_REG_INT_PB_POS 7 +#define TPS65215_REG_INT_LDO_2_POS 0 +#define TPS65215_REG_INT_LDO_1_POS 1 + +#define TPS65214_REG_INT_LDO_1_2_POS 0 +#define TPS65214_REG_INT_BUCK_3_POS 1 +#define TPS65214_REG_INT_BUCK_1_2_POS 2 +#define TPS65214_REG_INT_SYS_POS 3 +#define TPS65214_REG_INT_RV_POS 4 +#define TPS65214_REG_INT_TO_RV_POS 5 +#define TPS65214_REG_INT_PB_POS 6 + #define TPS65219_REG_USER_NVM_CMD 0x34 #define TPS65219_REG_POWER_UP_STATUS 0x35 #define TPS65219_REG_SPARE_2 0x36 @@ -112,6 +147,8 @@ struct regmap_irq_chip_data; #define TPS65219_ENABLE_LDO1_EN_MASK BIT(3) #define TPS65219_ENABLE_LDO2_EN_MASK BIT(4) #define TPS65219_ENABLE_LDO3_EN_MASK BIT(5) +#define TPS65215_ENABLE_LDO2_EN_MASK BIT(5) +#define TPS65214_ENABLE_LDO1_EN_MASK BIT(5) #define TPS65219_ENABLE_LDO4_EN_MASK BIT(6) /* power ON-OFF sequence slot */ #define TPS65219_BUCKS_LDOS_SEQUENCE_OFF_SLOT_MASK GENMASK(3, 0) @@ -163,20 +200,27 @@ struct regmap_irq_chip_data; #define TPS65219_REG_MASK_EFFECT_MASK GENMASK(2, 1) #define TPS65219_REG_MASK_INT_FOR_PB_MASK BIT(7) /* UnderVoltage - Short to GND - OverCurrent*/ -/* LDO3-4 */ +/* LDO3-4: only for TPS65219*/ #define TPS65219_INT_LDO3_SCG_MASK BIT(0) #define TPS65219_INT_LDO3_OC_MASK BIT(1) #define TPS65219_INT_LDO3_UV_MASK BIT(2) #define TPS65219_INT_LDO4_SCG_MASK BIT(3) #define TPS65219_INT_LDO4_OC_MASK BIT(4) #define TPS65219_INT_LDO4_UV_MASK BIT(5) -/* LDO1-2 */ +/* LDO1-2: TPS65214 & TPS65219 */ #define TPS65219_INT_LDO1_SCG_MASK BIT(0) #define TPS65219_INT_LDO1_OC_MASK BIT(1) #define TPS65219_INT_LDO1_UV_MASK BIT(2) #define TPS65219_INT_LDO2_SCG_MASK BIT(3) #define TPS65219_INT_LDO2_OC_MASK BIT(4) #define TPS65219_INT_LDO2_UV_MASK BIT(5) +/* TPS65215 LDO1-2*/ +#define TPS65215_INT_LDO1_SCG_MASK BIT(0) +#define TPS65215_INT_LDO1_OC_MASK BIT(1) +#define TPS65215_INT_LDO1_UV_MASK BIT(2) +#define TPS65215_INT_LDO2_SCG_MASK BIT(0) +#define TPS65215_INT_LDO2_OC_MASK BIT(1) +#define TPS65215_INT_LDO2_UV_MASK BIT(2) /* BUCK3 */ #define TPS65219_INT_BUCK3_SCG_MASK BIT(0) #define TPS65219_INT_BUCK3_OC_MASK BIT(1) @@ -191,12 +235,13 @@ struct regmap_irq_chip_data; #define TPS65219_INT_BUCK2_OC_MASK BIT(5) #define TPS65219_INT_BUCK2_NEG_OC_MASK BIT(6) #define TPS65219_INT_BUCK2_UV_MASK BIT(7) -/* Thermal Sensor */ +/* Thermal Sensor: TPS65219/TPS65215 */ #define TPS65219_INT_SENSOR_3_WARM_MASK BIT(0) +#define TPS65219_INT_SENSOR_3_HOT_MASK BIT(4) +/* Thermal Sensor: TPS65219/TPS65215/TPS65214 */ #define TPS65219_INT_SENSOR_2_WARM_MASK BIT(1) #define TPS65219_INT_SENSOR_1_WARM_MASK BIT(2) #define TPS65219_INT_SENSOR_0_WARM_MASK BIT(3) -#define TPS65219_INT_SENSOR_3_HOT_MASK BIT(4) #define TPS65219_INT_SENSOR_2_HOT_MASK BIT(5) #define TPS65219_INT_SENSOR_1_HOT_MASK BIT(6) #define TPS65219_INT_SENSOR_0_HOT_MASK BIT(7) @@ -207,6 +252,8 @@ struct regmap_irq_chip_data; #define TPS65219_INT_LDO1_RV_MASK BIT(3) #define TPS65219_INT_LDO2_RV_MASK BIT(4) #define TPS65219_INT_LDO3_RV_MASK BIT(5) +#define TPS65215_INT_LDO2_RV_MASK BIT(5) +#define TPS65214_INT_LDO2_RV_MASK BIT(5) #define TPS65219_INT_LDO4_RV_MASK BIT(6) /* Residual Voltage ShutDown */ #define TPS65219_INT_BUCK1_RV_SD_MASK BIT(0) @@ -215,6 +262,8 @@ struct regmap_irq_chip_data; #define TPS65219_INT_LDO1_RV_SD_MASK BIT(3) #define TPS65219_INT_LDO2_RV_SD_MASK BIT(4) #define TPS65219_INT_LDO3_RV_SD_MASK BIT(5) +#define TPS65215_INT_LDO2_RV_SD_MASK BIT(5) +#define TPS65214_INT_LDO1_RV_SD_MASK BIT(5) #define TPS65219_INT_LDO4_RV_SD_MASK BIT(6) #define TPS65219_INT_TIMEOUT_MASK BIT(7) /* Power Button */ @@ -240,7 +289,15 @@ enum { TPS65219_INT_LDO4_SCG, TPS65219_INT_LDO4_OC, TPS65219_INT_LDO4_UV, - /* LDO1-2 */ + /* TPS65215 LDO1*/ + TPS65215_INT_LDO1_SCG, + TPS65215_INT_LDO1_OC, + TPS65215_INT_LDO1_UV, + /* TPS65215 LDO2*/ + TPS65215_INT_LDO2_SCG, + TPS65215_INT_LDO2_OC, + TPS65215_INT_LDO2_UV, + /* LDO1-2: TPS65219/TPS65214 */ TPS65219_INT_LDO1_SCG, TPS65219_INT_LDO1_OC, TPS65219_INT_LDO1_UV, @@ -276,6 +333,8 @@ enum { TPS65219_INT_BUCK3_RV, TPS65219_INT_LDO1_RV, TPS65219_INT_LDO2_RV, + TPS65215_INT_LDO2_RV, + TPS65214_INT_LDO2_RV, TPS65219_INT_LDO3_RV, TPS65219_INT_LDO4_RV, /* Residual Voltage ShutDown */ @@ -283,6 +342,8 @@ enum { TPS65219_INT_BUCK2_RV_SD, TPS65219_INT_BUCK3_RV_SD, TPS65219_INT_LDO1_RV_SD, + TPS65214_INT_LDO1_RV_SD, + TPS65215_INT_LDO2_RV_SD, TPS65219_INT_LDO2_RV_SD, TPS65219_INT_LDO3_RV_SD, TPS65219_INT_LDO4_RV_SD, @@ -292,6 +353,23 @@ enum { TPS65219_INT_PB_RISING_EDGE_DETECT, }; +enum tps65214_regulator_id { + /* + * DCDC's same as TPS65219 + * LDO1 maps to TPS65219's LDO3 + * LDO2 is the same as TPS65219 + * + */ + TPS65214_LDO_1 = 3, + TPS65214_LDO_2 = 4, +}; + +enum tps65215_regulator_id { + /* DCDC's same as TPS65219 */ + /* LDO1 is the same as TPS65219 */ + TPS65215_LDO_2 = 4, +}; + enum tps65219_regulator_id { /* DCDC's */ TPS65219_BUCK_1, @@ -305,11 +383,40 @@ enum tps65219_regulator_id { }; /* Number of step-down converters available */ -#define TPS65219_NUM_DCDC 3 +#define TPS6521X_NUM_BUCKS 3 /* Number of LDO voltage regulators available */ #define TPS65219_NUM_LDO 4 +#define TPS65215_NUM_LDO 2 +#define TPS65214_NUM_LDO 2 /* Number of total regulators available */ -#define TPS65219_NUM_REGULATOR (TPS65219_NUM_DCDC + TPS65219_NUM_LDO) +#define TPS65219_NUM_REGULATOR (TPS6521X_NUM_BUCKS + TPS65219_NUM_LDO) +#define TPS65215_NUM_REGULATOR (TPS6521X_NUM_BUCKS + TPS65215_NUM_LDO) +#define TPS65214_NUM_REGULATOR (TPS6521X_NUM_BUCKS + TPS65214_NUM_LDO) + +/* Define the TPS65214 IRQ numbers */ +enum tps65214_irqs { + /* INT source registers */ + TPS65214_TO_RV_SD_SET_IRQ, + TPS65214_RV_SET_IRQ, + TPS65214_SYS_SET_IRQ, + TPS65214_BUCK_1_2_SET_IRQ, + TPS65214_BUCK_3_SET_IRQ, + TPS65214_LDO_1_2_SET_IRQ, + TPS65214_PB_SET_IRQ = 7, +}; + +/* Define the TPS65215 IRQ numbers */ +enum tps65215_irqs { + /* INT source registers */ + TPS65215_TO_RV_SD_SET_IRQ, + TPS65215_RV_SET_IRQ, + TPS65215_SYS_SET_IRQ, + TPS65215_BUCK_1_2_SET_IRQ, + TPS65215_BUCK_3_SET_IRQ, + TPS65215_LDO_1_SET_IRQ, + TPS65215_LDO_2_SET_IRQ, + TPS65215_PB_SET_IRQ, +}; /* Define the TPS65219 IRQ numbers */ enum tps65219_irqs { @@ -331,6 +438,7 @@ enum tps65219_irqs { * * @dev: MFD device * @regmap: Regmap for accessing the device registers + * @chip_id: Chip ID * @irq_data: Regmap irq data used for the irq chip * @nb: notifier block for the restart handler */ @@ -338,6 +446,7 @@ struct tps65219 { struct device *dev; struct regmap *regmap; + unsigned int chip_id; struct regmap_irq_chip_data *irq_data; struct notifier_block nb; }; diff --git a/include/linux/mfd/upboard-fpga.h b/include/linux/mfd/upboard-fpga.h new file mode 100644 index 000000000000..12231e40f5da --- /dev/null +++ b/include/linux/mfd/upboard-fpga.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * UP Board CPLD/FPGA driver + * + * Copyright (c) AAEON. All rights reserved. + * Copyright (C) 2024 Bootlin + * + * Author: Gary Wang <garywang@aaeon.com.tw> + * Author: Thomas Richard <thomas.richard@bootlin.com> + * + */ + +#ifndef __LINUX_MFD_UPBOARD_FPGA_H +#define __LINUX_MFD_UPBOARD_FPGA_H + +#define UPBOARD_REGISTER_SIZE 16 + +enum upboard_fpgareg { + UPBOARD_REG_PLATFORM_ID = 0x10, + UPBOARD_REG_FIRMWARE_ID = 0x11, + UPBOARD_REG_FUNC_EN0 = 0x20, + UPBOARD_REG_FUNC_EN1 = 0x21, + UPBOARD_REG_GPIO_EN0 = 0x30, + UPBOARD_REG_GPIO_EN1 = 0x31, + UPBOARD_REG_GPIO_EN2 = 0x32, + UPBOARD_REG_GPIO_DIR0 = 0x40, + UPBOARD_REG_GPIO_DIR1 = 0x41, + UPBOARD_REG_GPIO_DIR2 = 0x42, + UPBOARD_REG_MAX, +}; + +enum upboard_fpga_type { + UPBOARD_UP_FPGA, + UPBOARD_UP2_FPGA, +}; + +struct upboard_fpga_data { + enum upboard_fpga_type type; + const struct regmap_config *regmap_config; +}; + +struct upboard_fpga { + struct device *dev; + struct regmap *regmap; + struct gpio_desc *enable_gpio; + struct gpio_desc *reset_gpio; + struct gpio_desc *clear_gpio; + struct gpio_desc *strobe_gpio; + struct gpio_desc *datain_gpio; + struct gpio_desc *dataout_gpio; + unsigned int firmware_version; + const struct upboard_fpga_data *fpga_data; +}; + +#endif /* __LINUX_MFD_UPBOARD_FPGA_H */ diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 059dc94d20bb..dd372b0123a6 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -721,12 +721,6 @@ enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl); void mhi_soc_reset(struct mhi_controller *mhi_cntrl); /** - * mhi_device_get - Disable device low power mode - * @mhi_dev: Device associated with the channel - */ -void mhi_device_get(struct mhi_device *mhi_dev); - -/** * mhi_device_get_sync - Disable device low power mode. Synchronously * take the controller out of suspended state * @mhi_dev: Device associated with the channel @@ -777,18 +771,6 @@ int mhi_prepare_for_transfer_autoqueue(struct mhi_device *mhi_dev); void mhi_unprepare_from_transfer(struct mhi_device *mhi_dev); /** - * mhi_queue_dma - Send or receive DMA mapped buffers from client device - * over MHI channel - * @mhi_dev: Device associated with the channels - * @dir: DMA direction for the channel - * @mhi_buf: Buffer for holding the DMA mapped data - * @len: Buffer length - * @mflags: MHI transfer flags used for the transfer - */ -int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir, - struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags); - -/** * mhi_queue_buf - Send or receive raw buffers from client device over MHI * channel * @mhi_dev: Device associated with the channels diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h index 591bf5b5e8dc..9af01bdd86d2 100644 --- a/include/linux/micrel_phy.h +++ b/include/linux/micrel_phy.h @@ -44,7 +44,6 @@ #define MICREL_PHY_50MHZ_CLK BIT(0) #define MICREL_PHY_FXEN BIT(1) #define MICREL_KSZ8_P1_ERRATA BIT(2) -#define MICREL_NO_EEE BIT(3) #define MICREL_KSZ9021_EXTREG_CTRL 0xB #define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 002e49b2ebd9..aaa2114498d6 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -144,16 +144,14 @@ const struct movable_operations *page_movable_ops(struct page *page) #ifdef CONFIG_NUMA_BALANCING int migrate_misplaced_folio_prepare(struct folio *folio, struct vm_area_struct *vma, int node); -int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, - int node); +int migrate_misplaced_folio(struct folio *folio, int node); #else static inline int migrate_misplaced_folio_prepare(struct folio *folio, struct vm_area_struct *vma, int node) { return -EAGAIN; /* can't migrate now */ } -static inline int migrate_misplaced_folio(struct folio *folio, - struct vm_area_struct *vma, int node) +static inline int migrate_misplaced_folio(struct folio *folio, int node) { return -EAGAIN; /* can't migrate now */ } @@ -207,8 +205,8 @@ struct migrate_vma { unsigned long end; /* - * Set to the owner value also stored in page->pgmap->owner for - * migrating out of device private memory. The flags also need to + * Set to the owner value also stored in page_pgmap(page)->owner + * for migrating out of device private memory. The flags also need to * be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE. * The caller should always set this field when using mmu notifier * callbacks to avoid device MMU invalidations for device private @@ -229,6 +227,7 @@ void migrate_vma_pages(struct migrate_vma *migrate); void migrate_vma_finalize(struct migrate_vma *migrate); int migrate_device_range(unsigned long *src_pfns, unsigned long start, unsigned long npages); +int migrate_device_pfns(unsigned long *src_pfns, unsigned long npages); void migrate_device_pages(unsigned long *src_pfns, unsigned long *dst_pfns, unsigned long npages); void migrate_device_finalize(unsigned long *src_pfns, diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index e781727c8916..79ddc0adbf2b 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -6,6 +6,17 @@ #include <linux/string.h> #include <linux/types.h> +/* + * The Min Heap API provides utilities for managing min-heaps, a binary tree + * structure where each node's value is less than or equal to its children's + * values, ensuring the smallest element is at the root. + * + * Users should avoid directly calling functions prefixed with __min_heap_*(). + * Instead, use the provided macro wrappers. + * + * For further details and examples, refer to Documentation/core-api/min_heap.rst. + */ + /** * Data structure to hold a min-heap. * @nr: Number of elements currently in the heap. @@ -15,8 +26,8 @@ */ #define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \ struct _name { \ - int nr; \ - int size; \ + size_t nr; \ + size_t size; \ _type *data; \ _type preallocated[_nr]; \ } @@ -207,7 +218,7 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size) /* Initialize a min-heap. */ static __always_inline -void __min_heap_init_inline(min_heap_char *heap, void *data, int size) +void __min_heap_init_inline(min_heap_char *heap, void *data, size_t size) { heap->nr = 0; heap->size = size; @@ -218,7 +229,7 @@ void __min_heap_init_inline(min_heap_char *heap, void *data, int size) } #define min_heap_init_inline(_heap, _data, _size) \ - __min_heap_init_inline((min_heap_char *)_heap, _data, _size) + __min_heap_init_inline(container_of(&(_heap)->nr, min_heap_char, nr), _data, _size) /* Get the minimum element from the heap. */ static __always_inline @@ -228,7 +239,8 @@ void *__min_heap_peek_inline(struct min_heap_char *heap) } #define min_heap_peek_inline(_heap) \ - (__minheap_cast(_heap) __min_heap_peek_inline((min_heap_char *)_heap)) + (__minheap_cast(_heap) \ + __min_heap_peek_inline(container_of(&(_heap)->nr, min_heap_char, nr))) /* Check if the heap is full. */ static __always_inline @@ -238,11 +250,11 @@ bool __min_heap_full_inline(min_heap_char *heap) } #define min_heap_full_inline(_heap) \ - __min_heap_full_inline((min_heap_char *)_heap) + __min_heap_full_inline(container_of(&(_heap)->nr, min_heap_char, nr)) /* Sift the element at pos down the heap. */ static __always_inline -void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, +void __min_heap_sift_down_inline(min_heap_char *heap, size_t pos, size_t elem_size, const struct min_heap_callbacks *func, void *args) { const unsigned long lsbit = elem_size & -elem_size; @@ -277,8 +289,8 @@ void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, } #define min_heap_sift_down_inline(_heap, _pos, _func, _args) \ - __min_heap_sift_down_inline((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), \ - _func, _args) + __min_heap_sift_down_inline(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ + __minheap_obj_size(_heap), _func, _args) /* Sift up ith element from the heap, O(log2(nr)). */ static __always_inline @@ -304,22 +316,23 @@ void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx } #define min_heap_sift_up_inline(_heap, _idx, _func, _args) \ - __min_heap_sift_up_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ - _func, _args) + __min_heap_sift_up_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _idx, _func, _args) /* Floyd's approach to heapification that is O(nr). */ static __always_inline void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size, const struct min_heap_callbacks *func, void *args) { - int i; + ssize_t i; for (i = heap->nr / 2 - 1; i >= 0; i--) __min_heap_sift_down_inline(heap, i, elem_size, func, args); } #define min_heapify_all_inline(_heap, _func, _args) \ - __min_heapify_all_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) + __min_heapify_all_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args) /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline @@ -340,7 +353,8 @@ bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size, } #define min_heap_pop_inline(_heap, _func, _args) \ - __min_heap_pop_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) + __min_heap_pop_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args) /* * Remove the minimum element and then push the given element. The @@ -356,8 +370,8 @@ void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t } #define min_heap_pop_push_inline(_heap, _element, _func, _args) \ - __min_heap_pop_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ - _func, _args) + __min_heap_pop_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ + __minheap_obj_size(_heap), _func, _args) /* Push an element on to the heap, O(log2(nr)). */ static __always_inline @@ -365,7 +379,7 @@ bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t ele const struct min_heap_callbacks *func, void *args) { void *data = heap->data; - int pos; + size_t pos; if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) return false; @@ -382,8 +396,8 @@ bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t ele } #define min_heap_push_inline(_heap, _element, _func, _args) \ - __min_heap_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ - _func, _args) + __min_heap_push_inline(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ + __minheap_obj_size(_heap), _func, _args) /* Remove ith element from the heap, O(log2(nr)). */ static __always_inline @@ -411,13 +425,13 @@ bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, } #define min_heap_del_inline(_heap, _idx, _func, _args) \ - __min_heap_del_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ - _func, _args) + __min_heap_del_inline(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _idx, _func, _args) -void __min_heap_init(min_heap_char *heap, void *data, int size); +void __min_heap_init(min_heap_char *heap, void *data, size_t size); void *__min_heap_peek(struct min_heap_char *heap); bool __min_heap_full(min_heap_char *heap); -void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, +void __min_heap_sift_down(min_heap_char *heap, size_t pos, size_t elem_size, const struct min_heap_callbacks *func, void *args); void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args); @@ -433,25 +447,31 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, const struct min_heap_callbacks *func, void *args); #define min_heap_init(_heap, _data, _size) \ - __min_heap_init((min_heap_char *)_heap, _data, _size) + __min_heap_init(container_of(&(_heap)->nr, min_heap_char, nr), _data, _size) #define min_heap_peek(_heap) \ - (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) + (__minheap_cast(_heap) __min_heap_peek(container_of(&(_heap)->nr, min_heap_char, nr))) #define min_heap_full(_heap) \ - __min_heap_full((min_heap_char *)_heap) + __min_heap_full(container_of(&(_heap)->nr, min_heap_char, nr)) #define min_heap_sift_down(_heap, _pos, _func, _args) \ - __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) + __min_heap_sift_down(container_of(&(_heap)->nr, min_heap_char, nr), _pos, \ + __minheap_obj_size(_heap), _func, _args) #define min_heap_sift_up(_heap, _idx, _func, _args) \ - __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) + __min_heap_sift_up(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _idx, _func, _args) #define min_heapify_all(_heap, _func, _args) \ - __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) + __min_heapify_all(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args) #define min_heap_pop(_heap, _func, _args) \ - __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) + __min_heap_pop(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _func, _args) #define min_heap_pop_push(_heap, _element, _func, _args) \ - __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ - _func, _args) + __min_heap_pop_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ + __minheap_obj_size(_heap), _func, _args) #define min_heap_push(_heap, _element, _func, _args) \ - __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) + __min_heap_push(container_of(&(_heap)->nr, min_heap_char, nr), _element, \ + __minheap_obj_size(_heap), _func, _args) #define min_heap_del(_heap, _idx, _func, _args) \ - __min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) + __min_heap_del(container_of(&(_heap)->nr, min_heap_char, nr), \ + __minheap_obj_size(_heap), _idx, _func, _args) #endif /* _LINUX_MIN_HEAP_H */ diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 98008dd92153..eaaf5c008e4d 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -8,13 +8,10 @@ #include <linux/types.h> /* - * min()/max()/clamp() macros must accomplish three things: + * min()/max()/clamp() macros must accomplish several things: * * - Avoid multiple evaluations of the arguments (so side-effects like * "x++" happen only once) when non-constant. - * - Retain result as a constant expressions when called with only - * constant expressions (to avoid tripping VLA warnings in stack - * allocation usage). * - Perform signed v unsigned type-checking (to generate compile * errors instead of nasty runtime surprises). * - Unsigned char/short are always promoted to signed int and can be @@ -31,58 +28,54 @@ * bit #0 set if ok for unsigned comparisons * bit #1 set if ok for signed comparisons * - * In particular, statically non-negative signed integer - * expressions are ok for both. + * In particular, statically non-negative signed integer expressions + * are ok for both. * - * NOTE! Unsigned types smaller than 'int' are implicitly - * converted to 'int' in expressions, and are accepted for - * signed conversions for now. This is debatable. + * NOTE! Unsigned types smaller than 'int' are implicitly converted to 'int' + * in expressions, and are accepted for signed conversions for now. + * This is debatable. * - * Note that 'x' is the original expression, and 'ux' is - * the unique variable that contains the value. + * Note that 'x' is the original expression, and 'ux' is the unique variable + * that contains the value. * - * We use 'ux' for pure type checking, and 'x' for when - * we need to look at the value (but without evaluating - * it for side effects! Careful to only ever evaluate it - * with sizeof() or __builtin_constant_p() etc). + * We use 'ux' for pure type checking, and 'x' for when we need to look at the + * value (but without evaluating it for side effects! + * Careful to only ever evaluate it with sizeof() or __builtin_constant_p() etc). * - * Pointers end up being checked by the normal C type - * rules at the actual comparison, and these expressions - * only need to be careful to not cause warnings for - * pointer use. + * Pointers end up being checked by the normal C type rules at the actual + * comparison, and these expressions only need to be careful to not cause + * warnings for pointer use. */ -#define __signed_type_use(x,ux) (2+__is_nonneg(x,ux)) -#define __unsigned_type_use(x,ux) (1+2*(sizeof(ux)<4)) -#define __sign_use(x,ux) (is_signed_type(typeof(ux))? \ - __signed_type_use(x,ux):__unsigned_type_use(x,ux)) +#define __sign_use(ux) (is_signed_type(typeof(ux)) ? \ + (2 + __is_nonneg(ux)) : (1 + 2 * (sizeof(ux) < 4))) /* - * To avoid warnings about casting pointers to integers - * of different sizes, we need that special sign type. + * Check whether a signed value is always non-negative. * - * On 64-bit we can just always use 'long', since any - * integer or pointer type can just be cast to that. + * A cast is needed to avoid any warnings from values that aren't signed + * integer types (in which case the result doesn't matter). * - * This does not work for 128-bit signed integers since - * the cast would truncate them, but we do not use s128 - * types in the kernel (we do use 'u128', but they will - * be handled by the !is_signed_type() case). + * On 64-bit any integer or pointer type can safely be cast to 'long long'. + * But on 32-bit we need to avoid warnings about casting pointers to integers + * of different sizes without truncating 64-bit values so 'long' or 'long long' + * must be used depending on the size of the value. * - * NOTE! The cast is there only to avoid any warnings - * from when values that aren't signed integer types. + * This does not work for 128-bit signed integers since the cast would truncate + * them, but we do not use s128 types in the kernel (we do use 'u128', + * but they are handled by the !is_signed_type() case). */ -#ifdef CONFIG_64BIT - #define __signed_type(ux) long +#if __SIZEOF_POINTER__ == __SIZEOF_LONG_LONG__ +#define __is_nonneg(ux) statically_true((long long)(ux) >= 0) #else - #define __signed_type(ux) typeof(__builtin_choose_expr(sizeof(ux)>4,1LL,1L)) +#define __is_nonneg(ux) statically_true( \ + (typeof(__builtin_choose_expr(sizeof(ux) > 4, 1LL, 1L)))(ux) >= 0) #endif -#define __is_nonneg(x,ux) statically_true((__signed_type(ux))(x)>=0) -#define __types_ok(x,y,ux,uy) \ - (__sign_use(x,ux) & __sign_use(y,uy)) +#define __types_ok(ux, uy) \ + (__sign_use(ux) & __sign_use(uy)) -#define __types_ok3(x,y,z,ux,uy,uz) \ - (__sign_use(x,ux) & __sign_use(y,uy) & __sign_use(z,uz)) +#define __types_ok3(ux, uy, uz) \ + (__sign_use(ux) & __sign_use(uy) & __sign_use(uz)) #define __cmp_op_min < #define __cmp_op_max > @@ -97,30 +90,13 @@ #define __careful_cmp_once(op, x, y, ux, uy) ({ \ __auto_type ux = (x); __auto_type uy = (y); \ - BUILD_BUG_ON_MSG(!__types_ok(x,y,ux,uy), \ + BUILD_BUG_ON_MSG(!__types_ok(ux, uy), \ #op"("#x", "#y") signedness error"); \ __cmp(op, ux, uy); }) #define __careful_cmp(op, x, y) \ __careful_cmp_once(op, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) -#define __clamp(val, lo, hi) \ - ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) - -#define __clamp_once(val, lo, hi, uval, ulo, uhi) ({ \ - __auto_type uval = (val); \ - __auto_type ulo = (lo); \ - __auto_type uhi = (hi); \ - static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ - (lo) <= (hi), true), \ - "clamp() low limit " #lo " greater than high limit " #hi); \ - BUILD_BUG_ON_MSG(!__types_ok3(val,lo,hi,uval,ulo,uhi), \ - "clamp("#val", "#lo", "#hi") signedness error"); \ - __clamp(uval, ulo, uhi); }) - -#define __careful_clamp(val, lo, hi) \ - __clamp_once(val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_)) - /** * min - return minimum of two values of the same or compatible types * @x: first value @@ -154,7 +130,7 @@ #define __careful_op3(op, x, y, z, ux, uy, uz) ({ \ __auto_type ux = (x); __auto_type uy = (y);__auto_type uz = (z);\ - BUILD_BUG_ON_MSG(!__types_ok3(x,y,z,ux,uy,uz), \ + BUILD_BUG_ON_MSG(!__types_ok3(ux, uy, uz), \ #op"3("#x", "#y", "#z") signedness error"); \ __cmp(op, ux, __cmp(op, uy, uz)); }) @@ -177,6 +153,22 @@ __careful_op3(max, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define min_t(type, x, y) __cmp_once(min, type, x, y) + +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define max_t(type, x, y) __cmp_once(max, type, x, y) + +/** * min_not_zero - return the minimum that is _not_ zero, unless both are zero * @x: value1 * @y: value2 @@ -186,39 +178,57 @@ typeof(y) __y = (y); \ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) +#define __clamp(val, lo, hi) \ + ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) + +#define __clamp_once(type, val, lo, hi, uval, ulo, uhi) ({ \ + type uval = (val); \ + type ulo = (lo); \ + type uhi = (hi); \ + BUILD_BUG_ON_MSG(statically_true(ulo > uhi), \ + "clamp() low limit " #lo " greater than high limit " #hi); \ + BUILD_BUG_ON_MSG(!__types_ok3(uval, ulo, uhi), \ + "clamp("#val", "#lo", "#hi") signedness error"); \ + __clamp(uval, ulo, uhi); }) + +#define __careful_clamp(type, val, lo, hi) \ + __clamp_once(type, val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_)) + /** - * clamp - return a value clamped to a given range with strict typechecking + * clamp - return a value clamped to a given range with typechecking * @val: current value * @lo: lowest allowable value * @hi: highest allowable value * - * This macro does strict typechecking of @lo/@hi to make sure they are of the - * same type as @val. See the unnecessary pointer comparisons. - */ -#define clamp(val, lo, hi) __careful_clamp(val, lo, hi) - -/* - * ..and if you can't take the strict - * types, you can specify one yourself. - * - * Or not use min/max/clamp at all, of course. + * This macro checks @val/@lo/@hi to make sure they have compatible + * signedness. */ +#define clamp(val, lo, hi) __careful_clamp(__auto_type, val, lo, hi) /** - * min_t - return minimum of two values, using the specified type - * @type: data type to use - * @x: first value - * @y: second value + * clamp_t - return a value clamped to a given range using a given type + * @type: the type of variable to use + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of type + * @type to make all the comparisons. */ -#define min_t(type, x, y) __cmp_once(min, type, x, y) +#define clamp_t(type, val, lo, hi) __careful_clamp(type, val, lo, hi) /** - * max_t - return maximum of two values, using the specified type - * @type: data type to use - * @x: first value - * @y: second value + * clamp_val - return a value clamped to a given range using val's type + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of whatever + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed + * integer type. */ -#define max_t(type, x, y) __cmp_once(max, type, x, y) +#define clamp_val(val, lo, hi) __careful_clamp(typeof(val), val, lo, hi) /* * Do not check the array parameter using __must_be_array(). @@ -263,31 +273,6 @@ */ #define max_array(array, len) __minmax_array(max, array, len) -/** - * clamp_t - return a value clamped to a given range using a given type - * @type: the type of variable to use - * @val: current value - * @lo: minimum allowable value - * @hi: maximum allowable value - * - * This macro does no typechecking and uses temporary variables of type - * @type to make all the comparisons. - */ -#define clamp_t(type, val, lo, hi) __careful_clamp((type)(val), (type)(lo), (type)(hi)) - -/** - * clamp_val - return a value clamped to a given range using val's type - * @val: current value - * @lo: minimum allowable value - * @hi: maximum allowable value - * - * This macro does no typechecking and uses temporary variables of whatever - * type the input argument @val is. This is useful when @val is an unsigned - * type and @lo and @hi are literals that will otherwise be assigned a signed - * integer type. - */ -#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) - static inline bool in_range64(u64 val, u64 start, u64 len) { return (val - start) < len; @@ -326,9 +311,9 @@ static inline bool in_range32(u32 val, u32 start, u32 len) * Use these carefully: no type checking, and uses the arguments * multiple times. Use for obvious constants only. */ -#define MIN(a,b) __cmp(min,a,b) -#define MAX(a,b) __cmp(max,a,b) -#define MIN_T(type,a,b) __cmp(min,(type)(a),(type)(b)) -#define MAX_T(type,a,b) __cmp(max,(type)(a),(type)(b)) +#define MIN(a, b) __cmp(min, a, b) +#define MAX(a, b) __cmp(max, a, b) +#define MIN_T(type, a, b) __cmp(min, (type)(a), (type)(b)) +#define MAX_T(type, a, b) __cmp(max, (type)(a), (type)(b)) #endif /* _LINUX_MINMAX_H */ diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index 49eef10c8e59..71cf5bfc6349 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -18,6 +18,10 @@ enum misc_res_type { /** @MISC_CG_RES_SEV_ES: AMD SEV-ES ASIDs resource */ MISC_CG_RES_SEV_ES, #endif +#ifdef CONFIG_INTEL_TDX_HOST + /* Intel TDX HKIDs resource */ + MISC_CG_RES_TDX, +#endif /** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */ MISC_CG_RES_TYPES }; @@ -60,7 +64,6 @@ struct misc_cg { struct misc_res res[MISC_CG_RES_TYPES]; }; -u64 misc_cg_res_total_usage(enum misc_res_type type); int misc_cg_set_capacity(enum misc_res_type type, u64 capacity); int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg, u64 amount); void misc_cg_uncharge(enum misc_res_type type, struct misc_cg *cg, u64 amount); @@ -104,11 +107,6 @@ static inline void put_misc_cg(struct misc_cg *cg) #else /* !CONFIG_CGROUP_MISC */ -static inline u64 misc_cg_res_total_usage(enum misc_res_type type) -{ - return 0; -} - static inline int misc_cg_set_capacity(enum misc_res_type type, u64 capacity) { return 0; diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index c0fea6ca5076..69e110c2b86a 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -76,7 +76,7 @@ struct device; struct attribute_group; -struct miscdevice { +struct miscdevice { int minor; const char *name; const struct file_operations *fops; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 27f42f713c89..f016263e1fcf 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1135,7 +1135,7 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_buf *buf); -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order); +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, unsigned int order); void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, @@ -1415,7 +1415,6 @@ int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, struct _rule_hw *eth_header); -int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index cc647992f3d1..9d2467f982ad 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -280,6 +280,7 @@ enum { MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23, MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE = 1ull << 25, MLX5_MKEY_MASK_FREE = 1ull << 29, + MLX5_MKEY_MASK_PAGE_SIZE_5 = 1ull << 42, MLX5_MKEY_MASK_RELAXED_ORDERING_READ = 1ull << 47, }; @@ -538,6 +539,7 @@ struct mlx5_cmd_layout { }; enum mlx5_rfr_severity_bit_offsets { + MLX5_CRR_BIT_OFFSET = 0x6, MLX5_RFR_BIT_OFFSET = 0x7, }; @@ -1245,10 +1247,12 @@ enum mlx5_cap_type { MLX5_CAP_DEV_EVENT = 0x14, MLX5_CAP_IPSEC, MLX5_CAP_CRYPTO = 0x1a, + MLX5_CAP_SHAMPO = 0x1d, MLX5_CAP_MACSEC = 0x1f, MLX5_CAP_GENERAL_2 = 0x20, MLX5_CAP_PORT_SELECTION = 0x25, MLX5_CAP_ADV_VIRTUALIZATION = 0x26, + MLX5_CAP_ADV_RDMA = 0x28, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1343,6 +1347,12 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) \ MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_rdma.cap) +#define MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(mdev, cap) \ + MLX5_CAP_ADV_RDMA(mdev, rdma_transport_rx_flow_table_properties.cap) + +#define MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(mdev, cap) \ + MLX5_CAP_ADV_RDMA(mdev, rdma_transport_tx_flow_table_properties.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap) @@ -1382,6 +1392,10 @@ enum mlx5_qcam_feature_groups { MLX5_GET(adv_virtualization_cap, \ mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->cur, cap) +#define MLX5_CAP_ADV_RDMA(mdev, cap) \ + MLX5_GET(adv_rdma_cap, \ + mdev->caps.hca[MLX5_CAP_ADV_RDMA]->cur, cap) + #define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \ MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap) @@ -1470,6 +1484,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_MACSEC(mdev, cap)\ MLX5_GET(macsec_cap, (mdev)->caps.hca[MLX5_CAP_MACSEC]->cur, cap) +#define MLX5_CAP_SHAMPO(mdev, cap) \ + MLX5_GET(shampo_cap, mdev->caps.hca[MLX5_CAP_SHAMPO]->cur, cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, @@ -1501,6 +1518,7 @@ enum { MLX5_PHYSICAL_LAYER_COUNTERS_GROUP = 0x12, MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP = 0x13, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16, + MLX5_PHYSICAL_LAYER_RECOVERY_GROUP = 0x1a, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP = 0x21, }; @@ -1516,8 +1534,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz; } -#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 2 -#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 1 +#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 6 +#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 4 #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16 #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1 diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index fed666c5bd16..e6ba8f4f4bd1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -54,7 +54,6 @@ #include <linux/mlx5/doorbell.h> #include <linux/mlx5/eq.h> #include <linux/timecounter.h> -#include <linux/ptp_clock_kernel.h> #include <net/devlink.h> #define MLX5_ADEV_NAME "mlx5_core" @@ -160,9 +159,12 @@ enum { MLX5_REG_MIRC = 0x9162, MLX5_REG_MTPTM = 0x9180, MLX5_REG_MTCTR = 0x9181, + MLX5_REG_MRTCQ = 0x9182, MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, + MLX5_REG_NIC_CAP = 0xC00D, MLX5_REG_DTOR = 0xC00E, + MLX5_REG_VHCA_ICM_CTRL = 0xC010, }; enum mlx5_qpts_trust_state { @@ -302,6 +304,8 @@ struct mlx5_cmd { struct semaphore sem; struct semaphore pages_sem; struct semaphore throttle_sem; + struct semaphore unprivileged_sem; + struct xarray privileged_uids; } vars; enum mlx5_cmdif_state state; void *cmd_alloc_buf; @@ -394,6 +398,7 @@ struct mlx5_core_rsc_common { enum mlx5_res_type res; refcount_t refcount; struct completion free; + bool invalid; }; struct mlx5_uars_page { @@ -676,33 +681,8 @@ struct mlx5_rsvd_gids { struct ida ida; }; -#define MAX_PIN_NUM 8 -struct mlx5_pps { - u8 pin_caps[MAX_PIN_NUM]; - struct work_struct out_work; - u64 start[MAX_PIN_NUM]; - u8 enabled; - u64 min_npps_period; - u64 min_out_pulse_duration_ns; -}; - -struct mlx5_timer { - struct cyclecounter cycles; - struct timecounter tc; - u32 nominal_c_mult; - unsigned long overflow_period; -}; - -struct mlx5_clock { - struct mlx5_nb pps_nb; - seqlock_t lock; - struct hwtstamp_config hwtstamp_config; - struct ptp_clock *ptp; - struct ptp_clock_info ptp_info; - struct mlx5_pps pps_info; - struct mlx5_timer timer; -}; - +struct mlx5_clock; +struct mlx5_clock_dev_state; struct mlx5_dm; struct mlx5_fw_tracer; struct mlx5_vxlan; @@ -786,7 +766,8 @@ struct mlx5_core_dev { #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif - struct mlx5_clock clock; + struct mlx5_clock *clock; + struct mlx5_clock_dev_state *clock_state; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; struct mlx5_rsc_dump *rsc_dump; @@ -986,6 +967,8 @@ struct mlx5_async_work { mlx5_async_cbk_t user_callback; u16 opcode; /* cmd opcode */ u16 op_mod; /* cmd op_mod */ + u8 throttle_locked:1; + u8 unpriv_locked:1; void *out; /* pointer to the cmd output buffer */ }; @@ -1016,6 +999,8 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); +int mlx5_cmd_add_privileged_uid(struct mlx5_core_dev *dev, u16 uid); +void mlx5_cmd_remove_privileged_uid(struct mlx5_core_dev *dev, u16 uid); void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *mdev); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index df73a2ccc9af..67256e776566 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -147,6 +147,8 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, /* reuse tun_opts for the mapped ipsec obj id when tun_id is 0 (invalid) */ #define ESW_IPSEC_RX_MAPPED_ID_MASK GENMASK(ESW_TUN_OPTS_BITS - 1, 0) +#define ESW_IPSEC_RX_MAPPED_ID_MATCH_MASK \ + GENMASK(31 - ESW_RESERVED_BITS, ESW_ZONE_ID_BITS) u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 438db888bde0..939e58c2f386 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -40,6 +40,9 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +#define MLX5_RDMA_TRANSPORT_BYPASS_PRIO 0 +#define MLX5_FS_MAX_POOL_SIZE BIT(30) + enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_NONE, MLX5_FLOW_DESTINATION_TYPE_VPORT, @@ -108,6 +111,8 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC, MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC, MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC, + MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX, + MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX, }; enum { @@ -163,7 +168,7 @@ struct mlx5_flow_destination { u32 tir_num; u32 ft_num; struct mlx5_flow_table *ft; - u32 counter_id; + struct mlx5_fc *counter; struct { u16 num; u16 vhca_id; @@ -192,9 +197,9 @@ struct mlx5_flow_namespace * mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); struct mlx5_flow_namespace * -mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev, - enum mlx5_flow_namespace_type type, - int vport); +mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type, + int vport_idx); struct mlx5_flow_table_attr { int prio; @@ -202,6 +207,7 @@ struct mlx5_flow_table_attr { u32 level; u32 flags; u16 uid; + u16 vport; struct mlx5_flow_table *next_ft; struct { @@ -238,6 +244,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); struct mlx5_exe_aso { u32 object_id; + int base_id; u8 type; u8 return_reg_id; union { @@ -299,6 +306,8 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); +struct mlx5_fc *mlx5_fc_local_create(u32 counter_id, u32 offset, u32 bulk_size); +void mlx5_fc_local_destroy(struct mlx5_fc *counter); u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 48d47181c7cd..2c09df4ee574 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1095,7 +1095,9 @@ struct mlx5_ifc_qos_cap_bits { u8 log_esw_max_sched_depth[0x4]; u8 reserved_at_10[0x10]; - u8 reserved_at_20[0xb]; + u8 reserved_at_20[0x9]; + u8 esw_cross_esw_sched[0x1]; + u8 reserved_at_2a[0x1]; u8 log_max_qos_nic_queue_group[0x5]; u8 reserved_at_30[0x10]; @@ -1103,7 +1105,8 @@ struct mlx5_ifc_qos_cap_bits { u8 packet_pacing_min_rate[0x20]; - u8 reserved_at_80[0x10]; + u8 reserved_at_80[0xb]; + u8 log_esw_max_rate_limit[0x5]; u8 packet_pacing_rate_table_size[0x10]; u8 esw_element_type[0x10]; @@ -1567,6 +1570,8 @@ enum { enum { MLX5_UCTX_CAP_RAW_TX = 1UL << 0, MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1, + MLX5_UCTX_CAP_RDMA_CTRL = 1UL << 3, + MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA = 1UL << 4, }; #define MLX5_FC_BULK_SIZE_FACTOR 128 @@ -1590,12 +1595,14 @@ enum { MLX5_STEERING_FORMAT_CONNECTX_5 = 0, MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, MLX5_STEERING_FORMAT_CONNECTX_7 = 2, + MLX5_STEERING_FORMAT_CONNECTX_8 = 3, }; struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x6]; u8 page_request_disable[0x1]; - u8 reserved_at_7[0x9]; + u8 abs_native_port_num[0x1]; + u8 reserved_at_8[0x8]; u8 shared_object_to_user_object_allowed[0x1]; u8 reserved_at_13[0xe]; u8 vhca_resource_manager[0x1]; @@ -1825,7 +1832,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 regexp_params[0x1]; u8 uar_sz[0x6]; u8 port_selection_cap[0x1]; - u8 reserved_at_251[0x1]; + u8 nic_cap_reg[0x1]; u8 umem_uid_0[0x1]; u8 reserved_at_253[0x5]; u8 log_pg_sz[0x8]; @@ -1986,7 +1993,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_geneve_tlv_options[0x8]; u8 reserved_at_568[0x3]; u8 max_geneve_tlv_option_data_len[0x5]; - u8 reserved_at_570[0x9]; + u8 reserved_at_570[0x1]; + u8 adv_rdma[0x1]; + u8 reserved_at_572[0x7]; u8 adv_virtualization[0x1]; u8 reserved_at_57a[0x6]; @@ -2135,7 +2144,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 log_min_mkey_entity_size[0x5]; u8 reserved_at_1b0[0x10]; - u8 reserved_at_1c0[0x60]; + u8 general_obj_types_127_64[0x40]; + u8 reserved_at_200[0x20]; u8 reserved_at_220[0x1]; u8 sw_vhca_id_valid[0x1]; @@ -2324,7 +2334,9 @@ struct mlx5_ifc_wq_bits { u8 headers_mkey[0x20]; u8 shampo_enable[0x1]; - u8 reserved_at_1e1[0x4]; + u8 reserved_at_1e1[0x1]; + u8 shampo_mode[0x2]; + u8 reserved_at_1e4[0x1]; u8 log_reservation_size[0x3]; u8 reserved_at_1e8[0x5]; u8 log_max_num_of_packets_per_reservation[0x3]; @@ -2633,6 +2645,12 @@ struct mlx5_ifc_field_select_802_1qau_rp_bits { u8 field_select_8021qaurp[0x20]; }; +struct mlx5_ifc_phys_layer_recovery_cntrs_bits { + u8 total_successful_recovery_events[0x20]; + + u8 reserved_at_20[0x7a0]; +}; + struct mlx5_ifc_phys_layer_cntrs_bits { u8 time_since_last_clear_high[0x20]; @@ -3322,6 +3340,14 @@ struct mlx5_ifc_dropped_packet_logged_bits { u8 reserved_at_0[0xe0]; }; +struct mlx5_ifc_nic_cap_reg_bits { + u8 reserved_at_0[0x1a]; + u8 vhca_icm_ctrl[0x1]; + u8 reserved_at_1b[0x5]; + + u8 reserved_at_20[0x60]; +}; + struct mlx5_ifc_default_timeout_bits { u8 to_multiplier[0x3]; u8 reserved_at_3[0x9]; @@ -3358,6 +3384,18 @@ struct mlx5_ifc_dtor_reg_bits { u8 reserved_at_1c0[0x20]; }; +struct mlx5_ifc_vhca_icm_ctrl_reg_bits { + u8 vhca_id_valid[0x1]; + u8 reserved_at_1[0xf]; + u8 vhca_id[0x10]; + + u8 reserved_at_20[0xa0]; + + u8 cur_alloc_icm[0x20]; + + u8 reserved_at_e0[0x120]; +}; + enum { MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN = 0x1, MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR = 0x2, @@ -3696,6 +3734,22 @@ struct mlx5_ifc_crypto_cap_bits { u8 reserved_at_80[0x780]; }; +struct mlx5_ifc_shampo_cap_bits { + u8 reserved_at_0[0x3]; + u8 shampo_log_max_reservation_size[0x5]; + u8 reserved_at_8[0x3]; + u8 shampo_log_min_reservation_size[0x5]; + u8 shampo_min_mss_size[0x10]; + + u8 shampo_header_split[0x1]; + u8 shampo_header_split_data_merge[0x1]; + u8 reserved_at_22[0x1]; + u8 shampo_log_max_headers_entry_size[0x5]; + u8 reserved_at_28[0x18]; + + u8 reserved_at_40[0x7c0]; +}; + union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap; struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2; @@ -4105,6 +4159,7 @@ enum { SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4, + SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT = 0x5, }; enum { @@ -4113,34 +4168,41 @@ enum { ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, + ELEMENT_TYPE_CAP_MASK_RATE_LIMIT = 1 << 5, }; enum { TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, + TSAR_ELEMENT_TSAR_TYPE_TC_ARB = 0x3, }; enum { TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, TSAR_TYPE_CAP_MASK_ETS = 1 << 2, + TSAR_TYPE_CAP_MASK_TC_ARB = 1 << 3, }; struct mlx5_ifc_tsar_element_bits { - u8 reserved_at_0[0x8]; + u8 traffic_class[0x4]; + u8 reserved_at_4[0x4]; u8 tsar_type[0x8]; u8 reserved_at_10[0x10]; }; struct mlx5_ifc_vport_element_bits { - u8 reserved_at_0[0x10]; + u8 reserved_at_0[0x4]; + u8 eswitch_owner_vhca_id_valid[0x1]; + u8 eswitch_owner_vhca_id[0xb]; u8 vport_number[0x10]; }; struct mlx5_ifc_vport_tc_element_bits { u8 traffic_class[0x4]; - u8 reserved_at_4[0xc]; + u8 eswitch_owner_vhca_id_valid[0x1]; + u8 eswitch_owner_vhca_id[0xb]; u8 vport_number[0x10]; }; @@ -4165,7 +4227,9 @@ struct mlx5_ifc_scheduling_context_bits { u8 max_average_bw[0x20]; - u8 reserved_at_e0[0x120]; + u8 max_bw_obj_id[0x20]; + + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_rqtc_bits { @@ -4788,6 +4852,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_ib_ext_port_cntrs_grp_data_layout_bits ib_ext_port_cntrs_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs; + struct mlx5_ifc_phys_layer_recovery_cntrs_bits phys_layer_recovery_cntrs; u8 reserved_at_0[0x7c0]; }; @@ -6326,6 +6391,20 @@ struct mlx5_ifc_modify_other_hca_cap_in_bits { struct mlx5_ifc_other_hca_cap_bits other_capability; }; +struct mlx5_ifc_sw_owner_icm_root_params_bits { + u8 sw_owner_icm_root_1[0x40]; + + u8 sw_owner_icm_root_0[0x40]; +}; + +struct mlx5_ifc_rtc_params_bits { + u8 rtc_id_0[0x20]; + + u8 rtc_id_1[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_flow_table_context_bits { u8 reformat_en[0x1]; u8 decap_en[0x1]; @@ -6344,20 +6423,10 @@ struct mlx5_ifc_flow_table_context_bits { u8 lag_master_next_table_id[0x18]; u8 reserved_at_60[0x60]; - union { - struct { - u8 sw_owner_icm_root_1[0x40]; - u8 sw_owner_icm_root_0[0x40]; - } sws; - struct { - u8 rtc_id_0[0x20]; - - u8 rtc_id_1[0x20]; - - u8 reserved_at_100[0x40]; - - } hws; + union { + struct mlx5_ifc_sw_owner_icm_root_params_bits sws; + struct mlx5_ifc_rtc_params_bits hws; }; }; @@ -7006,6 +7075,7 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits { enum { MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START = 0x1, + MLX5_REFORMAT_CONTEXT_ANCHOR_VLAN_START = 0x2, MLX5_REFORMAT_CONTEXT_ANCHOR_IP_START = 0x7, MLX5_REFORMAT_CONTEXT_ANCHOR_TCP_UDP_START = 0x9, }; @@ -10133,7 +10203,21 @@ struct mlx5_ifc_pplm_reg_bits { u8 fec_override_admin_200g_2x[0x10]; u8 fec_override_admin_100g_1x[0x10]; - u8 reserved_at_260[0x20]; + u8 reserved_at_260[0x60]; + + u8 fec_override_cap_1600g_8x[0x10]; + u8 fec_override_cap_800g_4x[0x10]; + + u8 fec_override_cap_400g_2x[0x10]; + u8 fec_override_cap_200g_1x[0x10]; + + u8 fec_override_admin_1600g_8x[0x10]; + u8 fec_override_admin_800g_4x[0x10]; + + u8 fec_override_admin_400g_2x[0x10]; + u8 fec_override_admin_200g_1x[0x10]; + + u8 reserved_at_340[0x80]; }; struct mlx5_ifc_ppcnt_reg_bits { @@ -10507,7 +10591,11 @@ struct mlx5_ifc_mtutc_reg_bits { }; struct mlx5_ifc_pcam_enhanced_features_bits { - u8 reserved_at_0[0x48]; + u8 reserved_at_0[0x10]; + u8 ppcnt_recovery_counters[0x1]; + u8 reserved_at_11[0xc]; + u8 fec_200G_per_lane_in_pplm[0x1]; + u8 reserved_at_1e[0x2a]; u8 fec_100G_per_lane_in_pplm[0x1]; u8 reserved_at_49[0x1f]; u8 fec_50G_per_lane_in_pplm[0x1]; @@ -10647,7 +10735,8 @@ struct mlx5_ifc_mcam_access_reg_bits3 { u8 regs_63_to_32[0x20]; - u8 regs_31_to_2[0x1e]; + u8 regs_31_to_3[0x1d]; + u8 mrtcq[0x1]; u8 mtctr[0x1]; u8 mtptm[0x1]; }; @@ -11044,6 +11133,7 @@ enum { MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR = 0x12, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_TRUST_LOCKDOWN_ERR = 0x13, }; struct mlx5_ifc_initial_seg_bits { @@ -12419,12 +12509,17 @@ enum { }; enum { + MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL = BIT_ULL(0x13), +}; + +enum { MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc, MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13, MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20, MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24, MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27, MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47, + MLX5_GENERAL_OBJECT_TYPES_RDMA_CTRL = 0x53, MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS = 0xff15, }; @@ -12992,6 +13087,44 @@ struct mlx5_ifc_load_vhca_state_out_bits { u8 reserved_at_40[0x40]; }; +struct mlx5_ifc_adv_rdma_cap_bits { + u8 rdma_transport_manager[0x1]; + u8 rdma_transport_manager_other_eswitch[0x1]; + u8 reserved_at_2[0x1e]; + + u8 rcx_type[0x8]; + u8 reserved_at_28[0x2]; + u8 ps_entry_log_max_value[0x6]; + u8 reserved_at_30[0x6]; + u8 qp_max_ps_num_entry[0xa]; + + u8 mp_max_num_queues[0x8]; + u8 ps_user_context_max_log_size[0x8]; + u8 message_based_qp_and_striding_wq[0x8]; + u8 reserved_at_58[0x8]; + + u8 max_receive_send_message_size_stride[0x10]; + u8 reserved_at_70[0x10]; + + u8 max_receive_send_message_size_byte[0x20]; + + u8 reserved_at_a0[0x160]; + + struct mlx5_ifc_flow_table_prop_layout_bits rdma_transport_rx_flow_table_properties; + + struct mlx5_ifc_flow_table_prop_layout_bits rdma_transport_tx_flow_table_properties; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_rx_ft_field_support_2; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_tx_ft_field_support_2; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_rx_ft_field_bitmask_support_2; + + struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_tx_ft_field_bitmask_support_2; + + u8 reserved_at_800[0x3800]; +}; + struct mlx5_ifc_adv_virtualization_cap_bits { u8 reserved_at_0[0x3]; u8 pg_track_log_max_num[0x5]; @@ -13138,4 +13271,12 @@ struct mlx5_ifc_msees_reg_bits { u8 reserved_at_80[0x180]; }; +struct mlx5_ifc_mrtcq_reg_bits { + u8 reserved_at_0[0x40]; + + u8 rt_clock_identity[0x40]; + + u8 reserved_at_80[0x180]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e68d42b8ce65..58770b86f793 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -61,15 +61,6 @@ enum mlx5_an_status { #define MLX5_EEPROM_PAGE_LENGTH 256 #define MLX5_EEPROM_HIGH_PAGE_LENGTH 128 -struct mlx5_module_eeprom_query_params { - u16 size; - u16 offset; - u16 i2c_address; - u32 page; - u32 bank; - u32 module_number; -}; - enum mlx5e_link_mode { MLX5E_1000BASE_CX_SGMII = 0, MLX5E_1000BASE_KX = 1, @@ -115,9 +106,12 @@ enum mlx5e_ext_link_mode { MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, + MLX5E_200GAUI_1_200GBASE_CR1_KR1 = 14, MLX5E_400GAUI_8_400GBASE_CR8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, + MLX5E_400GAUI_2_400GBASE_CR2_KR2 = 17, MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, + MLX5E_800GAUI_4_800GBASE_CR4_KR4 = 20, MLX5E_EXT_LINK_MODES_NUMBER, }; @@ -142,12 +136,6 @@ enum mlx5_ptys_width { MLX5_PTYS_WIDTH_12X = 1 << 4, }; -struct mlx5_port_eth_proto { - u32 cap; - u32 admin; - u32 oper; -}; - #define MLX5E_PROT_MASK(link_mode) (1U << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ @@ -160,14 +148,7 @@ int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, u16 *proto_oper, u8 local_port, u8 plane_index); -void mlx5_toggle_port_link(struct mlx5_core_dev *dev); -int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, - enum mlx5_port_status status); -int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, - enum mlx5_port_status *status); -int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); - -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); + void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, u8 port); @@ -175,65 +156,4 @@ void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, u8 *vl_hw_cap, u8 local_port); -int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause); -int mlx5_query_port_pause(struct mlx5_core_dev *dev, - u32 *rx_pause, u32 *tx_pause); - -int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx); -int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, - u8 *pfc_en_rx); - -int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, - u16 stall_critical_watermark, - u16 stall_minor_watermark); -int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev, - u16 *stall_critical_watermark, u16 *stall_minor_watermark); - -int mlx5_max_tc(struct mlx5_core_dev *mdev); - -int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); -int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, - u8 prio, u8 *tc); -int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group); -int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, - u8 tc, u8 *tc_group); -int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw); -int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, - u8 tc, u8 *bw_pct); -int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, - u8 *max_bw_value, - u8 *max_bw_unit); -int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, - u8 *max_bw_value, - u8 *max_bw_unit); -int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode); -int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode); - -int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen); -int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen); -int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable); -void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, - bool *enabled); -int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, - u16 offset, u16 size, u8 *data); -int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, - struct mlx5_module_eeprom_query_params *params, u8 *data); - -int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); -int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); - -int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); -int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); -int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); -int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); - -int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, - struct mlx5_port_eth_proto *eproto); -bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev); -u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, - bool force_legacy); -u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, - bool force_legacy); -int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); - #endif /* __MLX5_PORT_H__ */ diff --git a/include/linux/mm.h b/include/linux/mm.h index b1c3db9cf355..fa538feaa8d9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -12,6 +12,7 @@ #include <linux/rbtree.h> #include <linux/atomic.h> #include <linux/debug_locks.h> +#include <linux/compiler.h> #include <linux/mm_types.h> #include <linux/mmap_lock.h> #include <linux/range.h> @@ -32,6 +33,7 @@ #include <linux/memremap.h> #include <linux/slab.h> #include <linux/cacheinfo.h> +#include <linux/rcuwait.h> struct mempolicy; struct anon_vma; @@ -40,22 +42,10 @@ struct user_struct; struct pt_regs; struct folio_batch; -extern int sysctl_page_lock_unfairness; - +void arch_mm_preinit(void); void mm_core_init(void); void init_mm_internals(void); -#ifndef CONFIG_NUMA /* Don't use mapnrs, do it properly */ -extern unsigned long max_mapnr; - -static inline void set_max_mapnr(unsigned long limit) -{ - max_mapnr = limit; -} -#else -static inline void set_max_mapnr(unsigned long limit) { } -#endif - extern atomic_long_t _totalram_pages; static inline unsigned long totalram_pages(void) { @@ -78,8 +68,6 @@ static inline void totalram_pages_add(long count) } extern void * high_memory; -extern int page_cluster; -extern const int page_cluster_max; #ifdef CONFIG_SYSCTL extern int sysctl_legacy_va_layout; @@ -209,17 +197,6 @@ extern int sysctl_max_map_count; extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; -extern int sysctl_overcommit_memory; -extern int sysctl_overcommit_ratio; -extern unsigned long sysctl_overcommit_kbytes; - -int overcommit_ratio_handler(const struct ctl_table *, int, void *, size_t *, - loff_t *); -int overcommit_kbytes_handler(const struct ctl_table *, int, void *, size_t *, - loff_t *); -int overcommit_policy_handler(const struct ctl_table *, int, void *, size_t *, - loff_t *); - #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) #define folio_page_idx(folio, p) (page_to_pfn(p) - folio_pfn(folio)) @@ -257,8 +234,6 @@ void setup_initial_init_mm(void *start_code, void *end_code, struct vm_area_struct *vm_area_alloc(struct mm_struct *); struct vm_area_struct *vm_area_dup(struct vm_area_struct *); void vm_area_free(struct vm_area_struct *); -/* Use only if VMA has no other users */ -void __vm_area_free(struct vm_area_struct *vma); #ifndef CONFIG_MMU extern struct rb_root nommu_region_tree; @@ -382,9 +357,7 @@ extern unsigned int kobjsize(const void *objp); # define VM_SHADOW_STACK VM_NONE #endif -#if defined(CONFIG_X86) -# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ -#elif defined(CONFIG_PPC64) +#if defined(CONFIG_PPC64) # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 @@ -411,7 +384,7 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 38 +# define VM_UFFD_MINOR_BIT 41 # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ # define VM_UFFD_MINOR VM_NONE @@ -696,109 +669,11 @@ static inline void vma_numab_state_init(struct vm_area_struct *vma) {} static inline void vma_numab_state_free(struct vm_area_struct *vma) {} #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_PER_VMA_LOCK /* - * Try to read-lock a vma. The function is allowed to occasionally yield false - * locked result to avoid performance overhead, in which case we fall back to - * using mmap_lock. The function should never yield false unlocked result. + * These must be here rather than mmap_lock.h as dependent on vm_fault type, + * declared in this header. */ -static inline bool vma_start_read(struct vm_area_struct *vma) -{ - /* - * Check before locking. A race might cause false locked result. - * We can use READ_ONCE() for the mm_lock_seq here, and don't need - * ACQUIRE semantics, because this is just a lockless check whose result - * we don't rely on for anything - the mm_lock_seq read against which we - * need ordering is below. - */ - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq)) - return false; - - if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) - return false; - - /* - * Overflow might produce false locked result. - * False unlocked result is impossible because we modify and check - * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq - * modification invalidates all existing locks. - * - * We must use ACQUIRE semantics for the mm_lock_seq so that if we are - * racing with vma_end_write_all(), we only start reading from the VMA - * after it has been unlocked. - * This pairs with RELEASE semantics in vma_end_write_all(). - */ - if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) { - up_read(&vma->vm_lock->lock); - return false; - } - return true; -} - -static inline void vma_end_read(struct vm_area_struct *vma) -{ - rcu_read_lock(); /* keeps vma alive till the end of up_read */ - up_read(&vma->vm_lock->lock); - rcu_read_unlock(); -} - -/* WARNING! Can only be used if mmap_lock is expected to be write-locked */ -static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) -{ - mmap_assert_write_locked(vma->vm_mm); - - /* - * current task is holding mmap_write_lock, both vma->vm_lock_seq and - * mm->mm_lock_seq can't be concurrently modified. - */ - *mm_lock_seq = vma->vm_mm->mm_lock_seq; - return (vma->vm_lock_seq == *mm_lock_seq); -} - -/* - * Begin writing to a VMA. - * Exclude concurrent readers under the per-VMA lock until the currently - * write-locked mmap_lock is dropped or downgraded. - */ -static inline void vma_start_write(struct vm_area_struct *vma) -{ - int mm_lock_seq; - - if (__is_vma_write_locked(vma, &mm_lock_seq)) - return; - - down_write(&vma->vm_lock->lock); - /* - * We should use WRITE_ONCE() here because we can have concurrent reads - * from the early lockless pessimistic check in vma_start_read(). - * We don't really care about the correctness of that early check, but - * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy. - */ - WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); - up_write(&vma->vm_lock->lock); -} - -static inline void vma_assert_write_locked(struct vm_area_struct *vma) -{ - int mm_lock_seq; - - VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); -} - -static inline void vma_assert_locked(struct vm_area_struct *vma) -{ - if (!rwsem_is_locked(&vma->vm_lock->lock)) - vma_assert_write_locked(vma); -} - -static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) -{ - /* When detaching vma should be write-locked */ - if (detached) - vma_assert_write_locked(vma); - vma->detached = detached; -} - +#ifdef CONFIG_PER_VMA_LOCK static inline void release_fault_lock(struct vm_fault *vmf) { if (vmf->flags & FAULT_FLAG_VMA_LOCK) @@ -814,32 +689,7 @@ static inline void assert_fault_locked(struct vm_fault *vmf) else mmap_assert_locked(vmf->vma->vm_mm); } - -struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, - unsigned long address); - -#else /* CONFIG_PER_VMA_LOCK */ - -static inline bool vma_start_read(struct vm_area_struct *vma) - { return false; } -static inline void vma_end_read(struct vm_area_struct *vma) {} -static inline void vma_start_write(struct vm_area_struct *vma) {} -static inline void vma_assert_write_locked(struct vm_area_struct *vma) - { mmap_assert_write_locked(vma->vm_mm); } -static inline void vma_mark_detached(struct vm_area_struct *vma, - bool detached) {} - -static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, - unsigned long address) -{ - return NULL; -} - -static inline void vma_assert_locked(struct vm_area_struct *vma) -{ - mmap_assert_locked(vma->vm_mm); -} - +#else static inline void release_fault_lock(struct vm_fault *vmf) { mmap_read_unlock(vmf->vma->vm_mm); @@ -849,23 +699,17 @@ static inline void assert_fault_locked(struct vm_fault *vmf) { mmap_assert_locked(vmf->vma->vm_mm); } - #endif /* CONFIG_PER_VMA_LOCK */ extern const struct vm_operations_struct vma_dummy_vm_ops; -/* - * WARNING: vma_init does not initialize vma->vm_lock. - * Use vm_area_alloc()/vm_area_free() if vma needs locking. - */ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = mm; vma->vm_ops = &vma_dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); - vma_mark_detached(vma, false); - vma_numab_state_init(vma); + vma_lock_init(vma, false); } /* Use when VMA is not part of the VMA tree and needs no locking */ @@ -1058,6 +902,7 @@ static inline int vma_iter_bulk_store(struct vma_iterator *vmi, if (unlikely(mas_is_err(&vmi->mas))) return -ENOMEM; + vma_mark_attached(vma); return 0; } @@ -1098,6 +943,25 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; +extern void prep_compound_page(struct page *page, unsigned int order); + +static inline unsigned int folio_large_order(const struct folio *folio) +{ + return folio->_flags_1 & 0xff; +} + +#ifdef NR_PAGES_IN_LARGE_FOLIO +static inline long folio_large_nr_pages(const struct folio *folio) +{ + return folio->_nr_pages; +} +#else +static inline long folio_large_nr_pages(const struct folio *folio) +{ + return 1L << folio_large_order(folio); +} +#endif + /* * compound_order() can be called without holding a reference, which means * that niceties like page_folio() don't work. These callers should be @@ -1111,7 +975,7 @@ static inline unsigned int compound_order(struct page *page) if (!test_bit(PG_head, &folio->flags)) return 0; - return folio->_flags_1 & 0xff; + return folio_large_order(folio); } /** @@ -1127,7 +991,24 @@ static inline unsigned int folio_order(const struct folio *folio) { if (!folio_test_large(folio)) return 0; - return folio->_flags_1 & 0xff; + return folio_large_order(folio); +} + +/** + * folio_reset_order - Reset the folio order and derived _nr_pages + * @folio: The folio. + * + * Reset the order and derived _nr_pages to 0. Must only be used in the + * process of splitting large folios. + */ +static inline void folio_reset_order(struct folio *folio) +{ + if (WARN_ON_ONCE(!folio_test_large(folio))) + return; + folio->_flags_1 &= ~0xffUL; +#ifdef NR_PAGES_IN_LARGE_FOLIO + folio->_nr_pages = 0; +#endif } #include <linux/huge_mm.h> @@ -1220,6 +1101,8 @@ static inline int is_vmalloc_or_module_addr(const void *x) static inline int folio_entire_mapcount(const struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio_large_order(folio) == 1)) + return 0; return atomic_read(&folio->_entire_mapcount) + 1; } @@ -1352,7 +1235,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) return pte; } -vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); +vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *page); void set_pte_range(struct vm_fault *vmf, struct folio *folio, struct page *page, unsigned int nr, unsigned long addr); @@ -1393,9 +1276,9 @@ vm_fault_t finish_fault(struct vm_fault *vmf); * the page's disk buffers. PG_private must be set to tell the VM to call * into the filesystem to release these pages. * - * A page may belong to an inode's memory mapping. In this case, page->mapping - * is the pointer to the inode, and page->index is the file offset of the page, - * in units of PAGE_SIZE. + * A folio may belong to an inode's memory mapping. In this case, + * folio->mapping points to the inode, and folio->index is the file + * offset of the folio, in units of PAGE_SIZE. * * If pagecache pages are not associated with an inode, they are said to be * anonymous pages. These may become associated with the swapcache, and in that @@ -1419,25 +1302,6 @@ vm_fault_t finish_fault(struct vm_fault *vmf); * back into memory. */ -#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) -DECLARE_STATIC_KEY_FALSE(devmap_managed_key); - -bool __put_devmap_managed_folio_refs(struct folio *folio, int refs); -static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs) -{ - if (!static_branch_unlikely(&devmap_managed_key)) - return false; - if (!folio_is_zone_device(folio)) - return false; - return __put_devmap_managed_folio_refs(folio, refs); -} -#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs) -{ - return false; -} -#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ - /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ ((unsigned int) folio_ref_count(folio) + 127u <= 127u) @@ -1458,7 +1322,10 @@ static inline void folio_get(struct folio *folio) static inline void get_page(struct page *page) { - folio_get(page_folio(page)); + struct folio *folio = page_folio(page); + if (WARN_ON_ONCE(folio_test_slab(folio))) + return; + folio_get(folio); } static inline __must_check bool try_get_page(struct page *page) @@ -1552,12 +1419,9 @@ static inline void put_page(struct page *page) { struct folio *folio = page_folio(page); - /* - * For some devmap managed pages we need to catch refcount transition - * from 2 to 1: - */ - if (put_devmap_managed_folio_refs(folio, 1)) + if (folio_test_slab(folio)) return; + folio_put(folio); } @@ -1916,6 +1780,52 @@ static inline struct folio *pfn_folio(unsigned long pfn) return page_folio(pfn_to_page(pfn)); } +#ifdef CONFIG_MMU +static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) +{ + return pfn_pte(page_to_pfn(page), pgprot); +} + +/** + * folio_mk_pte - Create a PTE for this folio + * @folio: The folio to create a PTE for + * @pgprot: The page protection bits to use + * + * Create a page table entry for the first page of this folio. + * This is suitable for passing to set_ptes(). + * + * Return: A page table entry suitable for mapping this folio. + */ +static inline pte_t folio_mk_pte(struct folio *folio, pgprot_t pgprot) +{ + return pfn_pte(folio_pfn(folio), pgprot); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/** + * folio_mk_pmd - Create a PMD for this folio + * @folio: The folio to create a PMD for + * @pgprot: The page protection bits to use + * + * Create a page table entry for the first page of this folio. + * This is suitable for passing to set_pmd_at(). + * + * Return: A page table entry suitable for mapping this folio. + */ +static inline pmd_t folio_mk_pmd(struct folio *folio, pgprot_t pgprot) +{ + return pmd_mkhuge(pfn_pmd(folio_pfn(folio), pgprot)); +} +#endif +#endif /* CONFIG_MMU */ + +static inline bool folio_has_pincount(const struct folio *folio) +{ + if (IS_ENABLED(CONFIG_64BIT)) + return folio_test_large(folio); + return folio_order(folio) > 1; +} + /** * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA. * @folio: The folio. @@ -1932,7 +1842,7 @@ static inline struct folio *pfn_folio(unsigned long pfn) * get that many refcounts, and b) all the callers of this routine are * expected to be able to deal gracefully with a false positive. * - * For large folios, the result will be exactly correct. That's because + * For most large folios, the result will be exactly correct. That's because * we have more tracking data available: the _pincount field is used * instead of the GUP_PIN_COUNTING_BIAS scheme. * @@ -1943,7 +1853,7 @@ static inline struct folio *pfn_folio(unsigned long pfn) */ static inline bool folio_maybe_dma_pinned(struct folio *folio) { - if (folio_test_large(folio)) + if (folio_has_pincount(folio)) return atomic_read(&folio->_pincount) > 0; /* @@ -2015,6 +1925,13 @@ static inline bool folio_is_longterm_pinnable(struct folio *folio) if (folio_is_device_coherent(folio)) return false; + /* + * Filesystems can only tolerate transient delays to truncate and + * hole-punch operations + */ + if (folio_is_fsdax(folio)) + return false; + /* Otherwise, non-movable zone folios can be pinned. */ return !folio_is_zone_movable(folio); @@ -2058,11 +1975,7 @@ static inline long folio_nr_pages(const struct folio *folio) { if (!folio_test_large(folio)) return 1; -#ifdef CONFIG_64BIT - return folio->_folio_nr_pages; -#else - return 1L << (folio->_flags_1 & 0xff); -#endif + return folio_large_nr_pages(folio); } /* Only hugetlbfs can allocate folios larger than MAX_ORDER */ @@ -2077,26 +1990,13 @@ static inline long folio_nr_pages(const struct folio *folio) * page. compound_nr() can be called on a tail page, and is defined to * return 1 in that case. */ -static inline unsigned long compound_nr(struct page *page) +static inline long compound_nr(struct page *page) { struct folio *folio = (struct folio *)page; if (!test_bit(PG_head, &folio->flags)) return 1; -#ifdef CONFIG_64BIT - return folio->_folio_nr_pages; -#else - return 1L << (folio->_flags_1 & 0xff); -#endif -} - -/** - * thp_nr_pages - The number of regular pages in this huge page. - * @page: The head page of a huge page. - */ -static inline int thp_nr_pages(struct page *page) -{ - return folio_nr_pages((struct folio *)page); + return folio_large_nr_pages(folio); } /** @@ -2149,23 +2049,18 @@ static inline size_t folio_size(const struct folio *folio) } /** - * folio_likely_mapped_shared - Estimate if the folio is mapped into the page - * tables of more than one MM + * folio_maybe_mapped_shared - Whether the folio is mapped into the page + * tables of more than one MM * @folio: The folio. * - * This function checks if the folio is currently mapped into more than one - * MM ("mapped shared"), or if the folio is only mapped into a single MM - * ("mapped exclusively"). + * This function checks if the folio maybe currently mapped into more than one + * MM ("maybe mapped shared"), or if the folio is certainly mapped into a single + * MM ("mapped exclusively"). * * For KSM folios, this function also returns "mapped shared" when a folio is * mapped multiple times into the same MM, because the individual page mappings * are independent. * - * As precise information is not easily available for all folios, this function - * estimates the number of MMs ("sharers") that are currently mapping a folio - * using the number of times the first page of the folio is currently mapped - * into page tables. - * * For small anonymous folios and anonymous hugetlb folios, the return * value will be exactly correct: non-KSM folios can only be mapped at most once * into an MM, and they cannot be partially mapped. KSM folios are @@ -2173,8 +2068,8 @@ static inline size_t folio_size(const struct folio *folio) * * For other folios, the result can be fuzzy: * #. For partially-mappable large folios (THP), the return value can wrongly - * indicate "mapped exclusively" (false negative) when the folio is - * only partially mapped into at least one MM. + * indicate "mapped shared" (false positive) if a folio was mapped by + * more than two MMs at one point in time. * #. For pagecache folios (including hugetlb), the return value can wrongly * indicate "mapped shared" (false positive) when two VMAs in the same MM * cover the same file range. @@ -2191,7 +2086,7 @@ static inline size_t folio_size(const struct folio *folio) * * Return: Whether the folio is estimated to be mapped into more than one MM. */ -static inline bool folio_likely_mapped_shared(struct folio *folio) +static inline bool folio_maybe_mapped_shared(struct folio *folio) { int mapcount = folio_mapcount(folio); @@ -2199,16 +2094,77 @@ static inline bool folio_likely_mapped_shared(struct folio *folio) if (!folio_test_large(folio) || unlikely(folio_test_hugetlb(folio))) return mapcount > 1; - /* A single mapping implies "mapped exclusively". */ + /* + * vm_insert_page() without CONFIG_TRANSPARENT_HUGEPAGE ... + * simply assume "mapped shared", nobody should really care + * about this for arbitrary kernel allocations. + */ + if (!IS_ENABLED(CONFIG_MM_ID)) + return true; + + /* + * A single mapping implies "mapped exclusively", even if the + * folio flag says something different: it's easier to handle this + * case here instead of on the RMAP hot path. + */ if (mapcount <= 1) return false; + return test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids); +} - /* If any page is mapped more than once we treat it "mapped shared". */ - if (folio_entire_mapcount(folio) || mapcount > folio_nr_pages(folio)) - return true; +/** + * folio_expected_ref_count - calculate the expected folio refcount + * @folio: the folio + * + * Calculate the expected folio refcount, taking references from the pagecache, + * swapcache, PG_private and page table mappings into account. Useful in + * combination with folio_ref_count() to detect unexpected references (e.g., + * GUP or other temporary references). + * + * Does currently not consider references from the LRU cache. If the folio + * was isolated from the LRU (which is the case during migration or split), + * the LRU cache does not apply. + * + * Calling this function on an unmapped folio -- !folio_mapped() -- that is + * locked will return a stable result. + * + * Calling this function on a mapped folio will not result in a stable result, + * because nothing stops additional page table mappings from coming (e.g., + * fork()) or going (e.g., munmap()). + * + * Calling this function without the folio lock will also not result in a + * stable result: for example, the folio might get dropped from the swapcache + * concurrently. + * + * However, even when called without the folio lock or on a mapped folio, + * this function can be used to detect unexpected references early (for example, + * if it makes sense to even lock the folio and unmap it). + * + * The caller must add any reference (e.g., from folio_try_get()) it might be + * holding itself to the result. + * + * Returns the expected folio refcount. + */ +static inline int folio_expected_ref_count(const struct folio *folio) +{ + const int order = folio_order(folio); + int ref_count = 0; + + if (WARN_ON_ONCE(folio_test_slab(folio))) + return 0; + + if (folio_test_anon(folio)) { + /* One reference per page from the swapcache. */ + ref_count += folio_test_swapcache(folio) << order; + } else if (!((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS)) { + /* One reference per page from the pagecache. */ + ref_count += !!folio->mapping << order; + /* One reference from PG_private. */ + ref_count += folio_test_private(folio); + } - /* Let's guess based on the first subpage. */ - return atomic_read(&folio->_mapcount) > 0; + /* One reference per page table mapping. */ + return ref_count + folio_mapcount(folio); } #ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE @@ -2311,7 +2267,6 @@ static inline void clear_page_pfmemalloc(struct page *page) extern void pagefault_out_of_memory(void); #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) -#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1)) #define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1)) /* @@ -2320,6 +2275,7 @@ extern void pagefault_out_of_memory(void); struct zap_details { struct folio *single_folio; /* Locked folio to be unmapped */ bool even_cows; /* Zap COWed private pages too? */ + bool reclaim_pt; /* Need reclaim page tables? */ zap_flags_t zap_flags; /* Extra flags for zapping */ }; @@ -2416,11 +2372,13 @@ struct follow_pfnmap_args { * Outputs: * * @pfn: the PFN of the address + * @addr_mask: address mask covering pfn * @pgprot: the pgprot_t of the mapping * @writable: whether the mapping is writable * @special: whether the mapping is a special mapping (real PFN maps) */ unsigned long pfn; + unsigned long addr_mask; pgprot_t pgprot; bool writable; bool special; @@ -2485,6 +2443,11 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); +#ifdef CONFIG_BPF_SYSCALL +extern int copy_remote_vm_str(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags); +#endif + long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, @@ -2548,7 +2511,7 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, struct task_struct *task, bool bypass_rlim); struct kvec; -struct page *get_dump_page(unsigned long addr); +struct page *get_dump_page(unsigned long addr, int *locked); bool folio_mark_dirty(struct folio *folio); bool folio_mark_dirty_lock(struct folio *folio); @@ -2605,6 +2568,11 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return percpu_counter_read_positive(&mm->rss_stat[member]); } +static inline unsigned long get_mm_counter_sum(struct mm_struct *mm, int member) +{ + return percpu_counter_sum_positive(&mm->rss_stat[member]); +} + void mm_trace_rss_stat(struct mm_struct *mm, int member); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) @@ -2664,7 +2632,7 @@ static inline void update_hiwater_rss(struct mm_struct *mm) { unsigned long _rss = get_mm_rss(mm); - if ((mm)->hiwater_rss < _rss) + if (data_race(mm->hiwater_rss) < _rss) (mm)->hiwater_rss = _rss; } @@ -2991,18 +2959,15 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} #endif /* defined(CONFIG_SPLIT_PTE_PTLOCKS) */ -static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc) +static inline void __pagetable_ctor(struct ptdesc *ptdesc) { struct folio *folio = ptdesc_folio(ptdesc); - if (!ptlock_init(ptdesc)) - return false; __folio_set_pgtable(folio); lruvec_stat_add_folio(folio, NR_PAGETABLE); - return true; } -static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) +static inline void pagetable_dtor(struct ptdesc *ptdesc) { struct folio *folio = ptdesc_folio(ptdesc); @@ -3011,6 +2976,21 @@ static inline void pagetable_pte_dtor(struct ptdesc *ptdesc) lruvec_stat_sub_folio(folio, NR_PAGETABLE); } +static inline void pagetable_dtor_free(struct ptdesc *ptdesc) +{ + pagetable_dtor(ptdesc); + pagetable_free(ptdesc); +} + +static inline bool pagetable_pte_ctor(struct mm_struct *mm, + struct ptdesc *ptdesc) +{ + if (mm != &init_mm && !ptlock_init(ptdesc)) + return false; + __pagetable_ctor(ptdesc); + return true; +} + pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); static inline pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp) @@ -3087,14 +3067,6 @@ static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) return ptlock_init(ptdesc); } -static inline void pmd_ptlock_free(struct ptdesc *ptdesc) -{ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON_PAGE(ptdesc->pmd_huge_pte, ptdesc_page(ptdesc)); -#endif - ptlock_free(ptdesc); -} - #define pmd_huge_pte(mm, pmd) (pmd_ptdesc(pmd)->pmd_huge_pte) #else @@ -3105,7 +3077,6 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) } static inline bool pmd_ptlock_init(struct ptdesc *ptdesc) { return true; } -static inline void pmd_ptlock_free(struct ptdesc *ptdesc) {} #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) @@ -3118,27 +3089,16 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) return ptl; } -static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc) +static inline bool pagetable_pmd_ctor(struct mm_struct *mm, + struct ptdesc *ptdesc) { - struct folio *folio = ptdesc_folio(ptdesc); - - if (!pmd_ptlock_init(ptdesc)) + if (mm != &init_mm && !pmd_ptlock_init(ptdesc)) return false; - __folio_set_pgtable(folio); ptdesc_pmd_pts_init(ptdesc); - lruvec_stat_add_folio(folio, NR_PAGETABLE); + __pagetable_ctor(ptdesc); return true; } -static inline void pagetable_pmd_dtor(struct ptdesc *ptdesc) -{ - struct folio *folio = ptdesc_folio(ptdesc); - - pmd_ptlock_free(ptdesc); - __folio_clear_pgtable(folio); - lruvec_stat_sub_folio(folio, NR_PAGETABLE); -} - /* * No scalability reason to split PUD locks yet, but follow the same pattern * as the PMD locks to make it easier if we decide to. The VM should not be @@ -3160,18 +3120,17 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) static inline void pagetable_pud_ctor(struct ptdesc *ptdesc) { - struct folio *folio = ptdesc_folio(ptdesc); - - __folio_set_pgtable(folio); - lruvec_stat_add_folio(folio, NR_PAGETABLE); + __pagetable_ctor(ptdesc); } -static inline void pagetable_pud_dtor(struct ptdesc *ptdesc) +static inline void pagetable_p4d_ctor(struct ptdesc *ptdesc) { - struct folio *folio = ptdesc_folio(ptdesc); + __pagetable_ctor(ptdesc); +} - __folio_clear_pgtable(folio); - lruvec_stat_sub_folio(folio, NR_PAGETABLE); +static inline void pagetable_pgd_ctor(struct ptdesc *ptdesc) +{ + __pagetable_ctor(ptdesc); } extern void __init pagecache_init(void); @@ -3193,7 +3152,6 @@ extern void reserve_bootmem_region(phys_addr_t start, /* Free the reserved page into the buddy system, so it gets managed. */ void free_reserved_page(struct page *page); -#define free_highmem_page(page) free_reserved_page(page) static inline void mark_page_reserved(struct page *page) { @@ -3323,7 +3281,8 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void exit_mmap(struct mm_struct *); -int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift); +bool mmap_read_lock_maybe_expand(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, bool write); static inline int check_data_rlimit(unsigned long rlim, unsigned long new, @@ -3371,9 +3330,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, return __get_unmapped_area(file, addr, len, pgoff, flags, 0); } -extern unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, - struct list_head *uf); extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, @@ -3437,9 +3393,6 @@ extern unsigned long stack_guard_gap; int expand_stack_locked(struct vm_area_struct *vma, unsigned long address); struct vm_area_struct *expand_stack(struct mm_struct * mm, unsigned long addr); -/* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */ -int expand_downwards(struct vm_area_struct *vma, unsigned long address); - /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, @@ -3557,6 +3510,8 @@ int vm_map_pages(struct vm_area_struct *vma, struct page **pages, unsigned long num); int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages, unsigned long num); +vm_fault_t vmf_insert_page_mkwrite(struct vm_fault *vmf, struct page *page, + bool write); vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn); vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, @@ -3817,12 +3772,6 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr) extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); -#ifdef CONFIG_SYSCTL -extern int sysctl_drop_caches; -int drop_caches_sysctl_handler(const struct ctl_table *, int, void *, size_t *, - loff_t *); -#endif - void drop_slab(void); #ifndef CONFIG_MMU @@ -3841,6 +3790,7 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) #endif void *sparse_buffer_alloc(unsigned long size); +unsigned long section_map_size(void); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap, struct dev_pagemap *pgmap); @@ -3849,7 +3799,8 @@ p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, - struct vmem_altmap *altmap, struct page *reuse); + struct vmem_altmap *altmap, unsigned long ptpfn, + unsigned long flags); void *vmemmap_alloc_block(unsigned long size, int node); struct vmem_altmap; void *vmemmap_alloc_block_buf(unsigned long size, int node, @@ -3865,6 +3816,12 @@ int vmemmap_populate_hugepages(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); +int vmemmap_populate_hvo(unsigned long start, unsigned long end, int node, + unsigned long headsize); +int vmemmap_undo_hvo(unsigned long start, unsigned long end, int node, + unsigned long headsize); +void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node, + unsigned long headsize); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG void vmemmap_free(unsigned long start, unsigned long end, @@ -3931,9 +3888,6 @@ static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, } #endif -void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, - unsigned long nr_pages); - enum mf_flags { MF_COUNT_INCREASED = 1 << 0, MF_ACTION_REQUIRED = 1 << 1, @@ -4094,69 +4048,6 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping, pgoff_t first_index, pgoff_t nr); #endif -extern int sysctl_nr_trim_pages; - -#ifdef CONFIG_PRINTK -void mem_dump_obj(void *object); -#else -static inline void mem_dump_obj(void *object) {} -#endif - -static inline bool is_write_sealed(int seals) -{ - return seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE); -} - -/** - * is_readonly_sealed - Checks whether write-sealed but mapped read-only, - * in which case writes should be disallowing moving - * forwards. - * @seals: the seals to check - * @vm_flags: the VMA flags to check - * - * Returns whether readonly sealed, in which case writess should be disallowed - * going forward. - */ -static inline bool is_readonly_sealed(int seals, vm_flags_t vm_flags) -{ - /* - * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as - * MAP_SHARED and read-only, take care to not allow mprotect to - * revert protections on such mappings. Do this only for shared - * mappings. For private mappings, don't need to mask - * VM_MAYWRITE as we still want them to be COW-writable. - */ - if (is_write_sealed(seals) && - ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_SHARED)) - return true; - - return false; -} - -/** - * seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and - * handle them. - * @seals: the seals to check - * @vma: the vma to operate on - * - * Check whether F_SEAL_WRITE or F_SEAL_FUTURE_WRITE are set; if so, do proper - * check/handling on the vma flags. Return 0 if check pass, or <0 for errors. - */ -static inline int seal_check_write(int seals, struct vm_area_struct *vma) -{ - if (!is_write_sealed(seals)) - return 0; - - /* - * New PROT_WRITE and MAP_SHARED mmaps are not allowed when - * write seals are active. - */ - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) - return -EPERM; - - return 0; -} - #ifdef CONFIG_ANON_VMA_NAME int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, unsigned long len_in, @@ -4197,6 +4088,7 @@ void vma_pgtable_walk_begin(struct vm_area_struct *vma); void vma_pgtable_walk_end(struct vm_area_struct *vma); int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size); +int reserve_mem_release_by_name(const char *name); #ifdef CONFIG_64BIT int do_mseal(unsigned long start, size_t len_in, unsigned long flags); @@ -4229,4 +4121,72 @@ int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *st int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status); int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status); + +/* + * mseal of userspace process's system mappings. + */ +#ifdef CONFIG_MSEAL_SYSTEM_MAPPINGS +#define VM_SEALED_SYSMAP VM_SEALED +#else +#define VM_SEALED_SYSMAP VM_NONE +#endif + +/* + * DMA mapping IDs for page_pool + * + * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and + * stashes it in the upper bits of page->pp_magic. We always want to be able to + * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP + * pages can have arbitrary kernel pointers stored in the same field as pp_magic + * (since it overlaps with page->lru.next), so we must ensure that we cannot + * mistake a valid kernel pointer with any of the values we write into this + * field. + * + * On architectures that set POISON_POINTER_DELTA, this is already ensured, + * since this value becomes part of PP_SIGNATURE; meaning we can just use the + * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the + * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is + * 0, we make sure that we leave the two topmost bits empty, as that guarantees + * we won't mistake a valid kernel pointer for a value we set, regardless of the + * VMSPLIT setting. + * + * Altogether, this means that the number of bits available is constrained by + * the size of an unsigned long (at the upper end, subtracting two bits per the + * above), and the definition of PP_SIGNATURE (with or without + * POISON_POINTER_DELTA). + */ +#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) +#if POISON_POINTER_DELTA > 0 +/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA + * index to not overlap with that if set + */ +#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) +#else +/* Always leave out the topmost two; see above. */ +#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2) +#endif + +#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ + PP_DMA_INDEX_SHIFT) + +/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is + * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for + * the head page of compound page and bit 1 for pfmemalloc page, as well as the + * bits used for the DMA index. page_is_pfmemalloc() is checked in + * __page_pool_put_page() to avoid recycling the pfmemalloc page. + */ +#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL) + +#ifdef CONFIG_PAGE_POOL +static inline bool page_pool_page_is_pp(struct page *page) +{ + return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE; +} +#else +static inline bool page_pool_page_is_pp(struct page *page) +{ + return false; +} +#endif + #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 1b6a917fffa4..89b518ff097e 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -133,31 +133,25 @@ static inline int lru_hist_from_seq(unsigned long seq) return seq % NR_HIST_GENS; } -static inline int lru_tier_from_refs(int refs) +static inline int lru_tier_from_refs(int refs, bool workingset) { VM_WARN_ON_ONCE(refs > BIT(LRU_REFS_WIDTH)); - /* see the comment in folio_lru_refs() */ - return order_base_2(refs + 1); + /* see the comment on MAX_NR_TIERS */ + return workingset ? MAX_NR_TIERS - 1 : order_base_2(refs); } static inline int folio_lru_refs(struct folio *folio) { unsigned long flags = READ_ONCE(folio->flags); - bool workingset = flags & BIT(PG_workingset); + if (!(flags & BIT(PG_referenced))) + return 0; /* - * Return the number of accesses beyond PG_referenced, i.e., N-1 if the - * total number of accesses is N>1, since N=0,1 both map to the first - * tier. lru_tier_from_refs() will account for this off-by-one. Also see - * the comment on MAX_NR_TIERS. + * Return the total number of accesses including PG_referenced. Also see + * the comment on LRU_REFS_FLAGS. */ - return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset; -} - -static inline void folio_clear_lru_refs(struct folio *folio) -{ - set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0); + return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1; } static inline int folio_lru_gen(struct folio *folio) @@ -223,11 +217,43 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); } +static inline unsigned long lru_gen_folio_seq(struct lruvec *lruvec, struct folio *folio, + bool reclaiming) +{ + int gen; + int type = folio_is_file_lru(folio); + struct lru_gen_folio *lrugen = &lruvec->lrugen; + + /* + * +-----------------------------------+-----------------------------------+ + * | Accessed through page tables and | Accessed through file descriptors | + * | promoted by folio_update_gen() | and protected by folio_inc_gen() | + * +-----------------------------------+-----------------------------------+ + * | PG_active (set while isolated) | | + * +-----------------+-----------------+-----------------+-----------------+ + * | PG_workingset | PG_referenced | PG_workingset | LRU_REFS_FLAGS | + * +-----------------------------------+-----------------------------------+ + * |<---------- MIN_NR_GENS ---------->| | + * |<---------------------------- MAX_NR_GENS ---------------------------->| + */ + if (folio_test_active(folio)) + gen = MIN_NR_GENS - folio_test_workingset(folio); + else if (reclaiming) + gen = MAX_NR_GENS; + else if ((!folio_is_file_lru(folio) && !folio_test_swapcache(folio)) || + (folio_test_reclaim(folio) && + (folio_test_dirty(folio) || folio_test_writeback(folio)))) + gen = MIN_NR_GENS; + else + gen = MAX_NR_GENS - folio_test_workingset(folio); + + return max(READ_ONCE(lrugen->max_seq) - gen + 1, READ_ONCE(lrugen->min_seq[type])); +} + static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { unsigned long seq; unsigned long flags; - unsigned long mask; int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); @@ -237,40 +263,12 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, if (folio_test_unevictable(folio) || !lrugen->enabled) return false; - /* - * There are four common cases for this page: - * 1. If it's hot, i.e., freshly faulted in, add it to the youngest - * generation, and it's protected over the rest below. - * 2. If it can't be evicted immediately, i.e., a dirty page pending - * writeback, add it to the second youngest generation. - * 3. If it should be evicted first, e.g., cold and clean from - * folio_rotate_reclaimable(), add it to the oldest generation. - * 4. Everything else falls between 2 & 3 above and is added to the - * second oldest generation if it's considered inactive, or the - * oldest generation otherwise. See lru_gen_is_active(). - */ - if (folio_test_active(folio)) - seq = lrugen->max_seq; - else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || - (folio_test_reclaim(folio) && - (folio_test_dirty(folio) || folio_test_writeback(folio)))) - seq = lrugen->max_seq - 1; - else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq) - seq = lrugen->min_seq[type]; - else - seq = lrugen->min_seq[type] + 1; + seq = lru_gen_folio_seq(lruvec, folio, reclaiming); gen = lru_gen_from_seq(seq); flags = (gen + 1UL) << LRU_GEN_PGOFF; /* see the comment on MIN_NR_GENS about PG_active */ - mask = LRU_GEN_MASK; - /* - * Don't clear PG_workingset here because it can affect PSI accounting - * if the activation is due to workingset refault. - */ - if (folio_test_active(folio)) - mask |= LRU_REFS_MASK | BIT(PG_referenced) | BIT(PG_active); - set_mask_bits(&folio->flags, mask, flags); + set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags); lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ @@ -449,6 +447,8 @@ static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, #endif /* CONFIG_ANON_VMA_NAME */ +void pfnmap_track_ctx_release(struct kref *ref); + static inline void init_tlb_flush_pending(struct mm_struct *mm) { atomic_set(&mm->tlb_flush_pending, 0); @@ -564,9 +564,9 @@ static inline pte_marker copy_pte_marker( * Must be called with pgtable lock held so that no thread will see the none * pte, and if they see it, they'll fault and serialize at the pgtable lock. * - * This function is a no-op if PTE_MARKER_UFFD_WP is not enabled. + * Returns true if an uffd-wp pte was installed, false otherwise. */ -static inline void +static inline bool pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, pte_t pteval) { @@ -583,7 +583,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, * with a swap pte. There's no way of leaking the bit. */ if (vma_is_anonymous(vma) || !userfaultfd_wp(vma)) - return; + return false; /* A uffd-wp wr-protected normal pte */ if (unlikely(pte_present(pteval) && pte_uffd_wp(pteval))) @@ -596,10 +596,13 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, if (unlikely(pte_swp_uffd_wp_any(pteval))) arm_uffd_pte = true; - if (unlikely(arm_uffd_pte)) + if (unlikely(arm_uffd_pte)) { set_pte_at(vma->vm_mm, addr, pte, make_pte_marker(PTE_MARKER_UFFD_WP)); + return true; + } #endif + return false; } static inline bool vma_has_recency(struct vm_area_struct *vma) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 14fc1b39c0cf..d6b91e8a66d6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -19,6 +19,7 @@ #include <linux/workqueue.h> #include <linux/seqlock.h> #include <linux/percpu_counter.h> +#include <linux/types.h> #include <asm/mmu.h> @@ -27,9 +28,9 @@ #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) -#define INIT_PASID 0 struct address_space; +struct futex_private_hash; struct mem_cgroup; /* @@ -99,11 +100,15 @@ struct page { /* Or, free page */ struct list_head buddy_list; struct list_head pcp_list; + struct { + struct llist_node pcp_llist; + unsigned int order; + }; }; /* See page-flags.h for PAGE_MAPPING_FLAGS */ struct address_space *mapping; union { - pgoff_t index; /* Our offset within mapping. */ + pgoff_t __folio_index; /* Our offset within mapping. */ unsigned long share; /* share count for fsdax */ }; /** @@ -129,8 +134,11 @@ struct page { unsigned long compound_head; /* Bit zero is set */ }; struct { /* ZONE_DEVICE pages */ - /** @pgmap: Points to the hosting device page map. */ - struct dev_pagemap *pgmap; + /* + * The first word is used for compound_head or folio + * pgmap + */ + void *_unused_pgmap_compound_head; void *zone_device_data; /* * ZONE_DEVICE private pages are counted as being @@ -283,6 +291,49 @@ typedef struct { unsigned long val; } swp_entry_t; +#if defined(CONFIG_MEMCG) || defined(CONFIG_SLAB_OBJ_EXT) +/* We have some extra room after the refcount in tail pages. */ +#define NR_PAGES_IN_LARGE_FOLIO +#endif + +/* + * On 32bit, we can cut the required metadata in half, because: + * (a) PID_MAX_LIMIT implicitly limits the number of MMs we could ever have, + * so we can limit MM IDs to 15 bit (32767). + * (b) We don't expect folios where even a single complete PTE mapping by + * one MM would exceed 15 bits (order-15). + */ +#ifdef CONFIG_64BIT +typedef int mm_id_mapcount_t; +#define MM_ID_MAPCOUNT_MAX INT_MAX +typedef unsigned int mm_id_t; +#else /* !CONFIG_64BIT */ +typedef short mm_id_mapcount_t; +#define MM_ID_MAPCOUNT_MAX SHRT_MAX +typedef unsigned short mm_id_t; +#endif /* CONFIG_64BIT */ + +/* We implicitly use the dummy ID for init-mm etc. where we never rmap pages. */ +#define MM_ID_DUMMY 0 +#define MM_ID_MIN (MM_ID_DUMMY + 1) + +/* + * We leave the highest bit of each MM id unused, so we can store a flag + * in the highest bit of each folio->_mm_id[]. + */ +#define MM_ID_BITS ((sizeof(mm_id_t) * BITS_PER_BYTE) - 1) +#define MM_ID_MASK ((1U << MM_ID_BITS) - 1) +#define MM_ID_MAX MM_ID_MASK + +/* + * In order to use bit_spin_lock(), which requires an unsigned long, we + * operate on folio->_mm_ids when working on flags. + */ +#define FOLIO_MM_IDS_LOCK_BITNUM MM_ID_BITS +#define FOLIO_MM_IDS_LOCK_BIT BIT(FOLIO_MM_IDS_LOCK_BITNUM) +#define FOLIO_MM_IDS_SHARED_BITNUM (2 * MM_ID_BITS + 1) +#define FOLIO_MM_IDS_SHARED_BIT BIT(FOLIO_MM_IDS_SHARED_BITNUM) + /** * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. @@ -292,6 +343,8 @@ typedef struct { * anonymous memory. * @index: Offset within the file, in units of pages. For anonymous memory, * this is the index from the beginning of the mmap. + * @share: number of DAX mappings that reference this folio. See + * dax_associate_entry. * @private: Filesystem per-folio data (see folio_attach_private()). * @swap: Used for swp_entry_t if folio_test_swapcache(). * @_mapcount: Do not access this member directly. Use folio_mapcount() to @@ -299,13 +352,17 @@ typedef struct { * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. + * @pgmap: Metadata for ZONE_DEVICE mappings * @virtual: Virtual address in the kernel direct map. * @_last_cpupid: IDs of last CPU and last process that accessed the folio. * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_large_mapcount: Do not use directly, call folio_mapcount(). * @_nr_pages_mapped: Do not use outside of rmap and debug code. * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). - * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). + * @_nr_pages: Do not use directly, call folio_nr_pages(). + * @_mm_id: Do not use outside of rmap code. + * @_mm_ids: Do not use outside of rmap code. + * @_mm_id_mapcount: Do not use outside of rmap code. * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. @@ -337,9 +394,13 @@ struct folio { /* private: */ }; /* public: */ + struct dev_pagemap *pgmap; }; struct address_space *mapping; - pgoff_t index; + union { + pgoff_t index; + unsigned long share; + }; union { void *private; swp_entry_t swap; @@ -365,14 +426,30 @@ struct folio { struct { unsigned long _flags_1; unsigned long _head_1; + union { + struct { /* public: */ - atomic_t _large_mapcount; - atomic_t _entire_mapcount; - atomic_t _nr_pages_mapped; - atomic_t _pincount; + atomic_t _large_mapcount; + atomic_t _nr_pages_mapped; #ifdef CONFIG_64BIT - unsigned int _folio_nr_pages; -#endif + atomic_t _entire_mapcount; + atomic_t _pincount; +#endif /* CONFIG_64BIT */ + mm_id_mapcount_t _mm_id_mapcount[2]; + union { + mm_id_t _mm_id[2]; + unsigned long _mm_ids; + }; + /* private: the union with struct page is transitional */ + }; + unsigned long _usable_1[4]; + }; + atomic_t _mapcount_1; + atomic_t _refcount_1; + /* public: */ +#ifdef NR_PAGES_IN_LARGE_FOLIO + unsigned int _nr_pages; +#endif /* NR_PAGES_IN_LARGE_FOLIO */ /* private: the union with struct page is transitional */ }; struct page __page_1; @@ -382,20 +459,27 @@ struct folio { unsigned long _flags_2; unsigned long _head_2; /* public: */ - void *_hugetlb_subpool; - void *_hugetlb_cgroup; - void *_hugetlb_cgroup_rsvd; - void *_hugetlb_hwpoison; + struct list_head _deferred_list; +#ifndef CONFIG_64BIT + atomic_t _entire_mapcount; + atomic_t _pincount; +#endif /* !CONFIG_64BIT */ /* private: the union with struct page is transitional */ }; + struct page __page_2; + }; + union { struct { - unsigned long _flags_2a; - unsigned long _head_2a; + unsigned long _flags_3; + unsigned long _head_3; /* public: */ - struct list_head _deferred_list; + void *_hugetlb_subpool; + void *_hugetlb_cgroup; + void *_hugetlb_cgroup_rsvd; + void *_hugetlb_hwpoison; /* private: the union with struct page is transitional */ }; - struct page __page_2; + struct page __page_3; }; }; @@ -405,7 +489,7 @@ FOLIO_MATCH(flags, flags); FOLIO_MATCH(lru, lru); FOLIO_MATCH(mapping, mapping); FOLIO_MATCH(compound_head, lru); -FOLIO_MATCH(index, index); +FOLIO_MATCH(__folio_index, index); FOLIO_MATCH(private, private); FOLIO_MATCH(_mapcount, _mapcount); FOLIO_MATCH(_refcount, _refcount); @@ -424,21 +508,29 @@ FOLIO_MATCH(_last_cpupid, _last_cpupid); offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); +FOLIO_MATCH(_mapcount, _mapcount_1); +FOLIO_MATCH(_refcount, _refcount_1); #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + 2 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); -FOLIO_MATCH(flags, _flags_2a); -FOLIO_MATCH(compound_head, _head_2a); +#undef FOLIO_MATCH +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct folio, fl) == \ + offsetof(struct page, pg) + 3 * sizeof(struct page)) +FOLIO_MATCH(flags, _flags_3); +FOLIO_MATCH(compound_head, _head_3); #undef FOLIO_MATCH /** * struct ptdesc - Memory descriptor for page tables. * @__page_flags: Same as page flags. Powerpc only. * @pt_rcu_head: For freeing page table pages. - * @pt_list: List of used page tables. Used for s390 and x86. + * @pt_list: List of used page tables. Used for s390 gmap shadow pages + * (which are not linked into the user page tables) and x86 + * pgds. * @_pt_pad_1: Padding that aliases with page's compound head. * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. * @__page_mapping: Aliases with page->mapping. Unused for page tables. @@ -498,7 +590,7 @@ TABLE_MATCH(flags, __page_flags); TABLE_MATCH(compound_head, pt_list); TABLE_MATCH(compound_head, _pt_pad_1); TABLE_MATCH(mapping, __page_mapping); -TABLE_MATCH(index, pt_index); +TABLE_MATCH(__folio_index, pt_index); TABLE_MATCH(rcu_head, pt_rcu_head); TABLE_MATCH(page_type, __page_type); TABLE_MATCH(_refcount, __page_refcount); @@ -572,6 +664,12 @@ static inline void *folio_get_private(struct folio *folio) typedef unsigned long vm_flags_t; /* + * freeptr_t represents a SLUB freelist pointer, which might be encoded + * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. + */ +typedef struct { unsigned long v; } freeptr_t; + +/* * A region containing a mapping of a non-memory backed file under NOMMU * conditions. These are held in a global tree and are pinned by the VMAs that * map parts of them. @@ -627,9 +725,8 @@ static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) } #endif -struct vma_lock { - struct rw_semaphore lock; -}; +#define VMA_LOCK_OFFSET 0x40000000 +#define VMA_REF_LIMIT (VMA_LOCK_OFFSET - 1) struct vma_numab_state { /* @@ -667,6 +764,38 @@ struct vma_numab_state { int prev_scan_seq; }; +#ifdef __HAVE_PFNMAP_TRACKING +struct pfnmap_track_ctx { + struct kref kref; + unsigned long pfn; + unsigned long size; /* in bytes */ +}; +#endif + +/* + * Describes a VMA that is about to be mmap()'ed. Drivers may choose to + * manipulate mutable fields which will cause those fields to be updated in the + * resultant VMA. + * + * Helper functions are not required for manipulating any field. + */ +struct vm_area_desc { + /* Immutable state. */ + struct mm_struct *mm; + unsigned long start; + unsigned long end; + + /* Mutable fields. Populated with initial state. */ + pgoff_t pgoff; + struct file *file; + vm_flags_t vm_flags; + pgprot_t page_prot; + + /* Write-only fields. */ + const struct vm_operations_struct *vm_ops; + void *private_data; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -675,6 +804,9 @@ struct vma_numab_state { * * Only explicitly marked struct members may be accessed by RCU readers before * getting a stable reference. + * + * WARNING: when adding new members, please update vm_area_init_from() to copy + * them during vm_area_struct content duplication. */ struct vm_area_struct { /* The first cache line has the info for VMA tree walking. */ @@ -685,9 +817,7 @@ struct vm_area_struct { unsigned long vm_start; unsigned long vm_end; }; -#ifdef CONFIG_PER_VMA_LOCK - struct rcu_head vm_rcu; /* Used for deferred freeing. */ -#endif + freeptr_t vm_freeptr; /* Pointer used by SLAB_TYPESAFE_BY_RCU */ }; /* @@ -708,18 +838,12 @@ struct vm_area_struct { #ifdef CONFIG_PER_VMA_LOCK /* - * Flag to indicate areas detached from the mm->mm_mt tree. - * Unstable RCU readers are allowed to read this. - */ - bool detached; - - /* * Can only be written (using WRITE_ONCE()) while holding both: * - mmap_lock (in write mode) - * - vm_lock->lock (in write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set * Can be read reliably while holding one of: * - mmap_lock (in read or write mode) - * - vm_lock->lock (in read or write mode) + * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1 * Can be read unreliably (using READ_ONCE()) for pessimistic bailout * while holding nothing (except RCU to keep the VMA struct allocated). * @@ -727,21 +851,8 @@ struct vm_area_struct { * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; - /* Unstable RCU readers are allowed to read this. */ - struct vma_lock *vm_lock; + unsigned int vm_lock_seq; #endif - - /* - * For areas with an address space and backing store, - * linkage into the address_space->i_mmap interval tree. - * - */ - struct { - struct rb_node rb; - unsigned long rb_subtree_last; - } shared; - /* * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * list, after a COW of one of the file pages. A MAP_SHARED vma @@ -761,14 +872,6 @@ struct vm_area_struct { struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ -#ifdef CONFIG_ANON_VMA_NAME - /* - * For private and shared anonymous mappings, a pointer to a null - * terminated string containing the name given to the vma, or NULL if - * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. - */ - struct anon_vma_name *anon_name; -#endif #ifdef CONFIG_SWAP atomic_long_t swap_readahead_info; #endif @@ -781,7 +884,34 @@ struct vm_area_struct { #ifdef CONFIG_NUMA_BALANCING struct vma_numab_state *numab_state; /* NUMA Balancing state */ #endif +#ifdef CONFIG_PER_VMA_LOCK + /* Unstable RCU readers are allowed to read this. */ + refcount_t vm_refcnt ____cacheline_aligned_in_smp; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map vmlock_dep_map; +#endif +#endif + /* + * For areas with an address space and backing store, + * linkage into the address_space->i_mmap interval tree. + * + */ + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; +#ifdef CONFIG_ANON_VMA_NAME + /* + * For private and shared anonymous mappings, a pointer to a null + * terminated string containing the name given to the vma, or NULL if + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. + */ + struct anon_vma_name *anon_name; +#endif struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +#ifdef __HAVE_PFNMAP_TRACKING + struct pfnmap_track_ctx *pfnmap_track_ctx; +#endif } __randomize_layout; #ifdef CONFIG_NUMA @@ -916,12 +1046,16 @@ struct mm_struct { * by mmlist_lock */ #ifdef CONFIG_PER_VMA_LOCK + struct rcuwait vma_writer_wait; /* * This field has lock-like semantics, meaning it is sometimes * accessed with ACQUIRE/RELEASE semantics. * Roughly speaking, incrementing the sequence number is * equivalent to releasing locks on VMAs; reading the sequence * number can be part of taking a read lock on a VMA. + * Incremented every time mmap_lock is write-locked/unlocked. + * Initialized to 0, therefore odd values indicate mmap_lock + * is write-locked and even values that it's released. * * Can be modified under write mmap_lock using RELEASE * semantics. @@ -930,9 +1064,13 @@ struct mm_struct { * Can be read with ACQUIRE semantics if not holding write * mmap_lock. */ - int mm_lock_seq; + seqcount_t mm_lock_seq; +#endif +#ifdef CONFIG_FUTEX_PRIVATE_HASH + struct mutex futex_hash_lock; + struct futex_private_hash __rcu *futex_phash; + struct futex_private_hash *futex_phash_new; #endif - unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ @@ -1065,6 +1203,9 @@ struct mm_struct { #endif } lru_gen; #endif /* CONFIG_LRU_GEN_WALKS_MMU */ +#ifdef CONFIG_MM_ID + mm_id_t mm_id; +#endif /* CONFIG_MM_ID */ } __randomize_layout; /* @@ -1402,6 +1543,7 @@ enum tlb_flush_reason { TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, + TLB_REMOTE_WRONG_CPU, NR_TLB_FLUSH_REASONS, }; diff --git a/include/linux/mman.h b/include/linux/mman.h index a842783ffa62..f4c6346a8fcd 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -59,8 +59,6 @@ | MAP_HUGE_1GB) extern int sysctl_overcommit_memory; -extern int sysctl_overcommit_ratio; -extern unsigned long sysctl_overcommit_kbytes; extern struct percpu_counter vm_committed_as; #ifdef CONFIG_SMP @@ -157,7 +155,9 @@ calc_vm_flag_bits(struct file *file, unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | +#endif arch_calc_vm_flag_bits(file, flags); } diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index de9dc20b01ba..ae9f89672574 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -1,12 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MMAP_LOCK_H #define _LINUX_MMAP_LOCK_H +/* Avoid a dependency loop by declaring here. */ +extern int rcuwait_wake_up(struct rcuwait *w); + #include <linux/lockdep.h> #include <linux/mm_types.h> #include <linux/mmdebug.h> #include <linux/rwsem.h> #include <linux/tracepoint-defs.h> #include <linux/types.h> +#include <linux/cleanup.h> +#include <linux/sched/mm.h> #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), @@ -71,39 +77,314 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) } #ifdef CONFIG_PER_VMA_LOCK + +static inline void mm_lock_seqcount_init(struct mm_struct *mm) +{ + seqcount_init(&mm->mm_lock_seq); +} + +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) +{ + do_raw_write_seqcount_begin(&mm->mm_lock_seq); +} + +static inline void mm_lock_seqcount_end(struct mm_struct *mm) +{ + ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); + do_raw_write_seqcount_end(&mm->mm_lock_seq); +} + +static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) +{ + /* + * Since mmap_lock is a sleeping lock, and waiting for it to become + * unlocked is more or less equivalent with taking it ourselves, don't + * bother with the speculative path if mmap_lock is already write-locked + * and take the slow path, which takes the lock. + */ + return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq); +} + +static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) +{ + return read_seqcount_retry(&mm->mm_lock_seq, seq); +} + +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + static struct lock_class_key lockdep_key; + + lockdep_init_map(&vma->vmlock_dep_map, "vm_lock", &lockdep_key, 0); +#endif + if (reset_refcnt) + refcount_set(&vma->vm_refcnt, 0); + vma->vm_lock_seq = UINT_MAX; +} + +static inline bool is_vma_writer_only(int refcnt) +{ + /* + * With a writer and no readers, refcnt is VMA_LOCK_OFFSET if the vma + * is detached and (VMA_LOCK_OFFSET + 1) if it is attached. Waiting on + * a detached vma happens only in vma_mark_detached() and is a rare + * case, therefore most of the time there will be no unnecessary wakeup. + */ + return refcnt & VMA_LOCK_OFFSET && refcnt <= VMA_LOCK_OFFSET + 1; +} + +static inline void vma_refcount_put(struct vm_area_struct *vma) +{ + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *mm = vma->vm_mm; + int oldcnt; + + rwsem_release(&vma->vmlock_dep_map, _RET_IP_); + if (!__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) { + + if (is_vma_writer_only(oldcnt - 1)) + rcuwait_wake_up(&mm->vma_writer_wait); + } +} + /* - * Drop all currently-held per-VMA locks. - * This is called from the mmap_lock implementation directly before releasing - * a write-locked mmap_lock (or downgrading it to read-locked). - * This should normally NOT be called manually from other places. - * If you want to call this manually anyway, keep in mind that this will release - * *all* VMA write locks, including ones from further up the stack. + * Try to read-lock a vma. The function is allowed to occasionally yield false + * locked result to avoid performance overhead, in which case we fall back to + * using mmap_lock. The function should never yield false unlocked result. + * False locked result is possible if mm_lock_seq overflows or if vma gets + * reused and attached to a different mm before we lock it. + * Returns the vma on success, NULL on failure to lock and EAGAIN if vma got + * detached. + * + * WARNING! The vma passed to this function cannot be used if the function + * fails to lock it because in certain cases RCU lock is dropped and then + * reacquired. Once RCU lock is dropped the vma can be concurently freed. */ -static inline void vma_end_write_all(struct mm_struct *mm) +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, + struct vm_area_struct *vma) { - mmap_assert_write_locked(mm); + int oldcnt; + + /* + * Check before locking. A race might cause false locked result. + * We can use READ_ONCE() for the mm_lock_seq here, and don't need + * ACQUIRE semantics, because this is just a lockless check whose result + * we don't rely on for anything - the mm_lock_seq read against which we + * need ordering is below. + */ + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(mm->mm_lock_seq.sequence)) + return NULL; + + /* + * If VMA_LOCK_OFFSET is set, __refcount_inc_not_zero_limited_acquire() + * will fail because VMA_REF_LIMIT is less than VMA_LOCK_OFFSET. + * Acquire fence is required here to avoid reordering against later + * vm_lock_seq check and checks inside lock_vma_under_rcu(). + */ + if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, + VMA_REF_LIMIT))) { + /* return EAGAIN if vma got detached from under us */ + return oldcnt ? NULL : ERR_PTR(-EAGAIN); + } + + rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + + /* + * If vma got attached to another mm from under us, that mm is not + * stable and can be freed in the narrow window after vma->vm_refcnt + * is dropped and before rcuwait_wake_up(mm) is called. Grab it before + * releasing vma->vm_refcnt. + */ + if (unlikely(vma->vm_mm != mm)) { + /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt */ + struct mm_struct *other_mm = vma->vm_mm; + + /* + * __mmdrop() is a heavy operation and we don't need RCU + * protection here. Release RCU lock during these operations. + * We reinstate the RCU read lock as the caller expects it to + * be held when this function returns even on error. + */ + rcu_read_unlock(); + mmgrab(other_mm); + vma_refcount_put(vma); + mmdrop(other_mm); + rcu_read_lock(); + return NULL; + } + /* - * Nobody can concurrently modify mm->mm_lock_seq due to exclusive - * mmap_lock being held. - * We need RELEASE semantics here to ensure that preceding stores into - * the VMA take effect before we unlock it with this store. - * Pairs with ACQUIRE semantics in vma_start_read(). + * Overflow of vm_lock_seq/mm_lock_seq might produce false locked result. + * False unlocked result is impossible because we modify and check + * vma->vm_lock_seq under vma->vm_refcnt protection and mm->mm_lock_seq + * modification invalidates all existing locks. + * + * We must use ACQUIRE semantics for the mm_lock_seq so that if we are + * racing with vma_end_write_all(), we only start reading from the VMA + * after it has been unlocked. + * This pairs with RELEASE semantics in vma_end_write_all(). */ - smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1); + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&mm->mm_lock_seq))) { + vma_refcount_put(vma); + return NULL; + } + + return vma; +} + +/* + * Use only while holding mmap read lock which guarantees that locking will not + * fail (nobody can concurrently write-lock the vma). vma_start_read() should + * not be used in such cases because it might fail due to mm_lock_seq overflow. + * This functionality is used to obtain vma read lock and drop the mmap read lock. + */ +static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) +{ + int oldcnt; + + mmap_assert_locked(vma->vm_mm); + if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, + VMA_REF_LIMIT))) + return false; + + rwsem_acquire_read(&vma->vmlock_dep_map, 0, 1, _RET_IP_); + return true; +} + +/* + * Use only while holding mmap read lock which guarantees that locking will not + * fail (nobody can concurrently write-lock the vma). vma_start_read() should + * not be used in such cases because it might fail due to mm_lock_seq overflow. + * This functionality is used to obtain vma read lock and drop the mmap read lock. + */ +static inline bool vma_start_read_locked(struct vm_area_struct *vma) +{ + return vma_start_read_locked_nested(vma, 0); } -#else -static inline void vma_end_write_all(struct mm_struct *mm) {} -#endif -static inline void mmap_init_lock(struct mm_struct *mm) +static inline void vma_end_read(struct vm_area_struct *vma) { - init_rwsem(&mm->mmap_lock); + vma_refcount_put(vma); } +/* WARNING! Can only be used if mmap_lock is expected to be write-locked */ +static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) +{ + mmap_assert_write_locked(vma->vm_mm); + + /* + * current task is holding mmap_write_lock, both vma->vm_lock_seq and + * mm->mm_lock_seq can't be concurrently modified. + */ + *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; + return (vma->vm_lock_seq == *mm_lock_seq); +} + +void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq); + +/* + * Begin writing to a VMA. + * Exclude concurrent readers under the per-VMA lock until the currently + * write-locked mmap_lock is dropped or downgraded. + */ +static inline void vma_start_write(struct vm_area_struct *vma) +{ + unsigned int mm_lock_seq; + + if (__is_vma_write_locked(vma, &mm_lock_seq)) + return; + + __vma_start_write(vma, mm_lock_seq); +} + +static inline void vma_assert_write_locked(struct vm_area_struct *vma) +{ + unsigned int mm_lock_seq; + + VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); +} + +static inline void vma_assert_locked(struct vm_area_struct *vma) +{ + unsigned int mm_lock_seq; + + VM_BUG_ON_VMA(refcount_read(&vma->vm_refcnt) <= 1 && + !__is_vma_write_locked(vma, &mm_lock_seq), vma); +} + +/* + * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these + * assertions should be made either under mmap_write_lock or when the object + * has been isolated under mmap_write_lock, ensuring no competing writers. + */ +static inline void vma_assert_attached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt)); +} + +static inline void vma_assert_detached(struct vm_area_struct *vma) +{ + WARN_ON_ONCE(refcount_read(&vma->vm_refcnt)); +} + +static inline void vma_mark_attached(struct vm_area_struct *vma) +{ + vma_assert_write_locked(vma); + vma_assert_detached(vma); + refcount_set_release(&vma->vm_refcnt, 1); +} + +void vma_mark_detached(struct vm_area_struct *vma); + +struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address); + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} + +static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) +{ + return false; +} + +static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) +{ + return true; +} +static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} +static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm, + struct vm_area_struct *vma) + { return NULL; } +static inline void vma_end_read(struct vm_area_struct *vma) {} +static inline void vma_start_write(struct vm_area_struct *vma) {} +static inline void vma_assert_write_locked(struct vm_area_struct *vma) + { mmap_assert_write_locked(vma->vm_mm); } +static inline void vma_assert_attached(struct vm_area_struct *vma) {} +static inline void vma_assert_detached(struct vm_area_struct *vma) {} +static inline void vma_mark_attached(struct vm_area_struct *vma) {} +static inline void vma_mark_detached(struct vm_area_struct *vma) {} + +static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address) +{ + return NULL; +} + +static inline void vma_assert_locked(struct vm_area_struct *vma) +{ + mmap_assert_locked(vma->vm_mm); +} + +#endif /* CONFIG_PER_VMA_LOCK */ + static inline void mmap_write_lock(struct mm_struct *mm) { __mmap_lock_trace_start_locking(mm, true); down_write(&mm->mmap_lock); + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, true); } @@ -111,6 +392,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) { __mmap_lock_trace_start_locking(mm, true); down_write_nested(&mm->mmap_lock, subclass); + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, true); } @@ -120,10 +402,26 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm) __mmap_lock_trace_start_locking(mm, true); ret = down_write_killable(&mm->mmap_lock); + if (!ret) + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, ret == 0); return ret; } +/* + * Drop all currently-held per-VMA locks. + * This is called from the mmap_lock implementation directly before releasing + * a write-locked mmap_lock (or downgrading it to read-locked). + * This should normally NOT be called manually from other places. + * If you want to call this manually anyway, keep in mind that this will release + * *all* VMA write locks, including ones from further up the stack. + */ +static inline void vma_end_write_all(struct mm_struct *mm) +{ + mmap_assert_write_locked(mm); + mm_lock_seqcount_end(mm); +} + static inline void mmap_write_unlock(struct mm_struct *mm) { __mmap_lock_trace_released(mm, true); @@ -171,6 +469,9 @@ static inline void mmap_read_unlock(struct mm_struct *mm) up_read(&mm->mmap_lock); } +DEFINE_GUARD(mmap_read_lock, struct mm_struct *, + mmap_read_lock(_T), mmap_read_unlock(_T)) + static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) { __mmap_lock_trace_released(mm, false); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 526fce581657..ddcdf23d731c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -329,6 +329,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ #define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ #define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */ +#define MMC_QUIRK_NO_UHS_DDR50_TUNING (1<<18) /* Disable DDR50 tuning */ bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 56972bd78462..01e0f591a20b 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -57,6 +57,7 @@ struct mmc_command { #define MMC_RSP_NONE (0) #define MMC_RSP_R1 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) #define MMC_RSP_R1B (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE|MMC_RSP_BUSY) +#define MMC_RSP_R1B_NO_CRC (MMC_RSP_PRESENT|MMC_RSP_OPCODE|MMC_RSP_BUSY) #define MMC_RSP_R2 (MMC_RSP_PRESENT|MMC_RSP_136|MMC_RSP_CRC) #define MMC_RSP_R3 (MMC_RSP_PRESENT) #define MMC_RSP_R4 (MMC_RSP_PRESENT) @@ -64,9 +65,6 @@ struct mmc_command { #define MMC_RSP_R6 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) #define MMC_RSP_R7 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) -/* Can be used by core to poll after switch to MMC HS mode */ -#define MMC_RSP_R1_NO_CRC (MMC_RSP_PRESENT|MMC_RSP_OPCODE) - #define mmc_resp_type(cmd) ((cmd)->flags & (MMC_RSP_PRESENT|MMC_RSP_136|MMC_RSP_CRC|MMC_RSP_BUSY|MMC_RSP_OPCODE)) /* diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f166d6611ddb..68f09a955a90 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -590,6 +590,14 @@ static inline struct mmc_host *mmc_from_priv(void *priv) return container_of(priv, struct mmc_host, private); } +#ifdef CONFIG_MMC_CRYPTO +static inline struct mmc_host * +mmc_from_crypto_profile(struct blk_crypto_profile *profile) +{ + return container_of(profile, struct mmc_host, crypto_profile); +} +#endif + #define mmc_host_is_spi(host) ((host)->caps & MMC_CAP_SPI) #define mmc_dev(x) ((x)->parent) diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 274a2767ea49..23ac5696fa38 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -22,10 +22,9 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, unsigned int idx, unsigned int debounce); int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config); -void mmc_gpio_set_cd_isr(struct mmc_host *host, irq_handler_t isr); int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on); void mmc_gpiod_request_cd_irq(struct mmc_host *host); -bool mmc_can_gpio_cd(struct mmc_host *host); -bool mmc_can_gpio_ro(struct mmc_host *host); +bool mmc_host_can_gpio_cd(struct mmc_host *host); +bool mmc_host_can_gpio_ro(struct mmc_host *host); #endif diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index d7cb1e5ecbda..a0a3894900ed 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -9,10 +9,12 @@ struct page; struct vm_area_struct; struct mm_struct; struct vma_iterator; +struct vma_merge_struct; void dump_page(const struct page *page, const char *reason); void dump_vma(const struct vm_area_struct *vma); void dump_mm(const struct mm_struct *mm); +void dump_vmg(const struct vma_merge_struct *vmg, const char *reason); void vma_iter_dump_tree(const struct vma_iterator *vmi); #ifdef CONFIG_DEBUG_VM @@ -87,6 +89,15 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); } \ unlikely(__ret_warn_once); \ }) +#define VM_WARN_ON_VMG(cond, vmg) ({ \ + int __ret_warn = !!(cond); \ + \ + if (unlikely(__ret_warn)) { \ + dump_vmg(vmg, "VM_WARN_ON_VMG(" __stringify(cond)")"); \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn); \ +}) #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) @@ -104,9 +115,10 @@ void vma_iter_dump_tree(const struct vma_iterator *vmi); #define VM_WARN_ON_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_FOLIO(cond, folio) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE_MM(cond, mm) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_VMG(cond, vmg) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) -#endif +#endif /* CONFIG_DEBUG_VM */ #ifdef CONFIG_DEBUG_VM_IRQSOFF #define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled()) diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index bbaec80c78c5..ac01dc4eb2ce 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -24,6 +24,7 @@ static inline void leave_mm(void) { } #ifndef task_cpu_possible_mask # define task_cpu_possible_mask(p) cpu_possible_mask # define task_cpu_possible(cpu, p) true +# define task_cpu_fallback_mask(p) housekeeping_cpumask(HK_TYPE_TICK) #else # define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p)) #endif diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index e2dd57ca368b..d1094c2d5fb6 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -43,10 +43,10 @@ struct mmu_interval_notifier; * a device driver to possibly ignore the invalidation if the * owner field matches the driver's device private pgmap owner. * - * @MMU_NOTIFY_EXCLUSIVE: to signal a device driver that the device will no - * longer have exclusive access to the page. When sent during creation of an - * exclusive range the owner will be initialised to the value provided by the - * caller of make_device_exclusive_range(), otherwise the owner will be NULL. + * @MMU_NOTIFY_EXCLUSIVE: conversion of a page table entry to device-exclusive. + * The owner is initialized to the value provided by the caller of + * make_device_exclusive(), such that this caller can filter out these + * events. */ enum mmu_notifier_event { MMU_NOTIFY_UNMAP = 0, @@ -654,9 +654,6 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) #define pmdp_clear_flush_young_notify pmdp_clear_flush_young #define ptep_clear_young_notify ptep_test_and_clear_young #define pmdp_clear_young_notify pmdp_test_and_clear_young -#define ptep_clear_flush_notify ptep_clear_flush -#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush -#define pudp_huge_clear_flush_notify pudp_huge_clear_flush static inline void mmu_notifier_synchronize(void) { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b36124145a16..283913d42d7b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -37,6 +37,22 @@ #define NR_PAGE_ORDERS (MAX_PAGE_ORDER + 1) +/* Defines the order for the number of pages that have a migrate type. */ +#ifndef CONFIG_PAGE_BLOCK_ORDER +#define PAGE_BLOCK_ORDER MAX_PAGE_ORDER +#else +#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER +#endif /* CONFIG_PAGE_BLOCK_ORDER */ + +/* + * The MAX_PAGE_ORDER, which defines the max order of pages to be allocated + * by the buddy allocator, has to be larger or equal to the PAGE_BLOCK_ORDER, + * which defines the order for the number of pages that can have a migrate type + */ +#if (PAGE_BLOCK_ORDER > MAX_PAGE_ORDER) +#error MAX_PAGE_ORDER must be >= PAGE_BLOCK_ORDER +#endif + /* * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed * costly to service. That is between allocation orders which should @@ -138,6 +154,7 @@ enum numa_stat_item { enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, + NR_FREE_PAGES_BLOCKS, NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, NR_ZONE_ACTIVE_ANON, @@ -147,7 +164,6 @@ enum zone_stat_item { NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ /* Second 128 byte cacheline */ - NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif @@ -220,9 +236,11 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, + PGDEMOTE_PROACTIVE, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif + NR_BALLOON_PAGES, NR_VM_NODE_STAT_ITEMS }; @@ -332,66 +350,88 @@ enum lruvec_flags { #endif /* !__GENERATING_BOUNDS_H */ /* - * Evictable pages are divided into multiple generations. The youngest and the + * Evictable folios are divided into multiple generations. The youngest and the * oldest generation numbers, max_seq and min_seq, are monotonically increasing. * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the * corresponding generation. The gen counter in folio->flags stores gen+1 while - * a page is on one of lrugen->folios[]. Otherwise it stores 0. + * a folio is on one of lrugen->folios[]. Otherwise it stores 0. * - * A page is added to the youngest generation on faulting. The aging needs to - * check the accessed bit at least twice before handing this page over to the - * eviction. The first check takes care of the accessed bit set on the initial - * fault; the second check makes sure this page hasn't been used since then. - * This process, AKA second chance, requires a minimum of two generations, - * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive - * LRU, e.g., /proc/vmstat, these two generations are considered active; the - * rest of generations, if they exist, are considered inactive. See - * lru_gen_is_active(). + * After a folio is faulted in, the aging needs to check the accessed bit at + * least twice before handing this folio over to the eviction. The first check + * clears the accessed bit from the initial fault; the second check makes sure + * this folio hasn't been used since then. This process, AKA second chance, + * requires a minimum of two generations, hence MIN_NR_GENS. And to maintain ABI + * compatibility with the active/inactive LRU, e.g., /proc/vmstat, these two + * generations are considered active; the rest of generations, if they exist, + * are considered inactive. See lru_gen_is_active(). * - * PG_active is always cleared while a page is on one of lrugen->folios[] so - * that the aging needs not to worry about it. And it's set again when a page - * considered active is isolated for non-reclaiming purposes, e.g., migration. - * See lru_gen_add_folio() and lru_gen_del_folio(). + * PG_active is always cleared while a folio is on one of lrugen->folios[] so + * that the sliding window needs not to worry about it. And it's set again when + * a folio considered active is isolated for non-reclaiming purposes, e.g., + * migration. See lru_gen_add_folio() and lru_gen_del_folio(). * * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the * number of categories of the active/inactive LRU when keeping track of * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits - * in folio->flags. + * in folio->flags, masked by LRU_GEN_MASK. */ #define MIN_NR_GENS 2U #define MAX_NR_GENS 4U /* - * Each generation is divided into multiple tiers. A page accessed N times - * through file descriptors is in tier order_base_2(N). A page in the first tier - * (N=0,1) is marked by PG_referenced unless it was faulted in through page - * tables or read ahead. A page in any other tier (N>1) is marked by - * PG_referenced and PG_workingset. This implies a minimum of two tiers is - * supported without using additional bits in folio->flags. + * Each generation is divided into multiple tiers. A folio accessed N times + * through file descriptors is in tier order_base_2(N). A folio in the first + * tier (N=0,1) is marked by PG_referenced unless it was faulted in through page + * tables or read ahead. A folio in the last tier (MAX_NR_TIERS-1) is marked by + * PG_workingset. A folio in any other tier (1<N<5) between the first and last + * is marked by additional bits of LRU_REFS_WIDTH in folio->flags. * * In contrast to moving across generations which requires the LRU lock, moving * across tiers only involves atomic operations on folio->flags and therefore * has a negligible cost in the buffered access path. In the eviction path, - * comparisons of refaulted/(evicted+protected) from the first tier and the - * rest infer whether pages accessed multiple times through file descriptors - * are statistically hot and thus worth protecting. + * comparisons of refaulted/(evicted+protected) from the first tier and the rest + * infer whether folios accessed multiple times through file descriptors are + * statistically hot and thus worth protecting. * * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the * number of categories of the active/inactive LRU when keeping track of * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in - * folio->flags. + * folio->flags, masked by LRU_REFS_MASK. */ #define MAX_NR_TIERS 4U #ifndef __GENERATING_BOUNDS_H -struct lruvec; -struct page_vma_mapped_walk; - #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) +/* + * For folios accessed multiple times through file descriptors, + * lru_gen_inc_refs() sets additional bits of LRU_REFS_WIDTH in folio->flags + * after PG_referenced, then PG_workingset after LRU_REFS_WIDTH. After all its + * bits are set, i.e., LRU_REFS_FLAGS|BIT(PG_workingset), a folio is lazily + * promoted into the second oldest generation in the eviction path. And when + * folio_inc_gen() does that, it clears LRU_REFS_FLAGS so that + * lru_gen_inc_refs() can start over. Note that for this case, LRU_REFS_MASK is + * only valid when PG_referenced is set. + * + * For folios accessed multiple times through page tables, folio_update_gen() + * from a page table walk or lru_gen_set_refs() from a rmap walk sets + * PG_referenced after the accessed bit is cleared for the first time. + * Thereafter, those two paths set PG_workingset and promote folios to the + * youngest generation. Like folio_inc_gen(), folio_update_gen() also clears + * PG_referenced. Note that for this case, LRU_REFS_MASK is not used. + * + * For both cases above, after PG_workingset is set on a folio, it remains until + * this folio is either reclaimed, or "deactivated" by lru_gen_clear_refs(). It + * can be set again if lru_gen_test_recent() returns true upon a refault. + */ +#define LRU_REFS_FLAGS (LRU_REFS_MASK | BIT(PG_referenced)) + +struct lruvec; +struct page_vma_mapped_walk; + #ifdef CONFIG_LRU_GEN enum { @@ -406,8 +446,6 @@ enum { NR_LRU_GEN_CAPS }; -#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) - #define MIN_LRU_BATCH BITS_PER_LONG #define MAX_LRU_BATCH (MIN_LRU_BATCH * 64) @@ -421,12 +459,11 @@ enum { /* * The youngest generation number is stored in max_seq for both anon and file * types as they are aged on an equal footing. The oldest generation numbers are - * stored in min_seq[] separately for anon and file types as clean file pages - * can be evicted regardless of swap constraints. - * - * Normally anon and file min_seq are in sync. But if swapping is constrained, - * e.g., out of swap space, file min_seq is allowed to advance and leave anon - * min_seq behind. + * stored in min_seq[] separately for anon and file types so that they can be + * incremented independently. Ideally min_seq[] are kept in sync when both anon + * and file types are evictable. However, to adapt to situations like extreme + * swappiness, they are allowed to be out of sync by at most + * MAX_NR_GENS-MIN_NR_GENS-1. * * The number of pages in each generation is eventually consistent and therefore * can be transiently negative when reset_batch_size() is pending. @@ -446,8 +483,8 @@ struct lru_gen_folio { unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; /* the exponential moving average of evicted+protected */ unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; - /* the first tier doesn't need protection, hence the minus one */ - unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1]; + /* can only be modified under the LRU lock */ + unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; /* can be modified without holding the LRU lock */ atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; @@ -498,7 +535,7 @@ struct lru_gen_mm_walk { int mm_stats[NR_MM_STATS]; /* total batched items */ int batched; - bool can_swap; + int swappiness; bool force_scan; }; @@ -945,6 +982,9 @@ struct zone { #ifdef CONFIG_UNACCEPTED_MEMORY /* Pages to be accepted. All pages on the list are MAX_PAGE_ORDER */ struct list_head unaccepted_pages; + + /* To be called once the last page in the zone is accepted */ + struct work_struct unaccepted_cleanup; #endif /* zone flags, see below */ @@ -953,6 +993,9 @@ struct zone { /* Primarily protects free_area */ spinlock_t lock; + /* Pages to be freed when next trylock succeeds */ + struct llist_head trylock_free_pages; + /* Write-intensive fields used by compaction and vmstats. */ CACHELINE_PADDING(_pad2_); @@ -1139,6 +1182,12 @@ static inline bool is_zone_device_page(const struct page *page) return page_zonenum(page) == ZONE_DEVICE; } +static inline struct dev_pagemap *page_pgmap(const struct page *page) +{ + VM_WARN_ON_ONCE_PAGE(!is_zone_device_page(page), page); + return page_folio(page)->pgmap; +} + /* * Consecutive zone device pages should not be merged into the same sgl * or bvec segment with other types of pages or if they belong to different @@ -1154,7 +1203,7 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, return false; if (!is_zone_device_page(a)) return true; - return a->pgmap == b->pgmap; + return page_pgmap(a) == page_pgmap(b); } extern void memmap_init_zone_device(struct zone *, unsigned long, @@ -1169,6 +1218,10 @@ static inline bool zone_device_pages_have_same_pgmap(const struct page *a, { return true; } +static inline struct dev_pagemap *page_pgmap(const struct page *page) +{ + return NULL; +} #endif static inline bool folio_is_zone_device(const struct folio *folio) @@ -1464,8 +1517,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx, unsigned int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int highest_zoneidx); /* * Memory initialization context, use to differentiate memory added by * the platform statically or via memory hotplug interface. @@ -1915,6 +1966,9 @@ enum { #ifdef CONFIG_ZONE_DEVICE SECTION_TAINT_ZONE_DEVICE_BIT, #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT + SECTION_IS_VMEMMAP_PREINIT_BIT, +#endif SECTION_MAP_LAST_BIT, }; @@ -1925,6 +1979,9 @@ enum { #ifdef CONFIG_ZONE_DEVICE #define SECTION_TAINT_ZONE_DEVICE BIT(SECTION_TAINT_ZONE_DEVICE_BIT) #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT +#define SECTION_IS_VMEMMAP_PREINIT BIT(SECTION_IS_VMEMMAP_PREINIT_BIT) +#endif #define SECTION_MAP_MASK (~(BIT(SECTION_MAP_LAST_BIT) - 1)) #define SECTION_NID_SHIFT SECTION_MAP_LAST_BIT @@ -1979,6 +2036,30 @@ static inline int online_device_section(struct mem_section *section) } #endif +#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT +static inline int preinited_vmemmap_section(struct mem_section *section) +{ + return (section && + (section->section_mem_map & SECTION_IS_VMEMMAP_PREINIT)); +} + +void sparse_vmemmap_init_nid_early(int nid); +void sparse_vmemmap_init_nid_late(int nid); + +#else +static inline int preinited_vmemmap_section(struct mem_section *section) +{ + return 0; +} +static inline void sparse_vmemmap_init_nid_early(int nid) +{ +} + +static inline void sparse_vmemmap_init_nid_late(int nid) +{ +} +#endif + static inline int online_section_nr(unsigned long nr) { return online_section(__nr_to_section(nr)); @@ -2009,13 +2090,42 @@ static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) return usage ? test_bit(idx, usage->subsection_map) : 0; } + +static inline bool pfn_section_first_valid(struct mem_section *ms, unsigned long *pfn) +{ + struct mem_section_usage *usage = READ_ONCE(ms->usage); + int idx = subsection_map_index(*pfn); + unsigned long bit; + + if (!usage) + return false; + + if (test_bit(idx, usage->subsection_map)) + return true; + + /* Find the next subsection that exists */ + bit = find_next_bit(usage->subsection_map, SUBSECTIONS_PER_SECTION, idx); + if (bit == SUBSECTIONS_PER_SECTION) + return false; + + *pfn = (*pfn & PAGE_SECTION_MASK) + (bit * PAGES_PER_SUBSECTION); + return true; +} #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { return 1; } + +static inline bool pfn_section_first_valid(struct mem_section *ms, unsigned long *pfn) +{ + return true; +} #endif +void sparse_init_early_section(int nid, struct page *map, unsigned long pnum, + unsigned long flags); + #ifndef CONFIG_HAVE_ARCH_PFN_VALID /** * pfn_valid - check if there is a valid memory map entry for a PFN @@ -2059,6 +2169,58 @@ static inline int pfn_valid(unsigned long pfn) return ret; } + +/* Returns end_pfn or higher if no valid PFN remaining in range */ +static inline unsigned long first_valid_pfn(unsigned long pfn, unsigned long end_pfn) +{ + unsigned long nr = pfn_to_section_nr(pfn); + + rcu_read_lock_sched(); + + while (nr <= __highest_present_section_nr && pfn < end_pfn) { + struct mem_section *ms = __pfn_to_section(pfn); + + if (valid_section(ms) && + (early_section(ms) || pfn_section_first_valid(ms, &pfn))) { + rcu_read_unlock_sched(); + return pfn; + } + + /* Nothing left in this section? Skip to next section */ + nr++; + pfn = section_nr_to_pfn(nr); + } + + rcu_read_unlock_sched(); + return end_pfn; +} + +static inline unsigned long next_valid_pfn(unsigned long pfn, unsigned long end_pfn) +{ + pfn++; + + if (pfn >= end_pfn) + return end_pfn; + + /* + * Either every PFN within the section (or subsection for VMEMMAP) is + * valid, or none of them are. So there's no point repeating the check + * for every PFN; only call first_valid_pfn() again when crossing a + * (sub)section boundary (i.e. !(pfn & ~PAGE_{SUB,}SECTION_MASK)). + */ + if (pfn & ~(IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP) ? + PAGE_SUBSECTION_MASK : PAGE_SECTION_MASK)) + return pfn; + + return first_valid_pfn(pfn, end_pfn); +} + + +#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \ + for ((_pfn) = first_valid_pfn((_start_pfn), (_end_pfn)); \ + (_pfn) < (_end_pfn); \ + (_pfn) = next_valid_pfn((_pfn), (_end_pfn))) + #endif static inline int pfn_in_present_section(unsigned long pfn) @@ -2078,6 +2240,11 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr) return -1; } +#define for_each_present_section_nr(start, section_nr) \ + for (section_nr = next_present_section_nr(start - 1); \ + section_nr != -1; \ + section_nr = next_present_section_nr(section_nr)) + /* * These are _only_ used during initialisation, therefore they * can use __initdata ... They could have names to indicate @@ -2097,10 +2264,22 @@ void sparse_init(void); #else #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) +#define sparse_vmemmap_init_nid_early(_nid, _use) do {} while (0) +#define sparse_vmemmap_init_nid_late(_nid) do {} while (0) #define pfn_in_present_section pfn_valid #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ +/* + * Fallback case for when the architecture provides its own pfn_valid() but + * not a corresponding for_each_valid_pfn(). + */ +#ifndef for_each_valid_pfn +#define for_each_valid_pfn(_pfn, _start_pfn, _end_pfn) \ + for ((_pfn) = (_start_pfn); (_pfn) < (_end_pfn); (_pfn)++) \ + if (pfn_valid(_pfn)) +#endif + #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index b1b219bc3422..e71a6070a8f8 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -25,6 +25,11 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t)); static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); +static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap) +{ + return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap; +} + #ifdef CONFIG_MULTIUSER static inline uid_t __vfsuid_val(vfsuid_t uid) { diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 4338b1b4ac44..6077972e8b45 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -340,7 +340,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_LED_MAX 0x0f #define INPUT_DEVICE_ID_SND_MAX 0x07 #define INPUT_DEVICE_ID_FF_MAX 0x7f -#define INPUT_DEVICE_ID_SW_MAX 0x10 +#define INPUT_DEVICE_ID_SW_MAX 0x11 #define INPUT_DEVICE_ID_PROP_MAX 0x1f #define INPUT_DEVICE_ID_MATCH_BUS 1 @@ -601,7 +601,7 @@ struct dmi_system_id { #define DMI_MATCH(a, b) { .slot = a, .substr = b } #define DMI_EXACT_MATCH(a, b) { .slot = a, .substr = b, .exact_match = 1 } -#define PLATFORM_NAME_SIZE 20 +#define PLATFORM_NAME_SIZE 24 #define PLATFORM_MODULE_PREFIX "platform:" struct platform_device_id { @@ -692,6 +692,7 @@ struct x86_cpu_id { __u16 feature; /* bit index */ /* Solely for kernel-internal use: DO NOT EXPORT to userspace! */ __u16 flags; + __u8 type; kernel_ulong_t driver_data; }; @@ -700,7 +701,10 @@ struct x86_cpu_id { #define X86_FAMILY_ANY 0 #define X86_MODEL_ANY 0 #define X86_STEPPING_ANY 0 +#define X86_STEP_MIN 0 +#define X86_STEP_MAX 0xf #define X86_FEATURE_ANY 0 /* Same as FPU, you can't test for that */ +#define X86_CPU_TYPE_ANY 0 /* * Generic table type for matching CPU features. diff --git a/include/linux/module.h b/include/linux/module.h index b3a643435357..5faa1fb1f4b4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -52,9 +52,9 @@ struct module_kobject { struct module_attribute { struct attribute attr; - ssize_t (*show)(struct module_attribute *, struct module_kobject *, + ssize_t (*show)(const struct module_attribute *, struct module_kobject *, char *); - ssize_t (*store)(struct module_attribute *, struct module_kobject *, + ssize_t (*store)(const struct module_attribute *, struct module_kobject *, const char *, size_t count); void (*setup)(struct module *, const char *); int (*test)(struct module *); @@ -67,10 +67,10 @@ struct module_version_attribute { const char *version; }; -extern ssize_t __modver_version_show(struct module_attribute *, +extern ssize_t __modver_version_show(const struct module_attribute *, struct module_kobject *, char *); -extern struct module_attribute module_uevent; +extern const struct module_attribute module_uevent; /* These are either module local, or the kernel's dummy ones. */ extern int init_module(void); @@ -162,6 +162,8 @@ extern void cleanup_module(void); #define __INITRODATA_OR_MODULE __INITRODATA #endif /*CONFIG_MODULES*/ +struct module_kobject *lookup_or_create_module_kobject(const char *name); + /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) @@ -247,8 +249,8 @@ extern void cleanup_module(void); #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ -extern typeof(name) __mod_device_table__##type##__##name \ - __attribute__ ((unused, alias(__stringify(name)))) +static typeof(name) __mod_device_table__##type##__##name \ + __attribute__ ((used, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) #endif @@ -275,7 +277,7 @@ extern typeof(name) __mod_device_table__##type##__##name \ #else #define MODULE_VERSION(_version) \ MODULE_INFO(version, _version); \ - static struct module_version_attribute __modver_attr \ + static const struct module_version_attribute __modver_attr \ __used __section("__modver") \ __aligned(__alignof__(struct module_version_attribute)) \ = { \ @@ -306,7 +308,10 @@ extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x)))) +#define symbol_get(x) ({ \ + static const char __notrim[] \ + __used __section(".no_trim_symbol") = __stringify(x); \ + (typeof(&x))(__symbol_get(__stringify(x))); }) /* modules using other modules: kdb wants to see this. */ struct module_use { @@ -367,7 +372,6 @@ enum mod_mem_type { struct module_memory { void *base; - void *rw_copy; bool is_rox; unsigned int size; @@ -430,7 +434,7 @@ struct module { /* Exported symbols */ const struct kernel_symbol *syms; - const s32 *crcs; + const u32 *crcs; unsigned int num_syms; #ifdef CONFIG_ARCH_USES_CFI_TRAPS @@ -448,7 +452,7 @@ struct module { /* GPL-only exported symbols. */ unsigned int num_gpl_syms; const struct kernel_symbol *gpl_syms; - const s32 *gpl_crcs; + const u32 *gpl_crcs; bool using_gplonly_symbols; #ifdef CONFIG_MODULE_SIG @@ -663,7 +667,7 @@ static inline bool within_module(unsigned long addr, const struct module *mod) return within_module_init(addr, mod) || within_module_core(addr, mod); } -/* Search for module by name: must be in a RCU-sched critical section. */ +/* Search for module by name: must be in a RCU critical section. */ struct module *find_module(const char *name); extern void __noreturn __module_put_and_kthread_exit(struct module *mod, @@ -769,15 +773,7 @@ static inline bool is_livepatch_module(struct module *mod) void set_module_sig_enforced(void); -void *__module_writable_address(struct module *mod, void *loc); - -static inline void *module_writable_address(struct module *mod, void *loc) -{ - if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod || - mod->state != MODULE_STATE_UNFORMED) - return loc; - return __module_writable_address(mod, loc); -} +void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data); #else /* !CONFIG_MODULES... */ @@ -887,9 +883,8 @@ static inline bool module_is_coming(struct module *mod) return false; } -static inline void *module_writable_address(struct module *mod, void *loc) +static inline void module_for_each_mod(int(*func)(struct module *mod, void *data), void *data) { - return loc; } #endif /* CONFIG_MODULES */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 1f5507ba5a12..e395461d59e5 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -108,10 +108,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod); -int module_post_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *mod); - #ifdef CONFIG_MODULES void flush_module_init_free_work(void); #else diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index bfb85fd13e1f..110e9d09de24 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -282,10 +282,9 @@ struct kparam_array #define __moduleparam_const const #endif -/* This is the fundamental function for registering boot/module - parameters. */ +/* This is the fundamental function for registering boot/module parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level, flags) \ - /* Default value instead of permissions? */ \ + static_assert(sizeof(""prefix) - 1 <= MAX_PARAM_PREFIX_LEN); \ static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used __section("__param") \ diff --git a/include/linux/mount.h b/include/linux/mount.h index 04213d8ef837..1a508beba446 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -22,48 +22,52 @@ struct fs_context; struct file; struct path; -#define MNT_NOSUID 0x01 -#define MNT_NODEV 0x02 -#define MNT_NOEXEC 0x04 -#define MNT_NOATIME 0x08 -#define MNT_NODIRATIME 0x10 -#define MNT_RELATIME 0x20 -#define MNT_READONLY 0x40 /* does the user want this to be r/o? */ -#define MNT_NOSYMFOLLOW 0x80 - -#define MNT_SHRINKABLE 0x100 -#define MNT_WRITE_HOLD 0x200 - -#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ -#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ -/* - * MNT_SHARED_MASK is the set of flags that should be cleared when a - * mount becomes shared. Currently, this is only the flag that says a - * mount cannot be bind mounted, since this is how we create a mount - * that shares events with another mount. If you add a new MNT_* - * flag, consider how it interacts with shared mounts. - */ -#define MNT_SHARED_MASK (MNT_UNBINDABLE) -#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ - | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ - | MNT_READONLY | MNT_NOSYMFOLLOW) -#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) - -#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) - -#define MNT_INTERNAL 0x4000 - -#define MNT_LOCK_ATIME 0x040000 -#define MNT_LOCK_NOEXEC 0x080000 -#define MNT_LOCK_NOSUID 0x100000 -#define MNT_LOCK_NODEV 0x200000 -#define MNT_LOCK_READONLY 0x400000 -#define MNT_LOCKED 0x800000 -#define MNT_DOOMED 0x1000000 -#define MNT_SYNC_UMOUNT 0x2000000 -#define MNT_MARKED 0x4000000 -#define MNT_UMOUNT 0x8000000 +enum mount_flags { + MNT_NOSUID = 0x01, + MNT_NODEV = 0x02, + MNT_NOEXEC = 0x04, + MNT_NOATIME = 0x08, + MNT_NODIRATIME = 0x10, + MNT_RELATIME = 0x20, + MNT_READONLY = 0x40, /* does the user want this to be r/o? */ + MNT_NOSYMFOLLOW = 0x80, + + MNT_SHRINKABLE = 0x100, + MNT_WRITE_HOLD = 0x200, + + MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */ + MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */ + + MNT_INTERNAL = 0x4000, + + MNT_LOCK_ATIME = 0x040000, + MNT_LOCK_NOEXEC = 0x080000, + MNT_LOCK_NOSUID = 0x100000, + MNT_LOCK_NODEV = 0x200000, + MNT_LOCK_READONLY = 0x400000, + MNT_LOCKED = 0x800000, + MNT_DOOMED = 0x1000000, + MNT_SYNC_UMOUNT = 0x2000000, + MNT_MARKED = 0x4000000, + MNT_UMOUNT = 0x8000000, + + /* + * MNT_SHARED_MASK is the set of flags that should be cleared when a + * mount becomes shared. Currently, this is only the flag that says a + * mount cannot be bind mounted, since this is how we create a mount + * that shares events with another mount. If you add a new MNT_* + * flag, consider how it interacts with shared mounts. + */ + MNT_SHARED_MASK = MNT_UNBINDABLE, + MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME + | MNT_READONLY | MNT_NOSYMFOLLOW, + MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, + + MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | + MNT_LOCKED, +}; struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ @@ -75,7 +79,7 @@ struct vfsmount { static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt) { /* Pairs with smp_store_release() in do_idmap_mount(). */ - return smp_load_acquire(&mnt->mnt_idmap); + return READ_ONCE(mnt->mnt_idmap); } extern int mnt_want_write(struct vfsmount *mnt); @@ -98,9 +102,6 @@ extern struct vfsmount *vfs_create_mount(struct fs_context *fc); extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data); -extern struct vfsmount *vfs_submount(const struct dentry *mountpoint, - struct file_system_type *type, - const char *name, void *data); extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); @@ -113,12 +114,10 @@ extern struct vfsmount *kern_mount(struct file_system_type *); extern void kern_unmount(struct vfsmount *mnt); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); -extern long do_mount(const char *, const char __user *, +int do_mount(const char *, const char __user *, const char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(const struct path *); -extern void drop_collected_mounts(struct vfsmount *); -extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, - struct vfsmount *); +extern struct path *collect_paths(const struct path *, struct path *, unsigned); +extern void drop_collected_paths(struct path *, struct path *); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); extern int cifs_root_data(char **dev, char **opts); diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 58a2401e4b55..0075f6e5c3da 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -262,6 +262,11 @@ struct mr_table { int mroute_reg_vif_num; }; +static inline bool mr_can_free_table(struct net *net) +{ + return !check_net(net) || !net_initialized(net); +} + #ifdef CONFIG_IP_MROUTE_COMMON void vif_device_init(struct vif_device *v, struct net_device *dev, diff --git a/include/linux/msi.h b/include/linux/msi.h index b10093c4d00e..6863540f4b71 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -73,7 +73,6 @@ struct msi_msg { }; }; -extern int pci_msi_ignore_mask; /* Helper functions */ struct msi_desc; struct pci_dev; @@ -166,6 +165,10 @@ struct msi_desc_data { * @dev: Pointer to the device which uses this descriptor * @msg: The last set MSI message cached for reuse * @affinity: Optional pointer to a cpu affinity mask for this descriptor + * @iommu_msi_iova: Optional shifted IOVA from the IOMMU to override the msi_addr. + * Only used if iommu_msi_shift != 0 + * @iommu_msi_shift: Indicates how many bits of the original address should be + * preserved when using iommu_msi_iova. * @sysfs_attr: Pointer to sysfs device attribute * * @write_msi_msg: Callback that may be called when the MSI message @@ -184,7 +187,8 @@ struct msi_desc { struct msi_msg msg; struct irq_affinity_desc *affinity; #ifdef CONFIG_IRQ_MSI_IOMMU - const void *iommu_cookie; + u64 iommu_msi_iova : 58; + u64 iommu_msi_shift : 6; #endif #ifdef CONFIG_SYSFS struct device_attribute *sysfs_attrs; @@ -225,8 +229,11 @@ struct msi_dev_domain { int msi_setup_device_data(struct device *dev); -void msi_lock_descs(struct device *dev); -void msi_unlock_descs(struct device *dev); +void __msi_lock_descs(struct device *dev); +void __msi_unlock_descs(struct device *dev); + +DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, __msi_lock_descs(_T->lock), + __msi_unlock_descs(_T->lock)); struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, enum msi_desc_filter filter); @@ -285,28 +292,42 @@ struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, #define msi_desc_to_dev(desc) ((desc)->dev) -#ifdef CONFIG_IRQ_MSI_IOMMU -static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) +static inline void msi_desc_set_iommu_msi_iova(struct msi_desc *desc, u64 msi_iova, + unsigned int msi_shift) { - return desc->iommu_cookie; -} - -static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, - const void *iommu_cookie) -{ - desc->iommu_cookie = iommu_cookie; -} -#else -static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) -{ - return NULL; +#ifdef CONFIG_IRQ_MSI_IOMMU + desc->iommu_msi_iova = msi_iova >> msi_shift; + desc->iommu_msi_shift = msi_shift; +#endif } -static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, - const void *iommu_cookie) +/** + * msi_msg_set_addr() - Set MSI address in an MSI message + * + * @desc: MSI descriptor that may carry an IOVA base address for MSI via @iommu_msi_iova/shift + * @msg: Target MSI message to set its address_hi and address_lo + * @msi_addr: Physical address to set the MSI message + * + * Notes: + * - Override @msi_addr using the IOVA base address in the @desc if @iommu_msi_shift is set + * - Otherwise, simply set @msi_addr to @msg + */ +static inline void msi_msg_set_addr(struct msi_desc *desc, struct msi_msg *msg, + phys_addr_t msi_addr) { -} +#ifdef CONFIG_IRQ_MSI_IOMMU + if (desc->iommu_msi_shift) { + u64 msi_iova = desc->iommu_msi_iova << desc->iommu_msi_shift; + + msg->address_hi = upper_32_bits(msi_iova); + msg->address_lo = lower_32_bits(msi_iova) | + (msi_addr & ((1 << desc->iommu_msi_shift) - 1)); + return; + } #endif + msg->address_hi = upper_32_bits(msi_addr); + msg->address_lo = lower_32_bits(msi_addr); +} int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, struct msi_desc *init_desc); @@ -402,6 +423,7 @@ struct msi_domain_info; * @msi_init: Domain specific init function for MSI interrupts * @msi_free: Domain specific function to free a MSI interrupts * @msi_prepare: Prepare the allocation of the interrupts in the domain + * @msi_teardown: Reverse the effects of @msi_prepare * @prepare_desc: Optional function to prepare the allocated MSI descriptor * in the domain * @set_desc: Set the msi descriptor for an interrupt @@ -417,8 +439,9 @@ struct msi_domain_info; * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. * - * @msi_check, @msi_prepare, @prepare_desc and @set_desc are callbacks used by the - * msi_domain_alloc/free_irqs*() variants. + * @msi_check, @msi_prepare, @msi_teardown, @prepare_desc and + * @set_desc are callbacks used by the msi_domain_alloc/free_irqs*() + * variants. * * @domain_alloc_irqs, @domain_free_irqs can be used to override the * default allocation/free functions (__msi_domain_alloc/free_irqs). This @@ -440,6 +463,8 @@ struct msi_domain_ops { int (*msi_prepare)(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); + void (*msi_teardown)(struct irq_domain *domain, + msi_alloc_info_t *arg); void (*prepare_desc)(struct irq_domain *domain, msi_alloc_info_t *arg, struct msi_desc *desc); void (*set_desc)(msi_alloc_info_t *arg, @@ -468,6 +493,7 @@ struct msi_domain_ops { * @handler: Optional: associated interrupt flow handler * @handler_data: Optional: associated interrupt flow handler data * @handler_name: Optional: associated interrupt flow handler name + * @alloc_data: Optional: associated interrupt allocation data * @data: Optional: domain specific data */ struct msi_domain_info { @@ -480,6 +506,7 @@ struct msi_domain_info { irq_flow_handler_t handler; void *handler_data; const char *handler_name; + msi_alloc_info_t *alloc_data; void *data; }; @@ -489,12 +516,14 @@ struct msi_domain_info { * @chip: Interrupt chip for this domain * @ops: MSI domain ops * @info: MSI domain info data + * @alloc_info: MSI domain allocation data (architecture specific) */ struct msi_domain_template { char name[48]; struct irq_chip chip; struct msi_domain_ops ops; struct msi_domain_info info; + msi_alloc_info_t alloc_info; }; /* @@ -556,6 +585,16 @@ enum { MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20), /* PCI MSIs cannot be steered separately to CPU cores */ MSI_FLAG_NO_AFFINITY = (1 << 21), + /* Inhibit usage of entry masking */ + MSI_FLAG_NO_MASK = (1 << 22), +}; + +/* + * Flags for msi_parent_ops::chip_flags + */ +enum { + MSI_CHIP_FLAG_SET_EOI = (1 << 0), + MSI_CHIP_FLAG_SET_ACK = (1 << 1), }; /** @@ -563,6 +602,8 @@ enum { * * @supported_flags: Required: The supported MSI flags of the parent domain * @required_flags: Optional: The required MSI flags of the parent MSI domain + * @chip_flags: Optional: Select MSI chip callbacks to update with defaults + * in msi_lib_init_dev_msi_info(). * @bus_select_token: Optional: The bus token of the real parent domain for * irq_domain::select() * @bus_select_mask: Optional: A mask of supported BUS_DOMAINs for @@ -575,6 +616,7 @@ enum { struct msi_parent_ops { u32 supported_flags; u32 required_flags; + u32 chip_flags; u32 bus_select_token; u32 bus_select_mask; const char *prefix; @@ -594,6 +636,10 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); +struct irq_domain_info; +struct irq_domain *msi_create_parent_irq_domain(struct irq_domain_info *info, + const struct msi_parent_ops *msi_parent_ops); + bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, const struct msi_domain_template *template, unsigned int hwsize, void *domain_data, diff --git a/include/linux/mtd/nand-qpic-common.h b/include/linux/mtd/nand-qpic-common.h new file mode 100644 index 000000000000..f0aa098a395f --- /dev/null +++ b/include/linux/mtd/nand-qpic-common.h @@ -0,0 +1,495 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * QCOM QPIC common APIs header file + * + * Copyright (c) 2023 Qualcomm Inc. + * Authors: Md sadre Alam <quic_mdalam@quicinc.com> + * + */ +#ifndef __MTD_NAND_QPIC_COMMON_H__ +#define __MTD_NAND_QPIC_COMMON_H__ + +/* NANDc reg offsets */ +#define NAND_FLASH_CMD 0x00 +#define NAND_ADDR0 0x04 +#define NAND_ADDR1 0x08 +#define NAND_FLASH_CHIP_SELECT 0x0c +#define NAND_EXEC_CMD 0x10 +#define NAND_FLASH_STATUS 0x14 +#define NAND_BUFFER_STATUS 0x18 +#define NAND_DEV0_CFG0 0x20 +#define NAND_DEV0_CFG1 0x24 +#define NAND_DEV0_ECC_CFG 0x28 +#define NAND_AUTO_STATUS_EN 0x2c +#define NAND_DEV1_CFG0 0x30 +#define NAND_DEV1_CFG1 0x34 +#define NAND_READ_ID 0x40 +#define NAND_READ_STATUS 0x44 +#define NAND_DEV_CMD0 0xa0 +#define NAND_DEV_CMD1 0xa4 +#define NAND_DEV_CMD2 0xa8 +#define NAND_DEV_CMD_VLD 0xac +#define SFLASHC_BURST_CFG 0xe0 +#define NAND_ERASED_CW_DETECT_CFG 0xe8 +#define NAND_ERASED_CW_DETECT_STATUS 0xec +#define NAND_EBI2_ECC_BUF_CFG 0xf0 +#define FLASH_BUF_ACC 0x100 + +#define NAND_CTRL 0xf00 +#define NAND_VERSION 0xf08 +#define NAND_READ_LOCATION_0 0xf20 +#define NAND_READ_LOCATION_1 0xf24 +#define NAND_READ_LOCATION_2 0xf28 +#define NAND_READ_LOCATION_3 0xf2c +#define NAND_READ_LOCATION_LAST_CW_0 0xf40 +#define NAND_READ_LOCATION_LAST_CW_1 0xf44 +#define NAND_READ_LOCATION_LAST_CW_2 0xf48 +#define NAND_READ_LOCATION_LAST_CW_3 0xf4c + +/* dummy register offsets, used by qcom_write_reg_dma */ +#define NAND_DEV_CMD1_RESTORE 0xdead +#define NAND_DEV_CMD_VLD_RESTORE 0xbeef + +/* NAND_FLASH_CMD bits */ +#define PAGE_ACC BIT(4) +#define LAST_PAGE BIT(5) + +/* NAND_FLASH_CHIP_SELECT bits */ +#define NAND_DEV_SEL 0 +#define DM_EN BIT(2) + +/* NAND_FLASH_STATUS bits */ +#define FS_OP_ERR BIT(4) +#define FS_READY_BSY_N BIT(5) +#define FS_MPU_ERR BIT(8) +#define FS_DEVICE_STS_ERR BIT(16) +#define FS_DEVICE_WP BIT(23) + +/* NAND_BUFFER_STATUS bits */ +#define BS_UNCORRECTABLE_BIT BIT(8) +#define BS_CORRECTABLE_ERR_MSK 0x1f + +/* NAND_DEVn_CFG0 bits */ +#define DISABLE_STATUS_AFTER_WRITE BIT(4) +#define CW_PER_PAGE 6 +#define CW_PER_PAGE_MASK GENMASK(8, 6) +#define UD_SIZE_BYTES 9 +#define UD_SIZE_BYTES_MASK GENMASK(18, 9) +#define ECC_PARITY_SIZE_BYTES_RS GENMASK(22, 19) +#define SPARE_SIZE_BYTES 23 +#define SPARE_SIZE_BYTES_MASK GENMASK(26, 23) +#define NUM_ADDR_CYCLES 27 +#define NUM_ADDR_CYCLES_MASK GENMASK(29, 27) +#define STATUS_BFR_READ BIT(30) +#define SET_RD_MODE_AFTER_STATUS BIT(31) + +/* NAND_DEVn_CFG0 bits */ +#define DEV0_CFG1_ECC_DISABLE BIT(0) +#define WIDE_FLASH BIT(1) +#define NAND_RECOVERY_CYCLES 2 +#define NAND_RECOVERY_CYCLES_MASK GENMASK(4, 2) +#define CS_ACTIVE_BSY BIT(5) +#define BAD_BLOCK_BYTE_NUM 6 +#define BAD_BLOCK_BYTE_NUM_MASK GENMASK(15, 6) +#define BAD_BLOCK_IN_SPARE_AREA BIT(16) +#define WR_RD_BSY_GAP 17 +#define WR_RD_BSY_GAP_MASK GENMASK(22, 17) +#define ENABLE_BCH_ECC BIT(27) + +/* NAND_DEV0_ECC_CFG bits */ +#define ECC_CFG_ECC_DISABLE BIT(0) +#define ECC_SW_RESET BIT(1) +#define ECC_MODE 4 +#define ECC_MODE_MASK GENMASK(5, 4) +#define ECC_PARITY_SIZE_BYTES_BCH 8 +#define ECC_PARITY_SIZE_BYTES_BCH_MASK GENMASK(12, 8) +#define ECC_NUM_DATA_BYTES 16 +#define ECC_NUM_DATA_BYTES_MASK GENMASK(25, 16) +#define ECC_FORCE_CLK_OPEN BIT(30) + +/* NAND_DEV_CMD1 bits */ +#define READ_ADDR_MASK GENMASK(7, 0) + +/* NAND_DEV_CMD_VLD bits */ +#define READ_START_VLD BIT(0) +#define READ_STOP_VLD BIT(1) +#define WRITE_START_VLD BIT(2) +#define ERASE_START_VLD BIT(3) +#define SEQ_READ_START_VLD BIT(4) + +/* NAND_EBI2_ECC_BUF_CFG bits */ +#define NUM_STEPS 0 +#define NUM_STEPS_MASK GENMASK(9, 0) + +/* NAND_ERASED_CW_DETECT_CFG bits */ +#define ERASED_CW_ECC_MASK 1 +#define AUTO_DETECT_RES 0 +#define MASK_ECC BIT(ERASED_CW_ECC_MASK) +#define RESET_ERASED_DET BIT(AUTO_DETECT_RES) +#define ACTIVE_ERASED_DET (0 << AUTO_DETECT_RES) +#define CLR_ERASED_PAGE_DET (RESET_ERASED_DET | MASK_ECC) +#define SET_ERASED_PAGE_DET (ACTIVE_ERASED_DET | MASK_ECC) + +/* NAND_ERASED_CW_DETECT_STATUS bits */ +#define PAGE_ALL_ERASED BIT(7) +#define CODEWORD_ALL_ERASED BIT(6) +#define PAGE_ERASED BIT(5) +#define CODEWORD_ERASED BIT(4) +#define ERASED_PAGE (PAGE_ALL_ERASED | PAGE_ERASED) +#define ERASED_CW (CODEWORD_ALL_ERASED | CODEWORD_ERASED) + +/* NAND_READ_LOCATION_n bits */ +#define READ_LOCATION_OFFSET 0 +#define READ_LOCATION_OFFSET_MASK GENMASK(9, 0) +#define READ_LOCATION_SIZE 16 +#define READ_LOCATION_SIZE_MASK GENMASK(25, 16) +#define READ_LOCATION_LAST 31 +#define READ_LOCATION_LAST_MASK BIT(31) + +/* Version Mask */ +#define NAND_VERSION_MAJOR_MASK 0xf0000000 +#define NAND_VERSION_MAJOR_SHIFT 28 +#define NAND_VERSION_MINOR_MASK 0x0fff0000 +#define NAND_VERSION_MINOR_SHIFT 16 + +/* NAND OP_CMDs */ +#define OP_PAGE_READ 0x2 +#define OP_PAGE_READ_WITH_ECC 0x3 +#define OP_PAGE_READ_WITH_ECC_SPARE 0x4 +#define OP_PAGE_READ_ONFI_READ 0x5 +#define OP_PROGRAM_PAGE 0x6 +#define OP_PAGE_PROGRAM_WITH_ECC 0x7 +#define OP_PROGRAM_PAGE_SPARE 0x9 +#define OP_BLOCK_ERASE 0xa +#define OP_CHECK_STATUS 0xc +#define OP_FETCH_ID 0xb +#define OP_RESET_DEVICE 0xd + +/* Default Value for NAND_DEV_CMD_VLD */ +#define NAND_DEV_CMD_VLD_VAL (READ_START_VLD | WRITE_START_VLD | \ + ERASE_START_VLD | SEQ_READ_START_VLD) + +/* NAND_CTRL bits */ +#define BAM_MODE_EN BIT(0) + +/* + * the NAND controller performs reads/writes with ECC in 516 byte chunks. + * the driver calls the chunks 'step' or 'codeword' interchangeably + */ +#define NANDC_STEP_SIZE 512 + +/* + * the largest page size we support is 8K, this will have 16 steps/codewords + * of 512 bytes each + */ +#define MAX_NUM_STEPS (SZ_8K / NANDC_STEP_SIZE) + +/* we read at most 3 registers per codeword scan */ +#define MAX_REG_RD (3 * MAX_NUM_STEPS) + +/* ECC modes supported by the controller */ +#define ECC_NONE BIT(0) +#define ECC_RS_4BIT BIT(1) +#define ECC_BCH_4BIT BIT(2) +#define ECC_BCH_8BIT BIT(3) + +/* + * Returns the actual register address for all NAND_DEV_ registers + * (i.e. NAND_DEV_CMD0, NAND_DEV_CMD1, NAND_DEV_CMD2 and NAND_DEV_CMD_VLD) + */ +#define dev_cmd_reg_addr(nandc, reg) ((nandc)->props->dev_cmd_reg_start + (reg)) + +/* Returns the dma address for reg read buffer */ +#define reg_buf_dma_addr(chip, vaddr) \ + ((chip)->reg_read_dma + \ + ((u8 *)(vaddr) - (u8 *)(chip)->reg_read_buf)) + +#define QPIC_PER_CW_CMD_ELEMENTS 32 +#define QPIC_PER_CW_CMD_SGL 32 +#define QPIC_PER_CW_DATA_SGL 8 + +#define QPIC_NAND_COMPLETION_TIMEOUT msecs_to_jiffies(2000) + +/* + * Flags used in DMA descriptor preparation helper functions + * (i.e. qcom_read_reg_dma/qcom_write_reg_dma/qcom_read_data_dma/qcom_write_data_dma) + */ +/* Don't set the EOT in current tx BAM sgl */ +#define NAND_BAM_NO_EOT BIT(0) +/* Set the NWD flag in current BAM sgl */ +#define NAND_BAM_NWD BIT(1) +/* Finish writing in the current BAM sgl and start writing in another BAM sgl */ +#define NAND_BAM_NEXT_SGL BIT(2) +/* + * Erased codeword status is being used two times in single transfer so this + * flag will determine the current value of erased codeword status register + */ +#define NAND_ERASED_CW_SET BIT(4) + +#define MAX_ADDRESS_CYCLE 5 + +/* + * This data type corresponds to the BAM transaction which will be used for all + * NAND transfers. + * @bam_ce - the array of BAM command elements + * @cmd_sgl - sgl for NAND BAM command pipe + * @data_sgl - sgl for NAND BAM consumer/producer pipe + * @last_data_desc - last DMA desc in data channel (tx/rx). + * @last_cmd_desc - last DMA desc in command channel. + * @txn_done - completion for NAND transfer. + * @bam_ce_nitems - the number of elements in the @bam_ce array + * @cmd_sgl_nitems - the number of elements in the @cmd_sgl array + * @data_sgl_nitems - the number of elements in the @data_sgl array + * @bam_ce_pos - the index in bam_ce which is available for next sgl + * @bam_ce_start - the index in bam_ce which marks the start position ce + * for current sgl. It will be used for size calculation + * for current sgl + * @cmd_sgl_pos - current index in command sgl. + * @cmd_sgl_start - start index in command sgl. + * @tx_sgl_pos - current index in data sgl for tx. + * @tx_sgl_start - start index in data sgl for tx. + * @rx_sgl_pos - current index in data sgl for rx. + * @rx_sgl_start - start index in data sgl for rx. + */ +struct bam_transaction { + struct bam_cmd_element *bam_ce; + struct scatterlist *cmd_sgl; + struct scatterlist *data_sgl; + struct dma_async_tx_descriptor *last_data_desc; + struct dma_async_tx_descriptor *last_cmd_desc; + struct completion txn_done; + + unsigned int bam_ce_nitems; + unsigned int cmd_sgl_nitems; + unsigned int data_sgl_nitems; + + struct_group(bam_positions, + u32 bam_ce_pos; + u32 bam_ce_start; + u32 cmd_sgl_pos; + u32 cmd_sgl_start; + u32 tx_sgl_pos; + u32 tx_sgl_start; + u32 rx_sgl_pos; + u32 rx_sgl_start; + + ); +}; + +/* + * This data type corresponds to the nand dma descriptor + * @dma_desc - low level DMA engine descriptor + * @list - list for desc_info + * + * @adm_sgl - sgl which will be used for single sgl dma descriptor. Only used by + * ADM + * @bam_sgl - sgl which will be used for dma descriptor. Only used by BAM + * @sgl_cnt - number of SGL in bam_sgl. Only used by BAM + * @dir - DMA transfer direction + */ +struct desc_info { + struct dma_async_tx_descriptor *dma_desc; + struct list_head node; + + union { + struct scatterlist adm_sgl; + struct { + struct scatterlist *bam_sgl; + int sgl_cnt; + }; + }; + enum dma_data_direction dir; +}; + +/* + * holds the current register values that we want to write. acts as a contiguous + * chunk of memory which we use to write the controller registers through DMA. + */ +struct nandc_regs { + __le32 cmd; + __le32 addr0; + __le32 addr1; + __le32 chip_sel; + __le32 exec; + + __le32 cfg0; + __le32 cfg1; + __le32 ecc_bch_cfg; + + __le32 clrflashstatus; + __le32 clrreadstatus; + + __le32 cmd1; + __le32 vld; + + __le32 orig_cmd1; + __le32 orig_vld; + + __le32 ecc_buf_cfg; + __le32 read_location0; + __le32 read_location1; + __le32 read_location2; + __le32 read_location3; + __le32 read_location_last0; + __le32 read_location_last1; + __le32 read_location_last2; + __le32 read_location_last3; + __le32 spi_cfg; + __le32 num_addr_cycle; + __le32 busy_wait_cnt; + __le32 flash_feature; + + __le32 erased_cw_detect_cfg_clr; + __le32 erased_cw_detect_cfg_set; +}; + +/* + * NAND controller data struct + * + * @dev: parent device + * + * @base: MMIO base + * + * @core_clk: controller clock + * @aon_clk: another controller clock + * @iomacro_clk: io macro clock + * + * @regs: a contiguous chunk of memory for DMA register + * writes. contains the register values to be + * written to controller + * + * @props: properties of current NAND controller, + * initialized via DT match data + * + * @controller: base controller structure + * @qspi: qpic spi structure + * @host_list: list containing all the chips attached to the + * controller + * + * @chan: dma channel + * @cmd_crci: ADM DMA CRCI for command flow control + * @data_crci: ADM DMA CRCI for data flow control + * + * @desc_list: DMA descriptor list (list of desc_infos) + * + * @data_buffer: our local DMA buffer for page read/writes, + * used when we can't use the buffer provided + * by upper layers directly + * @reg_read_buf: local buffer for reading back registers via DMA + * + * @base_phys: physical base address of controller registers + * @base_dma: dma base address of controller registers + * @reg_read_dma: contains dma address for register read buffer + * + * @buf_size/count/start: markers for chip->legacy.read_buf/write_buf + * functions + * @max_cwperpage: maximum QPIC codewords required. calculated + * from all connected NAND devices pagesize + * + * @reg_read_pos: marker for data read in reg_read_buf + * + * @cmd1/vld: some fixed controller register values + * + * @exec_opwrite: flag to select correct number of code word + * while reading status + */ +struct qcom_nand_controller { + struct device *dev; + + void __iomem *base; + + struct clk *core_clk; + struct clk *aon_clk; + + struct nandc_regs *regs; + struct bam_transaction *bam_txn; + + const struct qcom_nandc_props *props; + + struct nand_controller *controller; + struct qpic_spi_nand *qspi; + struct list_head host_list; + + union { + /* will be used only by QPIC for BAM DMA */ + struct { + struct dma_chan *tx_chan; + struct dma_chan *rx_chan; + struct dma_chan *cmd_chan; + }; + + /* will be used only by EBI2 for ADM DMA */ + struct { + struct dma_chan *chan; + unsigned int cmd_crci; + unsigned int data_crci; + }; + }; + + struct list_head desc_list; + + u8 *data_buffer; + __le32 *reg_read_buf; + + phys_addr_t base_phys; + dma_addr_t base_dma; + dma_addr_t reg_read_dma; + + int buf_size; + int buf_count; + int buf_start; + unsigned int max_cwperpage; + + int reg_read_pos; + + u32 cmd1, vld; + bool exec_opwrite; +}; + +/* + * This data type corresponds to the NAND controller properties which varies + * among different NAND controllers. + * @ecc_modes - ecc mode for NAND + * @dev_cmd_reg_start - NAND_DEV_CMD_* registers starting offset + * @supports_bam - whether NAND controller is using BAM + * @nandc_part_of_qpic - whether NAND controller is part of qpic IP + * @qpic_version2 - flag to indicate QPIC IP version 2 + * @use_codeword_fixup - whether NAND has different layout for boot partitions + */ +struct qcom_nandc_props { + u32 ecc_modes; + u32 dev_cmd_reg_start; + u32 bam_offset; + bool supports_bam; + bool nandc_part_of_qpic; + bool qpic_version2; + bool use_codeword_fixup; +}; + +void qcom_free_bam_transaction(struct qcom_nand_controller *nandc); +struct bam_transaction *qcom_alloc_bam_transaction(struct qcom_nand_controller *nandc); +void qcom_clear_bam_transaction(struct qcom_nand_controller *nandc); +void qcom_qpic_bam_dma_done(void *data); +void qcom_nandc_dev_to_mem(struct qcom_nand_controller *nandc, bool is_cpu); +int qcom_prepare_bam_async_desc(struct qcom_nand_controller *nandc, + struct dma_chan *chan, unsigned long flags); +int qcom_prep_bam_dma_desc_cmd(struct qcom_nand_controller *nandc, bool read, + int reg_off, const void *vaddr, int size, unsigned int flags); +int qcom_prep_bam_dma_desc_data(struct qcom_nand_controller *nandc, bool read, + const void *vaddr, int size, unsigned int flags); +int qcom_prep_adm_dma_desc(struct qcom_nand_controller *nandc, bool read, int reg_off, + const void *vaddr, int size, bool flow_control); +int qcom_read_reg_dma(struct qcom_nand_controller *nandc, int first, int num_regs, + unsigned int flags); +int qcom_write_reg_dma(struct qcom_nand_controller *nandc, __le32 *vaddr, int first, + int num_regs, unsigned int flags); +int qcom_read_data_dma(struct qcom_nand_controller *nandc, int reg_off, const u8 *vaddr, + int size, unsigned int flags); +int qcom_write_data_dma(struct qcom_nand_controller *nandc, int reg_off, const u8 *vaddr, + int size, unsigned int flags); +int qcom_submit_descs(struct qcom_nand_controller *nandc); +void qcom_clear_read_regs(struct qcom_nand_controller *nandc); +void qcom_nandc_unalloc(struct qcom_nand_controller *nandc); +int qcom_nandc_alloc(struct qcom_nand_controller *nandc); +#endif + diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 0e2f228e8b4a..07486168d104 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -21,7 +21,7 @@ struct nand_device; * @oobsize: OOB area size * @pages_per_eraseblock: number of pages per eraseblock * @eraseblocks_per_lun: number of eraseblocks per LUN (Logical Unit Number) - * @max_bad_eraseblocks_per_lun: maximum number of eraseblocks per LUN + * @max_bad_eraseblocks_per_lun: maximum number of bad eraseblocks per LUN * @planes_per_lun: number of planes per LUN * @luns_per_target: number of LUN per target (target is a synonym for die) * @ntargets: total number of targets exposed by the NAND device diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 702e5fb13dae..15eaa09da998 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -20,126 +20,209 @@ * Standard SPI NAND flash operations */ -#define SPINAND_RESET_OP \ +#define SPINAND_RESET_1S_0_0_OP \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xff, 1), \ SPI_MEM_OP_NO_ADDR, \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_WR_EN_DIS_OP(enable) \ +#define SPINAND_WR_EN_DIS_1S_0_0_OP(enable) \ SPI_MEM_OP(SPI_MEM_OP_CMD((enable) ? 0x06 : 0x04, 1), \ SPI_MEM_OP_NO_ADDR, \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_READID_OP(naddr, ndummy, buf, len) \ +#define SPINAND_READID_1S_1S_1S_OP(naddr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x9f, 1), \ SPI_MEM_OP_ADDR(naddr, 0, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1)) -#define SPINAND_SET_FEATURE_OP(reg, valptr) \ +#define SPINAND_SET_FEATURE_1S_1S_1S_OP(reg, valptr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x1f, 1), \ SPI_MEM_OP_ADDR(1, reg, 1), \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_DATA_OUT(1, valptr, 1)) -#define SPINAND_GET_FEATURE_OP(reg, valptr) \ +#define SPINAND_GET_FEATURE_1S_1S_1S_OP(reg, valptr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x0f, 1), \ SPI_MEM_OP_ADDR(1, reg, 1), \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_DATA_IN(1, valptr, 1)) -#define SPINAND_BLK_ERASE_OP(addr) \ +#define SPINAND_BLK_ERASE_1S_1S_0_OP(addr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xd8, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_PAGE_READ_OP(addr) \ +#define SPINAND_PAGE_READ_1S_1S_0_OP(addr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x13, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_PAGE_READ_FROM_CACHE_OP(fast, addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(fast ? 0x0b : 0x03, 1), \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(addr, ndummy, buf, len, ...) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 1), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) + +#define SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 1)) + +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x03, 1), \ + SPI_MEM_OP_ADDR(3, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1)) -#define SPINAND_PAGE_READ_FROM_CACHE_OP_3A(fast, addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(fast ? 0x0b : 0x03, 1), \ +#define SPINAND_PAGE_READ_FROM_CACHE_FAST_3A_1S_1S_1S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 1)) -#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_1D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x0d, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 1), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 2)) -#define SPINAND_PAGE_READ_FROM_CACHE_X2_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x3b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 2)) -#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_2D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x3d, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, ...) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ + SPI_MEM_OP_ADDR(2, addr, 2), \ + SPI_MEM_OP_DUMMY(ndummy, 2), \ + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) + +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ + SPI_MEM_OP_ADDR(3, addr, 2), \ + SPI_MEM_OP_DUMMY(ndummy, 2), \ + SPI_MEM_OP_DATA_IN(len, buf, 2)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2D_2D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xbd, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 2), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 2), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 4)) -#define SPINAND_PAGE_READ_FROM_CACHE_X4_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_1S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x6b, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_DUMMY(ndummy, 1), \ SPI_MEM_OP_DATA_IN(len, buf, 4)) -#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ - SPI_MEM_OP_ADDR(2, addr, 2), \ - SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) - -#define SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP_3A(addr, ndummy, buf, len) \ - SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ - SPI_MEM_OP_ADDR(3, addr, 2), \ - SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_4D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x6d, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) -#define SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP_3A(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(3, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ SPI_MEM_OP_DATA_IN(len, buf, 4)) -#define SPINAND_PROG_EXEC_OP(addr) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4D_4D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xed, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 4), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 4), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1S_8S_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x8b, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_DUMMY(ndummy, 1), \ + SPI_MEM_OP_DATA_IN(len, buf, 8), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0xcb, 1), \ + SPI_MEM_OP_ADDR(2, addr, 8), \ + SPI_MEM_OP_DUMMY(ndummy, 8), \ + SPI_MEM_OP_DATA_IN(len, buf, 8), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PAGE_READ_FROM_CACHE_1S_1D_8D_OP(addr, ndummy, buf, len, freq) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x9d, 1), \ + SPI_MEM_DTR_OP_ADDR(2, addr, 1), \ + SPI_MEM_DTR_OP_DUMMY(ndummy, 1), \ + SPI_MEM_DTR_OP_DATA_IN(len, buf, 8), \ + SPI_MEM_OP_MAX_FREQ(freq)) + +#define SPINAND_PROG_EXEC_1S_1S_0_OP(addr) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0x10, 1), \ SPI_MEM_OP_ADDR(3, addr, 1), \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_NO_DATA) -#define SPINAND_PROG_LOAD(reset, addr, buf, len) \ +#define SPINAND_PROG_LOAD_1S_1S_1S_OP(reset, addr, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x02 : 0x84, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_DATA_OUT(len, buf, 1)) -#define SPINAND_PROG_LOAD_X4(reset, addr, buf, len) \ +#define SPINAND_PROG_LOAD_1S_1S_4S_OP(reset, addr, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0x32 : 0x34, 1), \ SPI_MEM_OP_ADDR(2, addr, 1), \ SPI_MEM_OP_NO_DUMMY, \ SPI_MEM_OP_DATA_OUT(len, buf, 4)) +#define SPINAND_PROG_LOAD_1S_1S_8S_OP(addr, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(0x82, 1), \ + SPI_MEM_OP_ADDR(2, addr, 1), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_OUT(len, buf, 8)) + +#define SPINAND_PROG_LOAD_1S_8S_8S_OP(reset, addr, buf, len) \ + SPI_MEM_OP(SPI_MEM_OP_CMD(reset ? 0xc2 : 0xc4, 1), \ + SPI_MEM_OP_ADDR(2, addr, 8), \ + SPI_MEM_OP_NO_DUMMY, \ + SPI_MEM_OP_DATA_OUT(len, buf, 8)) + /** * Standard SPI NAND flash commands */ @@ -268,6 +351,7 @@ extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; extern const struct spinand_manufacturer paragon_spinand_manufacturer; +extern const struct spinand_manufacturer skyhigh_spinand_manufacturer; extern const struct spinand_manufacturer toshiba_spinand_manufacturer; extern const struct spinand_manufacturer winbond_spinand_manufacturer; extern const struct spinand_manufacturer xtx_spinand_manufacturer; @@ -314,6 +398,7 @@ struct spinand_ecc_info { #define SPINAND_HAS_CR_FEAT_BIT BIT(1) #define SPINAND_HAS_PROG_PLANE_SELECT_BIT BIT(2) #define SPINAND_HAS_READ_PLANE_SELECT_BIT BIT(3) +#define SPINAND_NO_RAW_ACCESS BIT(4) /** * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure @@ -325,6 +410,67 @@ struct spinand_ondie_ecc_conf { }; /** + * struct spinand_otp_layout - structure to describe the SPI NAND OTP area + * @npages: number of pages in the OTP + * @start_page: start page of the user/factory OTP area. + */ +struct spinand_otp_layout { + unsigned int npages; + unsigned int start_page; +}; + +/** + * struct spinand_fact_otp_ops - SPI NAND OTP methods for factory area + * @info: get the OTP area information + * @read: read from the SPI NAND OTP area + */ +struct spinand_fact_otp_ops { + int (*info)(struct spinand_device *spinand, size_t len, + struct otp_info *buf, size_t *retlen); + int (*read)(struct spinand_device *spinand, loff_t from, size_t len, + size_t *retlen, u8 *buf); +}; + +/** + * struct spinand_user_otp_ops - SPI NAND OTP methods for user area + * @info: get the OTP area information + * @lock: lock an OTP region + * @erase: erase an OTP region + * @read: read from the SPI NAND OTP area + * @write: write to the SPI NAND OTP area + */ +struct spinand_user_otp_ops { + int (*info)(struct spinand_device *spinand, size_t len, + struct otp_info *buf, size_t *retlen); + int (*lock)(struct spinand_device *spinand, loff_t from, size_t len); + int (*erase)(struct spinand_device *spinand, loff_t from, size_t len); + int (*read)(struct spinand_device *spinand, loff_t from, size_t len, + size_t *retlen, u8 *buf); + int (*write)(struct spinand_device *spinand, loff_t from, size_t len, + size_t *retlen, const u8 *buf); +}; + +/** + * struct spinand_fact_otp - SPI NAND OTP grouping structure for factory area + * @layout: OTP region layout + * @ops: OTP access ops + */ +struct spinand_fact_otp { + const struct spinand_otp_layout layout; + const struct spinand_fact_otp_ops *ops; +}; + +/** + * struct spinand_user_otp - SPI NAND OTP grouping structure for user area + * @layout: OTP region layout + * @ops: OTP access ops + */ +struct spinand_user_otp { + const struct spinand_otp_layout layout; + const struct spinand_user_otp_ops *ops; +}; + +/** * struct spinand_info - Structure used to describe SPI NAND chips * @model: model name * @devid: device ID @@ -339,6 +485,10 @@ struct spinand_ondie_ecc_conf { * @select_target: function used to select a target/die. Required only for * multi-die chips * @set_cont_read: enable/disable continuous cached reads + * @fact_otp: SPI NAND factory OTP info. + * @user_otp: SPI NAND user OTP info. + * @read_retries: the number of read retry modes supported + * @set_read_retry: enable/disable read retry for data recovery * * Each SPI NAND manufacturer driver should have a spinand_info table * describing all the chips supported by the driver. @@ -359,6 +509,11 @@ struct spinand_info { unsigned int target); int (*set_cont_read)(struct spinand_device *spinand, bool enable); + struct spinand_fact_otp fact_otp; + struct spinand_user_otp user_otp; + unsigned int read_retries; + int (*set_read_retry)(struct spinand_device *spinand, + unsigned int read_retry); }; #define SPINAND_ID(__method, ...) \ @@ -382,10 +537,32 @@ struct spinand_info { } #define SPINAND_SELECT_TARGET(__func) \ - .select_target = __func, + .select_target = __func #define SPINAND_CONT_READ(__set_cont_read) \ - .set_cont_read = __set_cont_read, + .set_cont_read = __set_cont_read + +#define SPINAND_FACT_OTP_INFO(__npages, __start_page, __ops) \ + .fact_otp = { \ + .layout = { \ + .npages = __npages, \ + .start_page = __start_page, \ + }, \ + .ops = __ops, \ + } + +#define SPINAND_USER_OTP_INFO(__npages, __start_page, __ops) \ + .user_otp = { \ + .layout = { \ + .npages = __npages, \ + .start_page = __start_page, \ + }, \ + .ops = __ops, \ + } + +#define SPINAND_READ_RETRY(__read_retries, __set_read_retry) \ + .read_retries = __read_retries, \ + .set_read_retry = __set_read_retry #define SPINAND_INFO(__model, __id, __memorg, __eccreq, __op_variants, \ __flags, ...) \ @@ -437,6 +614,10 @@ struct spinand_dirmap { * actually relevant to enable this feature. * @set_cont_read: Enable/disable the continuous read feature * @priv: manufacturer private data + * @fact_otp: SPI NAND factory OTP info. + * @user_otp: SPI NAND user OTP info. + * @read_retries: the number of read retry modes supported + * @set_read_retry: Enable/disable the read retry feature */ struct spinand_device { struct nand_device base; @@ -469,6 +650,13 @@ struct spinand_device { bool cont_read_possible; int (*set_cont_read)(struct spinand_device *spinand, bool enable); + + const struct spinand_fact_otp *fact_otp; + const struct spinand_user_otp *user_otp; + + unsigned int read_retries; + int (*set_read_retry)(struct spinand_device *spinand, + unsigned int retry_mode); }; /** @@ -538,4 +726,26 @@ int spinand_upd_cfg(struct spinand_device *spinand, u8 mask, u8 val); int spinand_write_reg_op(struct spinand_device *spinand, u8 reg, u8 val); int spinand_select_target(struct spinand_device *spinand, unsigned int target); +int spinand_wait(struct spinand_device *spinand, unsigned long initial_delay_us, + unsigned long poll_delay_us, u8 *s); + +int spinand_read_page(struct spinand_device *spinand, + const struct nand_page_io_req *req); + +int spinand_write_page(struct spinand_device *spinand, + const struct nand_page_io_req *req); + +size_t spinand_otp_page_size(struct spinand_device *spinand); +size_t spinand_fact_otp_size(struct spinand_device *spinand); +size_t spinand_user_otp_size(struct spinand_device *spinand); + +int spinand_fact_otp_read(struct spinand_device *spinand, loff_t ofs, + size_t len, size_t *retlen, u8 *buf); +int spinand_user_otp_read(struct spinand_device *spinand, loff_t ofs, + size_t len, size_t *retlen, u8 *buf); +int spinand_user_otp_write(struct spinand_device *spinand, loff_t ofs, + size_t len, size_t *retlen, const u8 *buf); + +int spinand_set_mtd_otp_ops(struct spinand_device *spinand); + #endif /* __LINUX_MTD_SPINAND_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2bf91b57591b..a039fa8c1780 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -156,16 +156,15 @@ static inline int __devm_mutex_init(struct device *dev, struct mutex *lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); - extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); -extern int __must_check mutex_lock_killable_nested(struct mutex *lock, - unsigned int subclass); +extern int __must_check _mutex_lock_killable(struct mutex *lock, + unsigned int subclass, struct lockdep_map *nest_lock); extern void mutex_lock_io_nested(struct mutex *lock, unsigned int subclass); #define mutex_lock(lock) mutex_lock_nested(lock, 0) #define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) -#define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0) +#define mutex_lock_killable(lock) _mutex_lock_killable(lock, 0, NULL) #define mutex_lock_io(lock) mutex_lock_io_nested(lock, 0) #define mutex_lock_nest_lock(lock, nest_lock) \ @@ -174,6 +173,15 @@ do { \ _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ } while (0) +#define mutex_lock_killable_nest_lock(lock, nest_lock) \ +( \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map), \ + _mutex_lock_killable(lock, 0, &(nest_lock)->dep_map) \ +) + +#define mutex_lock_killable_nested(lock, subclass) \ + _mutex_lock_killable(lock, subclass, NULL) + #else extern void mutex_lock(struct mutex *lock); extern int __must_check mutex_lock_interruptible(struct mutex *lock); @@ -183,6 +191,7 @@ extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) +# define mutex_lock_killable_nest_lock(lock, nest_lock) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) # define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock) #endif @@ -193,7 +202,22 @@ extern void mutex_lock_io(struct mutex *lock); * * Returns 1 if the mutex has been acquired successfully, and 0 on contention. */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern int _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); + +#define mutex_trylock_nest_lock(lock, nest_lock) \ +( \ + typecheck(struct lockdep_map *, &(nest_lock)->dep_map), \ + _mutex_trylock_nest_lock(lock, &(nest_lock)->dep_map) \ +) + +#define mutex_trylock(lock) _mutex_trylock_nest_lock(lock, NULL) +#else extern int mutex_trylock(struct mutex *lock); +#define mutex_trylock_nest_lock(lock, nest_lock) mutex_trylock(lock) +#endif + extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); @@ -202,4 +226,6 @@ DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) +extern unsigned long mutex_get_owner(struct mutex *lock); + #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/namei.h b/include/linux/namei.h index 8ec8fed3bce8..5d085428e471 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -18,35 +18,36 @@ enum { MAX_NESTED_LINKS = 8 }; enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; /* pathwalk mode */ -#define LOOKUP_FOLLOW 0x0001 /* follow links at the end */ -#define LOOKUP_DIRECTORY 0x0002 /* require a directory */ -#define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */ -#define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */ -#define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */ -#define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */ - -#define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */ -#define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */ +#define LOOKUP_FOLLOW BIT(0) /* follow links at the end */ +#define LOOKUP_DIRECTORY BIT(1) /* require a directory */ +#define LOOKUP_AUTOMOUNT BIT(2) /* force terminal automount */ +#define LOOKUP_EMPTY BIT(3) /* accept empty path [user_... only] */ +#define LOOKUP_LINKAT_EMPTY BIT(4) /* Linkat request with empty path. */ +#define LOOKUP_DOWN BIT(5) /* follow mounts in the starting point */ +#define LOOKUP_MOUNTPOINT BIT(6) /* follow mounts in the end */ +#define LOOKUP_REVAL BIT(7) /* tell ->d_revalidate() to trust no cache */ +#define LOOKUP_RCU BIT(8) /* RCU pathwalk mode; semi-internal */ +#define LOOKUP_CACHED BIT(9) /* Only do cached lookup */ +#define LOOKUP_PARENT BIT(10) /* Looking up final parent in path */ +/* 5 spare bits for pathwalk */ /* These tell filesystem methods that we are dealing with the final component... */ -#define LOOKUP_OPEN 0x0100 /* ... in open */ -#define LOOKUP_CREATE 0x0200 /* ... in object creation */ -#define LOOKUP_EXCL 0x0400 /* ... in exclusive creation */ -#define LOOKUP_RENAME_TARGET 0x0800 /* ... in destination of rename() */ +#define LOOKUP_OPEN BIT(16) /* ... in open */ +#define LOOKUP_CREATE BIT(17) /* ... in object creation */ +#define LOOKUP_EXCL BIT(18) /* ... in target must not exist */ +#define LOOKUP_RENAME_TARGET BIT(19) /* ... in destination of rename() */ -/* internal use only */ -#define LOOKUP_PARENT 0x0010 +/* 4 spare bits for intent */ /* Scoping flags for lookup. */ -#define LOOKUP_NO_SYMLINKS 0x010000 /* No symlink crossing. */ -#define LOOKUP_NO_MAGICLINKS 0x020000 /* No nd_jump_link() crossing. */ -#define LOOKUP_NO_XDEV 0x040000 /* No mountpoint crossing. */ -#define LOOKUP_BENEATH 0x080000 /* No escaping from starting point. */ -#define LOOKUP_IN_ROOT 0x100000 /* Treat dirfd as fs root. */ -#define LOOKUP_CACHED 0x200000 /* Only do cached lookup */ -#define LOOKUP_LINKAT_EMPTY 0x400000 /* Linkat request with empty path. */ +#define LOOKUP_NO_SYMLINKS BIT(24) /* No symlink crossing. */ +#define LOOKUP_NO_MAGICLINKS BIT(25) /* No nd_jump_link() crossing. */ +#define LOOKUP_NO_XDEV BIT(26) /* No mountpoint crossing. */ +#define LOOKUP_BENEATH BIT(27) /* No escaping from starting point. */ +#define LOOKUP_IN_ROOT BIT(28) /* Treat dirfd as fs root. */ /* LOOKUP_* flags which do scope-related checks based on the dirfd. */ #define LOOKUP_IS_SCOPED (LOOKUP_BENEATH | LOOKUP_IN_ROOT) +/* 3 spare bits for scoping */ extern int path_pts(struct path *path); @@ -61,6 +62,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); +extern struct dentry *kern_path_locked_negative(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, @@ -68,17 +70,16 @@ int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); -extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); -extern struct dentry *lookup_one_len(const char *, struct dentry *, int); -extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); -extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); -struct dentry *lookup_one(struct mnt_idmap *, const char *, struct dentry *, int); +extern struct dentry *try_lookup_noperm(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm_unlocked(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm_positive_unlocked(struct qstr *, struct dentry *); +struct dentry *lookup_one(struct mnt_idmap *, struct qstr *, struct dentry *); struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, - const char *name, struct dentry *base, - int len); + struct qstr *name, struct dentry *base); struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, - const char *name, - struct dentry *base, int len); + struct qstr *name, + struct dentry *base); extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); diff --git a/include/linux/net.h b/include/linux/net.h index b75bc534c1b3..ec09620f40f7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -36,14 +36,13 @@ struct net; * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected. * Eventually all flags will be in sk->sk_wq->flags. */ -#define SOCKWQ_ASYNC_NOSPACE 0 -#define SOCKWQ_ASYNC_WAITDATA 1 -#define SOCK_NOSPACE 2 -#define SOCK_PASSCRED 3 -#define SOCK_PASSSEC 4 -#define SOCK_SUPPORT_ZC 5 -#define SOCK_CUSTOM_SOCKOPT 6 -#define SOCK_PASSPIDFD 7 +enum socket_flags { + SOCKWQ_ASYNC_NOSPACE, + SOCKWQ_ASYNC_WAITDATA, + SOCK_NOSPACE, + SOCK_SUPPORT_ZC, + SOCK_CUSTOM_SOCKOPT, +}; #ifndef ARCH_HAS_SOCKET_TYPES /** @@ -70,6 +69,7 @@ enum sock_type { SOCK_DCCP = 6, SOCK_PACKET = 10, }; +#endif /* ARCH_HAS_SOCKET_TYPES */ #define SOCK_MAX (SOCK_PACKET + 1) /* Mask which covers at least up to SOCK_MASK-1. The @@ -81,8 +81,7 @@ enum sock_type { #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif - -#endif /* ARCH_HAS_SOCKET_TYPES */ +#define SOCK_COREDUMP O_NOCTTY /** * enum sock_shutdown_cmd - Shutdown types @@ -343,8 +342,6 @@ static inline bool sendpages_ok(struct page *page, size_t len, size_t offset) int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); -int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, - struct kvec *vec, size_t num, size_t len); int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len, int flags); diff --git a/include/linux/net/intel/iidc.h b/include/linux/net/intel/iidc.h deleted file mode 100644 index 1c1332e4df26..000000000000 --- a/include/linux/net/intel/iidc.h +++ /dev/null @@ -1,107 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2021, Intel Corporation. */ - -#ifndef _IIDC_H_ -#define _IIDC_H_ - -#include <linux/auxiliary_bus.h> -#include <linux/dcbnl.h> -#include <linux/device.h> -#include <linux/if_ether.h> -#include <linux/kernel.h> -#include <linux/netdevice.h> - -enum iidc_event_type { - IIDC_EVENT_BEFORE_MTU_CHANGE, - IIDC_EVENT_AFTER_MTU_CHANGE, - IIDC_EVENT_BEFORE_TC_CHANGE, - IIDC_EVENT_AFTER_TC_CHANGE, - IIDC_EVENT_CRIT_ERR, - IIDC_EVENT_NBITS /* must be last */ -}; - -enum iidc_reset_type { - IIDC_PFR, - IIDC_CORER, - IIDC_GLOBR, -}; - -enum iidc_rdma_protocol { - IIDC_RDMA_PROTOCOL_IWARP = BIT(0), - IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1), -}; - -#define IIDC_MAX_USER_PRIORITY 8 -#define IIDC_MAX_DSCP_MAPPING 64 -#define IIDC_DSCP_PFC_MODE 0x1 - -/* Struct to hold per RDMA Qset info */ -struct iidc_rdma_qset_params { - /* Qset TEID returned to the RDMA driver in - * ice_add_rdma_qset and used by RDMA driver - * for calls to ice_del_rdma_qset - */ - u32 teid; /* Qset TEID */ - u16 qs_handle; /* RDMA driver provides this */ - u16 vport_id; /* VSI index */ - u8 tc; /* TC branch the Qset should belong to */ -}; - -struct iidc_qos_info { - u64 tc_ctx; - u8 rel_bw; - u8 prio_type; - u8 egress_virt_up; - u8 ingress_virt_up; -}; - -/* Struct to pass QoS info */ -struct iidc_qos_params { - struct iidc_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; - u8 up2tc[IIDC_MAX_USER_PRIORITY]; - u8 vport_relative_bw; - u8 vport_priority_type; - u8 num_tc; - u8 pfc_mode; - u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; -}; - -struct iidc_event { - DECLARE_BITMAP(type, IIDC_EVENT_NBITS); - u32 reg; -}; - -struct ice_pf; - -int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset); -int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type); -int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable); -void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos); - -/* Structure representing auxiliary driver tailored information about the core - * PCI dev, each auxiliary driver using the IIDC interface will have an - * instance of this struct dedicated to it. - */ - -struct iidc_auxiliary_dev { - struct auxiliary_device adev; - struct ice_pf *pf; -}; - -/* structure representing the auxiliary driver. This struct is to be - * allocated and populated by the auxiliary driver's owner. The core PCI - * driver will access these ops by performing a container_of on the - * auxiliary_device->dev.driver. - */ -struct iidc_auxiliary_drv { - struct auxiliary_driver adrv; - /* This event_handler is meant to be a blocking call. For instance, - * when a BEFORE_MTU_CHANGE event comes in, the event_handler will not - * return until the auxiliary driver is ready for the MTU change to - * happen. - */ - void (*event_handler)(struct ice_pf *pf, struct iidc_event *event); -}; - -#endif /* _IIDC_H_*/ diff --git a/include/linux/net/intel/iidc_rdma.h b/include/linux/net/intel/iidc_rdma.h new file mode 100644 index 000000000000..8baad1082042 --- /dev/null +++ b/include/linux/net/intel/iidc_rdma.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021-2025, Intel Corporation. */ + +#ifndef _IIDC_RDMA_H_ +#define _IIDC_RDMA_H_ + +#include <linux/auxiliary_bus.h> +#include <linux/device.h> +#include <linux/if_ether.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <net/dscp.h> + +enum iidc_rdma_event_type { + IIDC_RDMA_EVENT_BEFORE_MTU_CHANGE, + IIDC_RDMA_EVENT_AFTER_MTU_CHANGE, + IIDC_RDMA_EVENT_BEFORE_TC_CHANGE, + IIDC_RDMA_EVENT_AFTER_TC_CHANGE, + IIDC_RDMA_EVENT_WARN_RESET, + IIDC_RDMA_EVENT_CRIT_ERR, + IIDC_RDMA_EVENT_NBITS /* must be last */ +}; + +struct iidc_rdma_event { + DECLARE_BITMAP(type, IIDC_RDMA_EVENT_NBITS); + u32 reg; +}; + +enum iidc_rdma_reset_type { + IIDC_FUNC_RESET, + IIDC_DEV_RESET, +}; + +enum iidc_rdma_protocol { + IIDC_RDMA_PROTOCOL_IWARP = BIT(0), + IIDC_RDMA_PROTOCOL_ROCEV2 = BIT(1), +}; + +/* Structure to be populated by core LAN PCI driver */ +struct iidc_rdma_core_dev_info { + struct pci_dev *pdev; /* PCI device of corresponding to main function */ + struct auxiliary_device *adev; + /* Current active RDMA protocol */ + enum iidc_rdma_protocol rdma_protocol; + void *iidc_priv; /* elements unique to each driver */ +}; + +/* Structure representing auxiliary driver tailored information about the core + * PCI dev, each auxiliary driver using the IIDC interface will have an + * instance of this struct dedicated to it. + */ +struct iidc_rdma_core_auxiliary_dev { + struct auxiliary_device adev; + struct iidc_rdma_core_dev_info *cdev_info; +}; + +/* structure representing the auxiliary driver. This struct is to be + * allocated and populated by the auxiliary driver's owner. The core PCI + * driver will access these ops by performing a container_of on the + * auxiliary_device->dev.driver. + */ +struct iidc_rdma_core_auxiliary_drv { + struct auxiliary_driver adrv; + void (*event_handler)(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_event *event); +}; + +#endif /* _IIDC_RDMA_H_*/ diff --git a/include/linux/net/intel/iidc_rdma_ice.h b/include/linux/net/intel/iidc_rdma_ice.h new file mode 100644 index 000000000000..b40eed0e13fe --- /dev/null +++ b/include/linux/net/intel/iidc_rdma_ice.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2021-2025, Intel Corporation. */ + +#ifndef _IIDC_RDMA_ICE_H_ +#define _IIDC_RDMA_ICE_H_ + +#include <linux/dcbnl.h> + +#define IIDC_MAX_USER_PRIORITY 8 +#define IIDC_DSCP_PFC_MODE 0x1 + +/** + * struct iidc_rdma_qset_params - Struct to hold per RDMA Qset info + * @teid: TEID of the Qset node + * @qs_handle: SW index of the Qset, RDMA provides this + * @vport_id: VSI index + * @tc: Traffic Class branch the QSet should belong to + */ +struct iidc_rdma_qset_params { + /* Qset TEID returned to the RDMA driver in + * ice_add_rdma_qset and used by RDMA driver + * for calls to ice_del_rdma_qset + */ + u32 teid; + u16 qs_handle; + u16 vport_id; + u8 tc; +}; + +struct iidc_rdma_qos_info { + u64 tc_ctx; + u8 rel_bw; + u8 prio_type; + u8 egress_virt_up; + u8 ingress_virt_up; +}; + +/* Struct to pass QoS info */ +struct iidc_rdma_qos_params { + struct iidc_rdma_qos_info tc_info[IEEE_8021QAZ_MAX_TCS]; + u8 up2tc[IIDC_MAX_USER_PRIORITY]; + u8 vport_relative_bw; + u8 vport_priority_type; + u8 num_tc; + u8 pfc_mode; + u8 dscp_map[DSCP_MAX]; +}; + +struct iidc_rdma_priv_dev_info { + u8 pf_id; + u16 vport_id; + struct net_device *netdev; + struct iidc_rdma_qos_params qos_info; + u8 __iomem *hw_addr; +}; + +int ice_add_rdma_qset(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_qset_params *qset); +int ice_del_rdma_qset(struct iidc_rdma_core_dev_info *cdev, + struct iidc_rdma_qset_params *qset); +int ice_rdma_request_reset(struct iidc_rdma_core_dev_info *cdev, + enum iidc_rdma_reset_type reset_type); +int ice_rdma_update_vsi_filter(struct iidc_rdma_core_dev_info *cdev, u16 vsi_id, + bool enable); +int ice_alloc_rdma_qvector(struct iidc_rdma_core_dev_info *cdev, + struct msix_entry *entry); +void ice_free_rdma_qvector(struct iidc_rdma_core_dev_info *cdev, + struct msix_entry *entry); + +#endif /* _IIDC_RDMA_ICE_H_*/ diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index 662074b08c94..f4936d9c2b3c 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -4,6 +4,7 @@ #define _LINUX_NET_TIMESTAMPING_H_ #include <uapi/linux/net_tstamp.h> +#include <uapi/linux/ethtool_netlink_generated.h> #define SOF_TIMESTAMPING_SOFTWARE_MASK (SOF_TIMESTAMPING_RX_SOFTWARE | \ SOF_TIMESTAMPING_TX_SOFTWARE | \ @@ -13,10 +14,31 @@ SOF_TIMESTAMPING_TX_HARDWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) -enum hwtstamp_source { - HWTSTAMP_SOURCE_UNSPEC, - HWTSTAMP_SOURCE_NETDEV, - HWTSTAMP_SOURCE_PHYLIB, +/** + * struct hwtstamp_provider_desc - hwtstamp provider description + * + * @index: index of the hwtstamp provider. + * @qualifier: hwtstamp provider qualifier. + */ +struct hwtstamp_provider_desc { + int index; + enum hwtstamp_provider_qualifier qualifier; +}; + +/** + * struct hwtstamp_provider - hwtstamp provider object + * + * @rcu_head: RCU callback used to free the struct. + * @source: source of the hwtstamp provider. + * @phydev: pointer of the phydev source in case a PTP coming from phylib + * @desc: hwtstamp provider description. + */ + +struct hwtstamp_provider { + struct rcu_head rcu_head; + enum hwtstamp_source source; + struct phy_device *phydev; + struct hwtstamp_provider_desc desc; }; /** @@ -31,6 +53,7 @@ enum hwtstamp_source { * copied the ioctl request back to user space * @source: indication whether timestamps should come from the netdev or from * an attached phylib PHY + * @qualifier: qualifier of the hwtstamp provider * * Prefer using this structure for in-kernel processing of hardware * timestamping configuration, over the inextensible struct hwtstamp_config @@ -43,6 +66,7 @@ struct kernel_hwtstamp_config { struct ifreq *ifr; bool copied_to_user; enum hwtstamp_source source; + enum hwtstamp_provider_qualifier qualifier; }; static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 11be70a7929f..7a01c518e573 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -53,12 +53,12 @@ enum { NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */ NETIF_F_GSO_UDP_L4_BIT, /* ... UDP payload GSO (not UFO) */ NETIF_F_GSO_FRAGLIST_BIT, /* ... Fraglist GSO */ + NETIF_F_GSO_ACCECN_BIT, /* TCP AccECN w/ TSO (no clear CWR) */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_FRAGLIST_BIT, + NETIF_F_GSO_ACCECN_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ - __UNUSED_NETIF_F_37, NETIF_F_NTUPLE_BIT, /* N-tuple filters supported */ NETIF_F_RXHASH_BIT, /* Receive hashing offload */ NETIF_F_RXCSUM_BIT, /* Receive checksumming offload */ @@ -128,6 +128,7 @@ enum { #define NETIF_F_SG __NETIF_F(SG) #define NETIF_F_TSO6 __NETIF_F(TSO6) #define NETIF_F_TSO_ECN __NETIF_F(TSO_ECN) +#define NETIF_F_GSO_ACCECN __NETIF_F(GSO_ACCECN) #define NETIF_F_TSO __NETIF_F(TSO) #define NETIF_F_VLAN_CHALLENGED __NETIF_F(VLAN_CHALLENGED) #define NETIF_F_RXFCS __NETIF_F(RXFCS) @@ -210,7 +211,8 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) NETIF_F_TSO_ECN | NETIF_F_TSO_MANGLEID) /* List of features with software fallbacks. */ -#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_GSO_SCTP | \ +#define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | \ + NETIF_F_GSO_ACCECN | NETIF_F_GSO_SCTP | \ NETIF_F_GSO_UDP_L4 | NETIF_F_GSO_FRAGLIST) /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bb71ad82b42b..adb14db25798 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,6 +63,7 @@ struct dsa_port; struct ip_tunnel_parm_kern; struct macsec_context; struct macsec_ops; +struct netdev_config; struct netdev_name_node; struct sd_flow_limit; struct sfp_bus; @@ -82,6 +83,7 @@ struct xdp_metadata_ops; struct xdp_md; struct ethtool_netdev_state; struct phy_link_topology; +struct hwtstamp_provider; typedef u32 xdp_features_t; @@ -338,11 +340,27 @@ struct gro_list { }; /* - * size of gro hash buckets, must less than bit number of - * napi_struct::gro_bitmask + * size of gro hash buckets, must be <= the number of bits in + * gro_node::bitmask */ #define GRO_HASH_BUCKETS 8 +/** + * struct gro_node - structure to support Generic Receive Offload + * @bitmask: bitmask to indicate used buckets in @hash + * @hash: hashtable of pending aggregated skbs, separated by flows + * @rx_list: list of pending ``GRO_NORMAL`` skbs + * @rx_count: cached current length of @rx_list + * @cached_napi_id: napi_struct::napi_id cached for hotpath, 0 for standalone + */ +struct gro_node { + unsigned long bitmask; + struct gro_list hash[GRO_HASH_BUCKETS]; + struct list_head rx_list; + u32 rx_count; + u32 cached_napi_id; +}; + /* * Structure for per-NAPI config */ @@ -350,6 +368,7 @@ struct napi_config { u64 gro_flush_timeout; u64 irq_suspend_timeout; u32 defer_hard_irqs; + cpumask_t affinity_mask; unsigned int napi_id; }; @@ -368,7 +387,6 @@ struct napi_struct { unsigned long state; int weight; u32 defer_hard_irqs_count; - unsigned long gro_bitmask; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL /* CPU actively polling if netpoll is configured */ @@ -377,20 +395,21 @@ struct napi_struct { /* CPU on which NAPI has been scheduled for processing */ int list_owner; struct net_device *dev; - struct gro_list gro_hash[GRO_HASH_BUCKETS]; struct sk_buff *skb; - struct list_head rx_list; /* Pending GRO_NORMAL skbs */ - int rx_count; /* length of rx_list */ - unsigned int napi_id; + struct gro_node gro; struct hrtimer timer; + /* all fields past this point are write-protected by netdev_lock */ struct task_struct *thread; unsigned long gro_flush_timeout; unsigned long irq_suspend_timeout; u32 defer_hard_irqs; /* control-path-only fields follow */ + u32 napi_id; struct list_head dev_list; struct hlist_node napi_hash_node; int irq; + struct irq_affinity_notify notify; + int napi_rmap_idx; int index; struct napi_config *config; }; @@ -406,6 +425,7 @@ enum { NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ + NAPI_STATE_HAS_NOTIFIER, /* Napi has an IRQ notifier */ }; enum { @@ -419,6 +439,7 @@ enum { NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), + NAPIF_STATE_HAS_NOTIFIER = BIT(NAPI_STATE_HAS_NOTIFIER), }; enum gro_result { @@ -509,7 +530,7 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n) * is scheduled for example in the context of delayed timer * that can be skipped if a NAPI is already scheduled. * - * Return True if NAPI is scheduled, False otherwise. + * Return: True if NAPI is scheduled, False otherwise. */ static inline bool napi_is_scheduled(struct napi_struct *n) { @@ -524,7 +545,7 @@ bool napi_schedule_prep(struct napi_struct *n); * * Schedule NAPI poll routine to be called if it is not already * running. - * Return true if we schedule a NAPI or false if not. + * Return: true if we schedule a NAPI or false if not. * Refer to napi_schedule_prep() for additional reason on why * a NAPI might not be scheduled. */ @@ -558,7 +579,7 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) * Mark NAPI processing as complete. Should only be called if poll budget * has not been completely consumed. * Prefer over napi_complete(). - * Return false if device should avoid rearming interrupts. + * Return: false if device should avoid rearming interrupts. */ bool napi_complete_done(struct napi_struct *n, int work_done); @@ -569,16 +590,11 @@ static inline bool napi_complete(struct napi_struct *n) int dev_set_threaded(struct net_device *dev, bool threaded); -/** - * napi_disable - prevent NAPI from scheduling - * @n: NAPI context - * - * Stop NAPI from being scheduled on this context. - * Waits till any outstanding processing completes. - */ void napi_disable(struct napi_struct *n); +void napi_disable_locked(struct napi_struct *n); void napi_enable(struct napi_struct *n); +void napi_enable_locked(struct napi_struct *n); /** * napi_synchronize - wait until NAPI is not running @@ -660,6 +676,7 @@ struct netdev_queue { struct Qdisc __rcu *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; + const struct attribute_group **groups; #endif unsigned long tx_maxrate; /* @@ -671,6 +688,7 @@ struct netdev_queue { /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; #ifdef CONFIG_XDP_SOCKETS + /* "ops protected", see comment about net_device::lock */ struct xsk_buff_pool *pool; #endif @@ -693,7 +711,7 @@ struct netdev_queue { * slow- / control-path part */ /* NAPI instance for the queue - * Readers and writers must hold RTNL + * "ops protected", see comment about net_device::lock */ struct napi_struct *napi; @@ -995,9 +1013,13 @@ struct netdev_bpf { #ifdef CONFIG_XFRM_OFFLOAD struct xfrmdev_ops { - int (*xdo_dev_state_add) (struct xfrm_state *x, struct netlink_ext_ack *extack); - void (*xdo_dev_state_delete) (struct xfrm_state *x); - void (*xdo_dev_state_free) (struct xfrm_state *x); + int (*xdo_dev_state_add)(struct net_device *dev, + struct xfrm_state *x, + struct netlink_ext_ack *extack); + void (*xdo_dev_state_delete)(struct net_device *dev, + struct xfrm_state *x); + void (*xdo_dev_state_free)(struct net_device *dev, + struct xfrm_state *x); bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); @@ -1087,8 +1109,8 @@ struct netdev_net_notifier { * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); * Old-style ioctl entry point. This is used internally by the - * appletalk and ieee802154 subsystems but is no longer called by - * the device ioctl handler. + * ieee802154 subsystem but is no longer called by the device + * ioctl handler. * * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd); * Used by the bonding driver for its device specific ioctls: @@ -1754,6 +1776,7 @@ enum netdev_reg_state { * @lltx: device supports lockless Tx. Deprecated for real HW * drivers. Mainly used by logical interfaces, such as * bonding and tunnels + * @netmem_tx: device support netmem_tx. * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name @@ -1928,13 +1951,11 @@ enum netdev_reg_state { * * @reg_state: Register/unregister state machine * @dismantle: Device is going to be freed - * @rtnl_link_state: This enum represents the phases of creating - * a new link - * * @needs_free_netdev: Should unregister perform free_netdev? * @priv_destructor: Called from unregister * @npinfo: XXX: need comments on this one * @nd_net: Network namespace this network device is inside + * protected by @lock * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type @@ -1990,12 +2011,21 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * + * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ + * affinity. Set by netif_enable_irq_affinity(), then + * the driver must create a persistent napi by + * netif_napi_add_config() and finally bind the napi to + * IRQ (via netif_napi_set_irq()). + * + * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap. + * Set by calling netif_enable_cpu_rmap(). + * * @see_all_hwtstamp_requests: device wants to see calls to * ndo_hwtstamp_set() for all timestamp requests * regardless of source, even if those aren't * HWTSTAMP_SOURCE_NETDEV * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN - * @netns_local: interface can't change network namespaces + * @netns_immutable: interface can't change network namespaces * @fcoe_mtu: device supports maximum FCoE MTU, 2158 bytes * * @net_notifier_list: List of per-net netdev notifier block @@ -2045,6 +2075,7 @@ enum netdev_reg_state { * * @neighbours: List heads pointing to this device's neighbours' * dev_list, one per address-family. + * @hwprov: Tracks which PTP performs hardware packet time stamping. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2061,6 +2092,7 @@ struct net_device { struct_group(priv_flags_fast, unsigned long priv_flags:32; unsigned long lltx:1; + unsigned long netmem_tx:1; ); const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; @@ -2332,10 +2364,10 @@ struct net_device { bool dismantle; - enum { - RTNL_LINK_INITIALIZED, - RTNL_LINK_INITIALIZING, - } rtnl_link_state:16; + /** @moving_ns: device is changing netns, protected by @lock */ + bool moving_ns; + /** @rtnl_link_initializing: Device being created, suppress events */ + bool rtnl_link_initializing; bool needs_free_netdev; void (*priv_destructor)(struct net_device *dev); @@ -2396,11 +2428,13 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; + bool irq_affinity_auto; + bool rx_cpu_rmap_auto; /* priv_flags_slow, ungrouped to save space */ unsigned long see_all_hwtstamp_requests:1; unsigned long change_proto_down:1; - unsigned long netns_local:1; + unsigned long netns_immutable:1; unsigned long fcoe_mtu:1; struct list_head net_notifier_list; @@ -2412,6 +2446,14 @@ struct net_device { const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + /** @cfg: net_device queue-related configuration */ + struct netdev_config *cfg; + /** + * @cfg_pending: same as @cfg but when device is being actively + * reconfigured includes any changes to the configuration + * requested by the user, but which may or may not be rejected. + */ + struct netdev_config *cfg_pending; struct ethtool_netdev_state *ethtool; /* protected by rtnl_lock */ @@ -2442,8 +2484,57 @@ struct net_device { u32 napi_defer_hard_irqs; /** - * @lock: protects @net_shaper_hierarchy, feel free to use for other - * netdev-scope protection. Ordering: take after rtnl_lock. + * @up: copy of @state's IFF_UP, but safe to read with just @lock. + * May report false negatives while the device is being opened + * or closed (@lock does not protect .ndo_open, or .ndo_close). + */ + bool up; + + /** + * @request_ops_lock: request the core to run all @netdev_ops and + * @ethtool_ops under the @lock. + */ + bool request_ops_lock; + + /** + * @lock: netdev-scope lock, protects a small selection of fields. + * Should always be taken using netdev_lock() / netdev_unlock() helpers. + * Drivers are free to use it for other protection. + * + * For the drivers that implement shaper or queue API, the scope + * of this lock is expanded to cover most ndo/queue/ethtool/sysfs + * operations. Drivers may opt-in to this behavior by setting + * @request_ops_lock. + * + * @lock protection mixes with rtnl_lock in multiple ways, fields are + * either: + * + * - simply protected by the instance @lock; + * + * - double protected - writers hold both locks, readers hold either; + * + * - ops protected - protected by the lock held around the NDOs + * and other callbacks, that is the instance lock on devices for + * which netdev_need_ops_lock() returns true, otherwise by rtnl_lock; + * + * - double ops protected - always protected by rtnl_lock but for + * devices for which netdev_need_ops_lock() returns true - also + * the instance lock. + * + * Simply protects: + * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, + * @net_shaper_hierarchy, @reg_state, @threaded + * + * Double protects: + * @up, @moving_ns, @nd_net, @xdp_features + * + * Double ops protects: + * @real_num_rx_queues, @real_num_tx_queues + * + * Also protects some fields in: + * struct napi_struct, struct netdev_queue, struct netdev_rx_queue + * + * Ordering: take after rtnl_lock. */ struct mutex lock; @@ -2457,6 +2548,8 @@ struct net_device { struct hlist_head neighbours[NEIGH_NR_TABLES]; + struct hwtstamp_provider __rcu *hwprov; + u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; @@ -2562,21 +2655,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev, f(dev, &dev->_tx[i], arg); } -#define netdev_lockdep_set_classes(dev) \ -{ \ - static struct lock_class_key qdisc_tx_busylock_key; \ - static struct lock_class_key qdisc_xmit_lock_key; \ - static struct lock_class_key dev_addr_list_lock_key; \ - unsigned int i; \ - \ - (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \ - lockdep_set_class(&(dev)->addr_list_lock, \ - &dev_addr_list_lock_key); \ - for (i = 0; i < (dev)->num_tx_queues; i++) \ - lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \ - &qdisc_xmit_lock_key); \ -} - u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, @@ -2673,9 +2751,24 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, enum netdev_queue_type type, struct napi_struct *napi); +static inline void netdev_lock(struct net_device *dev) +{ + mutex_lock(&dev->lock); +} + +static inline void netdev_unlock(struct net_device *dev) +{ + mutex_unlock(&dev->lock); +} +/* Additional netdev_lock()-related helpers are in net/netdev_lock.h */ + +void netif_napi_set_irq_locked(struct napi_struct *napi, int irq); + static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) { - napi->irq = irq; + netdev_lock(napi->dev); + netif_napi_set_irq_locked(napi, irq); + netdev_unlock(napi->dev); } /* Default NAPI poll() weight @@ -2683,8 +2776,19 @@ static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) */ #define NAPI_POLL_WEIGHT 64 -void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight); +void netif_napi_add_weight_locked(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight); + +static inline void +netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) +{ + netdev_lock(dev); + netif_napi_add_weight_locked(dev, napi, poll, weight); + netdev_unlock(dev); +} /** * netif_napi_add() - initialize a NAPI context @@ -2703,6 +2807,13 @@ netif_napi_add(struct net_device *dev, struct napi_struct *napi, } static inline void +netif_napi_add_locked(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int)) +{ + netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT); +} + +static inline void netif_napi_add_tx_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), @@ -2712,6 +2823,15 @@ netif_napi_add_tx_weight(struct net_device *dev, netif_napi_add_weight(dev, napi, poll, weight); } +static inline void +netif_napi_add_config_locked(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int index) +{ + napi->index = index; + napi->config = &dev->napi_config[index]; + netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT); +} + /** * netif_napi_add_config - initialize a NAPI context with persistent config * @dev: network device @@ -2723,9 +2843,9 @@ static inline void netif_napi_add_config(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int index) { - napi->index = index; - napi->config = &dev->napi_config[index]; - netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); + netdev_lock(dev); + netif_napi_add_config_locked(dev, napi, poll, index); + netdev_unlock(dev); } /** @@ -2745,6 +2865,8 @@ static inline void netif_napi_add_tx(struct net_device *dev, netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT); } +void __netif_napi_del_locked(struct napi_struct *napi); + /** * __netif_napi_del - remove a NAPI context * @napi: NAPI context @@ -2753,7 +2875,18 @@ static inline void netif_napi_add_tx(struct net_device *dev, * containing @napi. Drivers might want to call this helper to combine * all the needed RCU grace periods into a single one. */ -void __netif_napi_del(struct napi_struct *napi); +static inline void __netif_napi_del(struct napi_struct *napi) +{ + netdev_lock(napi->dev); + __netif_napi_del_locked(napi); + netdev_unlock(napi->dev); +} + +static inline void netif_napi_del_locked(struct napi_struct *napi) +{ + __netif_napi_del_locked(napi); + synchronize_net(); +} /** * netif_napi_del - remove a NAPI context @@ -2767,6 +2900,9 @@ static inline void netif_napi_del(struct napi_struct *napi) synchronize_net(); } +int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs); +void netif_set_affinity_auto(struct net_device *dev); + struct packet_type { __be16 type; /* This is really htons(ether_type). */ bool ignore_outgoing; @@ -2813,9 +2949,9 @@ struct pcpu_sw_netstats { struct pcpu_dstats { u64_stats_t rx_packets; u64_stats_t rx_bytes; - u64_stats_t rx_drops; u64_stats_t tx_packets; u64_stats_t tx_bytes; + u64_stats_t rx_drops; u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); @@ -2860,6 +2996,46 @@ static inline void dev_lstats_add(struct net_device *dev, unsigned int len) u64_stats_update_end(&lstats->syncp); } +static inline void dev_dstats_rx_add(struct net_device *dev, + unsigned int len) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->rx_packets); + u64_stats_add(&dstats->rx_bytes, len); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_rx_dropped(struct net_device *dev) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->rx_drops); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_tx_add(struct net_device *dev, + unsigned int len) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->tx_packets); + u64_stats_add(&dstats->tx_bytes, len); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_tx_dropped(struct net_device *dev) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->tx_drops); + u64_stats_update_end(&dstats->syncp); +} + #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ @@ -3096,7 +3272,7 @@ int call_netdevice_notifiers_info(unsigned long val, #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_in_bond_rcu(bond, slave) \ - for_each_netdev_rcu(&init_net, slave) \ + for_each_netdev_rcu(dev_net_rcu(bond), slave) \ if (netdev_master_upper_dev_get_rcu(slave) == (bond)) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) @@ -3160,9 +3336,12 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); bool netdev_name_in_use(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); +int netif_open(struct net_device *dev, struct netlink_ext_ack *extack); int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); +void netif_close(struct net_device *dev); void dev_close(struct net_device *dev); void dev_close_many(struct list_head *head, bool unlink); +void netif_disable_lro(struct net_device *dev); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, @@ -3202,7 +3381,6 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); -void init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct sk_buff *skb, @@ -3216,7 +3394,6 @@ struct net_device *netdev_get_by_index(struct net *net, int ifindex, struct net_device *netdev_get_by_name(struct net *net, const char *name, netdevice_tracker *tracker, gfp_t gfp); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); -struct net_device *dev_get_by_napi_id(unsigned int napi_id); void netdev_copy_name(struct net_device *dev, char *name); static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, @@ -3331,6 +3508,12 @@ struct softnet_data { DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +struct page_pool_bh { + struct page_pool *pool; + local_lock_t bh_lock; +}; +DECLARE_PER_CPU(struct page_pool_bh, system_page_pool); + #ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) { @@ -3818,7 +4001,7 @@ static inline bool netif_attr_test_mask(unsigned long j, * @online_mask: bitmask for CPUs/Rx queues that are online * @nr_bits: number of bits in the bitmask * - * Returns true if a CPU/Rx queue is online. + * Returns: true if a CPU/Rx queue is online. */ static inline bool netif_attr_test_online(unsigned long j, const unsigned long *online_mask, @@ -3838,7 +4021,8 @@ static inline bool netif_attr_test_online(unsigned long j, * @srcp: the cpumask/Rx queue mask pointer * @nr_bits: number of bits in the bitmask * - * Returns >= nr_bits if no further CPUs/Rx queues set. + * Returns: next (after n) CPU/Rx queue index in the mask; + * >= nr_bits if no further CPUs/Rx queues set. */ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, unsigned int nr_bits) @@ -3860,7 +4044,8 @@ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, * @src2p: the second CPUs/Rx queues mask pointer * @nr_bits: number of bits in the bitmask * - * Returns >= nr_bits if no further CPUs/Rx queues set in both. + * Returns: next (after n) CPU/Rx queue index set in both masks; + * >= nr_bits if no further CPUs/Rx queues set in both. */ static inline int netif_attrmask_next_and(int n, const unsigned long *src1p, const unsigned long *src2p, @@ -3907,17 +4092,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev) } int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); - -#ifdef CONFIG_SYSFS int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); -#else -static inline int netif_set_real_num_rx_queues(struct net_device *dev, - unsigned int rxqs) -{ - dev->real_num_rx_queues = rxqs; - return 0; -} -#endif int netif_set_real_num_queues(struct net_device *dev, unsigned int txq, unsigned int rxq); @@ -3966,9 +4141,9 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog); -void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb); + const struct bpf_prog *xdp_prog); +void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog); +int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); @@ -3976,10 +4151,15 @@ int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); void netif_receive_skb_list_internal(struct list_head *head); void netif_receive_skb_list(struct list_head *head); -gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); -void napi_gro_flush(struct napi_struct *napi, bool flush_old); +gro_result_t gro_receive_skb(struct gro_node *gro, struct sk_buff *skb); + +static inline gro_result_t napi_gro_receive(struct napi_struct *napi, + struct sk_buff *skb) +{ + return gro_receive_skb(&napi->gro, skb); +} + struct sk_buff *napi_get_frags(struct napi_struct *napi); -void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) @@ -4004,6 +4184,8 @@ int put_user_ifreq(struct ifreq *ifr, void __user *arg); int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, void __user *data, bool *need_copyout); int dev_ifconf(struct net *net, struct ifconf __user *ifc); +int dev_eth_ioctl(struct net_device *dev, + struct ifreq *ifr, unsigned int cmd); int generic_hwtstamp_get_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg); int generic_hwtstamp_set_lower(struct net_device *dev, @@ -4013,25 +4195,28 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); +int netif_change_flags(struct net_device *dev, unsigned int flags, + struct netlink_ext_ack *extack); int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); +int netif_set_alias(struct net_device *dev, const char *alias, size_t len); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int __dev_change_net_namespace(struct net_device *dev, struct net *net, - const char *pat, int new_ifindex); -static inline + const char *pat, int new_ifindex, + struct netlink_ext_ack *extack); int dev_change_net_namespace(struct net_device *dev, struct net *net, - const char *pat) -{ - return __dev_change_net_namespace(dev, net, pat, 0); -} + const char *pat); int __dev_set_mtu(struct net_device *, int); +int netif_set_mtu(struct net_device *dev, int new_mtu); int dev_set_mtu(struct net_device *, int); int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); -int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, +int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, + struct netlink_ext_ack *extack); +int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); -int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, @@ -4044,7 +4229,9 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); +int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); +u8 dev_xdp_sb_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); u32 dev_get_min_mp_channel_count(const struct net_device *dev); @@ -4111,7 +4298,17 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, return 0; } -bool dev_nit_active(struct net_device *dev); +bool dev_nit_active_rcu(const struct net_device *dev); +static inline bool dev_nit_active(const struct net_device *dev) +{ + bool ret; + + rcu_read_lock(); + ret = dev_nit_active_rcu(dev); + rcu_read_unlock(); + return ret; +} + void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); static inline void __dev_put(struct net_device *dev) @@ -4242,6 +4439,7 @@ void linkwatch_fire_event(struct net_device *dev); * pending work list (if queued). */ void linkwatch_sync_dev(struct net_device *dev); +void __linkwatch_sync_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present @@ -4256,7 +4454,7 @@ static inline bool netif_carrier_ok(const struct net_device *dev) unsigned long dev_trans_start(struct net_device *dev); -void __netdev_watchdog_up(struct net_device *dev); +void netdev_watchdog_up(struct net_device *dev); void netif_carrier_on(struct net_device *dev); void netif_carrier_off(struct net_device *dev); @@ -4501,9 +4699,10 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) /* * txq->trans_start can be read locklessly from dev_watchdog() */ -static inline void txq_trans_update(struct netdev_queue *txq) +static inline void txq_trans_update(const struct net_device *dev, + struct netdev_queue *txq) { - if (txq->xmit_lock_owner != -1) + if (!dev->lltx) WRITE_ONCE(txq->trans_start, jiffies); } @@ -4650,6 +4849,9 @@ int devm_register_netdev(struct device *dev, struct net_device *ndev); /* General hardware address lists handling functions */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); +int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, + struct netdev_hw_addr_list *from_list, + int addr_len); void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, @@ -4781,8 +4983,11 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); +int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); +void netif_state_change(struct net_device *dev); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); @@ -5018,7 +5223,7 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi rc = __netdev_start_xmit(ops, skb, dev, more); if (rc == NETDEV_TX_OK) - txq_trans_update(txq); + txq_trans_update(dev, txq); return rc; } @@ -5100,6 +5305,8 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FRAGLIST != (NETIF_F_GSO_FRAGLIST >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_TCP_ACCECN != + (NETIF_F_GSO_ACCECN >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h index 38325e070296..813a19122ebb 100644 --- a/include/linux/netdevice_xmit.h +++ b/include/linux/netdevice_xmit.h @@ -8,6 +8,12 @@ struct netdev_xmit { #ifdef CONFIG_NET_EGRESS u8 skip_txqueue; #endif +#if IS_ENABLED(CONFIG_NET_ACT_MIRRED) + u8 sched_mirred_nest; +#endif +#if IS_ENABLED(CONFIG_NF_DUP_NETDEV) + u8 nf_dup_skb_recursion; +#endif }; #endif diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2b8aac2c70ad..5f896fcc074d 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -95,6 +95,9 @@ enum nf_hook_ops_type { }; struct nf_hook_ops { + struct list_head list; + struct rcu_head rcu; + /* User fills in from here down. */ nf_hookfn *hook; struct net_device *dev; @@ -470,6 +473,7 @@ struct nf_ct_hook { void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); int (*confirm)(struct sk_buff *skb); + u32 (*get_id)(const struct nf_conntrack *nfct); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; @@ -498,17 +502,6 @@ extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook; extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook; /* - * nf_skb_duplicated - TEE target has sent a packet - * - * When a xtables target sends a packet, the OUTPUT and POSTROUTING - * hooks are traversed again, i.e. nft and xtables are invoked recursively. - * - * This is used by xtables TEE target to prevent the duplicated skb from - * being duplicated again. - */ -DECLARE_PER_CPU(bool, nf_skb_duplicated); - -/* * Contains bitmask of ctnetlink event subscribers, if any. * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag. */ diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5897f3dbaf7c..f39f688d7285 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -357,7 +357,7 @@ extern struct static_key xt_tee_enabled; * Begin packet processing : all readers must wait the end * 1) Must be called with preemption disabled * 2) softirqs must be disabled too (or we should use this_cpu_add()) - * Returns : + * Returns: * 1 if no recursion on this cpu * 0 if recursion detected */ diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index 8676316547cc..3175073a66ba 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -66,7 +66,6 @@ static inline bool nf_hook_egress_active(void) * @rc: result code which shall be returned by __dev_queue_xmit() on failure * @dev: netdev whose egress hooks shall be applied to @skb * - * Returns @skb on success or %NULL if the packet was consumed or filtered. * Caller must hold rcu_read_lock. * * On ingress, packets are classified first by tc, then by netfilter. @@ -81,6 +80,8 @@ static inline bool nf_hook_egress_active(void) * called recursively by tunnel drivers such as vxlan, the flag is reverted to * false after sch_handle_egress(). This ensures that netfilter is applied * both on the overlay and underlying network. + * + * Returns: @skb on success or %NULL if the packet was consumed or filtered. */ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, struct net_device *dev) diff --git a/include/linux/netfs.h b/include/linux/netfs.h index ecdd5ced16a8..f43f075852c0 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -18,9 +18,11 @@ #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/uio.h> +#include <linux/rolling_buffer.h> enum netfs_sreq_ref_trace; typedef struct mempool_s mempool_t; +struct folio_queue; /** * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED] @@ -46,11 +48,9 @@ enum netfs_io_source { NETFS_INVALID_READ, NETFS_UPLOAD_TO_SERVER, NETFS_WRITE_TO_CACHE, - NETFS_INVALID_WRITE, } __mode(byte); -typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, - bool was_async); +typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error); /* * Per-inode context. This wraps the VFS inode. @@ -69,8 +69,8 @@ struct netfs_inode { unsigned long flags; #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ -#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ #define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */ +#define NETFS_ICTX_SINGLE_NO_UPLOAD 4 /* Monolithic payload, cache but no upload */ }; /* @@ -143,8 +143,8 @@ struct netfs_io_stream { struct netfs_io_subrequest *front; /* Op being collected */ unsigned long long collected_to; /* Position we've collected results to */ size_t transferred; /* The amount transferred from this stream */ - enum netfs_io_source source; /* Where to read from/write to */ unsigned short error; /* Aggregate error for the stream */ + enum netfs_io_source source; /* Where to read from/write to */ unsigned char stream_nr; /* Index of stream in parent table */ bool avail; /* T if stream is available */ bool active; /* T if stream is active */ @@ -178,9 +178,6 @@ struct netfs_io_subrequest { unsigned long long start; /* Where to start the I/O */ size_t len; /* Size of the I/O */ size_t transferred; /* Amount of data transferred */ - size_t consumed; /* Amount of read data consumed */ - size_t prev_donated; /* Amount of data donated from previous subreq */ - size_t next_donated; /* Amount of data donated from next subreq */ refcount_t ref; short error; /* 0 or error that occurred */ unsigned short debug_index; /* Index in list (for debugging output) */ @@ -188,13 +185,9 @@ struct netfs_io_subrequest { u8 retry_count; /* The number of retries (0 on initial pass) */ enum netfs_io_source source; /* Where to read from/write to */ unsigned char stream_nr; /* I/O stream this belongs to */ - unsigned char curr_folioq_slot; /* Folio currently being read */ - unsigned char curr_folio_order; /* Order of folio */ - struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ -#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_MADE_PROGRESS 4 /* Set if we transferred at least some data */ #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ @@ -208,9 +201,12 @@ enum netfs_io_origin { NETFS_READAHEAD, /* This read was triggered by readahead */ NETFS_READPAGE, /* This read is a synchronous read */ NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ + NETFS_READ_SINGLE, /* This read should be treated as a single object */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ + NETFS_UNBUFFERED_READ, /* This is an unbuffered read */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ + NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */ NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */ NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */ NETFS_DIO_WRITE, /* This is a direct I/O write */ @@ -224,66 +220,65 @@ enum netfs_io_origin { */ struct netfs_io_request { union { - struct work_struct work; + struct work_struct cleanup_work; /* Deferred cleanup work */ struct rcu_head rcu; }; + struct work_struct work; /* Result collector work */ struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; - struct readahead_control *ractl; /* Readahead descriptor */ + struct netfs_io_request *copy_to_cache; /* Request to write just-read data to the cache */ +#ifdef CONFIG_PROC_FS struct list_head proc_link; /* Link in netfs_iorequests */ - struct list_head subrequests; /* Contributory I/O operations */ +#endif struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ #define NR_IO_STREAMS 2 //wreq->nr_io_streams struct netfs_group *group; /* Writeback group being written back */ - struct folio_queue *buffer; /* Head of I/O buffer */ - struct folio_queue *buffer_tail; /* Tail of I/O buffer */ - struct iov_iter iter; /* Unencrypted-side iterator */ - struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */ + struct rolling_buffer buffer; /* Unencrypted buffer */ +#define NETFS_ROLLBUF_PUT_MARK ROLLBUF_MARK_1 +#define NETFS_ROLLBUF_PAGECACHE_MARK ROLLBUF_MARK_2 + wait_queue_head_t waitq; /* Processor waiter */ void *netfs_priv; /* Private data for the netfs */ void *netfs_priv2; /* Private data for the netfs */ struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ - unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ - unsigned int debug_id; - unsigned int rsize; /* Maximum read size (0 for none) */ - unsigned int wsize; /* Maximum write size (0 for none) */ - atomic_t subreq_counter; /* Next subreq->debug_index */ - unsigned int nr_group_rel; /* Number of refs to release on ->group */ - spinlock_t lock; /* Lock for queuing subreqs */ - atomic_t nr_outstanding; /* Number of ops in progress */ unsigned long long submitted; /* Amount submitted for I/O so far */ unsigned long long len; /* Length of the request */ size_t transferred; /* Amount to be indicated as transferred */ long error; /* 0 or error that occurred */ - enum netfs_io_origin origin; /* Origin of the request */ - bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ - u8 buffer_head_slot; /* First slot in ->buffer */ - u8 buffer_tail_slot; /* Next slot in ->buffer_tail */ unsigned long long i_size; /* Size of the file */ unsigned long long start; /* Start position */ atomic64_t issued_to; /* Write issuer folio cursor */ unsigned long long collected_to; /* Point we've collected to */ unsigned long long cleaned_to; /* Position we've cleaned folios to */ + unsigned long long abandon_to; /* Position to abandon folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ - size_t prev_donated; /* Fallback for subreq->prev_donated */ + unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ + unsigned int debug_id; + unsigned int rsize; /* Maximum read size (0 for none) */ + unsigned int wsize; /* Maximum write size (0 for none) */ + atomic_t subreq_counter; /* Next subreq->debug_index */ + unsigned int nr_group_rel; /* Number of refs to release on ->group */ + spinlock_t lock; /* Lock for queuing subreqs */ + unsigned char front_folio_order; /* Order (size) of front folio */ + enum netfs_io_origin origin; /* Origin of the request */ + bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ refcount_t ref; unsigned long flags; -#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ -#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ -#define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ -#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ -#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ -#define NETFS_RREQ_BLOCKED 10 /* We blocked */ -#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ +#define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */ +#define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */ +#define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */ +#define NETFS_RREQ_FAILED 3 /* The request failed */ +#define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */ +#define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */ +#define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */ +#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */ +#define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ -#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; - void (*cleanup)(struct netfs_io_request *req); }; /* @@ -322,7 +317,6 @@ struct netfs_request_ops { */ enum netfs_read_from_hole { NETFS_READ_HOLE_IGNORE, - NETFS_READ_HOLE_CLEAR, NETFS_READ_HOLE_FAIL, }; @@ -409,6 +403,13 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * struct netfs_group *netfs_group); ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from); +/* Single, monolithic object read/write API. */ +void netfs_single_mark_inode_dirty(struct inode *inode); +ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_iter *iter); +int netfs_writeback_single(struct address_space *mapping, + struct writeback_control *wbc, + struct iov_iter *iter); + /* Address operations API */ struct readahead_control; void netfs_readahead(struct readahead_control *); @@ -428,22 +429,19 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp); vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group); /* (Sub)request management API. */ -void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq, - bool was_async); -void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq, - int error, bool was_async); +void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq); +void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, - bool was_async, enum netfs_sreq_ref_trace what); + enum netfs_sreq_ref_trace what); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs); void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); -void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, - bool was_async); +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error); void netfs_queue_write_request(struct netfs_io_subrequest *subreq); int netfs_start_io_read(struct inode *inode); @@ -453,6 +451,18 @@ void netfs_end_io_write(struct inode *inode); int netfs_start_io_direct(struct inode *inode); void netfs_end_io_direct(struct inode *inode); +/* Miscellaneous APIs. */ +struct folio_queue *netfs_folioq_alloc(unsigned int rreq_id, gfp_t gfp, + unsigned int trace /*enum netfs_folioq_trace*/); +void netfs_folioq_free(struct folio_queue *folioq, + unsigned int trace /*enum netfs_trace_folioq*/); + +/* Buffer wrangling helpers API. */ +int netfs_alloc_folioq_buffer(struct address_space *mapping, + struct folio_queue **_buffer, + size_t *_cur_size, ssize_t size, gfp_t gfp); +void netfs_free_folioq_buffer(struct folio_queue *fq); + /** * netfs_inode - Get the netfs inode context from the inode * @inode: The inode to query diff --git a/include/linux/netlink.h b/include/linux/netlink.h index c3ae84a77e16..882e9c1b6c1d 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -63,7 +63,7 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) } /* this can be increased when necessary - don't expose to userland */ -#define NETLINK_MAX_COOKIE_LEN 20 +#define NETLINK_MAX_COOKIE_LEN 8 #define NETLINK_MAX_FMTMSG_LEN 80 /** @@ -212,6 +212,7 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, { if (!extack) return; + BUILD_BUG_ON(sizeof(extack->cookie) < sizeof(cookie)); memcpy(extack->cookie, &cookie, sizeof(cookie)); extack->cookie_len = sizeof(cookie); } diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index b34301650c47..0477208ed9ff 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -25,7 +25,13 @@ union inet_addr { struct netpoll { struct net_device *dev; netdevice_tracker dev_tracker; + /* + * Either dev_name or dev_mac can be used to specify the local + * interface - dev_name is used if it is a nonempty string, else + * dev_mac is used. + */ char dev_name[IFNAMSIZ]; + u8 dev_mac[ETH_ALEN]; const char *name; union inet_addr local_ip, remote_ip; @@ -33,6 +39,7 @@ struct netpoll { u16 local_port, remote_port; u8 remote_mac[ETH_ALEN]; struct sk_buff_head skb_pool; + struct work_struct refill_wq; }; struct netpoll_info { @@ -57,7 +64,7 @@ static inline void netpoll_poll_disable(struct net_device *dev) { return; } static inline void netpoll_poll_enable(struct net_device *dev) { return; } #endif -void netpoll_send_udp(struct netpoll *np, const char *msg, int len); +int netpoll_send_udp(struct netpoll *np, const char *msg, int len); void netpoll_print_options(struct netpoll *np); int netpoll_parse_options(struct netpoll *np, char *opt); int __netpoll_setup(struct netpoll *np, struct net_device *ndev); diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 9ad727ddfedb..0906a0b40c6a 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -55,7 +55,6 @@ enum nfs3_stable_how { NFS_INVALID_STABLE_HOW = -1 }; -#ifdef CONFIG_CRC32 /** * nfs_fhandle_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -67,10 +66,4 @@ static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) { return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size); } -#else /* CONFIG_CRC32 */ -static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh) -{ - return 0; -} -#endif /* CONFIG_CRC32 */ #endif /* _LINUX_NFS_H */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 8d7430d9f218..e947af6a3684 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -17,6 +17,7 @@ #include <linux/uidgid.h> #include <uapi/linux/nfs4.h> #include <linux/sunrpc/msg_prot.h> +#include <linux/sunrpc/xdrgen/nfs4_1.h> enum nfs4_acl_whotype { NFS4_ACL_WHO_NAMED = 0, @@ -46,6 +47,7 @@ struct nfs4_acl { struct nfs4_label { uint32_t lfs; uint32_t pi; + u32 lsmid; u32 len; char *label; }; @@ -70,6 +72,7 @@ struct nfs4_stateid_struct { NFS4_LAYOUT_STATEID_TYPE, NFS4_PNFS_DS_STATEID_TYPE, NFS4_REVOKED_STATEID_TYPE, + NFS4_FREED_STATEID_TYPE, } type; }; @@ -298,6 +301,7 @@ enum nfsstat4 { /* error codes for internal client use */ #define NFS4ERR_RESET_TO_MDS 12001 #define NFS4ERR_RESET_TO_PNFS 12002 +#define NFS4ERR_FATAL_IOERROR 12003 static inline bool seqid_mutating_err(u32 err) { @@ -365,7 +369,7 @@ enum limit_by4 { NFS4_LIMIT_BLOCKS = 2 }; -enum open_delegation_type4 { +enum nfs4_open_delegation_type4 { NFS4_OPEN_DELEGATE_NONE = 0, NFS4_OPEN_DELEGATE_READ = 1, NFS4_OPEN_DELEGATE_WRITE = 2, @@ -512,12 +516,6 @@ enum { FATTR4_XATTR_SUPPORT = 82, }; -enum { - FATTR4_TIME_DELEG_ACCESS = 84, - FATTR4_TIME_DELEG_MODIFY = 85, - FATTR4_OPEN_ARGUMENTS = 86, -}; - /* * The following internal definitions enable processing the above * attribute bits within 32-bit word boundaries. @@ -681,6 +679,7 @@ enum { NFSPROC4_CLNT_SEEK, NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, + NFSPROC4_CLNT_ZERO_RANGE, NFSPROC4_CLNT_LAYOUTSTATS, NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_COPY, @@ -695,6 +694,7 @@ enum { NFSPROC4_CLNT_LISTXATTRS, NFSPROC4_CLNT_REMOVEXATTR, NFSPROC4_CLNT_READ_PLUS, + NFSPROC4_CLNT_OFFLOAD_STATUS, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 039898d70954..67ae2c3f41d2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -77,6 +77,23 @@ struct nfs_lock_context { struct rcu_head rcu_head; }; +struct nfs_file_localio { + struct nfsd_file __rcu *ro_file; + struct nfsd_file __rcu *rw_file; + struct list_head list; + void __rcu *nfs_uuid; /* opaque pointer to 'nfs_uuid_t' */ +}; + +static inline void nfs_localio_file_init(struct nfs_file_localio *nfl) +{ +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + nfl->ro_file = NULL; + nfl->rw_file = NULL; + INIT_LIST_HEAD(&nfl->list); + nfl->nfs_uuid = NULL; +#endif +} + struct nfs4_state; struct nfs_open_context { struct nfs_lock_context lock_context; @@ -87,15 +104,16 @@ struct nfs_open_context { struct nfs4_state *state; fmode_t mode; + int error; unsigned long flags; #define NFS_CONTEXT_BAD (2) #define NFS_CONTEXT_UNLOCK (3) #define NFS_CONTEXT_FILE_OPEN (4) - int error; - struct list_head list; struct nfs4_threshold *mdsthreshold; + struct list_head list; struct rcu_head rcu_head; + struct nfs_file_localio nfl; }; struct nfs_open_dir_context { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b804346a9741..63141320c2a8 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -50,7 +50,7 @@ struct nfs_client { #define NFS_CS_DS 7 /* - Server is a DS */ #define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ #define NFS_CS_PNFS 9 /* - Server used for pnfs */ -#define NFS_CS_LOCAL_IO 10 /* - client is local */ +#define NFS_CS_NETUNREACH_FATAL 10 /* - ENETUNREACH errors are fatal */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -125,6 +125,7 @@ struct nfs_client { */ char cl_ipaddr[48]; struct net *cl_net; + netns_tracker cl_ns_tracker; struct list_head pending_cb_stateids; struct rcu_head rcu; @@ -132,7 +133,7 @@ struct nfs_client { struct timespec64 cl_nfssvc_boot; seqlock_t cl_boot_lock; nfs_uuid_t cl_uuid; - spinlock_t cl_localio_lock; + struct work_struct cl_local_probe_work; #endif /* CONFIG_NFS_LOCALIO */ }; @@ -168,6 +169,8 @@ struct nfs_server { #define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 #define NFS_MOUNT_SHUTDOWN 0x08000000 #define NFS_MOUNT_NO_ALIGNWRITE 0x10000000 +#define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000 +#define NFS_MOUNT_NETUNREACH_FATAL 0x40000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ @@ -211,6 +214,15 @@ struct nfs_server { char *fscache_uniq; /* Uniquifier (or NULL) */ #endif + /* The following #defines numerically match the NFSv4 equivalents */ +#define NFS_FH_NOEXPIRE_WITH_OPEN (0x1) +#define NFS_FH_VOLATILE_ANY (0x2) +#define NFS_FH_VOL_MIGRATION (0x4) +#define NFS_FH_VOL_RENAME (0x8) +#define NFS_FH_RENAME_UNSAFE (NFS_FH_VOLATILE_ANY | NFS_FH_VOL_RENAME) + u32 fh_expire_type; /* V4 bitmask representing file + handle volatility type for + this filesystem */ u32 pnfs_blksize; /* layout_blksize attr */ #if IS_ENABLED(CONFIG_NFS_V4) u32 attr_bitmask[3];/* V4 bitmask representing the set @@ -234,9 +246,6 @@ struct nfs_server { u32 acl_bitmask; /* V4 bitmask representing the ACEs that are supported on this filesystem */ - u32 fh_expire_type; /* V4 bitmask representing file - handle volatility type for - this filesystem */ struct pnfs_layoutdriver_type *pnfs_curr_ld; /* Active layout driver */ struct rpc_wait_queue roc_rpcwaitq; void *pnfs_ld_data; /* per mount point data */ @@ -251,6 +260,10 @@ struct nfs_server { struct list_head ss_copies; struct list_head ss_src_copies; + unsigned long delegation_flags; +#define NFS4SERV_DELEGRETURN (1) +#define NFS4SERV_DELEGATION_EXPIRED (2) +#define NFS4SERV_DELEGRETURN_DELAYED (3) unsigned long delegation_gen; unsigned long mig_gen; unsigned long mig_status; @@ -290,6 +303,8 @@ struct nfs_server { #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) #define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8) +#define NFS_CAP_OFFLOAD_STATUS (1U << 9) +#define NFS_CAP_ZERO_RANGE (1U << 10) #define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 559273a0f16d..67f6632f723b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1315,11 +1315,6 @@ struct nfs4_fsid_present_res { #endif /* CONFIG_NFS_V4 */ -struct nfstime4 { - u64 seconds; - u32 nseconds; -}; - #ifdef CONFIG_NFS_V4_1 struct pnfs_commit_bucket { @@ -1520,8 +1515,9 @@ struct nfs42_offload_status_args { struct nfs42_offload_status_res { struct nfs4_sequence_res osr_seq_res; - uint64_t osr_count; - int osr_status; + u64 osr_count; + int complete_count; + u32 osr_complete; }; struct nfs42_copy_notify_args { @@ -1637,6 +1633,7 @@ enum { NFS_IOHDR_RESEND_PNFS, NFS_IOHDR_RESEND_MDS, NFS_IOHDR_UNSTABLE_WRITES, + NFS_IOHDR_ODIRECT, }; struct nfs_io_completion; @@ -1785,7 +1782,7 @@ struct nfs_rpc_ops { struct nfs_fattr *, struct inode *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); - int (*lookup) (struct inode *, struct dentry *, + int (*lookup) (struct inode *, struct dentry *, const struct qstr *, struct nfs_fh *, struct nfs_fattr *); int (*lookupp) (struct inode *, struct nfs_fh *, struct nfs_fattr *); @@ -1806,7 +1803,7 @@ struct nfs_rpc_ops { int (*link) (struct inode *, struct inode *, const struct qstr *); int (*symlink) (struct inode *, struct dentry *, struct folio *, unsigned int, struct iattr *); - int (*mkdir) (struct inode *, struct dentry *, struct iattr *); + struct dentry *(*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, const struct qstr *); int (*readdir) (struct nfs_readdir_arg *, struct nfs_readdir_res *); int (*mknod) (struct inode *, struct dentry *, struct iattr *, diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index 9202f4b24343..5c7c92659e73 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -6,9 +6,6 @@ #ifndef __LINUX_NFSLOCALIO_H #define __LINUX_NFSLOCALIO_H -/* nfsd_file structure is purposely kept opaque to NFS client */ -struct nfsd_file; - #if IS_ENABLED(CONFIG_NFS_LOCALIO) #include <linux/module.h> @@ -19,6 +16,9 @@ struct nfsd_file; #include <linux/nfs.h> #include <net/net_namespace.h> +struct nfs_client; +struct nfs_file_localio; + /* * Useful to allow a client to negotiate if localio * possible with its server. @@ -27,35 +27,43 @@ struct nfsd_file; */ typedef struct { uuid_t uuid; + unsigned nfs3_localio_probe_count; + /* this struct is over a cacheline, avoid bouncing */ + spinlock_t ____cacheline_aligned lock; struct list_head list; + spinlock_t *list_lock; /* nn->local_clients_lock */ struct net __rcu *net; /* nfsd's network namespace */ struct auth_domain *dom; /* auth_domain for localio */ + /* Local files to close when net is shut down or exports change */ + struct list_head files; } nfs_uuid_t; void nfs_uuid_init(nfs_uuid_t *); bool nfs_uuid_begin(nfs_uuid_t *); void nfs_uuid_end(nfs_uuid_t *); -void nfs_uuid_is_local(const uuid_t *, struct list_head *, +void nfs_uuid_is_local(const uuid_t *, struct list_head *, spinlock_t *, struct net *, struct auth_domain *, struct module *); -void nfs_uuid_invalidate_clients(struct list_head *list); -void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid); + +void nfs_localio_enable_client(struct nfs_client *clp); +void nfs_localio_disable_client(struct nfs_client *clp); +void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, + spinlock_t *nn_local_clients_lock); /* localio needs to map filehandle -> struct nfsd_file */ -extern struct nfsd_file * -nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *, - const struct cred *, const struct nfs_fh *, - const fmode_t) __must_hold(rcu); +void nfs_close_local_fh(struct nfs_file_localio *); struct nfsd_localio_operations { - bool (*nfsd_serv_try_get)(struct net *); - void (*nfsd_serv_put)(struct net *); + bool (*nfsd_net_try_get)(struct net *); + void (*nfsd_net_put)(struct net *); struct nfsd_file *(*nfsd_open_local_fh)(struct net *, struct auth_domain *, struct rpc_clnt *, const struct cred *, const struct nfs_fh *, + struct nfsd_file __rcu **pnf, const fmode_t); - struct net *(*nfsd_file_put_local)(struct nfsd_file *); + struct net *(*nfsd_file_put_local)(struct nfsd_file __rcu **); + struct nfsd_file *(*nfsd_file_get_local)(struct nfsd_file *); struct file *(*nfsd_file_file)(struct nfsd_file *); } ____cacheline_aligned; @@ -64,39 +72,51 @@ extern const struct nfsd_localio_operations *nfs_to; struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *, struct rpc_clnt *, const struct cred *, - const struct nfs_fh *, const fmode_t); + const struct nfs_fh *, struct nfs_file_localio *, + struct nfsd_file __rcu **pnf, + const fmode_t); static inline void nfs_to_nfsd_net_put(struct net *net) { /* - * Once reference to nfsd_serv is dropped, NFSD could be - * unloaded, so ensure safe return from nfsd_file_put_local() - * by always taking RCU. + * Once reference to net (and associated nfsd_serv) is dropped, NFSD + * could be unloaded, so ensure safe return from nfsd_net_put() by + * always taking RCU. */ rcu_read_lock(); - nfs_to->nfsd_serv_put(net); + nfs_to->nfsd_net_put(net); rcu_read_unlock(); } -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +static inline void nfs_to_nfsd_file_put_local(struct nfsd_file __rcu **localio) { /* - * Must not hold RCU otherwise nfsd_file_put() can easily trigger: - * "Voluntary context switch within RCU read-side critical section!" - * by scheduling deep in underlying filesystem (e.g. XFS). + * Either *localio must be guaranteed to be non-NULL, or caller + * must prevent nfsd shutdown from completing as nfs_close_local_fh() + * does by blocking the nfs_uuid from being finally put. */ - struct net *net = nfs_to->nfsd_file_put_local(localio); + struct net *net; + + net = nfs_to->nfsd_file_put_local(localio); - nfs_to_nfsd_net_put(net); + if (net) + nfs_to_nfsd_net_put(net); } #else /* CONFIG_NFS_LOCALIO */ + +struct nfs_file_localio; +static inline void nfs_close_local_fh(struct nfs_file_localio *nfl) +{ +} static inline void nfsd_localio_ops_init(void) { } -static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio) +struct nfs_client; +static inline void nfs_localio_disable_client(struct nfs_client *clp) { } + #endif /* CONFIG_NFS_LOCALIO */ #endif /* __LINUX_NFSLOCALIO_H */ diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a8dfb38c9bb6..e78fa535f61d 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -17,7 +17,6 @@ void lockup_detector_init(void); void lockup_detector_retry_init(void); void lockup_detector_soft_poweroff(void); -void lockup_detector_cleanup(void); extern int watchdog_user_enabled; extern int watchdog_thresh; @@ -37,7 +36,6 @@ extern int sysctl_hardlockup_all_cpu_backtrace; static inline void lockup_detector_init(void) { } static inline void lockup_detector_retry_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } -static inline void lockup_detector_cleanup(void) { } #endif /* !CONFIG_LOCKUP_DETECTOR */ #ifdef CONFIG_SOFTLOCKUP_DETECTOR @@ -104,12 +102,10 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); -extern void hardlockup_detector_perf_cleanup(void); extern void hardlockup_config_perf_event(const char *str); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } -static inline void hardlockup_detector_perf_cleanup(void) { } static inline void hardlockup_config_perf_event(const char *str) { } #endif diff --git a/include/linux/node.h b/include/linux/node.h index 9a881c2208b3..2b7517892230 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -57,6 +57,11 @@ enum cache_write_policy { NODE_CACHE_WRITE_OTHER, }; +enum cache_mode { + NODE_CACHE_ADDR_MODE_RESERVED, + NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR, +}; + /** * struct node_cache_attrs - system memory caching attributes * @@ -65,6 +70,7 @@ enum cache_write_policy { * @size: Total size of cache in bytes * @line_size: Number of bytes fetched on a cache miss * @level: The cache hierarchy level + * @address_mode: The address mode */ struct node_cache_attrs { enum cache_indexing indexing; @@ -72,6 +78,7 @@ struct node_cache_attrs { u64 size; u16 line_size; u8 level; + u16 address_mode; }; #ifdef CONFIG_HMEM_REPORTING diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 9fd7a0ce9c1a..f08ae71585fa 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -39,9 +39,6 @@ * int nodes_full(mask) Is mask full (all bits sets)? * int nodes_weight(mask) Hamming weight - number of set bits * - * void nodes_shift_right(dst, src, n) Shift right - * void nodes_shift_left(dst, src, n) Shift left - * * unsigned int first_node(mask) Number lowest set bit, or MAX_NUMNODES * unsigend int next_node(node, mask) Next node past 'node', or MAX_NUMNODES * unsigned int next_node_in(node, mask) Next node past 'node', or wrap to first, @@ -94,7 +91,6 @@ #include <linux/bitmap.h> #include <linux/minmax.h> #include <linux/nodemask_types.h> -#include <linux/numa.h> #include <linux/random.h> extern nodemask_t _unused_nodemask_arg_; @@ -191,6 +187,13 @@ static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *s bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } +#define nodes_copy(dst, src) __nodes_copy(&(dst), &(src), MAX_NUMNODES) +static __always_inline void __nodes_copy(nodemask_t *dstp, + const nodemask_t *srcp, unsigned int nbits) +{ + bitmap_copy(dstp->bits, srcp->bits, nbits); +} + #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) static __always_inline void __nodes_complement(nodemask_t *dstp, @@ -241,22 +244,6 @@ static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int n return bitmap_weight(srcp->bits, nbits); } -#define nodes_shift_right(dst, src, n) \ - __nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES) -static __always_inline void __nodes_shift_right(nodemask_t *dstp, - const nodemask_t *srcp, int n, int nbits) -{ - bitmap_shift_right(dstp->bits, srcp->bits, n, nbits); -} - -#define nodes_shift_left(dst, src, n) \ - __nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES) -static __always_inline void __nodes_shift_left(nodemask_t *dstp, - const nodemask_t *srcp, int n, int nbits) -{ - bitmap_shift_left(dstp->bits, srcp->bits, n, nbits); -} - /* FIXME: better would be to fix all architectures to never return > MAX_NUMNODES, then the silly min_ts could be dropped. */ @@ -535,6 +522,7 @@ static __always_inline int node_random(const nodemask_t *maskp) #define for_each_node(node) for_each_node_state(node, N_POSSIBLE) #define for_each_online_node(node) for_each_node_state(node, N_ONLINE) +#define for_each_node_with_cpus(node) for_each_node_state(node, N_CPU) /* * For nodemask scratch area. diff --git a/include/linux/nodemask_types.h b/include/linux/nodemask_types.h index 6b28d97ea6ed..f850a48742f1 100644 --- a/include/linux/nodemask_types.h +++ b/include/linux/nodemask_types.h @@ -3,7 +3,16 @@ #define __LINUX_NODEMASK_TYPES_H #include <linux/bitops.h> -#include <linux/numa.h> + +#ifdef CONFIG_NODES_SHIFT +#define NODES_SHIFT CONFIG_NODES_SHIFT +#else +#define NODES_SHIFT 0 +#endif + +#define MAX_NUMNODES (1 << NODES_SHIFT) + +#define NUMA_NO_NODE (-1) typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; diff --git a/include/linux/numa.h b/include/linux/numa.h index 3567e40329eb..e6baaf6051bc 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -3,16 +3,8 @@ #define _LINUX_NUMA_H #include <linux/init.h> #include <linux/types.h> +#include <linux/nodemask.h> -#ifdef CONFIG_NODES_SHIFT -#define NODES_SHIFT CONFIG_NODES_SHIFT -#else -#define NODES_SHIFT 0 -#endif - -#define MAX_NUMNODES (1 << NODES_SHIFT) - -#define NUMA_NO_NODE (-1) #define NUMA_NO_MEMBLK (-1) static inline bool numa_valid_node(int nid) @@ -39,6 +31,8 @@ void __init alloc_offline_node_data(int nid); /* Generic implementation available */ int numa_nearest_node(int node, unsigned int state); +int nearest_node_nodemask(int node, nodemask_t *mask); + #ifndef memory_add_physaddr_to_nid int memory_add_physaddr_to_nid(u64 start); #endif @@ -55,6 +49,11 @@ static inline int numa_nearest_node(int node, unsigned int state) return NUMA_NO_NODE; } +static inline int nearest_node_nodemask(int node, nodemask_t *mask) +{ + return NUMA_NO_NODE; +} + static inline int memory_add_physaddr_to_nid(u64 start) { return 0; diff --git a/include/linux/numa_memblks.h b/include/linux/numa_memblks.h index cfad6ce7e1bd..991076cba7c5 100644 --- a/include/linux/numa_memblks.h +++ b/include/linux/numa_memblks.h @@ -22,6 +22,7 @@ struct numa_meminfo { }; int __init numa_add_memblk(int nodeid, u64 start, u64 end); +int __init numa_add_reserved_memblk(int nid, u64 start, u64 end); void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi); int __init numa_cleanup_meminfo(struct numa_meminfo *mi); @@ -29,7 +30,10 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi); int __init numa_memblks_init(int (*init_func)(void), bool memblock_force_top_down); +extern int numa_distance_cnt; + #ifdef CONFIG_NUMA_EMU +extern int emu_nid_to_phys[MAX_NUMNODES]; int numa_emu_cmdline(char *str); void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys, unsigned int nr_emu_nids); diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index c1d0bc5d9624..60e069a6757f 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -40,5 +40,12 @@ int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm, int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm, u8 *ctrl_key, size_t ctrl_key_len, u8 *sess_key, size_t sess_key_len); +int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len, + u8 *c1, u8 *c2, size_t hash_len, + u8 **ret_psk, size_t *ret_len); +int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len, + char *subsysnqn, char *hostnqn, u8 **ret_digest); +int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len, + u8 *psk_digest, u8 **ret_psk); #endif /* _NVME_AUTH_H */ diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index 19d2b256180f..ab8971afa973 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -6,15 +6,25 @@ #ifndef _NVME_KEYRING_H #define _NVME_KEYRING_H +#include <linux/key.h> + #if IS_ENABLED(CONFIG_NVME_KEYRING) +struct key *nvme_tls_psk_refresh(struct key *keyring, + const char *hostnqn, const char *subnqn, u8 hmac_id, + u8 *data, size_t data_len, const char *digest); key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn); key_serial_t nvme_keyring_id(void); struct key *nvme_tls_key_lookup(key_serial_t key_id); #else - +static inline struct key *nvme_tls_psk_refresh(struct key *keyring, + const char *hostnqn, char *subnqn, u8 hmac_id, + u8 *data, size_t data_len, const char *digest) +{ + return ERR_PTR(-ENOTSUPP); +} static inline key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn) { diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index e07e8978d691..e435250fcb4d 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -13,6 +13,8 @@ #define NVME_TCP_ADMIN_CCSZ SZ_8K #define NVME_TCP_DIGEST_LENGTH 4 #define NVME_TCP_MIN_MAXH2CDATA 4096 +#define NVME_TCP_MIN_C2HTERM_PLEN 24 +#define NVME_TCP_MAX_C2HTERM_PLEN 152 enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 13377dde4527..b65a1b9f2116 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -64,6 +64,7 @@ enum { /* Transport Type codes for Discovery Log Page entry TRTYPE field */ enum { + NVMF_TRTYPE_PCI = 0, /* PCI */ NVMF_TRTYPE_RDMA = 1, /* RDMA */ NVMF_TRTYPE_FC = 2, /* Fibre Channel */ NVMF_TRTYPE_TCP = 3, /* TCP/IP */ @@ -198,28 +199,54 @@ enum { #define NVME_NVM_IOSQES 6 #define NVME_NVM_IOCQES 4 +/* + * Controller Configuration (CC) register (Offset 14h) + */ enum { + /* Enable (EN): bit 0 */ NVME_CC_ENABLE = 1 << 0, NVME_CC_EN_SHIFT = 0, + + /* Bits 03:01 are reserved (NVMe Base Specification rev 2.1) */ + + /* I/O Command Set Selected (CSS): bits 06:04 */ NVME_CC_CSS_SHIFT = 4, - NVME_CC_MPS_SHIFT = 7, - NVME_CC_AMS_SHIFT = 11, - NVME_CC_SHN_SHIFT = 14, - NVME_CC_IOSQES_SHIFT = 16, - NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT, NVME_CC_CSS_NVM = 0 << NVME_CC_CSS_SHIFT, NVME_CC_CSS_CSI = 6 << NVME_CC_CSS_SHIFT, - NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT, + + /* Memory Page Size (MPS): bits 10:07 */ + NVME_CC_MPS_SHIFT = 7, + NVME_CC_MPS_MASK = 0xf << NVME_CC_MPS_SHIFT, + + /* Arbitration Mechanism Selected (AMS): bits 13:11 */ + NVME_CC_AMS_SHIFT = 11, + NVME_CC_AMS_MASK = 7 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, + + /* Shutdown Notification (SHN): bits 15:14 */ + NVME_CC_SHN_SHIFT = 14, + NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT, - NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, + + /* I/O Submission Queue Entry Size (IOSQES): bits 19:16 */ + NVME_CC_IOSQES_SHIFT = 16, + NVME_CC_IOSQES_MASK = 0xf << NVME_CC_IOSQES_SHIFT, NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, + + /* I/O Completion Queue Entry Size (IOCQES): bits 23:20 */ + NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_IOCQES_MASK = 0xf << NVME_CC_IOCQES_SHIFT, NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, + + /* Controller Ready Independent of Media Enable (CRIME): bit 24 */ NVME_CC_CRIME = 1 << 24, + + /* Bits 25:31 are reserved (NVMe Base Specification rev 2.1) */ }; enum { @@ -275,6 +302,8 @@ enum nvme_ctrl_attr { NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), NVME_CTRL_ATTR_TBKAS = (1 << 6), NVME_CTRL_ATTR_ELBAS = (1 << 15), + NVME_CTRL_ATTR_RHII = (1 << 18), + NVME_CTRL_ATTR_FDPS = (1 << 19), }; struct nvme_id_ctrl { @@ -661,6 +690,44 @@ struct nvme_rotational_media_log { __u8 rsvd24[488]; }; +struct nvme_fdp_config { + __u8 flags; +#define FDPCFG_FDPE (1U << 0) + __u8 fdpcidx; + __le16 reserved; +}; + +struct nvme_fdp_ruh_desc { + __u8 ruht; + __u8 reserved[3]; +}; + +struct nvme_fdp_config_desc { + __le16 dsze; + __u8 fdpa; + __u8 vss; + __le32 nrg; + __le16 nruh; + __le16 maxpids; + __le32 nns; + __le64 runs; + __le32 erutl; + __u8 rsvd28[36]; + struct nvme_fdp_ruh_desc ruhs[]; +}; + +struct nvme_fdp_config_log { + __le16 numfdpc; + __u8 ver; + __u8 rsvd3; + __le32 sze; + __u8 rsvd8[8]; + /* + * This is followed by variable number of nvme_fdp_config_desc + * structures, but sparse doesn't like nested variable sized arrays. + */ +}; + struct nvme_smart_log { __u8 critical_warning; __u8 temperature[2]; @@ -887,6 +954,7 @@ enum nvme_opcode { nvme_cmd_resv_register = 0x0d, nvme_cmd_resv_report = 0x0e, nvme_cmd_resv_acquire = 0x11, + nvme_cmd_io_mgmt_recv = 0x12, nvme_cmd_resv_release = 0x15, nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_recv = 0x7a, @@ -908,6 +976,7 @@ enum nvme_opcode { nvme_opcode_name(nvme_cmd_resv_register), \ nvme_opcode_name(nvme_cmd_resv_report), \ nvme_opcode_name(nvme_cmd_resv_acquire), \ + nvme_opcode_name(nvme_cmd_io_mgmt_recv), \ nvme_opcode_name(nvme_cmd_resv_release), \ nvme_opcode_name(nvme_cmd_zone_mgmt_send), \ nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \ @@ -1059,6 +1128,7 @@ enum { NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, NVME_RW_PRINFO_PRACT = 1 << 13, NVME_RW_DTYPE_STREAMS = 1 << 4, + NVME_RW_DTYPE_DPLCMT = 2 << 4, NVME_WZ_DEAC = 1 << 9, }; @@ -1146,6 +1216,38 @@ struct nvme_zone_mgmt_recv_cmd { __le32 cdw14[2]; }; +struct nvme_io_mgmt_recv_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le64 rsvd2[2]; + union nvme_data_ptr dptr; + __u8 mo; + __u8 rsvd11; + __u16 mos; + __le32 numd; + __le32 cdw12[4]; +}; + +enum { + NVME_IO_MGMT_RECV_MO_RUHS = 1, +}; + +struct nvme_fdp_ruh_status_desc { + __le16 pid; + __le16 ruhid; + __le32 earutr; + __le64 ruamw; + __u8 reserved[16]; +}; + +struct nvme_fdp_ruh_status { + __u8 rsvd0[14]; + __le16 nruhsd; + struct nvme_fdp_ruh_status_desc ruhsd[]; +}; + enum { NVME_ZRA_ZONE_REPORT = 0, NVME_ZRASF_ZONE_REPORT_ALL = 0, @@ -1281,6 +1383,7 @@ enum { NVME_FEAT_PLM_WINDOW = 0x14, NVME_FEAT_HOST_BEHAVIOR = 0x16, NVME_FEAT_SANITIZE = 0x17, + NVME_FEAT_FDP = 0x1d, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, @@ -1301,6 +1404,7 @@ enum { NVME_LOG_ANA = 0x0c, NVME_LOG_FEATURES = 0x12, NVME_LOG_RMI = 0x16, + NVME_LOG_FDP_CONFIGS = 0x20, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), @@ -1744,6 +1848,13 @@ enum { NVME_AUTH_DHGROUP_INVALID = 0xff, }; +enum { + NVME_AUTH_SECP_NOSC = 0x00, + NVME_AUTH_SECP_SC = 0x01, + NVME_AUTH_SECP_NEWTLSPSK = 0x02, + NVME_AUTH_SECP_REPLACETLSPSK = 0x03, +}; + union nvmf_auth_protocol { struct nvmf_auth_dhchap_protocol_descriptor dhchap; }; @@ -1888,6 +1999,7 @@ struct nvme_command { struct nvmf_auth_receive_command auth_receive; struct nvme_dbbuf dbbuf; struct nvme_directive_cmd directive; + struct nvme_io_mgmt_recv_cmd imr; }; }; @@ -1896,6 +2008,46 @@ static inline bool nvme_is_fabrics(const struct nvme_command *cmd) return cmd->common.opcode == nvme_fabrics_command; } +#ifdef CONFIG_NVME_VERBOSE_ERRORS +const char *nvme_get_error_status_str(u16 status); +const char *nvme_get_opcode_str(u8 opcode); +const char *nvme_get_admin_opcode_str(u8 opcode); +const char *nvme_get_fabrics_opcode_str(u8 opcode); +#else /* CONFIG_NVME_VERBOSE_ERRORS */ +static inline const char *nvme_get_error_status_str(u16 status) +{ + return "I/O Error"; +} +static inline const char *nvme_get_opcode_str(u8 opcode) +{ + return "I/O Cmd"; +} +static inline const char *nvme_get_admin_opcode_str(u8 opcode) +{ + return "Admin Cmd"; +} + +static inline const char *nvme_get_fabrics_opcode_str(u8 opcode) +{ + return "Fabrics Cmd"; +} +#endif /* CONFIG_NVME_VERBOSE_ERRORS */ + +static inline const char *nvme_opcode_str(int qid, u8 opcode) +{ + return qid ? nvme_get_opcode_str(opcode) : + nvme_get_admin_opcode_str(opcode); +} + +static inline const char *nvme_fabrics_opcode_str( + int qid, const struct nvme_command *cmd) +{ + if (nvme_is_fabrics(cmd)) + return nvme_get_fabrics_opcode_str(cmd->fabrics.fctype); + + return nvme_opcode_str(qid, cmd->common.opcode); +} + struct nvme_error_slot { __le64 error_count; __le16 sqid; @@ -2019,7 +2171,7 @@ enum { NVME_SC_BAD_ATTRIBUTES = 0x180, NVME_SC_INVALID_PI = 0x181, NVME_SC_READ_ONLY = 0x182, - NVME_SC_ONCS_NOT_SUPPORTED = 0x183, + NVME_SC_CMD_SIZE_LIM_EXCEEDED = 0x183, /* * I/O Command Set Specific - Fabrics commands: diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 3ebeaa0ded00..615a560d9edb 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -92,8 +92,8 @@ struct nvmem_cell_info { * @read_only: Device is read-only. * @root_only: Device is accessibly to root only. * @of_node: If given, this will be used instead of the parent's of_node. - * @reg_read: Callback to read data. - * @reg_write: Callback to write data. + * @reg_read: Callback to read data; return zero if successful. + * @reg_write: Callback to write data; return zero if successful. * @size: Device size. * @word_size: Minimum read/write access granularity. * @stride: Minimum read/write access stride. @@ -138,25 +138,6 @@ struct nvmem_config { }; /** - * struct nvmem_cell_table - NVMEM cell definitions for given provider - * - * @nvmem_name: Provider name. - * @cells: Array of cell definitions. - * @ncells: Number of cell definitions in the array. - * @node: List node. - * - * This structure together with related helper functions is provided for users - * that don't can't access the nvmem provided structure but wish to register - * cell definitions for it e.g. board files registering an EEPROM device. - */ -struct nvmem_cell_table { - const char *nvmem_name; - const struct nvmem_cell_info *cells; - size_t ncells; - struct list_head node; -}; - -/** * struct nvmem_layout - NVMEM layout definitions * * @dev: Device-model layout device. @@ -190,9 +171,6 @@ void nvmem_unregister(struct nvmem_device *nvmem); struct nvmem_device *devm_nvmem_register(struct device *dev, const struct nvmem_config *cfg); -void nvmem_add_cell_table(struct nvmem_cell_table *table); -void nvmem_del_cell_table(struct nvmem_cell_table *table); - int nvmem_add_one_cell(struct nvmem_device *nvmem, const struct nvmem_cell_info *info); @@ -223,8 +201,6 @@ devm_nvmem_register(struct device *dev, const struct nvmem_config *c) return nvmem_register(c); } -static inline void nvmem_add_cell_table(struct nvmem_cell_table *table) {} -static inline void nvmem_del_cell_table(struct nvmem_cell_table *table) {} static inline int nvmem_add_one_cell(struct nvmem_device *nvmem, const struct nvmem_cell_info *info) { diff --git a/include/linux/objpool.h b/include/linux/objpool.h index cb1758eaa2d3..b713a1fe7521 100644 --- a/include/linux/objpool.h +++ b/include/linux/objpool.h @@ -170,17 +170,16 @@ static inline void *objpool_pop(struct objpool_head *pool) { void *obj = NULL; unsigned long flags; - int i, cpu; + int start, cpu; /* disable local irq to avoid preemption & interruption */ raw_local_irq_save(flags); - cpu = raw_smp_processor_id(); - for (i = 0; i < pool->nr_possible_cpus; i++) { + start = raw_smp_processor_id(); + for_each_possible_cpu_wrap(cpu, start) { obj = __objpool_try_get_slot(pool, cpu); if (obj) break; - cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1); } raw_local_irq_restore(flags); diff --git a/include/linux/objtool.h b/include/linux/objtool.h index b3b8d3dab52d..366ad004d794 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -45,35 +45,31 @@ #define STACK_FRAME_NON_STANDARD_FP(func) #endif -#define ANNOTATE_NOENDBR \ - "986: \n\t" \ - ".pushsection .discard.noendbr\n\t" \ - ".long 986b\n\t" \ - ".popsection\n\t" - #define ASM_REACHABLE \ "998:\n\t" \ ".pushsection .discard.reachable\n\t" \ ".long 998b\n\t" \ ".popsection\n\t" -#else /* __ASSEMBLY__ */ +#define __ASM_BREF(label) label ## b -/* - * This macro indicates that the following intra-function call is valid. - * Any non-annotated intra-function call will cause objtool to issue a warning. - */ -#define ANNOTATE_INTRA_FUNCTION_CALL \ - 999: \ - .pushsection .discard.intra_function_calls; \ - .long 999b; \ - .popsection; +#define __ASM_ANNOTATE(label, type) \ + ".pushsection .discard.annotate_insn,\"M\",@progbits,8\n\t" \ + ".long " __stringify(label) " - .\n\t" \ + ".long " __stringify(type) "\n\t" \ + ".popsection\n\t" + +#define ASM_ANNOTATE(type) \ + "911:\n\t" \ + __ASM_ANNOTATE(911b, type) + +#else /* __ASSEMBLY__ */ /* * In asm, there are two kinds of code: normal C-type callable functions and * the rest. The normal callable functions can be called by other code, and * don't do anything unusual with the stack. Such normal callable functions - * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this + * are annotated with SYM_FUNC_{START,END}. Most asm code falls in this * category. In this case, no special debugging annotations are needed because * objtool can automatically generate the ORC data for the ORC unwinder to read * at runtime. @@ -115,34 +111,11 @@ #endif .endm -.macro ANNOTATE_NOENDBR +.macro ANNOTATE type:req .Lhere_\@: - .pushsection .discard.noendbr - .long .Lhere_\@ - .popsection -.endm - -/* - * Use objtool to validate the entry requirement that all code paths do - * VALIDATE_UNRET_END before RET. - * - * NOTE: The macro must be used at the beginning of a global symbol, otherwise - * it will be ignored. - */ -.macro VALIDATE_UNRET_BEGIN -#if defined(CONFIG_NOINSTR_VALIDATION) && \ - (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) -.Lhere_\@: - .pushsection .discard.validate_unret + .pushsection .discard.annotate_insn,"M",@progbits,8 .long .Lhere_\@ - . - .popsection -#endif -.endm - -.macro REACHABLE -.Lhere_\@: - .pushsection .discard.reachable - .long .Lhere_\@ + .long \type .popsection .endm @@ -155,20 +128,79 @@ #define UNWIND_HINT(type, sp_reg, sp_offset, signal) "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) -#define ANNOTATE_NOENDBR -#define ASM_REACHABLE +#define __ASM_ANNOTATE(label, type) "" +#define ASM_ANNOTATE(type) #else -#define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm -.macro ANNOTATE_NOENDBR -.endm -.macro REACHABLE +.macro ANNOTATE type:req .endm #endif #endif /* CONFIG_OBJTOOL */ +#ifndef __ASSEMBLY__ +/* + * Annotate away the various 'relocation to !ENDBR` complaints; knowing that + * these relocations will never be used for indirect calls. + */ +#define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR) +#define ANNOTATE_NOENDBR_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOENDBR)) + +/* + * This should be used immediately before an indirect jump/call. It tells + * objtool the subsequent indirect jump/call is vouched safe for retpoline + * builds. + */ +#define ANNOTATE_RETPOLINE_SAFE ASM_ANNOTATE(ANNOTYPE_RETPOLINE_SAFE) +/* + * See linux/instrumentation.h + */ +#define ANNOTATE_INSTR_BEGIN(label) __ASM_ANNOTATE(label, ANNOTYPE_INSTR_BEGIN) +#define ANNOTATE_INSTR_END(label) __ASM_ANNOTATE(label, ANNOTYPE_INSTR_END) +/* + * objtool annotation to ignore the alternatives and only consider the original + * instruction(s). + */ +#define ANNOTATE_IGNORE_ALTERNATIVE ASM_ANNOTATE(ANNOTYPE_IGNORE_ALTS) +/* + * This macro indicates that the following intra-function call is valid. + * Any non-annotated intra-function call will cause objtool to issue a warning. + */ +#define ANNOTATE_INTRA_FUNCTION_CALL ASM_ANNOTATE(ANNOTYPE_INTRA_FUNCTION_CALL) +/* + * Use objtool to validate the entry requirement that all code paths do + * VALIDATE_UNRET_END before RET. + * + * NOTE: The macro must be used at the beginning of a global symbol, otherwise + * it will be ignored. + */ +#define ANNOTATE_UNRET_BEGIN ASM_ANNOTATE(ANNOTYPE_UNRET_BEGIN) +/* + * This should be used to refer to an instruction that is considered + * terminating, like a noreturn CALL or UD2 when we know they are not -- eg + * WARN using UD2. + */ +#define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE) + +#else +#define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR +#define ANNOTATE_RETPOLINE_SAFE ANNOTATE type=ANNOTYPE_RETPOLINE_SAFE +/* ANNOTATE_INSTR_BEGIN ANNOTATE type=ANNOTYPE_INSTR_BEGIN */ +/* ANNOTATE_INSTR_END ANNOTATE type=ANNOTYPE_INSTR_END */ +#define ANNOTATE_IGNORE_ALTERNATIVE ANNOTATE type=ANNOTYPE_IGNORE_ALTS +#define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL +#define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN +#define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE +#endif + +#if defined(CONFIG_NOINSTR_VALIDATION) && \ + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) +#define VALIDATE_UNRET_BEGIN ANNOTATE_UNRET_BEGIN +#else +#define VALIDATE_UNRET_BEGIN +#endif + #endif /* _LINUX_OBJTOOL_H */ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index 453a4f4ef39d..df5d9fa84dba 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -54,4 +54,16 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_SAVE 6 #define UNWIND_HINT_TYPE_RESTORE 7 +/* + * Annotate types + */ +#define ANNOTYPE_NOENDBR 1 +#define ANNOTYPE_RETPOLINE_SAFE 2 +#define ANNOTYPE_INSTR_BEGIN 3 +#define ANNOTYPE_INSTR_END 4 +#define ANNOTYPE_UNRET_BEGIN 5 +#define ANNOTYPE_IGNORE_ALTS 6 +#define ANNOTYPE_INTRA_FUNCTION_CALL 7 +#define ANNOTYPE_REACHABLE 8 + #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/include/linux/of.h b/include/linux/of.h index f921786cb8ac..a62154aeda1b 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -67,7 +67,7 @@ struct device_node { #endif }; -#define MAX_PHANDLE_ARGS 16 +#define MAX_PHANDLE_ARGS NR_FWNODE_REFERENCE_ARGS struct of_phandle_args { struct device_node *np; int args_count; @@ -301,6 +301,8 @@ extern struct device_node *of_get_compatible_child(const struct device_node *par const char *compatible); extern struct device_node *of_get_child_by_name(const struct device_node *node, const char *name); +extern struct device_node *of_get_available_child_by_name(const struct device_node *node, + const char *name); /* cache lookup */ extern struct device_node *of_find_next_cache_node(const struct device_node *); @@ -311,8 +313,12 @@ extern struct device_node *of_find_node_with_property( extern struct property *of_find_property(const struct device_node *np, const char *name, int *lenp); +extern bool of_property_read_bool(const struct device_node *np, const char *propname); extern int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size); +extern int of_property_read_u16_index(const struct device_node *np, + const char *propname, + u32 index, u16 *out_value); extern int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value); @@ -397,7 +403,6 @@ extern int of_phandle_iterator_args(struct of_phandle_iterator *it, uint32_t *args, int size); -extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)); extern int of_alias_get_id(const struct device_node *np, const char *stem); extern int of_alias_get_highest_id(const char *stem); @@ -578,6 +583,13 @@ static inline struct device_node *of_get_child_by_name( return NULL; } +static inline struct device_node *of_get_available_child_by_name( + const struct device_node *node, + const char *name) +{ + return NULL; +} + static inline int of_device_is_compatible(const struct device_node *device, const char *name) { @@ -615,12 +627,24 @@ static inline struct device_node *of_find_compatible_node( return NULL; } +static inline bool of_property_read_bool(const struct device_node *np, + const char *propname) +{ + return false; +} + static inline int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size) { return -ENOSYS; } +static inline int of_property_read_u16_index(const struct device_node *np, + const char *propname, u32 index, u16 *out_value) +{ + return -ENOSYS; +} + static inline int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value) { @@ -902,12 +926,6 @@ static inline const void *of_device_get_match_data(const struct device *dev) #define of_node_cmp(s1, s2) strcasecmp((s1), (s2)) #endif -static inline int of_prop_val_eq(const struct property *p1, const struct property *p2) -{ - return p1->length == p2->length && - !memcmp(p1->value, p2->value, (size_t)p1->length); -} - #define for_each_property_of_node(dn, pp) \ for (pp = dn->properties; pp != NULL; pp = pp->next) @@ -1243,24 +1261,6 @@ static inline int of_property_read_string_index(const struct device_node *np, } /** - * of_property_read_bool - Find a property - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * - * Search for a boolean property in a device node. Usage on non-boolean - * property types is deprecated. - * - * Return: true if the property exists false otherwise. - */ -static inline bool of_property_read_bool(const struct device_node *np, - const char *propname) -{ - const struct property *prop = of_find_property(np, propname, NULL); - - return prop ? true : false; -} - -/** * of_property_present - Test if a property is present in a node * @np: device node to search for the property. * @propname: name of the property to be searched. @@ -1271,7 +1271,9 @@ static inline bool of_property_read_bool(const struct device_node *np, */ static inline bool of_property_present(const struct device_node *np, const char *propname) { - return of_property_read_bool(np, propname); + struct property *prop = of_find_property(np, propname, NULL); + + return prop ? true : false; } /** diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 9e034363788a..0cff90365391 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -26,6 +26,7 @@ struct of_pci_range { u64 bus_addr; }; u64 cpu_addr; + u64 parent_bus_addr; u64 size; u32 flags; }; diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index a2ff1ad48f7f..17471ef8e092 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -47,8 +47,6 @@ struct of_dev_auxdata { { .compatible = _compat, .phys_addr = _phys, .name = _name, \ .platform_data = _pdata } -extern const struct of_device_id of_default_bus_match_table[]; - /* Platform drivers register/unregister */ extern struct platform_device *of_device_alloc(struct device_node *np, const char *bus_id, diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index e338282da652..f573423359f4 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -7,6 +7,7 @@ struct of_phandle_args; struct reserved_mem_ops; +struct resource; struct reserved_mem { const char *name; @@ -39,6 +40,12 @@ int of_reserved_mem_device_init_by_name(struct device *dev, void of_reserved_mem_device_release(struct device *dev); struct reserved_mem *of_reserved_mem_lookup(struct device_node *np); +int of_reserved_mem_region_to_resource(const struct device_node *np, + unsigned int idx, struct resource *res); +int of_reserved_mem_region_to_resource_byname(const struct device_node *np, + const char *name, struct resource *res); +int of_reserved_mem_region_count(const struct device_node *np); + #else #define RESERVEDMEM_OF_DECLARE(name, compat, init) \ @@ -63,6 +70,25 @@ static inline struct reserved_mem *of_reserved_mem_lookup(struct device_node *np { return NULL; } + +static inline int of_reserved_mem_region_to_resource(const struct device_node *np, + unsigned int idx, + struct resource *res) +{ + return -ENOSYS; +} + +static inline int of_reserved_mem_region_to_resource_byname(const struct device_node *np, + const char *name, + struct resource *res) +{ + return -ENOSYS; +} + +static inline int of_reserved_mem_region_count(const struct device_node *np) +{ + return 0; +} #endif /** diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 6f9242259edc..6de479ebbe5d 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -151,6 +151,5 @@ enum OID { extern enum OID look_up_OID(const void *data, size_t datasize); extern int parse_OID(const void *data, size_t datasize, enum OID *oid); extern int sprint_oid(const void *, size_t, char *, size_t); -extern int sprint_OID(enum OID, char *, size_t); #endif /* _LINUX_OID_REGISTRY_H */ diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index c9e3843d2dd5..263b915df1fb 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -66,10 +66,6 @@ extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t, struct device_node; -extern int gpmc_get_client_irq(unsigned irq_config); - -extern unsigned int gpmc_ticks_to_ns(unsigned int ticks); - extern void gpmc_cs_write_reg(int cs, int idx, u32 val); extern int gpmc_calc_divider(unsigned int sync_clk); extern int gpmc_cs_set_timings(int cs, const struct gpmc_timings *t, diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0c7e3dcfe867..154ed0dbb43f 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -389,25 +389,38 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) struct_size((type *)NULL, member, count) /** - * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. - * Enables caller macro to pass (different) initializer. + * __DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass arbitrary trailing expressions * * @type: structure type name, including "struct" keyword. * @name: Name for a variable to define. * @member: Name of the array member. * @count: Number of elements in the array; must be compile-time const. - * @initializer: initializer expression (could be empty for no init). + * @trailer: Trailing expressions for attributes and/or initializers. */ -#define _DEFINE_FLEX(type, name, member, count, initializer...) \ +#define __DEFINE_FLEX(type, name, member, count, trailer...) \ _Static_assert(__builtin_constant_p(count), \ "onstack flex array members require compile-time const count"); \ union { \ u8 bytes[struct_size_t(type, member, count)]; \ type obj; \ - } name##_u initializer; \ + } name##_u trailer; \ type *name = (type *)&name##_u /** + * _DEFINE_FLEX() - helper macro for DEFINE_FLEX() family. + * Enables caller macro to pass (different) initializer. + * + * @type: structure type name, including "struct" keyword. + * @name: Name for a variable to define. + * @member: Name of the array member. + * @count: Number of elements in the array; must be compile-time const. + * @initializer: Initializer expression (e.g., pass `= { }` at minimum). + */ +#define _DEFINE_FLEX(type, name, member, count, initializer...) \ + __DEFINE_FLEX(type, name, member, count, = { .obj initializer }) + +/** * DEFINE_RAW_FLEX() - Define an on-stack instance of structure with a trailing * flexible array member, when it does not have a __counted_by annotation. * @@ -419,9 +432,12 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Define a zeroed, on-stack, instance of @type structure with a trailing * flexible array member. * Use __struct_size(@name) to get compile-time size of it afterwards. + * Use __member_size(@name->member) to get compile-time size of @name members. + * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of + * elements in array @member. */ #define DEFINE_RAW_FLEX(type, name, member, count) \ - _DEFINE_FLEX(type, name, member, count, = {}) + __DEFINE_FLEX(type, name, member, count, = { }) /** * DEFINE_FLEX() - Define an on-stack instance of structure with a trailing @@ -436,8 +452,22 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Define a zeroed, on-stack, instance of @TYPE structure with a trailing * flexible array member. * Use __struct_size(@NAME) to get compile-time size of it afterwards. + * Use __member_size(@NAME->member) to get compile-time size of @NAME members. + * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of + * elements in array @member. */ #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ - _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) + _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .COUNTER = COUNT, }) + +/** + * STACK_FLEX_ARRAY_SIZE() - helper macro for DEFINE_FLEX() family. + * Returns the number of elements in @array. + * + * @name: Name for a variable defined in DEFINE_RAW_FLEX()/DEFINE_FLEX(). + * @array: Name of the array member. + */ +#define STACK_FLEX_ARRAY_SIZE(name, array) \ + (__member_size((name)->array) / sizeof(*(name)->array) + \ + __must_be_array((name)->array)) #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/packing.h b/include/linux/packing.h index 5d36dcd06f60..20ae4d452c7b 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -5,8 +5,89 @@ #ifndef _LINUX_PACKING_H #define _LINUX_PACKING_H -#include <linux/types.h> +#include <linux/array_size.h> #include <linux/bitops.h> +#include <linux/build_bug.h> +#include <linux/minmax.h> +#include <linux/stddef.h> +#include <linux/types.h> + +#define GEN_PACKED_FIELD_STRUCT(__type) \ + struct packed_field_ ## __type { \ + __type startbit; \ + __type endbit; \ + __type offset; \ + __type size; \ + } + +/* struct packed_field_u8. Use with bit offsets < 256, buffers < 32B and + * unpacked structures < 256B. + */ +GEN_PACKED_FIELD_STRUCT(u8); + +/* struct packed_field_u16. Use with bit offsets < 65536, buffers < 8KB and + * unpacked structures < 64KB. + */ +GEN_PACKED_FIELD_STRUCT(u16); + +#define PACKED_FIELD(start, end, struct_name, struct_field) \ +{ \ + (start), \ + (end), \ + offsetof(struct_name, struct_field), \ + sizeof_field(struct_name, struct_field), \ +} + +#define CHECK_PACKED_FIELD_OVERLAP(fields, index1, index2) ({ \ + typeof(&(fields)[0]) __f = (fields); \ + typeof(__f[0]) _f1 = __f[index1]; typeof(__f[0]) _f2 = __f[index2]; \ + const bool _ascending = __f[0].startbit < __f[1].startbit; \ + BUILD_BUG_ON_MSG(_ascending && _f1.startbit >= _f2.startbit, \ + __stringify(fields) " field " __stringify(index2) \ + " breaks ascending order"); \ + BUILD_BUG_ON_MSG(!_ascending && _f1.startbit <= _f2.startbit, \ + __stringify(fields) " field " __stringify(index2) \ + " breaks descending order"); \ + BUILD_BUG_ON_MSG(max(_f1.endbit, _f2.endbit) <= \ + min(_f1.startbit, _f2.startbit), \ + __stringify(fields) " field " __stringify(index2) \ + " overlaps with previous field"); \ +}) + +#define CHECK_PACKED_FIELD(fields, index) ({ \ + typeof(&(fields)[0]) _f = (fields); \ + typeof(_f[0]) __f = _f[index]; \ + BUILD_BUG_ON_MSG(__f.startbit < __f.endbit, \ + __stringify(fields) " field " __stringify(index) \ + " start bit must not be smaller than end bit"); \ + BUILD_BUG_ON_MSG(__f.size != 1 && __f.size != 2 && \ + __f.size != 4 && __f.size != 8, \ + __stringify(fields) " field " __stringify(index) \ + " has unsupported unpacked storage size"); \ + BUILD_BUG_ON_MSG(__f.startbit - __f.endbit >= BITS_PER_BYTE * __f.size, \ + __stringify(fields) " field " __stringify(index) \ + " exceeds unpacked storage size"); \ + __builtin_choose_expr(index != 0, \ + CHECK_PACKED_FIELD_OVERLAP(fields, index - 1, index), \ + 1); \ +}) + +/* Note that the packed fields may be either in ascending or descending order. + * Thus, we must check that both the first and last field wit within the + * packed buffer size. + */ +#define CHECK_PACKED_FIELDS_SIZE(fields, pbuflen) ({ \ + typeof(&(fields)[0]) _f = (fields); \ + typeof(pbuflen) _len = (pbuflen); \ + const size_t num_fields = ARRAY_SIZE(fields); \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_len), \ + __stringify(fields) " pbuflen " __stringify(pbuflen) \ + " must be a compile time constant"); \ + BUILD_BUG_ON_MSG(_f[0].startbit >= BITS_PER_BYTE * _len, \ + __stringify(fields) " first field exceeds packed buffer size"); \ + BUILD_BUG_ON_MSG(_f[num_fields - 1].startbit >= BITS_PER_BYTE * _len, \ + __stringify(fields) " last field exceeds packed buffer size"); \ +}) #define QUIRK_MSB_ON_THE_RIGHT BIT(0) #define QUIRK_LITTLE_ENDIAN BIT(1) @@ -26,4 +107,352 @@ int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, size_t pbuflen, u8 quirks); +void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks); + +void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks); + +void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks); + +void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks); + +/* Do not hand-edit the following packed field check macros! + * + * They are generated using scripts/gen_packed_field_checks.c, which may be + * built via "make scripts_gen_packed_field_checks". If larger macro sizes are + * needed in the future, please use this program to re-generate the macros and + * insert them here. + */ + +#define CHECK_PACKED_FIELDS_1(fields) \ + CHECK_PACKED_FIELD(fields, 0) + +#define CHECK_PACKED_FIELDS_2(fields) do { \ + CHECK_PACKED_FIELDS_1(fields); \ + CHECK_PACKED_FIELD(fields, 1); \ +} while (0) + +#define CHECK_PACKED_FIELDS_3(fields) do { \ + CHECK_PACKED_FIELDS_2(fields); \ + CHECK_PACKED_FIELD(fields, 2); \ +} while (0) + +#define CHECK_PACKED_FIELDS_4(fields) do { \ + CHECK_PACKED_FIELDS_3(fields); \ + CHECK_PACKED_FIELD(fields, 3); \ +} while (0) + +#define CHECK_PACKED_FIELDS_5(fields) do { \ + CHECK_PACKED_FIELDS_4(fields); \ + CHECK_PACKED_FIELD(fields, 4); \ +} while (0) + +#define CHECK_PACKED_FIELDS_6(fields) do { \ + CHECK_PACKED_FIELDS_5(fields); \ + CHECK_PACKED_FIELD(fields, 5); \ +} while (0) + +#define CHECK_PACKED_FIELDS_7(fields) do { \ + CHECK_PACKED_FIELDS_6(fields); \ + CHECK_PACKED_FIELD(fields, 6); \ +} while (0) + +#define CHECK_PACKED_FIELDS_8(fields) do { \ + CHECK_PACKED_FIELDS_7(fields); \ + CHECK_PACKED_FIELD(fields, 7); \ +} while (0) + +#define CHECK_PACKED_FIELDS_9(fields) do { \ + CHECK_PACKED_FIELDS_8(fields); \ + CHECK_PACKED_FIELD(fields, 8); \ +} while (0) + +#define CHECK_PACKED_FIELDS_10(fields) do { \ + CHECK_PACKED_FIELDS_9(fields); \ + CHECK_PACKED_FIELD(fields, 9); \ +} while (0) + +#define CHECK_PACKED_FIELDS_11(fields) do { \ + CHECK_PACKED_FIELDS_10(fields); \ + CHECK_PACKED_FIELD(fields, 10); \ +} while (0) + +#define CHECK_PACKED_FIELDS_12(fields) do { \ + CHECK_PACKED_FIELDS_11(fields); \ + CHECK_PACKED_FIELD(fields, 11); \ +} while (0) + +#define CHECK_PACKED_FIELDS_13(fields) do { \ + CHECK_PACKED_FIELDS_12(fields); \ + CHECK_PACKED_FIELD(fields, 12); \ +} while (0) + +#define CHECK_PACKED_FIELDS_14(fields) do { \ + CHECK_PACKED_FIELDS_13(fields); \ + CHECK_PACKED_FIELD(fields, 13); \ +} while (0) + +#define CHECK_PACKED_FIELDS_15(fields) do { \ + CHECK_PACKED_FIELDS_14(fields); \ + CHECK_PACKED_FIELD(fields, 14); \ +} while (0) + +#define CHECK_PACKED_FIELDS_16(fields) do { \ + CHECK_PACKED_FIELDS_15(fields); \ + CHECK_PACKED_FIELD(fields, 15); \ +} while (0) + +#define CHECK_PACKED_FIELDS_17(fields) do { \ + CHECK_PACKED_FIELDS_16(fields); \ + CHECK_PACKED_FIELD(fields, 16); \ +} while (0) + +#define CHECK_PACKED_FIELDS_18(fields) do { \ + CHECK_PACKED_FIELDS_17(fields); \ + CHECK_PACKED_FIELD(fields, 17); \ +} while (0) + +#define CHECK_PACKED_FIELDS_19(fields) do { \ + CHECK_PACKED_FIELDS_18(fields); \ + CHECK_PACKED_FIELD(fields, 18); \ +} while (0) + +#define CHECK_PACKED_FIELDS_20(fields) do { \ + CHECK_PACKED_FIELDS_19(fields); \ + CHECK_PACKED_FIELD(fields, 19); \ +} while (0) + +#define CHECK_PACKED_FIELDS_21(fields) do { \ + CHECK_PACKED_FIELDS_20(fields); \ + CHECK_PACKED_FIELD(fields, 20); \ +} while (0) + +#define CHECK_PACKED_FIELDS_22(fields) do { \ + CHECK_PACKED_FIELDS_21(fields); \ + CHECK_PACKED_FIELD(fields, 21); \ +} while (0) + +#define CHECK_PACKED_FIELDS_23(fields) do { \ + CHECK_PACKED_FIELDS_22(fields); \ + CHECK_PACKED_FIELD(fields, 22); \ +} while (0) + +#define CHECK_PACKED_FIELDS_24(fields) do { \ + CHECK_PACKED_FIELDS_23(fields); \ + CHECK_PACKED_FIELD(fields, 23); \ +} while (0) + +#define CHECK_PACKED_FIELDS_25(fields) do { \ + CHECK_PACKED_FIELDS_24(fields); \ + CHECK_PACKED_FIELD(fields, 24); \ +} while (0) + +#define CHECK_PACKED_FIELDS_26(fields) do { \ + CHECK_PACKED_FIELDS_25(fields); \ + CHECK_PACKED_FIELD(fields, 25); \ +} while (0) + +#define CHECK_PACKED_FIELDS_27(fields) do { \ + CHECK_PACKED_FIELDS_26(fields); \ + CHECK_PACKED_FIELD(fields, 26); \ +} while (0) + +#define CHECK_PACKED_FIELDS_28(fields) do { \ + CHECK_PACKED_FIELDS_27(fields); \ + CHECK_PACKED_FIELD(fields, 27); \ +} while (0) + +#define CHECK_PACKED_FIELDS_29(fields) do { \ + CHECK_PACKED_FIELDS_28(fields); \ + CHECK_PACKED_FIELD(fields, 28); \ +} while (0) + +#define CHECK_PACKED_FIELDS_30(fields) do { \ + CHECK_PACKED_FIELDS_29(fields); \ + CHECK_PACKED_FIELD(fields, 29); \ +} while (0) + +#define CHECK_PACKED_FIELDS_31(fields) do { \ + CHECK_PACKED_FIELDS_30(fields); \ + CHECK_PACKED_FIELD(fields, 30); \ +} while (0) + +#define CHECK_PACKED_FIELDS_32(fields) do { \ + CHECK_PACKED_FIELDS_31(fields); \ + CHECK_PACKED_FIELD(fields, 31); \ +} while (0) + +#define CHECK_PACKED_FIELDS_33(fields) do { \ + CHECK_PACKED_FIELDS_32(fields); \ + CHECK_PACKED_FIELD(fields, 32); \ +} while (0) + +#define CHECK_PACKED_FIELDS_34(fields) do { \ + CHECK_PACKED_FIELDS_33(fields); \ + CHECK_PACKED_FIELD(fields, 33); \ +} while (0) + +#define CHECK_PACKED_FIELDS_35(fields) do { \ + CHECK_PACKED_FIELDS_34(fields); \ + CHECK_PACKED_FIELD(fields, 34); \ +} while (0) + +#define CHECK_PACKED_FIELDS_36(fields) do { \ + CHECK_PACKED_FIELDS_35(fields); \ + CHECK_PACKED_FIELD(fields, 35); \ +} while (0) + +#define CHECK_PACKED_FIELDS_37(fields) do { \ + CHECK_PACKED_FIELDS_36(fields); \ + CHECK_PACKED_FIELD(fields, 36); \ +} while (0) + +#define CHECK_PACKED_FIELDS_38(fields) do { \ + CHECK_PACKED_FIELDS_37(fields); \ + CHECK_PACKED_FIELD(fields, 37); \ +} while (0) + +#define CHECK_PACKED_FIELDS_39(fields) do { \ + CHECK_PACKED_FIELDS_38(fields); \ + CHECK_PACKED_FIELD(fields, 38); \ +} while (0) + +#define CHECK_PACKED_FIELDS_40(fields) do { \ + CHECK_PACKED_FIELDS_39(fields); \ + CHECK_PACKED_FIELD(fields, 39); \ +} while (0) + +#define CHECK_PACKED_FIELDS_41(fields) do { \ + CHECK_PACKED_FIELDS_40(fields); \ + CHECK_PACKED_FIELD(fields, 40); \ +} while (0) + +#define CHECK_PACKED_FIELDS_42(fields) do { \ + CHECK_PACKED_FIELDS_41(fields); \ + CHECK_PACKED_FIELD(fields, 41); \ +} while (0) + +#define CHECK_PACKED_FIELDS_43(fields) do { \ + CHECK_PACKED_FIELDS_42(fields); \ + CHECK_PACKED_FIELD(fields, 42); \ +} while (0) + +#define CHECK_PACKED_FIELDS_44(fields) do { \ + CHECK_PACKED_FIELDS_43(fields); \ + CHECK_PACKED_FIELD(fields, 43); \ +} while (0) + +#define CHECK_PACKED_FIELDS_45(fields) do { \ + CHECK_PACKED_FIELDS_44(fields); \ + CHECK_PACKED_FIELD(fields, 44); \ +} while (0) + +#define CHECK_PACKED_FIELDS_46(fields) do { \ + CHECK_PACKED_FIELDS_45(fields); \ + CHECK_PACKED_FIELD(fields, 45); \ +} while (0) + +#define CHECK_PACKED_FIELDS_47(fields) do { \ + CHECK_PACKED_FIELDS_46(fields); \ + CHECK_PACKED_FIELD(fields, 46); \ +} while (0) + +#define CHECK_PACKED_FIELDS_48(fields) do { \ + CHECK_PACKED_FIELDS_47(fields); \ + CHECK_PACKED_FIELD(fields, 47); \ +} while (0) + +#define CHECK_PACKED_FIELDS_49(fields) do { \ + CHECK_PACKED_FIELDS_48(fields); \ + CHECK_PACKED_FIELD(fields, 48); \ +} while (0) + +#define CHECK_PACKED_FIELDS_50(fields) do { \ + CHECK_PACKED_FIELDS_49(fields); \ + CHECK_PACKED_FIELD(fields, 49); \ +} while (0) + +#define CHECK_PACKED_FIELDS(fields) \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 1, ({ CHECK_PACKED_FIELDS_1(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 2, ({ CHECK_PACKED_FIELDS_2(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 3, ({ CHECK_PACKED_FIELDS_3(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 4, ({ CHECK_PACKED_FIELDS_4(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 5, ({ CHECK_PACKED_FIELDS_5(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 6, ({ CHECK_PACKED_FIELDS_6(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 7, ({ CHECK_PACKED_FIELDS_7(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 8, ({ CHECK_PACKED_FIELDS_8(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 9, ({ CHECK_PACKED_FIELDS_9(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 10, ({ CHECK_PACKED_FIELDS_10(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 11, ({ CHECK_PACKED_FIELDS_11(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 12, ({ CHECK_PACKED_FIELDS_12(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 13, ({ CHECK_PACKED_FIELDS_13(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 14, ({ CHECK_PACKED_FIELDS_14(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 15, ({ CHECK_PACKED_FIELDS_15(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 16, ({ CHECK_PACKED_FIELDS_16(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 17, ({ CHECK_PACKED_FIELDS_17(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 18, ({ CHECK_PACKED_FIELDS_18(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 19, ({ CHECK_PACKED_FIELDS_19(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 20, ({ CHECK_PACKED_FIELDS_20(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 21, ({ CHECK_PACKED_FIELDS_21(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 22, ({ CHECK_PACKED_FIELDS_22(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 23, ({ CHECK_PACKED_FIELDS_23(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 24, ({ CHECK_PACKED_FIELDS_24(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 25, ({ CHECK_PACKED_FIELDS_25(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 26, ({ CHECK_PACKED_FIELDS_26(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 27, ({ CHECK_PACKED_FIELDS_27(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 28, ({ CHECK_PACKED_FIELDS_28(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 29, ({ CHECK_PACKED_FIELDS_29(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 30, ({ CHECK_PACKED_FIELDS_30(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 31, ({ CHECK_PACKED_FIELDS_31(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 32, ({ CHECK_PACKED_FIELDS_32(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 33, ({ CHECK_PACKED_FIELDS_33(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 34, ({ CHECK_PACKED_FIELDS_34(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 35, ({ CHECK_PACKED_FIELDS_35(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 36, ({ CHECK_PACKED_FIELDS_36(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 37, ({ CHECK_PACKED_FIELDS_37(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 38, ({ CHECK_PACKED_FIELDS_38(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 39, ({ CHECK_PACKED_FIELDS_39(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 40, ({ CHECK_PACKED_FIELDS_40(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 41, ({ CHECK_PACKED_FIELDS_41(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 42, ({ CHECK_PACKED_FIELDS_42(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 43, ({ CHECK_PACKED_FIELDS_43(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 44, ({ CHECK_PACKED_FIELDS_44(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 45, ({ CHECK_PACKED_FIELDS_45(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 46, ({ CHECK_PACKED_FIELDS_46(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 47, ({ CHECK_PACKED_FIELDS_47(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 48, ({ CHECK_PACKED_FIELDS_48(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 49, ({ CHECK_PACKED_FIELDS_49(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 50, ({ CHECK_PACKED_FIELDS_50(fields); }), \ + ({ BUILD_BUG_ON_MSG(1, "CHECK_PACKED_FIELDS() must be regenerated to support array sizes larger than 50."); }) \ +)))))))))))))))))))))))))))))))))))))))))))))))))) + +/* End of generated content */ + +#define pack_fields(pbuf, pbuflen, ustruct, fields, quirks) \ + ({ \ + CHECK_PACKED_FIELDS(fields); \ + CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \ + _Generic((fields), \ + const struct packed_field_u8 * : pack_fields_u8, \ + const struct packed_field_u16 * : pack_fields_u16 \ + )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \ + }) + +#define unpack_fields(pbuf, pbuflen, ustruct, fields, quirks) \ + ({ \ + CHECK_PACKED_FIELDS(fields); \ + CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \ + _Generic((fields), \ + const struct packed_field_u8 * : unpack_fields_u8, \ + const struct packed_field_u16 * : unpack_fields_u16 \ + )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \ + }) + #endif diff --git a/include/linux/padata.h b/include/linux/padata.h index 0146daf34430..765f2778e264 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -90,8 +90,6 @@ struct padata_cpumask { * @processed: Number of already processed objects. * @cpu: Next CPU to be processed. * @cpumask: The cpumasks in use for parallel and serial workers. - * @reorder_work: work struct for reordering. - * @lock: Reorder lock. */ struct parallel_data { struct padata_shell *ps; @@ -102,8 +100,6 @@ struct parallel_data { unsigned int processed; int cpu; struct padata_cpumask cpumask; - struct work_struct reorder_work; - spinlock_t ____cacheline_aligned lock; }; /** diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 4f5c9e979bb9..760006b1c480 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -72,8 +72,10 @@ #define NODE_NOT_IN_PAGE_FLAGS 1 #endif -#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) +#if defined(CONFIG_KASAN_SW_TAGS) #define KASAN_TAG_WIDTH 8 +#elif defined(CONFIG_KASAN_HW_TAGS) +#define KASAN_TAG_WIDTH 4 #else #define KASAN_TAG_WIDTH 0 #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 691506bdf2c5..4fe5ee67535b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -110,6 +110,7 @@ enum pageflags { PG_reclaim, /* To be reclaimed asap */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ + PG_dropbehind, /* drop pages on IO completion */ #ifdef CONFIG_MMU PG_mlocked, /* Page is vma mlocked */ #endif @@ -225,11 +226,48 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page } return page; } + +static __always_inline bool page_count_writable(const struct page *page, int u) +{ + if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) + return true; + + /* + * The refcount check is ordered before the fake-head check to prevent + * the following race: + * CPU 1 (HVO) CPU 2 (speculative PFN walker) + * + * page_ref_freeze() + * synchronize_rcu() + * rcu_read_lock() + * page_is_fake_head() is false + * vmemmap_remap_pte() + * XXX: struct page[] becomes r/o + * + * page_ref_unfreeze() + * page_ref_count() is not zero + * + * atomic_add_unless(&page->_refcount) + * XXX: try to modify r/o struct page[] + * + * The refcount check also prevents modification attempts to other (r/o) + * tail pages that are not fake heads. + */ + if (atomic_read_acquire(&page->_refcount) == u) + return false; + + return page_fixed_fake_head(page) == page; +} #else static inline const struct page *page_fixed_fake_head(const struct page *page) { return page; } + +static inline bool page_count_writable(const struct page *page, int u) +{ + return true; +} #endif static __always_inline int page_is_fake_head(const struct page *page) @@ -562,6 +600,10 @@ PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL) FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE) FOLIO_TEST_CLEAR_FLAG(readahead, FOLIO_HEAD_PAGE) +FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) + FOLIO_TEST_CLEAR_FLAG(dropbehind, FOLIO_HEAD_PAGE) + __FOLIO_SET_FLAG(dropbehind, FOLIO_HEAD_PAGE) + #ifdef CONFIG_HIGHMEM /* * Must use a macro here due to header dependency issues. page_zone() is not @@ -573,6 +615,13 @@ FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE) PAGEFLAG_FALSE(HighMem, highmem) #endif +/* Does kmap_local_folio() only allow access to one page of the folio? */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +#define folio_test_partial_kmap(f) true +#else +#define folio_test_partial_kmap(f) folio_test_highmem(f) +#endif + #ifdef CONFIG_SWAP static __always_inline bool folio_test_swapcache(const struct folio *folio) { @@ -668,12 +717,6 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #define PAGE_MAPPING_KSM (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE) -/* - * Different with flags above, this flag is used only for fsdax mode. It - * indicates that this page->mapping is now under reflink case. - */ -#define PAGE_MAPPING_DAX_SHARED ((void *)0x1) - static __always_inline bool folio_mapping_flags(const struct folio *folio) { return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) != 0; @@ -872,20 +915,6 @@ FOLIO_FLAG_FALSE(partially_mapped) #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* - * PageHuge() only returns true for hugetlbfs pages, but not for - * normal or transparent huge pages. - * - * PageTransHuge() returns true for both transparent huge and - * hugetlbfs pages, but not normal pages. PageTransHuge() can only be - * called only in the core VM paths where hugetlbfs pages can't exist. - */ -static inline int PageTransHuge(const struct page *page) -{ - VM_BUG_ON_PAGE(PageTail(page), page); - return PageHead(page); -} - -/* * PageTransCompound returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known * that hugetlbfs pages aren't involved. @@ -894,21 +923,8 @@ static inline int PageTransCompound(const struct page *page) { return PageCompound(page); } - -/* - * PageTransTail returns true for both transparent huge pages - * and hugetlbfs pages, so it should only be called when it's known - * that hugetlbfs pages aren't involved. - */ -static inline int PageTransTail(const struct page *page) -{ - return PageTail(page); -} #else -TESTPAGEFLAG_FALSE(TransHuge, transhuge) TESTPAGEFLAG_FALSE(TransCompound, transcompound) -TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap) -TESTPAGEFLAG_FALSE(TransTail, transtail) #endif #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) @@ -918,11 +934,9 @@ TESTPAGEFLAG_FALSE(TransTail, transtail) * * This flag is set by hwpoison handler. Cleared by THP split or free page. */ -PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) - TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) +FOLIO_FLAG(has_hwpoisoned, FOLIO_SECOND_PAGE) #else -PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) - TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) +FOLIO_FLAG_FALSE(has_hwpoisoned) #endif /* @@ -934,14 +948,15 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) enum pagetype { /* 0x00-0x7f are positive numbers, ie mapcount */ /* Reserve 0x80-0xef for mapcount overflow. */ - PGTY_buddy = 0xf0, - PGTY_offline = 0xf1, - PGTY_table = 0xf2, - PGTY_guard = 0xf3, - PGTY_hugetlb = 0xf4, - PGTY_slab = 0xf5, - PGTY_zsmalloc = 0xf6, - PGTY_unaccepted = 0xf7, + PGTY_buddy = 0xf0, + PGTY_offline = 0xf1, + PGTY_table = 0xf2, + PGTY_guard = 0xf3, + PGTY_hugetlb = 0xf4, + PGTY_slab = 0xf5, + PGTY_zsmalloc = 0xf6, + PGTY_unaccepted = 0xf7, + PGTY_large_kmalloc = 0xf8, PGTY_mapcount_underflow = 0xff }; @@ -959,7 +974,7 @@ static inline bool page_mapcount_is_type(unsigned int mapcount) static inline bool page_has_type(const struct page *page) { - return page_mapcount_is_type(data_race(page->page_type)); + return page_type_has_type(data_race(page->page_type)); } #define FOLIO_TYPE_OPS(lname, fname) \ @@ -1084,6 +1099,7 @@ PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) * Serialized with zone lock. */ PAGE_TYPE_OPS(Unaccepted, unaccepted, unaccepted) +FOLIO_TYPE_OPS(large_kmalloc, large_kmalloc) /** * PageHuge - Determine if the page belongs to hugetlbfs @@ -1113,6 +1129,12 @@ static inline bool is_page_hwpoison(const struct page *page) return folio_test_hugetlb(folio) && PageHWPoison(&folio->page); } +static inline bool folio_contain_hwpoisoned_page(struct folio *folio) +{ + return folio_test_hwpoison(folio) || + (folio_test_large(folio) && folio_test_has_hwpoisoned(folio)); +} + bool is_free_buddy_page(const struct page *page); PAGEFLAG(Isolated, isolated, PF_ANY); diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 73dc2c1841ec..898bb788243b 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -31,7 +31,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page, int migratetype); int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - int migratetype, int flags, gfp_t gfp_flags); + int migratetype, int flags); void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, int migratetype); diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 79dbd8bc35a7..d649b6bbbc87 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -9,10 +9,12 @@ struct page_counter { /* - * Make sure 'usage' does not share cacheline with any other field. The - * memcg->memory.usage is a hot member of struct mem_cgroup. + * Make sure 'usage' does not share cacheline with any other field in + * v2. The memcg->memory.usage is a hot member of struct mem_cgroup. */ atomic_long_t usage; + unsigned long failcnt; /* v1-only field */ + CACHELINE_PADDING(_pad1_); /* effective memory.min and memory.min usage tracking */ @@ -28,12 +30,12 @@ struct page_counter { unsigned long watermark; /* Latest cg2 reset watermark */ unsigned long local_watermark; - unsigned long failcnt; /* Keep all the read most fields in a separete cacheline. */ CACHELINE_PADDING(_pad2_); bool protection_support; + bool track_failcnt; unsigned long min; unsigned long low; unsigned long high; @@ -58,6 +60,7 @@ static inline void page_counter_init(struct page_counter *counter, counter->max = PAGE_COUNTER_MAX; counter->parent = parent; counter->protection_support = protection_support; + counter->track_failcnt = false; } static inline unsigned long page_counter_read(struct page_counter *counter) @@ -96,7 +99,7 @@ static inline void page_counter_reset_watermark(struct page_counter *counter) counter->watermark = usage; } -#ifdef CONFIG_MEMCG +#if IS_ENABLED(CONFIG_MEMCG) || IS_ENABLED(CONFIG_CGROUP_DMEM) void page_counter_calculate_protection(struct page_counter *root, struct page_counter *counter, bool recursive_protection); diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index e4b48a0dda24..76c817162d2f 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -3,6 +3,7 @@ #define __LINUX_PAGE_EXT_H #include <linux/types.h> +#include <linux/mmzone.h> #include <linux/stacktrace.h> struct pglist_data; @@ -69,16 +70,31 @@ extern void page_ext_init(void); static inline void page_ext_init_flatmem_late(void) { } + +static inline bool page_ext_iter_next_fast_possible(unsigned long next_pfn) +{ + /* + * page_ext is allocated per memory section. Once we cross a + * memory section, we have to fetch the new pointer. + */ + return next_pfn % PAGES_PER_SECTION; +} #else extern void page_ext_init_flatmem(void); extern void page_ext_init_flatmem_late(void); static inline void page_ext_init(void) { } + +static inline bool page_ext_iter_next_fast_possible(unsigned long next_pfn) +{ + return true; +} #endif extern struct page_ext *page_ext_get(const struct page *page); extern void page_ext_put(struct page_ext *page_ext); +extern struct page_ext *page_ext_lookup(unsigned long pfn); static inline void *page_ext_data(struct page_ext *page_ext, struct page_ext_operations *ops) @@ -93,6 +109,83 @@ static inline struct page_ext *page_ext_next(struct page_ext *curr) return next; } +struct page_ext_iter { + unsigned long index; + unsigned long start_pfn; + struct page_ext *page_ext; +}; + +/** + * page_ext_iter_begin() - Prepare for iterating through page extensions. + * @iter: page extension iterator. + * @pfn: PFN of the page we're interested in. + * + * Must be called with RCU read lock taken. + * + * Return: NULL if no page_ext exists for this page. + */ +static inline struct page_ext *page_ext_iter_begin(struct page_ext_iter *iter, + unsigned long pfn) +{ + iter->index = 0; + iter->start_pfn = pfn; + iter->page_ext = page_ext_lookup(pfn); + + return iter->page_ext; +} + +/** + * page_ext_iter_next() - Get next page extension + * @iter: page extension iterator. + * + * Must be called with RCU read lock taken. + * + * Return: NULL if no next page_ext exists. + */ +static inline struct page_ext *page_ext_iter_next(struct page_ext_iter *iter) +{ + unsigned long pfn; + + if (WARN_ON_ONCE(!iter->page_ext)) + return NULL; + + iter->index++; + pfn = iter->start_pfn + iter->index; + + if (page_ext_iter_next_fast_possible(pfn)) + iter->page_ext = page_ext_next(iter->page_ext); + else + iter->page_ext = page_ext_lookup(pfn); + + return iter->page_ext; +} + +/** + * page_ext_iter_get() - Get current page extension + * @iter: page extension iterator. + * + * Return: NULL if no page_ext exists for this iterator. + */ +static inline struct page_ext *page_ext_iter_get(const struct page_ext_iter *iter) +{ + return iter->page_ext; +} + +/** + * for_each_page_ext(): iterate through page_ext objects. + * @__page: the page we're interested in + * @__pgcount: how many pages to iterate through + * @__page_ext: struct page_ext pointer where the current page_ext + * object is returned + * @__iter: struct page_ext_iter object (defined in the stack) + * + * IMPORTANT: must be called with RCU read lock taken. + */ +#define for_each_page_ext(__page, __pgcount, __page_ext, __iter) \ + for (__page_ext = page_ext_iter_begin(&__iter, page_to_pfn(__page));\ + __page_ext && __iter.index < __pgcount; \ + __page_ext = page_ext_iter_next(&__iter)) + #else /* !CONFIG_PAGE_EXTENSION */ struct page_ext; diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 8c236c651d1d..544150d1d5fd 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -234,7 +234,7 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u) rcu_read_lock(); /* avoid writing to the vmemmap area being remapped */ - if (!page_is_fake_head(page) && page_ref_count(page) != u) + if (page_count_writable(page, u)) ret = atomic_add_unless(&page->_refcount, nr, u); rcu_read_unlock(); diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h index 6722941c7cb8..289620d4aad3 100644 --- a/include/linux/page_table_check.h +++ b/include/linux/page_table_check.h @@ -19,8 +19,10 @@ void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd); void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud); void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, unsigned int nr); -void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd); -void __page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud); +void __page_table_check_pmds_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd, + unsigned int nr); +void __page_table_check_puds_set(struct mm_struct *mm, pud_t *pudp, pud_t pud, + unsigned int nr); void __page_table_check_pte_clear_range(struct mm_struct *mm, unsigned long addr, pmd_t pmd); @@ -74,22 +76,22 @@ static inline void page_table_check_ptes_set(struct mm_struct *mm, __page_table_check_ptes_set(mm, ptep, pte, nr); } -static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, - pmd_t pmd) +static inline void page_table_check_pmds_set(struct mm_struct *mm, + pmd_t *pmdp, pmd_t pmd, unsigned int nr) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pmd_set(mm, pmdp, pmd); + __page_table_check_pmds_set(mm, pmdp, pmd, nr); } -static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, - pud_t pud) +static inline void page_table_check_puds_set(struct mm_struct *mm, + pud_t *pudp, pud_t pud, unsigned int nr) { if (static_branch_likely(&page_table_check_disabled)) return; - __page_table_check_pud_set(mm, pudp, pud); + __page_table_check_puds_set(mm, pudp, pud, nr); } static inline void page_table_check_pte_clear_range(struct mm_struct *mm, @@ -129,13 +131,13 @@ static inline void page_table_check_ptes_set(struct mm_struct *mm, { } -static inline void page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, - pmd_t pmd) +static inline void page_table_check_pmds_set(struct mm_struct *mm, + pmd_t *pmdp, pmd_t pmd, unsigned int nr) { } -static inline void page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, - pud_t pud) +static inline void page_table_check_puds_set(struct mm_struct *mm, + pud_t *pudp, pud_t pud, unsigned int nr) { } @@ -146,4 +148,8 @@ static inline void page_table_check_pte_clear_range(struct mm_struct *mm, } #endif /* CONFIG_PAGE_TABLE_CHECK */ + +#define page_table_check_pmd_set(mm, pmdp, pmd) page_table_check_pmds_set(mm, pmdp, pmd, 1) +#define page_table_check_pud_set(mm, pudp, pud) page_table_check_puds_set(mm, pudp, pud, 1) + #endif /* __LINUX_PAGE_TABLE_CHECK_H */ diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index fc6b9c87cb0a..e73a4292ef02 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,18 +41,18 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, PAGE_BLOCK_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #elif defined(CONFIG_TRANSPARENT_HUGEPAGE) -#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, PAGE_BLOCK_ORDER) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ -/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */ -#define pageblock_order MAX_PAGE_ORDER +/* If huge pages are not used, group by PAGE_BLOCK_ORDER */ +#define pageblock_order PAGE_BLOCK_ORDER #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bcf0865a38ae..e63fbfbd5b0f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -210,6 +210,7 @@ enum mapping_flags { AS_STABLE_WRITES = 7, /* must wait for writeback before modifying folio contents */ AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ + AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM = 9, /* Bits 16-25 are used for FOLIO_ORDER */ AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, @@ -335,6 +336,16 @@ static inline bool mapping_inaccessible(struct address_space *mapping) return test_bit(AS_INACCESSIBLE, &mapping->flags); } +static inline void mapping_set_writeback_may_deadlock_on_reclaim(struct address_space *mapping) +{ + set_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); +} + +static inline bool mapping_writeback_may_deadlock_on_reclaim(struct address_space *mapping) +{ + return test_bit(AS_WRITEBACK_MAY_DEADLOCK_ON_RECLAIM, &mapping->flags); +} + static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->gfp_mask; @@ -533,27 +544,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) } struct address_space *folio_mapping(struct folio *); -struct address_space *swapcache_mapping(struct folio *); - -/** - * folio_file_mapping - Find the mapping this folio belongs to. - * @folio: The folio. - * - * For folios which are in the page cache, return the mapping that this - * page belongs to. Folios in the swap cache return the mapping of the - * swap file or swap device where the data is stored. This is different - * from the mapping returned by folio_mapping(). The only reason to - * use it is if, like NFS, you return 0 from ->activate_swapfile. - * - * Do not call this for folios which aren't in the page cache or swap cache. - */ -static inline struct address_space *folio_file_mapping(struct folio *folio) -{ - if (unlikely(folio_test_swapcache(folio))) - return swapcache_mapping(folio); - - return folio->mapping; -} /** * folio_flush_mapping - Find the file mapping this folio belongs to. @@ -575,11 +565,6 @@ static inline struct address_space *folio_flush_mapping(struct folio *folio) return folio_mapping(folio); } -static inline struct address_space *page_file_mapping(struct page *page) -{ - return folio_file_mapping(page_folio(page)); -} - /** * folio_inode - Get the host inode for this folio. * @folio: The folio. @@ -710,6 +695,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, * * %FGP_NOFS - __GFP_FS will get cleared in gfp. * * %FGP_NOWAIT - Don't block on the folio lock. * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) + * * %FGP_DONTCACHE - Uncached buffered IO * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin() * implementation. */ @@ -723,10 +709,21 @@ typedef unsigned int __bitwise fgf_t; #define FGP_NOWAIT ((__force fgf_t)0x00000020) #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) #define FGP_STABLE ((__force fgf_t)0x00000080) +#define FGP_DONTCACHE ((__force fgf_t)0x00000100) #define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) +static inline unsigned int filemap_get_order(size_t size) +{ + unsigned int shift = ilog2(size); + + if (shift <= PAGE_SHIFT) + return 0; + + return shift - PAGE_SHIFT; +} + /** * fgf_set_order - Encode a length in the fgf_t flags. * @size: The suggested size of the folio to create. @@ -740,11 +737,11 @@ typedef unsigned int __bitwise fgf_t; */ static inline fgf_t fgf_set_order(size_t size) { - unsigned int shift = ilog2(size); + unsigned int order = filemap_get_order(size); - if (shift <= PAGE_SHIFT) + if (!order) return 0; - return (__force fgf_t)((shift - PAGE_SHIFT) << 26); + return (__force fgf_t)(order << 26); } void *filemap_get_entry(struct address_space *mapping, pgoff_t index); @@ -897,26 +894,6 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } -extern pgoff_t __folio_swap_cache_index(struct folio *folio); - -/** - * folio_index - File index of a folio. - * @folio: The folio. - * - * For a folio which is either in the page cache or the swap cache, - * return its index within the address_space it belongs to. If you know - * the page is definitely in the page cache, you can look at the folio's - * index directly. - * - * Return: The index (offset in units of pages) of a folio in its file. - */ -static inline pgoff_t folio_index(struct folio *folio) -{ - if (unlikely(folio_test_swapcache(folio))) - return __folio_swap_cache_index(folio); - return folio->index; -} - /** * folio_next_index - Get the index of the next folio. * @folio: The current folio. @@ -948,27 +925,14 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) * @folio: The folio. * @index: The page index within the file. * - * Context: The caller should have the page locked in order to prevent - * (eg) shmem from moving the page between the page cache and swap cache - * and changing its index in the middle of the operation. + * Context: The caller should have the folio locked and ensure + * e.g., shmem did not move this folio to the swap cache. * Return: true or false. */ static inline bool folio_contains(struct folio *folio, pgoff_t index) { - return index - folio_index(folio) < folio_nr_pages(folio); -} - -/* - * Given the page we found in the page cache, return the page corresponding - * to this index in the file - */ -static inline struct page *find_subpage(struct page *head, pgoff_t index) -{ - /* HugeTLBfs wants the head page regardless */ - if (PageHuge(head)) - return head; - - return head + (index & (thp_nr_pages(head) - 1)); + VM_WARN_ON_ONCE_FOLIO(folio_test_swapcache(folio), folio); + return index - folio->index < folio_nr_pages(folio); } unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, @@ -978,9 +942,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); -struct page *grab_cache_page_write_begin(struct address_space *mapping, - pgoff_t index); - /* * Returns locked page at given index in given cache, creating it if needed. */ @@ -1032,21 +993,23 @@ static inline pgoff_t page_pgoff(const struct folio *folio, return folio->index + folio_page_idx(folio, page); } -/* - * Return byte-offset into filesystem object for page. +/** + * folio_pos - Returns the byte position of this folio in its file. + * @folio: The folio. */ -static inline loff_t page_offset(struct page *page) +static inline loff_t folio_pos(const struct folio *folio) { - return ((loff_t)page->index) << PAGE_SHIFT; + return ((loff_t)folio->index) * PAGE_SIZE; } -/** - * folio_pos - Returns the byte position of this folio in its file. - * @folio: The folio. +/* + * Return byte-offset into filesystem object for page. */ -static inline loff_t folio_pos(struct folio *folio) +static inline loff_t page_offset(struct page *page) { - return page_offset(&folio->page); + struct folio *folio = page_folio(page); + + return folio_pos(folio) + folio_page_idx(folio, page) * PAGE_SIZE; } /* @@ -1233,18 +1196,12 @@ static inline int folio_wait_locked_killable(struct folio *folio) return folio_wait_bit_killable(folio, PG_locked); } -static inline void wait_on_page_locked(struct page *page) -{ - folio_wait_locked(page_folio(page)); -} - void folio_end_read(struct folio *folio, bool success); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); -void wait_for_stable_page(struct page *page); void folio_wait_stable(struct folio *folio); void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb); @@ -1271,11 +1228,6 @@ void folio_wait_private_2(struct folio *folio); int folio_wait_private_2_killable(struct folio *folio); /* - * Add an arbitrary waiter to a page's wait queue - */ -void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter); - -/* * Fault in userspace address range. */ size_t fault_in_writeable(char __user *uaddr, size_t size); @@ -1333,9 +1285,9 @@ static inline bool filemap_range_needs_writeback(struct address_space *mapping, * struct readahead_control - Describes a readahead request. * * A readahead request is for consecutive pages. Filesystems which - * implement the ->readahead method should call readahead_page() or - * readahead_page_batch() in a loop and attempt to start I/O against - * each page in the request. + * implement the ->readahead method should call readahead_folio() or + * __readahead_batch() in a loop and attempt to start reads into each + * folio in the request. * * Most of the fields in this struct are private and should be accessed * by the functions below. @@ -1353,6 +1305,7 @@ struct readahead_control { pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; + bool dropbehind; bool _workingset; unsigned long _pflags; }; @@ -1440,22 +1393,6 @@ static inline struct folio *__readahead_folio(struct readahead_control *ractl) } /** - * readahead_page - Get the next page to read. - * @ractl: The current readahead request. - * - * Context: The page is locked and has an elevated refcount. The caller - * should decreases the refcount once the page has been submitted for I/O - * and unlock the page once all I/O to that page has completed. - * Return: A pointer to the next page, or %NULL if we are done. - */ -static inline struct page *readahead_page(struct readahead_control *ractl) -{ - struct folio *folio = __readahead_folio(ractl); - - return &folio->page; -} - -/** * readahead_folio - Get the next folio to read. * @ractl: The current readahead request. * @@ -1477,7 +1414,7 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, { unsigned int i = 0; XA_STATE(xas, &rac->mapping->i_pages, 0); - struct page *page; + struct folio *folio; BUG_ON(rac->_batch_count > rac->_nr_pages); rac->_nr_pages -= rac->_batch_count; @@ -1486,13 +1423,12 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, xas_set(&xas, rac->_index); rcu_read_lock(); - xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) { - if (xas_retry(&xas, page)) + xas_for_each(&xas, folio, rac->_index + rac->_nr_pages - 1) { + if (xas_retry(&xas, folio)) continue; - VM_BUG_ON_PAGE(!PageLocked(page), page); - VM_BUG_ON_PAGE(PageTail(page), page); - array[i++] = page; - rac->_batch_count += thp_nr_pages(page); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + array[i++] = folio_page(folio, 0); + rac->_batch_count += folio_nr_pages(folio); if (i == array_sz) break; } @@ -1502,20 +1438,6 @@ static inline unsigned int __readahead_batch(struct readahead_control *rac, } /** - * readahead_page_batch - Get a batch of pages to read. - * @rac: The current readahead request. - * @array: An array of pointers to struct page. - * - * Context: The pages are locked and have an elevated refcount. The caller - * should decreases the refcount once the page has been submitted for I/O - * and unlock the page once all I/O to that page has completed. - * Return: The number of pages placed in the array. 0 indicates the request - * is complete. - */ -#define readahead_page_batch(rac, array) \ - __readahead_batch(rac, array, ARRAY_SIZE(array)) - -/** * readahead_pos - The byte offset into the file of this readahead request. * @rac: The readahead request. */ @@ -1595,34 +1517,6 @@ static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, } /** - * page_mkwrite_check_truncate - check if page was truncated - * @page: the page to check - * @inode: the inode to check the page against - * - * Returns the number of bytes in the page up to EOF, - * or -EFAULT if the page was truncated. - */ -static inline int page_mkwrite_check_truncate(struct page *page, - struct inode *inode) -{ - loff_t size = i_size_read(inode); - pgoff_t index = size >> PAGE_SHIFT; - int offset = offset_in_page(size); - - if (page->mapping != inode->i_mapping) - return -EFAULT; - - /* page is wholly inside EOF */ - if (page->index < index) - return PAGE_SIZE; - /* page is wholly past EOF */ - if (page->index > index || !offset) - return -EFAULT; - /* page is partially inside EOF */ - return offset; -} - -/** * i_blocks_per_folio - How many blocks fit in this folio. * @inode: The inode which contains the blocks. * @folio: The folio. diff --git a/include/linux/panic.h b/include/linux/panic.h index 54d90b6c5f47..4adc65766935 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -20,8 +20,6 @@ extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; -extern int panic_on_unrecovered_nmi; -extern int panic_on_io_nmi; extern int panic_on_warn; extern unsigned long panic_on_taint; @@ -74,7 +72,8 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) #define TAINT_AUX 16 #define TAINT_RANDSTRUCT 17 #define TAINT_TEST 18 -#define TAINT_FLAGS_COUNT 19 +#define TAINT_FWCTL 19 +#define TAINT_FLAGS_COUNT 20 #define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) struct taint_flag { diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index ac8c44dd8237..eeeff2a04529 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -33,7 +33,7 @@ struct disk_stats { #define part_stat_read(part, field) \ ({ \ - typeof((part)->bd_stats->field) res = 0; \ + TYPEOF_UNQUAL((part)->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \ @@ -79,4 +79,6 @@ static inline void part_stat_set_all(struct block_device *part, int value) #define part_stat_local_read_cpu(part, field, cpu) \ local_read(&(part_stat_get_cpu(part, field, cpu))) +unsigned int bdev_count_inflight(struct block_device *part); + #endif /* _LINUX_PART_STAT_H */ diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 0e8b74e63767..75c6c86cf09d 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -42,6 +42,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features); void pci_disable_pasid(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); +int pci_pasid_status(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ static inline int pci_enable_pasid(struct pci_dev *pdev, int features) { return -EINVAL; } @@ -50,6 +51,8 @@ static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; } static inline int pci_max_pasids(struct pci_dev *pdev) { return -EINVAL; } +static inline int pci_pasid_status(struct pci_dev *pdev) +{ return -EINVAL; } #endif /* CONFIG_PCI_PASID */ #endif /* LINUX_PCI_ATS_H */ diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index 3a4860bd2758..d930651473b4 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -45,6 +45,10 @@ struct pci_ecam_ops { unsigned int bus_shift; struct pci_ops pci_ops; int (*init)(struct pci_config_window *); + int (*enable_device)(struct pci_host_bridge *, + struct pci_dev *); + void (*disable_device)(struct pci_host_bridge *, + struct pci_dev *); }; /* @@ -89,10 +93,4 @@ extern const struct pci_ecam_ops al_pcie_ops; /* Amazon Annapurna Labs PCIe */ extern const struct pci_ecam_ops tegra194_pcie_ops; /* Tegra194 PCIe */ extern const struct pci_ecam_ops loongson_pci_ecam_ops; /* Loongson PCIe */ #endif - -#if IS_ENABLED(CONFIG_PCI_HOST_COMMON) -/* for DT-based PCI controllers that support ECAM */ -int pci_host_common_probe(struct platform_device *pdev); -void pci_host_common_remove(struct platform_device *pdev); -#endif #endif diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index e818e3fdcded..4286bfdbfdfa 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -100,10 +100,10 @@ struct pci_epc_ops { void (*unmap_addr)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t addr); int (*set_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - u8 interrupts); + u8 nr_irqs); int (*get_msi)(struct pci_epc *epc, u8 func_no, u8 vfunc_no); int (*set_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - u16 interrupts, enum pci_barno, u32 offset); + u16 nr_irqs, enum pci_barno, u32 offset); int (*get_msix)(struct pci_epc *epc, u8 func_no, u8 vfunc_no); int (*raise_irq)(struct pci_epc *epc, u8 func_no, u8 vfunc_no, unsigned int type, u16 interrupt_num); @@ -188,11 +188,15 @@ struct pci_epc { * enum pci_epc_bar_type - configurability of endpoint BAR * @BAR_PROGRAMMABLE: The BAR mask can be configured by the EPC. * @BAR_FIXED: The BAR mask is fixed by the hardware. + * @BAR_RESIZABLE: The BAR implements the PCI-SIG Resizable BAR Capability. + * NOTE: An EPC driver can currently only set a single supported + * size. * @BAR_RESERVED: The BAR should not be touched by an EPF driver. */ enum pci_epc_bar_type { BAR_PROGRAMMABLE = 0, BAR_FIXED, + BAR_RESIZABLE, BAR_RESERVED, }; @@ -221,6 +225,7 @@ struct pci_epc_bar_desc { * @linkup_notifier: indicate if the EPC device can notify EPF driver on link up * @msi_capable: indicate if the endpoint function has MSI capability * @msix_capable: indicate if the endpoint function has MSI-X capability + * @intx_capable: indicate if the endpoint can raise INTx interrupts * @bar: array specifying the hardware description for each BAR * @align: alignment size required for BAR buffer allocation */ @@ -228,6 +233,7 @@ struct pci_epc_features { unsigned int linkup_notifier : 1; unsigned int msi_capable : 1; unsigned int msix_capable : 1; + unsigned int intx_capable : 1; struct pci_epc_bar_desc bar[PCI_STD_NUM_BARS]; size_t align; }; @@ -257,7 +263,6 @@ __devm_pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, struct pci_epc * __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, struct module *owner); -void devm_pci_epc_destroy(struct device *dev, struct pci_epc *epc); void pci_epc_destroy(struct pci_epc *epc); int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf, enum pci_epc_interface_type type); @@ -271,6 +276,7 @@ void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf, enum pci_epc_interface_type type); int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no, struct pci_epf_header *hdr); +int pci_epc_bar_size_to_rebar_cap(size_t size, u32 *cap); int pci_epc_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no, struct pci_epf_bar *epf_bar); void pci_epc_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no, @@ -280,11 +286,10 @@ int pci_epc_map_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u64 pci_addr, size_t size); void pci_epc_unmap_addr(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t phys_addr); -int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - u8 interrupts); +int pci_epc_set_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u8 nr_irqs); int pci_epc_get_msi(struct pci_epc *epc, u8 func_no, u8 vfunc_no); -int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no, - u16 interrupts, enum pci_barno, u32 offset); +int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no, u16 nr_irqs, + enum pci_barno, u32 offset); int pci_epc_get_msix(struct pci_epc *epc, u8 func_no, u8 vfunc_no); int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, u8 vfunc_no, phys_addr_t phys_addr, u8 interrupt_num, diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 18a3aeb62ae4..749cee0bcf2c 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -38,7 +38,7 @@ enum pci_barno { * @baseclass_code: broadly classifies the type of function the device performs * @cache_line_size: specifies the system cacheline size in units of DWORDs * @subsys_vendor_id: vendor of the add-in card or subsystem - * @subsys_id: id specific to vendor + * @subsys_id: ID specific to vendor * @interrupt_pin: interrupt pin the device (or device function) uses */ struct pci_epf_header { @@ -59,7 +59,7 @@ struct pci_epf_header { * @bind: ops to perform when a EPC device has been bound to EPF device * @unbind: ops to perform when a binding has been lost between a EPC device * and EPF device - * @add_cfs: ops to initialize function specific configfs attributes + * @add_cfs: ops to initialize function-specific configfs attributes */ struct pci_epf_ops { int (*bind)(struct pci_epf *epf); @@ -114,6 +114,8 @@ struct pci_epf_driver { * @phys_addr: physical address that should be mapped to the BAR * @addr: virtual address corresponding to the @phys_addr * @size: the size of the address space present in BAR + * @aligned_size: the size actually allocated to accommodate the iATU alignment + * requirement * @barno: BAR number * @flags: flags that are set for the BAR */ @@ -121,6 +123,7 @@ struct pci_epf_bar { dma_addr_t phys_addr; void *addr; size_t size; + size_t aligned_size; enum pci_barno barno; int flags; }; @@ -138,7 +141,7 @@ struct pci_epf_bar { * @epc: the EPC device to which this EPF device is bound * @epf_pf: the physical EPF device to which this virtual EPF device is bound * @driver: the EPF driver to which this EPF device is bound - * @id: Pointer to the EPF device ID + * @id: pointer to the EPF device ID * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc * @lock: mutex to protect pci_epf_ops * @sec_epc: the secondary EPC device to which this EPF device is bound @@ -151,13 +154,13 @@ struct pci_epf_bar { * @is_vf: true - virtual function, false - physical function * @vfunction_num_map: bitmap to manage virtual function number * @pci_vepf: list of virtual endpoint functions associated with this function - * @event_ops: Callbacks for capturing the EPC events + * @event_ops: callbacks for capturing the EPC events */ struct pci_epf { struct device dev; const char *name; struct pci_epf_header *header; - struct pci_epf_bar bar[6]; + struct pci_epf_bar bar[PCI_STD_NUM_BARS]; u8 msi_interrupts; u16 msix_interrupts; u8 func_no; @@ -174,7 +177,7 @@ struct pci_epf { /* Below members are to attach secondary EPC to an endpoint function */ struct pci_epc *sec_epc; struct list_head sec_epc_list; - struct pci_epf_bar sec_epc_bar[6]; + struct pci_epf_bar sec_epc_bar[PCI_STD_NUM_BARS]; u8 sec_epc_func_no; struct config_group *group; unsigned int is_bound; @@ -185,11 +188,12 @@ struct pci_epf { }; /** - * struct pci_epf_msix_tbl - represents the MSIX table entry structure - * @msg_addr: Writes to this address will trigger MSIX interrupt in host - * @msg_data: Data that should be written to @msg_addr to trigger MSIX interrupt + * struct pci_epf_msix_tbl - represents the MSI-X table entry structure + * @msg_addr: Writes to this address will trigger MSI-X interrupt in host + * @msg_data: Data that should be written to @msg_addr to trigger MSI-X + * interrupt * @vector_ctrl: Identifies if the function is prohibited from sending a message - * using this MSIX table entry + * using this MSI-X table entry */ struct pci_epf_msix_tbl { u64 msg_addr; diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 2c07aa6b7665..075c20b161d9 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -104,4 +104,89 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client) return pci_p2pmem_find_many(&client, 1); } +enum pci_p2pdma_map_type { + /* + * PCI_P2PDMA_MAP_UNKNOWN: Used internally as an initial state before + * the mapping type has been calculated. Exported routines for the API + * will never return this value. + */ + PCI_P2PDMA_MAP_UNKNOWN = 0, + + /* + * Not a PCI P2PDMA transfer. + */ + PCI_P2PDMA_MAP_NONE, + + /* + * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will + * traverse the host bridge and the host bridge is not in the + * allowlist. DMA Mapping routines should return an error when + * this is returned. + */ + PCI_P2PDMA_MAP_NOT_SUPPORTED, + + /* + * PCI_P2PDMA_MAP_BUS_ADDR: Indicates that two devices can talk to + * each other directly through a PCI switch and the transaction will + * not traverse the host bridge. Such a mapping should program + * the DMA engine with PCI bus addresses. + */ + PCI_P2PDMA_MAP_BUS_ADDR, + + /* + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk + * to each other, but the transaction traverses a host bridge on the + * allowlist. In this case, a normal mapping either with CPU physical + * addresses (in the case of dma-direct) or IOVA addresses (in the + * case of IOMMUs) should be used to program the DMA engine. + */ + PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, +}; + +struct pci_p2pdma_map_state { + struct dev_pagemap *pgmap; + enum pci_p2pdma_map_type map; + u64 bus_off; +}; + +/* helper for pci_p2pdma_state(), do not use directly */ +void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, + struct device *dev, struct page *page); + +/** + * pci_p2pdma_state - check the P2P transfer state of a page + * @state: P2P state structure + * @dev: device to transfer to/from + * @page: page to map + * + * Check if @page is a PCI P2PDMA page, and if yes of what kind. Returns the + * map type, and updates @state with all information needed for a P2P transfer. + */ +static inline enum pci_p2pdma_map_type +pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev, + struct page *page) +{ + if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) { + if (state->pgmap != page_pgmap(page)) + __pci_p2pdma_update_state(state, dev, page); + return state->map; + } + return PCI_P2PDMA_MAP_NONE; +} + +/** + * pci_p2pdma_bus_addr_map - Translate a physical address to a bus address + * for a PCI_P2PDMA_MAP_BUS_ADDR transfer. + * @state: P2P state structure + * @paddr: physical address to map + * + * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer. + */ +static inline dma_addr_t +pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr) +{ + WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR); + return paddr + state->bus_off; +} + #endif /* _LINUX_PCI_P2P_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index f05903dd7695..d56d0dd80afb 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -245,6 +245,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), /* Device does honor MSI masking despite saying otherwise */ PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), + /* Device requires write to PCI_MSIX_ENTRY_DATA before any MSIX reads */ + PCI_DEV_FLAGS_MSIX_TOUCH_ENTRY_DATA_FIRST = (__force pci_dev_flags_t) (1 << 13), }; enum pci_irq_reroute_variant { @@ -326,6 +328,11 @@ struct rcec_ea; * determined (e.g., for Root Complex Integrated * Endpoints without the relevant Capability * Registers). + * @is_hotplug_bridge: Hotplug bridge of any kind (e.g. PCIe Hot-Plug Capable, + * Conventional PCI Hot-Plug, ACPI slot). + * Such bridges are allocated additional MMIO and bus + * number resources to allow for hierarchy expansion. + * @is_pciehp: PCIe Hot-Plug Capable bridge. */ struct pci_dev { struct list_head bus_list; /* Node in per-bus list */ @@ -346,13 +353,14 @@ struct pci_dev { u8 hdr_type; /* PCI header type (`multi' flag masked out) */ #ifdef CONFIG_PCIEAER u16 aer_cap; /* AER capability offset */ - struct aer_stats *aer_stats; /* AER stats for this device */ + struct aer_info *aer_info; /* AER info for this device */ #endif #ifdef CONFIG_PCIEPORTBUS struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ struct pci_dev *rcec; /* Associated RCEC device */ #endif u32 devcap; /* PCIe Device Capabilities */ + u16 rebar_cap; /* Resizable BAR capability offset */ u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ u8 msix_cap; /* MSI-X capability offset */ @@ -407,7 +415,7 @@ struct pci_dev { supported from root to here */ #endif unsigned int pasid_no_tlp:1; /* PASID works without TLP Prefix */ - unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */ + unsigned int eetlp_prefix_max:3; /* Max # of End-End TLP Prefixes, 0=not supported */ pci_channel_state_t error_state; /* Current connectivity state */ struct device dev; /* Generic device interface */ @@ -422,8 +430,6 @@ struct pci_dev { struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ struct resource driver_exclusive_resource; /* driver exclusive resource ranges */ - bool match_driver; /* Skip attaching driver */ - unsigned int transparent:1; /* Subtractive decode bridge */ unsigned int io_window:1; /* Bridge has I/O window */ unsigned int pref_window:1; /* Bridge has pref mem window */ @@ -450,6 +456,7 @@ struct pci_dev { unsigned int is_physfn:1; unsigned int is_virtfn:1; unsigned int is_hotplug_bridge:1; + unsigned int is_pciehp:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ /* @@ -476,6 +483,7 @@ struct pci_dev { unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */ unsigned int rom_bar_overlap:1; /* ROM BAR disable broken */ unsigned int rom_attr_enabled:1; /* Display of ROM attribute enabled? */ + unsigned int non_mappable_bars:1; /* BARs can't be mapped to user-space */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ @@ -595,6 +603,8 @@ struct pci_host_bridge { u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* Platform IRQ swizzler */ int (*map_irq)(const struct pci_dev *, u8, u8); void (*release_fn)(struct pci_host_bridge *); + int (*enable_device)(struct pci_host_bridge *bridge, struct pci_dev *dev); + void (*disable_device)(struct pci_host_bridge *bridge, struct pci_dev *dev); void *release_data; unsigned int ignore_reset_delay:1; /* For entire hierarchy */ unsigned int no_ext_tags:1; /* No Extended Tags */ @@ -679,6 +689,7 @@ struct pci_bus { struct bin_attribute *legacy_mem; /* Legacy mem */ unsigned int is_added:1; unsigned int unsafe_warn:1; /* warned about RW1C config write */ + unsigned int flit_mode:1; /* Link in Flit mode */ }; #define to_pci_bus(n) container_of(n, struct pci_bus, dev) @@ -1047,6 +1058,20 @@ struct pci_driver { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0 /** + * PCI_VDEVICE_SUB - describe a specific PCI device/subdevice in a short form + * @vend: the vendor name + * @dev: the 16 bit PCI Device ID + * @subvend: the 16 bit PCI Subvendor ID + * @subdev: the 16 bit PCI Subdevice ID + * + * Generate the pci_device_id struct layout for the specific PCI + * device/subdevice. Private data may follow the output. + */ +#define PCI_VDEVICE_SUB(vend, dev, subvend, subdev) \ + .vendor = PCI_VENDOR_ID_##vend, .device = (dev), \ + .subvendor = (subvend), .subdevice = (subdev), 0, 0 + +/** * PCI_DEVICE_DATA - macro used to describe a specific PCI device in very short form * @vend: the vendor name (without PCI_VENDOR_ID_ prefix) * @dev: the device name (without PCI_DEVICE_ID_<vend>_ prefix) @@ -1120,9 +1145,6 @@ resource_size_t pcibios_align_resource(void *, const struct resource *, resource_size_t, resource_size_t); -/* Weak but can be overridden by arch */ -void pci_fixup_cardbus(struct pci_bus *); - /* Generic PCI functions used internally */ void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region, @@ -1380,7 +1402,6 @@ void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); -int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); void pci_release_resource(struct pci_dev *dev, int resno); static inline int pci_rebar_bytes_to_size(u64 bytes) { @@ -1439,7 +1460,6 @@ void set_pcie_port_type(struct pci_dev *pdev); void set_pcie_hotplug_bridge(struct pci_dev *pdev); /* Functions for PCI Hotplug drivers to use */ -unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge); unsigned int pci_rescan_bus(struct pci_bus *bus); void pci_lock_rescan_remove(void); void pci_unlock_rescan_remove(void); @@ -1461,7 +1481,6 @@ void pci_assign_unassigned_resources(void); void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge); void pci_assign_unassigned_bus_resources(struct pci_bus *bus); void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus); -int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type); int pci_enable_resources(struct pci_dev *, int mask); void pci_assign_irq(struct pci_dev *dev); struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res); @@ -1621,7 +1640,6 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); -void pci_setup_bridge(struct pci_bus *bus); resource_size_t pcibios_window_alignment(struct pci_bus *bus, unsigned long type); @@ -1654,7 +1672,7 @@ void pci_disable_msi(struct pci_dev *dev); int pci_msix_vec_count(struct pci_dev *dev); void pci_disable_msix(struct pci_dev *dev); void pci_restore_msi_state(struct pci_dev *dev); -int pci_msi_enabled(void); +bool pci_msi_enabled(void); int pci_enable_msi(struct pci_dev *dev); int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec); @@ -1687,7 +1705,7 @@ static inline void pci_disable_msi(struct pci_dev *dev) { } static inline int pci_msix_vec_count(struct pci_dev *dev) { return -ENOSYS; } static inline void pci_disable_msix(struct pci_dev *dev) { } static inline void pci_restore_msi_state(struct pci_dev *dev) { } -static inline int pci_msi_enabled(void) { return 0; } +static inline bool pci_msi_enabled(void) { return false; } static inline int pci_enable_msi(struct pci_dev *dev) { return -ENOSYS; } static inline int pci_enable_msix_range(struct pci_dev *dev, @@ -1833,6 +1851,14 @@ static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } #endif +#ifdef CONFIG_HOTPLUG_PCI +void pci_hp_ignore_link_change(struct pci_dev *pdev); +void pci_hp_unignore_link_change(struct pci_dev *pdev); +#else +static inline void pci_hp_ignore_link_change(struct pci_dev *pdev) { } +static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { } +#endif + #ifdef CONFIG_PCIEAER bool pci_aer_available(void); #else @@ -1841,6 +1867,39 @@ static inline bool pci_aer_available(void) { return false; } bool pci_ats_disabled(void); +#define PCIE_PTM_CONTEXT_UPDATE_AUTO 0 +#define PCIE_PTM_CONTEXT_UPDATE_MANUAL 1 + +struct pcie_ptm_ops { + int (*check_capability)(void *drvdata); + int (*context_update_write)(void *drvdata, u8 mode); + int (*context_update_read)(void *drvdata, u8 *mode); + int (*context_valid_write)(void *drvdata, bool valid); + int (*context_valid_read)(void *drvdata, bool *valid); + int (*local_clock_read)(void *drvdata, u64 *clock); + int (*master_clock_read)(void *drvdata, u64 *clock); + int (*t1_read)(void *drvdata, u64 *clock); + int (*t2_read)(void *drvdata, u64 *clock); + int (*t3_read)(void *drvdata, u64 *clock); + int (*t4_read)(void *drvdata, u64 *clock); + + bool (*context_update_visible)(void *drvdata); + bool (*context_valid_visible)(void *drvdata); + bool (*local_clock_visible)(void *drvdata); + bool (*master_clock_visible)(void *drvdata); + bool (*t1_visible)(void *drvdata); + bool (*t2_visible)(void *drvdata); + bool (*t3_visible)(void *drvdata); + bool (*t4_visible)(void *drvdata); +}; + +struct pci_ptm_debugfs { + struct dentry *debugfs; + const struct pcie_ptm_ops *ops; + struct mutex lock; + void *pdata; +}; + #ifdef CONFIG_PCIE_PTM int pci_enable_ptm(struct pci_dev *dev, u8 *granularity); void pci_disable_ptm(struct pci_dev *dev); @@ -1853,6 +1912,18 @@ static inline bool pcie_ptm_enabled(struct pci_dev *dev) { return false; } #endif +#if IS_ENABLED(CONFIG_DEBUG_FS) && IS_ENABLED(CONFIG_PCIE_PTM) +struct pci_ptm_debugfs *pcie_ptm_create_debugfs(struct device *dev, void *pdata, + const struct pcie_ptm_ops *ops); +void pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs); +#else +static inline struct pci_ptm_debugfs +*pcie_ptm_create_debugfs(struct device *dev, void *pdata, + const struct pcie_ptm_ops *ops) { return NULL; } +static inline void +pcie_ptm_destroy_debugfs(struct pci_ptm_debugfs *ptm_debugfs) { } +#endif + void pci_cfg_access_lock(struct pci_dev *dev); bool pci_cfg_access_trylock(struct pci_dev *dev); void pci_cfg_access_unlock(struct pci_dev *dev); @@ -2307,7 +2378,6 @@ void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name); -void pcim_iounmap_regions(struct pci_dev *pdev, int mask); void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar, unsigned long offset, unsigned long len); @@ -2320,8 +2390,6 @@ extern int pci_pci_problems; #define PCIPCI_ALIMAGIK 32 /* Need low latency setting */ #define PCIAGP_FAIL 64 /* No PCI to AGP DMA */ -extern unsigned long pci_cardbus_io_size; -extern unsigned long pci_cardbus_mem_size; extern u8 pci_dfl_cache_line_size; extern u8 pci_cache_line_size; @@ -2431,11 +2499,6 @@ static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int res static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif -#if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE) -void pci_hp_create_module_link(struct pci_slot *pci_slot); -void pci_hp_remove_module_link(struct pci_slot *pci_slot); -#endif - /** * pci_pcie_cap - get the saved PCIe capability offset * @dev: PCI device @@ -2686,9 +2749,6 @@ void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #include <linux/dma-mapping.h> -#define pci_printk(level, pdev, fmt, arg...) \ - dev_printk(level, &(pdev)->dev, fmt, ##arg) - #define pci_emerg(pdev, fmt, arg...) dev_emerg(&(pdev)->dev, fmt, ##arg) #define pci_alert(pdev, fmt, arg...) dev_alert(&(pdev)->dev, fmt, ##arg) #define pci_crit(pdev, fmt, arg...) dev_crit(&(pdev)->dev, fmt, ##arg) diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index 3a10d6ec3ee7..ec77ccf1fc4d 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -50,7 +50,6 @@ struct hotplug_slot_ops { /** * struct hotplug_slot - used to register a physical slot with the hotplug pci core * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot - * @slot_list: internal list used to track hotplug PCI slots * @pci_slot: represents a physical slot * @owner: The module owner of this structure * @mod_name: The module name (KBUILD_MODNAME) of this structure @@ -59,7 +58,6 @@ struct hotplug_slot { const struct hotplug_slot_ops *ops; /* Variables below this are for use only by the hotplug pci core. */ - struct list_head slot_list; struct pci_slot *pci_slot; struct module *owner; const char *mod_name; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index de5deb1a0118..e2d71b6fdd84 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -518,6 +518,7 @@ #define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM 0x0251 #define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM_PCIE 0x0361 #define PCI_DEVICE_ID_IBM_ICOM_FOUR_PORT_MODEL 0x252 +#define PCI_DEVICE_ID_IBM_ISM 0x04ed #define PCI_SUBVENDOR_ID_IBM 0x1014 #define PCI_SUBDEVICE_ID_IBM_SATURN_SERIAL_ONE_PORT 0x03d4 @@ -569,6 +570,7 @@ #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493 +#define PCI_DEVICE_ID_AMD_17H_M40H_DF_F3 0x13f3 #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 #define PCI_DEVICE_ID_AMD_17H_MA0H_DF_F3 0x1727 @@ -2609,6 +2611,8 @@ #define PCI_VENDOR_ID_ZHAOXIN 0x1d17 +#define PCI_VENDOR_ID_ROCKCHIP 0x1d87 + #define PCI_VENDOR_ID_HYGON 0x1d94 #define PCI_VENDOR_ID_META 0x1d9b @@ -3045,6 +3049,7 @@ #define PCI_DEVICE_ID_INTEL_HDA_DG1 0x490d #define PCI_DEVICE_ID_INTEL_HDA_EHL_0 0x4b55 #define PCI_DEVICE_ID_INTEL_HDA_EHL_3 0x4b58 +#define PCI_DEVICE_ID_INTEL_HDA_WCL 0x4d28 #define PCI_DEVICE_ID_INTEL_HDA_JSL_N 0x4dc8 #define PCI_DEVICE_ID_INTEL_HDA_DG2_0 0x4f90 #define PCI_DEVICE_ID_INTEL_HDA_DG2_1 0x4f91 @@ -3066,6 +3071,7 @@ #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 #define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 #define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff +#define PCI_DEVICE_ID_INTEL_HDA_FCL 0x67a8 #define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 #define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 #define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020 @@ -3134,6 +3140,7 @@ #define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828 #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 #define PCI_DEVICE_ID_INTEL_HDA_BMG 0xe2f7 +#define PCI_DEVICE_ID_INTEL_HDA_PTL_H 0xe328 #define PCI_DEVICE_ID_INTEL_HDA_PTL 0xe428 #define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8 #define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8 diff --git a/include/linux/pcie-dwc.h b/include/linux/pcie-dwc.h new file mode 100644 index 000000000000..8ff778e7aec0 --- /dev/null +++ b/include/linux/pcie-dwc.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021-2023 Alibaba Inc. + * Copyright (C) 2025 Linaro Ltd. + * + * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org> + */ + +#ifndef LINUX_PCIE_DWC_H +#define LINUX_PCIE_DWC_H + +#include <linux/pci_ids.h> + +struct dwc_pcie_vsec_id { + u16 vendor_id; + u16 vsec_id; + u8 vsec_rev; +}; + +/* + * VSEC IDs are allocated by the vendor, so a given ID may mean different + * things to different vendors. See PCIe r6.0, sec 7.9.5.2. + */ +static const struct dwc_pcie_vsec_id dwc_pcie_rasdes_vsec_ids[] = { + { .vendor_id = PCI_VENDOR_ID_ALIBABA, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_AMPERE, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_QCOM, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_ROCKCHIP, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + { .vendor_id = PCI_VENDOR_ID_SAMSUNG, + .vsec_id = 0x02, .vsec_rev = 0x4 }, + {} +}; + +#endif /* LINUX_PCIE_DWC_H */ diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index b5b5d17998b8..e40f554ff717 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -50,9 +50,7 @@ struct dw_xpcs; struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs); int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); -int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, - int enable); +void xpcs_config_eee_mult_fact(struct dw_xpcs *xpcs, u8 mult_fact); struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr); struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode); void xpcs_destroy(struct dw_xpcs *xpcs); diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index 4b4e9a98b37b..40ff0ec2b879 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -4,7 +4,7 @@ #ifndef _PDS_CORE_ADMINQ_H_ #define _PDS_CORE_ADMINQ_H_ -#define PDSC_ADMINQ_MAX_POLL_INTERVAL 256 +#define PDSC_ADMINQ_MAX_POLL_INTERVAL 256000 /* usecs */ enum pds_core_adminq_flags { PDS_AQ_FLAG_FASTPOLL = BIT(1), /* completion poll at 1ms */ @@ -463,7 +463,6 @@ struct pds_core_lif_getattr_cmd { * @rsvd: Word boundary padding * @comp_index: Index in the descriptor ring for which this is the completion * @state: LIF state (enum pds_core_lif_state) - * @name: LIF name string, 0 terminated * @features: Features (enum pds_core_hw_features) * @rsvd2: Word boundary padding * @color: Color bit @@ -1179,6 +1178,274 @@ struct pds_lm_host_vf_status_cmd { u8 status; }; +enum pds_fwctl_cmd_opcode { + PDS_FWCTL_CMD_IDENT = 70, + PDS_FWCTL_CMD_RPC = 71, + PDS_FWCTL_CMD_QUERY = 72, +}; + +/** + * struct pds_fwctl_cmd - Firmware control command structure + * @opcode: Opcode + * @rsvd: Reserved + * @ep: Endpoint identifier + * @op: Operation identifier + */ +struct pds_fwctl_cmd { + u8 opcode; + u8 rsvd[3]; + __le32 ep; + __le32 op; +} __packed; + +/** + * struct pds_fwctl_comp - Firmware control completion structure + * @status: Status of the firmware control operation + * @rsvd: Reserved + * @comp_index: Completion index in little-endian format + * @rsvd2: Reserved + * @color: Color bit indicating the state of the completion + */ +struct pds_fwctl_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 rsvd2[11]; + u8 color; +} __packed; + +/** + * struct pds_fwctl_ident_cmd - Firmware control identification command structure + * @opcode: Operation code for the command + * @rsvd: Reserved + * @version: Interface version + * @rsvd2: Reserved + * @len: Length of the identification data + * @ident_pa: Physical address of the identification data + */ +struct pds_fwctl_ident_cmd { + u8 opcode; + u8 rsvd; + u8 version; + u8 rsvd2; + __le32 len; + __le64 ident_pa; +} __packed; + +/* future feature bits here + * enum pds_fwctl_features { + * }; + * (compilers don't like empty enums) + */ + +/** + * struct pds_fwctl_ident - Firmware control identification structure + * @features: Supported features (enum pds_fwctl_features) + * @version: Interface version + * @rsvd: Reserved + * @max_req_sz: Maximum request size + * @max_resp_sz: Maximum response size + * @max_req_sg_elems: Maximum number of request SGs + * @max_resp_sg_elems: Maximum number of response SGs + */ +struct pds_fwctl_ident { + __le64 features; + u8 version; + u8 rsvd[3]; + __le32 max_req_sz; + __le32 max_resp_sz; + u8 max_req_sg_elems; + u8 max_resp_sg_elems; +} __packed; + +enum pds_fwctl_query_entity { + PDS_FWCTL_RPC_ROOT = 0, + PDS_FWCTL_RPC_ENDPOINT = 1, + PDS_FWCTL_RPC_OPERATION = 2, +}; + +#define PDS_FWCTL_RPC_OPCODE_CMD_SHIFT 0 +#define PDS_FWCTL_RPC_OPCODE_CMD_MASK GENMASK(15, PDS_FWCTL_RPC_OPCODE_CMD_SHIFT) +#define PDS_FWCTL_RPC_OPCODE_VER_SHIFT 16 +#define PDS_FWCTL_RPC_OPCODE_VER_MASK GENMASK(23, PDS_FWCTL_RPC_OPCODE_VER_SHIFT) + +#define PDS_FWCTL_RPC_OPCODE_GET_CMD(op) FIELD_GET(PDS_FWCTL_RPC_OPCODE_CMD_MASK, op) +#define PDS_FWCTL_RPC_OPCODE_GET_VER(op) FIELD_GET(PDS_FWCTL_RPC_OPCODE_VER_MASK, op) + +#define PDS_FWCTL_RPC_OPCODE_CMP(op1, op2) \ + (PDS_FWCTL_RPC_OPCODE_GET_CMD(op1) == PDS_FWCTL_RPC_OPCODE_GET_CMD(op2) && \ + PDS_FWCTL_RPC_OPCODE_GET_VER(op1) <= PDS_FWCTL_RPC_OPCODE_GET_VER(op2)) + +/* + * FW command attributes that map to the FWCTL scope values + */ +#define PDSFC_FW_CMD_ATTR_READ 0x00 +#define PDSFC_FW_CMD_ATTR_DEBUG_READ 0x02 +#define PDSFC_FW_CMD_ATTR_WRITE 0x04 +#define PDSFC_FW_CMD_ATTR_DEBUG_WRITE 0x08 +#define PDSFC_FW_CMD_ATTR_SYNC 0x10 + +/** + * struct pds_fwctl_query_cmd - Firmware control query command structure + * @opcode: Operation code for the command + * @entity: Entity type to query (enum pds_fwctl_query_entity) + * @version: Version of the query data structure supported by the driver + * @rsvd: Reserved + * @query_data_buf_len: Length of the query data buffer + * @query_data_buf_pa: Physical address of the query data buffer + * @ep: Endpoint identifier to query (when entity is PDS_FWCTL_RPC_ENDPOINT) + * @op: Operation identifier to query (when entity is PDS_FWCTL_RPC_OPERATION) + * + * This structure is used to send a query command to the firmware control + * interface. The structure is packed to ensure there is no padding between + * the fields. + */ +struct pds_fwctl_query_cmd { + u8 opcode; + u8 entity; + u8 version; + u8 rsvd; + __le32 query_data_buf_len; + __le64 query_data_buf_pa; + union { + __le32 ep; + __le32 op; + }; +} __packed; + +/** + * struct pds_fwctl_query_comp - Firmware control query completion structure + * @status: Status of the query command + * @rsvd: Reserved + * @comp_index: Completion index in little-endian format + * @version: Version of the query data structure returned by firmware. This + * should be less than or equal to the version supported by the driver + * @rsvd2: Reserved + * @color: Color bit indicating the state of the completion + */ +struct pds_fwctl_query_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + u8 version; + u8 rsvd2[2]; + u8 color; +} __packed; + +/** + * struct pds_fwctl_query_data_endpoint - query data for entity PDS_FWCTL_RPC_ROOT + * @id: The identifier for the data endpoint + */ +struct pds_fwctl_query_data_endpoint { + __le32 id; +} __packed; + +/** + * struct pds_fwctl_query_data_operation - query data for entity PDS_FWCTL_RPC_ENDPOINT + * @id: Operation identifier + * @scope: Scope of the operation (enum fwctl_rpc_scope) + * @rsvd: Reserved + */ +struct pds_fwctl_query_data_operation { + __le32 id; + u8 scope; + u8 rsvd[3]; +} __packed; + +/** + * struct pds_fwctl_query_data - query data structure + * @version: Version of the query data structure + * @rsvd: Reserved + * @num_entries: Number of entries in the union + * @entries: Array of query data entries, depending on the entity type + */ +struct pds_fwctl_query_data { + u8 version; + u8 rsvd[3]; + __le32 num_entries; + u8 entries[] __counted_by_le(num_entries); +} __packed; + +/** + * struct pds_fwctl_rpc_cmd - Firmware control RPC command + * @opcode: opcode PDS_FWCTL_CMD_RPC + * @rsvd: Reserved + * @flags: Indicates indirect request and/or response handling + * @ep: Endpoint identifier + * @op: Operation identifier + * @inline_req0: Buffer for inline request + * @inline_req1: Buffer for inline request + * @req_pa: Physical address of request data + * @req_sz: Size of the request + * @req_sg_elems: Number of request SGs + * @req_rsvd: Reserved + * @inline_req2: Buffer for inline request + * @resp_pa: Physical address of response data + * @resp_sz: Size of the response + * @resp_sg_elems: Number of response SGs + * @resp_rsvd: Reserved + */ +struct pds_fwctl_rpc_cmd { + u8 opcode; + u8 rsvd; + __le16 flags; +#define PDS_FWCTL_RPC_IND_REQ 0x1 +#define PDS_FWCTL_RPC_IND_RESP 0x2 + __le32 ep; + __le32 op; + u8 inline_req0[16]; + union { + u8 inline_req1[16]; + struct { + __le64 req_pa; + __le32 req_sz; + u8 req_sg_elems; + u8 req_rsvd[3]; + }; + }; + union { + u8 inline_req2[16]; + struct { + __le64 resp_pa; + __le32 resp_sz; + u8 resp_sg_elems; + u8 resp_rsvd[3]; + }; + }; +} __packed; + +/** + * struct pds_sg_elem - Transmit scatter-gather (SG) descriptor element + * @addr: DMA address of SG element data buffer + * @len: Length of SG element data buffer, in bytes + * @rsvd: Reserved + */ +struct pds_sg_elem { + __le64 addr; + __le32 len; + u8 rsvd[4]; +} __packed; + +/** + * struct pds_fwctl_rpc_comp - Completion of a firmware control RPC + * @status: Status of the command + * @rsvd: Reserved + * @comp_index: Completion index of the command + * @err: Error code, if any, from the RPC + * @resp_sz: Size of the response + * @rsvd2: Reserved + * @color: Color bit indicating the state of the completion + */ +struct pds_fwctl_rpc_comp { + u8 status; + u8 rsvd; + __le16 comp_index; + __le32 err; + __le32 resp_sz; + u8 rsvd2[3]; + u8 color; +} __packed; + union pds_core_adminq_cmd { u8 opcode; u8 bytes[64]; @@ -1216,6 +1483,11 @@ union pds_core_adminq_cmd { struct pds_lm_dirty_enable_cmd lm_dirty_enable; struct pds_lm_dirty_disable_cmd lm_dirty_disable; struct pds_lm_dirty_seq_ack_cmd lm_dirty_seq_ack; + + struct pds_fwctl_cmd fwctl; + struct pds_fwctl_ident_cmd fwctl_ident; + struct pds_fwctl_rpc_cmd fwctl_rpc; + struct pds_fwctl_query_cmd fwctl_query; }; union pds_core_adminq_comp { @@ -1243,6 +1515,10 @@ union pds_core_adminq_comp { struct pds_lm_state_size_comp lm_state_size; struct pds_lm_dirty_status_comp lm_dirty_status; + + struct pds_fwctl_comp fwctl; + struct pds_fwctl_rpc_comp fwctl_rpc; + struct pds_fwctl_query_comp fwctl_query; }; #ifndef __CHECKER__ diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 5802e1deef24..b193adbe7cc3 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -29,6 +29,7 @@ enum pds_core_vif_types { PDS_DEV_TYPE_ETH = 3, PDS_DEV_TYPE_RDMA = 4, PDS_DEV_TYPE_LM = 5, + PDS_DEV_TYPE_FWCTL = 6, /* new ones added before this line */ PDS_DEV_TYPE_MAX = 16 /* don't change - used in struct size */ @@ -40,6 +41,7 @@ enum pds_core_vif_types { #define PDS_DEV_TYPE_ETH_STR "Eth" #define PDS_DEV_TYPE_RDMA_STR "RDMA" #define PDS_DEV_TYPE_LM_STR "LM" +#define PDS_DEV_TYPE_FWCTL_STR "fwctl" #define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR #define PDS_VFIO_LM_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_LM_STR "." PDS_DEV_TYPE_VFIO_STR diff --git a/include/linux/pe.h b/include/linux/pe.h index fdf9c95709ba..cd2b7275385f 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -39,113 +39,160 @@ */ #define LINUX_PE_MAGIC 0x818223cd -#define MZ_MAGIC 0x5a4d /* "MZ" */ +#define IMAGE_DOS_SIGNATURE 0x5a4d /* "MZ" */ -#define PE_MAGIC 0x00004550 /* "PE\0\0" */ -#define PE_OPT_MAGIC_PE32 0x010b -#define PE_OPT_MAGIC_PE32_ROM 0x0107 -#define PE_OPT_MAGIC_PE32PLUS 0x020b +#define IMAGE_NT_SIGNATURE 0x00004550 /* "PE\0\0" */ + +#define IMAGE_ROM_OPTIONAL_HDR_MAGIC 0x0107 /* ROM image (for R3000/R4000/R10000/ALPHA), without MZ and PE\0\0 sign */ +#define IMAGE_NT_OPTIONAL_HDR32_MAGIC 0x010b /* PE32 executable image */ +#define IMAGE_NT_OPTIONAL_HDR64_MAGIC 0x020b /* PE32+ executable image */ /* machine type */ -#define IMAGE_FILE_MACHINE_UNKNOWN 0x0000 -#define IMAGE_FILE_MACHINE_AM33 0x01d3 -#define IMAGE_FILE_MACHINE_AMD64 0x8664 -#define IMAGE_FILE_MACHINE_ARM 0x01c0 -#define IMAGE_FILE_MACHINE_ARMV7 0x01c4 -#define IMAGE_FILE_MACHINE_ARM64 0xaa64 -#define IMAGE_FILE_MACHINE_EBC 0x0ebc -#define IMAGE_FILE_MACHINE_I386 0x014c -#define IMAGE_FILE_MACHINE_IA64 0x0200 -#define IMAGE_FILE_MACHINE_M32R 0x9041 -#define IMAGE_FILE_MACHINE_MIPS16 0x0266 -#define IMAGE_FILE_MACHINE_MIPSFPU 0x0366 -#define IMAGE_FILE_MACHINE_MIPSFPU16 0x0466 -#define IMAGE_FILE_MACHINE_POWERPC 0x01f0 -#define IMAGE_FILE_MACHINE_POWERPCFP 0x01f1 -#define IMAGE_FILE_MACHINE_R4000 0x0166 -#define IMAGE_FILE_MACHINE_RISCV32 0x5032 -#define IMAGE_FILE_MACHINE_RISCV64 0x5064 -#define IMAGE_FILE_MACHINE_RISCV128 0x5128 -#define IMAGE_FILE_MACHINE_SH3 0x01a2 -#define IMAGE_FILE_MACHINE_SH3DSP 0x01a3 -#define IMAGE_FILE_MACHINE_SH3E 0x01a4 -#define IMAGE_FILE_MACHINE_SH4 0x01a6 -#define IMAGE_FILE_MACHINE_SH5 0x01a8 -#define IMAGE_FILE_MACHINE_THUMB 0x01c2 -#define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 -#define IMAGE_FILE_MACHINE_LOONGARCH32 0x6232 -#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 +#define IMAGE_FILE_MACHINE_UNKNOWN 0x0000 /* Unknown architecture */ +#define IMAGE_FILE_MACHINE_TARGET_HOST 0x0001 /* Interacts with the host and not a WOW64 guest (not for file image) */ +#define IMAGE_FILE_MACHINE_ALPHA_OLD 0x0183 /* DEC Alpha AXP 32-bit (old images) */ +#define IMAGE_FILE_MACHINE_ALPHA 0x0184 /* DEC Alpha AXP 32-bit */ +#define IMAGE_FILE_MACHINE_ALPHA64 0x0284 /* DEC Alpha AXP 64-bit (with 8kB page size) */ +#define IMAGE_FILE_MACHINE_AXP64 IMAGE_FILE_MACHINE_ALPHA64 +#define IMAGE_FILE_MACHINE_AM33 0x01d3 /* Matsushita AM33, now Panasonic MN103 */ +#define IMAGE_FILE_MACHINE_AMD64 0x8664 /* AMD64 (x64) */ +#define IMAGE_FILE_MACHINE_ARM 0x01c0 /* ARM Little-Endian (ARMv4) */ +#define IMAGE_FILE_MACHINE_THUMB 0x01c2 /* ARM Thumb Little-Endian (ARMv4T) */ +#define IMAGE_FILE_MACHINE_ARMNT 0x01c4 /* ARM Thumb-2 Little-Endian (ARMv7) */ +#define IMAGE_FILE_MACHINE_ARMV7 IMAGE_FILE_MACHINE_ARMNT +#define IMAGE_FILE_MACHINE_ARM64 0xaa64 /* ARM64 Little-Endian (Classic ABI) */ +#define IMAGE_FILE_MACHINE_ARM64EC 0xa641 /* ARM64 Little-Endian (Emulation Compatible ABI for AMD64) */ +#define IMAGE_FILE_MACHINE_ARM64X 0xa64e /* ARM64 Little-Endian (fat binary with both Classic ABI and EC ABI code) */ +#define IMAGE_FILE_MACHINE_CEE 0xc0ee /* COM+ Execution Engine (CLR pure MSIL object files) */ +#define IMAGE_FILE_MACHINE_CEF 0x0cef /* Windows CE 3.0 Common Executable Format (CEF bytecode) */ +#define IMAGE_FILE_MACHINE_CHPE_X86 0x3a64 /* ARM64 Little-Endian (Compiled Hybrid PE ABI for I386) */ +#define IMAGE_FILE_MACHINE_HYBRID_X86 IMAGE_FILE_MACHINE_CHPE_X86 +#define IMAGE_FILE_MACHINE_EBC 0x0ebc /* EFI/UEFI Byte Code */ +#define IMAGE_FILE_MACHINE_I386 0x014c /* Intel 386 (x86) */ +#define IMAGE_FILE_MACHINE_I860 0x014d /* Intel 860 (N10) */ +#define IMAGE_FILE_MACHINE_IA64 0x0200 /* Intel IA-64 (with 8kB page size) */ +#define IMAGE_FILE_MACHINE_LOONGARCH32 0x6232 /* LoongArch 32-bit processor family */ +#define IMAGE_FILE_MACHINE_LOONGARCH64 0x6264 /* LoongArch 64-bit processor family */ +#define IMAGE_FILE_MACHINE_M32R 0x9041 /* Mitsubishi M32R 32-bit Little-Endian */ +#define IMAGE_FILE_MACHINE_M68K 0x0268 /* Motorola 68000 series */ +#define IMAGE_FILE_MACHINE_MIPS16 0x0266 /* MIPS III with MIPS16 ASE Little-Endian */ +#define IMAGE_FILE_MACHINE_MIPSFPU 0x0366 /* MIPS III with FPU Little-Endian */ +#define IMAGE_FILE_MACHINE_MIPSFPU16 0x0466 /* MIPS III with MIPS16 ASE and FPU Little-Endian */ +#define IMAGE_FILE_MACHINE_MPPC_601 0x0601 /* PowerPC 32-bit Big-Endian */ +#define IMAGE_FILE_MACHINE_OMNI 0xace1 /* Microsoft OMNI VM (omniprox.dll) */ +#define IMAGE_FILE_MACHINE_PARISC 0x0290 /* HP PA-RISC */ +#define IMAGE_FILE_MACHINE_POWERPC 0x01f0 /* PowerPC 32-bit Little-Endian */ +#define IMAGE_FILE_MACHINE_POWERPCFP 0x01f1 /* PowerPC 32-bit with FPU Little-Endian */ +#define IMAGE_FILE_MACHINE_POWERPCBE 0x01f2 /* PowerPC 64-bit Big-Endian */ +#define IMAGE_FILE_MACHINE_R3000 0x0162 /* MIPS I Little-Endian */ +#define IMAGE_FILE_MACHINE_R3000_BE 0x0160 /* MIPS I Big-Endian */ +#define IMAGE_FILE_MACHINE_R4000 0x0166 /* MIPS III Little-Endian (with 1kB or 4kB page size) */ +#define IMAGE_FILE_MACHINE_R10000 0x0168 /* MIPS IV Little-Endian */ +#define IMAGE_FILE_MACHINE_RISCV32 0x5032 /* RISC-V 32-bit address space */ +#define IMAGE_FILE_MACHINE_RISCV64 0x5064 /* RISC-V 64-bit address space */ +#define IMAGE_FILE_MACHINE_RISCV128 0x5128 /* RISC-V 128-bit address space */ +#define IMAGE_FILE_MACHINE_SH3 0x01a2 /* Hitachi SH-3 32-bit Little-Endian (with 1kB page size) */ +#define IMAGE_FILE_MACHINE_SH3DSP 0x01a3 /* Hitachi SH-3 DSP 32-bit (with 1kB page size) */ +#define IMAGE_FILE_MACHINE_SH3E 0x01a4 /* Hitachi SH-3E Little-Endian (with 1kB page size) */ +#define IMAGE_FILE_MACHINE_SH4 0x01a6 /* Hitachi SH-4 32-bit Little-Endian (with 1kB page size) */ +#define IMAGE_FILE_MACHINE_SH5 0x01a8 /* Hitachi SH-5 64-bit */ +#define IMAGE_FILE_MACHINE_TAHOE 0x07cc /* Intel EM machine */ +#define IMAGE_FILE_MACHINE_TRICORE 0x0520 /* Infineon AUDO 32-bit */ +#define IMAGE_FILE_MACHINE_WCEMIPSV2 0x0169 /* MIPS Windows CE v2 Little-Endian */ /* flags */ -#define IMAGE_FILE_RELOCS_STRIPPED 0x0001 -#define IMAGE_FILE_EXECUTABLE_IMAGE 0x0002 -#define IMAGE_FILE_LINE_NUMS_STRIPPED 0x0004 -#define IMAGE_FILE_LOCAL_SYMS_STRIPPED 0x0008 -#define IMAGE_FILE_AGGRESSIVE_WS_TRIM 0x0010 -#define IMAGE_FILE_LARGE_ADDRESS_AWARE 0x0020 -#define IMAGE_FILE_16BIT_MACHINE 0x0040 -#define IMAGE_FILE_BYTES_REVERSED_LO 0x0080 -#define IMAGE_FILE_32BIT_MACHINE 0x0100 -#define IMAGE_FILE_DEBUG_STRIPPED 0x0200 -#define IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP 0x0400 -#define IMAGE_FILE_NET_RUN_FROM_SWAP 0x0800 -#define IMAGE_FILE_SYSTEM 0x1000 -#define IMAGE_FILE_DLL 0x2000 -#define IMAGE_FILE_UP_SYSTEM_ONLY 0x4000 -#define IMAGE_FILE_BYTES_REVERSED_HI 0x8000 - -#define IMAGE_FILE_OPT_ROM_MAGIC 0x107 -#define IMAGE_FILE_OPT_PE32_MAGIC 0x10b -#define IMAGE_FILE_OPT_PE32_PLUS_MAGIC 0x20b - -#define IMAGE_SUBSYSTEM_UNKNOWN 0 -#define IMAGE_SUBSYSTEM_NATIVE 1 -#define IMAGE_SUBSYSTEM_WINDOWS_GUI 2 -#define IMAGE_SUBSYSTEM_WINDOWS_CUI 3 -#define IMAGE_SUBSYSTEM_POSIX_CUI 7 -#define IMAGE_SUBSYSTEM_WINDOWS_CE_GUI 9 -#define IMAGE_SUBSYSTEM_EFI_APPLICATION 10 -#define IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER 11 -#define IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER 12 -#define IMAGE_SUBSYSTEM_EFI_ROM_IMAGE 13 -#define IMAGE_SUBSYSTEM_XBOX 14 - -#define IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE 0x0040 -#define IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY 0x0080 -#define IMAGE_DLL_CHARACTERISTICS_NX_COMPAT 0x0100 -#define IMAGE_DLLCHARACTERISTICS_NO_ISOLATION 0x0200 -#define IMAGE_DLLCHARACTERISTICS_NO_SEH 0x0400 -#define IMAGE_DLLCHARACTERISTICS_NO_BIND 0x0800 -#define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER 0x2000 -#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000 - -#define IMAGE_DLLCHARACTERISTICS_EX_CET_COMPAT 0x0001 -#define IMAGE_DLLCHARACTERISTICS_EX_FORWARD_CFI_COMPAT 0x0040 - -/* they actually defined 0x00000000 as well, but I think we'll skip that one. */ -#define IMAGE_SCN_RESERVED_0 0x00000001 -#define IMAGE_SCN_RESERVED_1 0x00000002 -#define IMAGE_SCN_RESERVED_2 0x00000004 -#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 /* don't pad - obsolete */ -#define IMAGE_SCN_RESERVED_3 0x00000010 +#define IMAGE_FILE_RELOCS_STRIPPED 0x0001 /* Relocation info stripped from file */ +#define IMAGE_FILE_EXECUTABLE_IMAGE 0x0002 /* File is executable (i.e. no unresolved external references) */ +#define IMAGE_FILE_LINE_NUMS_STRIPPED 0x0004 /* Line nunbers stripped from file */ +#define IMAGE_FILE_LOCAL_SYMS_STRIPPED 0x0008 /* Local symbols stripped from file */ +#define IMAGE_FILE_AGGRESSIVE_WS_TRIM 0x0010 /* Aggressively trim working set */ +#define IMAGE_FILE_LARGE_ADDRESS_AWARE 0x0020 /* App can handle >2gb addresses (image can be loaded at address above 2GB) */ +#define IMAGE_FILE_16BIT_MACHINE 0x0040 /* 16 bit word machine */ +#define IMAGE_FILE_BYTES_REVERSED_LO 0x0080 /* Bytes of machine word are reversed (should be set together with IMAGE_FILE_BYTES_REVERSED_HI) */ +#define IMAGE_FILE_32BIT_MACHINE 0x0100 /* 32 bit word machine */ +#define IMAGE_FILE_DEBUG_STRIPPED 0x0200 /* Debugging info stripped from file in .DBG file */ +#define IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP 0x0400 /* If Image is on removable media, copy and run from the swap file */ +#define IMAGE_FILE_NET_RUN_FROM_SWAP 0x0800 /* If Image is on Net, copy and run from the swap file */ +#define IMAGE_FILE_SYSTEM 0x1000 /* System kernel-mode file (can't be loaded in user-mode) */ +#define IMAGE_FILE_DLL 0x2000 /* File is a DLL */ +#define IMAGE_FILE_UP_SYSTEM_ONLY 0x4000 /* File should only be run on a UP (uniprocessor) machine */ +#define IMAGE_FILE_BYTES_REVERSED_HI 0x8000 /* Bytes of machine word are reversed (should be set together with IMAGE_FILE_BYTES_REVERSED_LO) */ + +/* subsys */ +#define IMAGE_SUBSYSTEM_UNKNOWN 0 /* Unknown subsystem */ +#define IMAGE_SUBSYSTEM_NATIVE 1 /* No subsystem required (NT device drivers and NT native system processes) */ +#define IMAGE_SUBSYSTEM_WINDOWS_GUI 2 /* Windows graphical user interface (GUI) subsystem */ +#define IMAGE_SUBSYSTEM_WINDOWS_CUI 3 /* Windows character-mode user interface (CUI) subsystem */ +#define IMAGE_SUBSYSTEM_WINDOWS_OLD_CE_GUI 4 /* Old Windows CE subsystem */ +#define IMAGE_SUBSYSTEM_OS2_CUI 5 /* OS/2 CUI subsystem */ +#define IMAGE_SUBSYSTEM_RESERVED_6 6 +#define IMAGE_SUBSYSTEM_POSIX_CUI 7 /* POSIX CUI subsystem */ +#define IMAGE_SUBSYSTEM_MMOSA 8 /* MMOSA/Native Win32E */ +#define IMAGE_SUBSYSTEM_WINDOWS_CE_GUI 9 /* Windows CE subsystem */ +#define IMAGE_SUBSYSTEM_EFI_APPLICATION 10 /* Extensible Firmware Interface (EFI) application */ +#define IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER 11 /* EFI driver with boot services */ +#define IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER 12 /* EFI driver with run-time services */ +#define IMAGE_SUBSYSTEM_EFI_ROM_IMAGE 13 /* EFI ROM image */ +#define IMAGE_SUBSYSTEM_XBOX 14 /* Xbox system */ +#define IMAGE_SUBSYSTEM_RESERVED_15 15 +#define IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION 16 /* Windows Boot application */ +#define IMAGE_SUBSYSTEM_XBOX_CODE_CATALOG 17 /* Xbox Code Catalog */ + +/* dll_flags */ +#define IMAGE_LIBRARY_PROCESS_INIT 0x0001 /* DLL initialization function called just after process initialization */ +#define IMAGE_LIBRARY_PROCESS_TERM 0x0002 /* DLL initialization function called just before process termination */ +#define IMAGE_LIBRARY_THREAD_INIT 0x0004 /* DLL initialization function called just after thread initialization */ +#define IMAGE_LIBRARY_THREAD_TERM 0x0008 /* DLL initialization function called just before thread initialization */ +#define IMAGE_DLLCHARACTERISTICS_RESERVED_4 0x0010 +#define IMAGE_DLLCHARACTERISTICS_HIGH_ENTROPY_VA 0x0020 /* ASLR with 64 bit address space (image can be loaded at address above 4GB) */ +#define IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE 0x0040 /* The DLL can be relocated at load time */ +#define IMAGE_DLLCHARACTERISTICS_FORCE_INTEGRITY 0x0080 /* Code integrity checks are forced */ +#define IMAGE_DLLCHARACTERISTICS_NX_COMPAT 0x0100 /* Image is compatible with data execution prevention */ +#define IMAGE_DLLCHARACTERISTICS_NO_ISOLATION 0x0200 /* Image is isolation aware, but should not be isolated (prevents loading of manifest file) */ +#define IMAGE_DLLCHARACTERISTICS_NO_SEH 0x0400 /* Image does not use SEH, no SE handler may reside in this image */ +#define IMAGE_DLLCHARACTERISTICS_NO_BIND 0x0800 /* Do not bind the image */ +#define IMAGE_DLLCHARACTERISTICS_X86_THUNK 0x1000 /* Image is a Wx86 Thunk DLL (for non-x86/risc DLL files) */ +#define IMAGE_DLLCHARACTERISTICS_APPCONTAINER 0x1000 /* Image should execute in an AppContainer (for EXE Metro Apps in Windows 8) */ +#define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER 0x2000 /* A WDM driver */ +#define IMAGE_DLLCHARACTERISTICS_GUARD_CF 0x4000 /* Image supports Control Flow Guard */ +#define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000 /* The image is terminal server (Remote Desktop Services) aware */ + +/* IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS flags */ +#define IMAGE_DLLCHARACTERISTICS_EX_CET_COMPAT 0x0001 /* Image is Control-flow Enforcement Technology Shadow Stack compatible */ +#define IMAGE_DLLCHARACTERISTICS_EX_CET_COMPAT_STRICT_MODE 0x0002 /* CET is enforced in strict mode */ +#define IMAGE_DLLCHARACTERISTICS_EX_CET_SET_CONTEXT_IP_VALIDATION_RELAXED_MODE 0x0004 /* Relaxed mode for Context IP Validation under CET is allowed */ +#define IMAGE_DLLCHARACTERISTICS_EX_CET_DYNAMIC_APIS_ALLOW_IN_PROC 0x0008 /* Use of dynamic APIs is restricted to processes only */ +#define IMAGE_DLLCHARACTERISTICS_EX_CET_RESERVED_1 0x0010 +#define IMAGE_DLLCHARACTERISTICS_EX_CET_RESERVED_2 0x0020 +#define IMAGE_DLLCHARACTERISTICS_EX_FORWARD_CFI_COMPAT 0x0040 /* All branch targets in all image code sections are annotated with forward-edge control flow integrity guard instructions */ +#define IMAGE_DLLCHARACTERISTICS_EX_HOTPATCH_COMPATIBLE 0x0080 /* Image can be modified while in use, hotpatch-compatible */ + +/* section_header flags */ +#define IMAGE_SCN_SCALE_INDEX 0x00000001 /* address of tls index is scaled = multiplied by 4 (for .tls section on MIPS only) */ +#define IMAGE_SCN_TYPE_NO_LOAD 0x00000002 /* reserved */ +#define IMAGE_SCN_TYPE_GROUPED 0x00000004 /* obsolete (used for 16-bit offset code) */ +#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 /* .o only - don't pad - obsolete (same as IMAGE_SCN_ALIGN_1BYTES) */ +#define IMAGE_SCN_TYPE_COPY 0x00000010 /* reserved */ #define IMAGE_SCN_CNT_CODE 0x00000020 /* .text */ #define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */ #define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */ -#define IMAGE_SCN_LNK_OTHER 0x00000100 /* reserved */ -#define IMAGE_SCN_LNK_INFO 0x00000200 /* .drectve comments */ -#define IMAGE_SCN_RESERVED_4 0x00000400 +#define IMAGE_SCN_LNK_OTHER 0x00000100 /* .o only - other type than code, data or info */ +#define IMAGE_SCN_LNK_INFO 0x00000200 /* .o only - .drectve comments */ +#define IMAGE_SCN_LNK_OVERLAY 0x00000400 /* section contains overlay */ #define IMAGE_SCN_LNK_REMOVE 0x00000800 /* .o only - scn to be rm'd*/ #define IMAGE_SCN_LNK_COMDAT 0x00001000 /* .o only - COMDAT data */ -#define IMAGE_SCN_RESERVED_5 0x00002000 /* spec omits this */ -#define IMAGE_SCN_RESERVED_6 0x00004000 /* spec omits this */ -#define IMAGE_SCN_GPREL 0x00008000 /* global pointer referenced data */ -/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */ -#define IMAGE_SCN_MEM_PURGEABLE 0x00010000 /* reserved for "future" use */ -#define IMAGE_SCN_16BIT 0x00020000 /* reserved for "future" use */ -#define IMAGE_SCN_LOCKED 0x00040000 /* reserved for "future" use */ -#define IMAGE_SCN_PRELOAD 0x00080000 /* reserved for "future" use */ +#define IMAGE_SCN_RESERVED_13 0x00002000 /* spec omits this */ +#define IMAGE_SCN_MEM_PROTECTED 0x00004000 /* section is memory protected (for M68K) */ +#define IMAGE_SCN_NO_DEFER_SPEC_EXC 0x00004000 /* reset speculative exceptions handling bits in the TLB entries (for non-M68K) */ +#define IMAGE_SCN_MEM_FARDATA 0x00008000 /* section uses FAR_EXTERNAL relocations (for M68K) */ +#define IMAGE_SCN_GPREL 0x00008000 /* global pointer referenced data (for non-M68K) */ +#define IMAGE_SCN_MEM_SYSHEAP 0x00010000 /* use system heap (for M68K) */ +#define IMAGE_SCN_MEM_PURGEABLE 0x00020000 /* section can be released from RAM (for M68K) */ +#define IMAGE_SCN_MEM_16BIT 0x00020000 /* section is 16-bit (for non-M68K where it makes sense: I386, THUMB, MIPS16, MIPSFPU16, ...) */ +#define IMAGE_SCN_MEM_LOCKED 0x00040000 /* prevent the section from being moved (for M68K and .o I386) */ +#define IMAGE_SCN_MEM_PRELOAD 0x00080000 /* section is preload to RAM (for M68K and .o I386) */ /* and here they just stuck a 1-byte integer in the middle of a bitfield */ -#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 /* it does what it says on the box */ +#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 /* .o only - it does what it says on the box */ #define IMAGE_SCN_ALIGN_2BYTES 0x00200000 #define IMAGE_SCN_ALIGN_4BYTES 0x00300000 #define IMAGE_SCN_ALIGN_8BYTES 0x00400000 @@ -159,7 +206,9 @@ #define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000 #define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000 #define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000 -#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */ +#define IMAGE_SCN_ALIGN_RESERVED 0x00f00000 +#define IMAGE_SCN_ALIGN_MASK 0x00f00000 +#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* .o only - extended relocations */ #define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */ #define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */ #define IMAGE_SCN_MEM_NOT_PAGED 0x08000000 /* not pageable */ @@ -168,8 +217,28 @@ #define IMAGE_SCN_MEM_READ 0x40000000 /* readable */ #define IMAGE_SCN_MEM_WRITE 0x80000000 /* writeable */ -#define IMAGE_DEBUG_TYPE_CODEVIEW 2 -#define IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS 20 +#define IMAGE_DEBUG_TYPE_UNKNOWN 0 /* Unknown value, ignored by all tools */ +#define IMAGE_DEBUG_TYPE_COFF 1 /* COFF debugging information */ +#define IMAGE_DEBUG_TYPE_CODEVIEW 2 /* CodeView debugging information or Visual C++ Program Database debugging information */ +#define IMAGE_DEBUG_TYPE_FPO 3 /* Frame pointer omission (FPO) information */ +#define IMAGE_DEBUG_TYPE_MISC 4 /* Location of DBG file with CodeView debugging information */ +#define IMAGE_DEBUG_TYPE_EXCEPTION 5 /* Exception information, copy of .pdata section */ +#define IMAGE_DEBUG_TYPE_FIXUP 6 /* Fixup information */ +#define IMAGE_DEBUG_TYPE_OMAP_TO_SRC 7 /* The mapping from an RVA in image to an RVA in source image */ +#define IMAGE_DEBUG_TYPE_OMAP_FROM_SRC 8 /* The mapping from an RVA in source image to an RVA in image */ +#define IMAGE_DEBUG_TYPE_BORLAND 9 /* Borland debugging information */ +#define IMAGE_DEBUG_TYPE_RESERVED10 10 /* Coldpath / Hotpatch debug information */ +#define IMAGE_DEBUG_TYPE_CLSID 11 /* CLSID */ +#define IMAGE_DEBUG_TYPE_VC_FEATURE 12 /* Visual C++ counts / statistics */ +#define IMAGE_DEBUG_TYPE_POGO 13 /* COFF group information, data for profile-guided optimization */ +#define IMAGE_DEBUG_TYPE_ILTCG 14 /* Incremental link-time code generation */ +#define IMAGE_DEBUG_TYPE_MPX 15 /* Intel Memory Protection Extensions */ +#define IMAGE_DEBUG_TYPE_REPRO 16 /* PE determinism or reproducibility */ +#define IMAGE_DEBUG_TYPE_EMBEDDED_PORTABLE_PDB 17 /* Embedded Portable PDB debugging information */ +#define IMAGE_DEBUG_TYPE_SPGO 18 /* Sample profile-guided optimization */ +#define IMAGE_DEBUG_TYPE_PDBCHECKSUM 19 /* PDB Checksum */ +#define IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS 20 /* Extended DLL characteristics bits */ +#define IMAGE_DEBUG_TYPE_PERFMAP 21 /* Location of associated Ready To Run PerfMap file */ #ifndef __ASSEMBLY__ @@ -235,7 +304,7 @@ struct pe32_opt_hdr { uint16_t image_minor; /* minor image version */ uint16_t subsys_major; /* major subsystem version */ uint16_t subsys_minor; /* minor subsystem version */ - uint32_t win32_version; /* reserved, must be 0 */ + uint32_t win32_version; /* win32 version reported at runtime */ uint32_t image_size; /* image size */ uint32_t header_size; /* header size rounded up to file_align */ @@ -246,7 +315,7 @@ struct pe32_opt_hdr { uint32_t stack_size; /* amt of stack required */ uint32_t heap_size_req; /* amt of heap requested */ uint32_t heap_size; /* amt of heap required */ - uint32_t loader_flags; /* reserved, must be 0 */ + uint32_t loader_flags; /* loader flags */ uint32_t data_dirs; /* number of data dir entries */ }; @@ -269,7 +338,7 @@ struct pe32plus_opt_hdr { uint16_t image_minor; /* minor image version */ uint16_t subsys_major; /* major subsystem version */ uint16_t subsys_minor; /* minor subsystem version */ - uint32_t win32_version; /* reserved, must be 0 */ + uint32_t win32_version; /* win32 version reported at runtime */ uint32_t image_size; /* image size */ uint32_t header_size; /* header size rounded up to file_align */ @@ -280,7 +349,7 @@ struct pe32plus_opt_hdr { uint64_t stack_size; /* amt of stack required */ uint64_t heap_size_req; /* amt of heap requested */ uint64_t heap_size; /* amt of heap required */ - uint32_t loader_flags; /* reserved, must be 0 */ + uint32_t loader_flags; /* loader flags */ uint32_t data_dirs; /* number of data dir entries */ }; @@ -301,10 +370,10 @@ struct data_directory { struct data_dirent global_ptr; /* global pointer reg. Size=0 */ struct data_dirent tls; /* .tls */ struct data_dirent load_config; /* load configuration structure */ - struct data_dirent bound_imports; /* no idea */ + struct data_dirent bound_imports; /* bound import table */ struct data_dirent import_addrs; /* import address table */ struct data_dirent delay_imports; /* delay-load import table */ - struct data_dirent clr_runtime_hdr; /* .cor (object only) */ + struct data_dirent clr_runtime_hdr; /* .cor (clr/.net executables) */ struct data_dirent reserved; }; diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 5b520fe86b60..c16cdeaa505e 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -26,13 +26,11 @@ #define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned" #define PER_CPU_ALIGNED_SECTION "..shared_aligned" #endif -#define PER_CPU_FIRST_SECTION "..first" #else #define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_ALIGNED_SECTION "..shared_aligned" -#define PER_CPU_FIRST_SECTION "" #endif @@ -115,14 +113,17 @@ DEFINE_PER_CPU_SECTION(type, name, "") /* - * Declaration/definition used for per-CPU variables that must come first in - * the set of variables. + * Declaration/definition used for per-CPU variables that are frequently + * accessed and should be in a single cacheline. + * + * For use only by architecture and core code. Only use scalar or pointer + * types to maximize density. */ -#define DECLARE_PER_CPU_FIRST(type, name) \ - DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) +#define DECLARE_PER_CPU_CACHE_HOT(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "..hot.." #name) -#define DEFINE_PER_CPU_FIRST(type, name) \ - DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) +#define DEFINE_PER_CPU_CACHE_HOT(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "..hot.." #name) /* * Declaration/definition used for per-CPU variables that must be cacheline @@ -221,7 +222,7 @@ do { \ } while (0) #define PERCPU_PTR(__p) \ - (typeof(*(__p)) __force __kernel *)((__force unsigned long)(__p)) + (TYPEOF_UNQUAL(*(__p)) __force __kernel *)((__force unsigned long)(__p)) #ifdef CONFIG_SMP @@ -317,7 +318,7 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { } #define __pcpu_size_call_return(stem, variable) \ ({ \ - typeof(variable) pscr_ret__; \ + TYPEOF_UNQUAL(variable) pscr_ret__; \ __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: pscr_ret__ = stem##1(variable); break; \ @@ -332,7 +333,7 @@ static __always_inline void __this_cpu_preempt_check(const char *op) { } #define __pcpu_size_call_return2(stem, variable, ...) \ ({ \ - typeof(variable) pscr2_ret__; \ + TYPEOF_UNQUAL(variable) pscr2_ret__; \ __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \ @@ -374,7 +375,7 @@ do { \ } while (0) /* - * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@linux.com> + * this_cpu operations (C) 2008-2013 Christoph Lameter <cl@gentwo.org> * * Optimized manipulation for memory allocated through the per cpu * allocator or for addresses of per cpu variables. diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index c012df33a9f0..288f5235649a 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -8,6 +8,7 @@ #include <linux/wait.h> #include <linux/rcu_sync.h> #include <linux/lockdep.h> +#include <linux/cleanup.h> struct percpu_rw_semaphore { struct rcu_sync rss; @@ -42,9 +43,10 @@ is_static struct percpu_rw_semaphore name = { \ #define DEFINE_STATIC_PERCPU_RWSEM(name) \ __DEFINE_PERCPU_RWSEM(name, static) -extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); +extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool, bool); -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +static inline void percpu_down_read_internal(struct percpu_rw_semaphore *sem, + bool freezable) { might_sleep(); @@ -62,7 +64,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) if (likely(rcu_sync_is_idle(&sem->rss))) this_cpu_inc(*sem->read_count); else - __percpu_down_read(sem, false); /* Unconditional memory barrier */ + __percpu_down_read(sem, false, freezable); /* Unconditional memory barrier */ /* * The preempt_enable() prevents the compiler from * bleeding the critical section out. @@ -70,6 +72,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) preempt_enable(); } +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +{ + percpu_down_read_internal(sem, false); +} + +static inline void percpu_down_read_freezable(struct percpu_rw_semaphore *sem, + bool freeze) +{ + percpu_down_read_internal(sem, freeze); +} + static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { bool ret = true; @@ -81,7 +94,7 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) if (likely(rcu_sync_is_idle(&sem->rss))) this_cpu_inc(*sem->read_count); else - ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ + ret = __percpu_down_read(sem, true, false); /* Unconditional memory barrier */ preempt_enable(); /* * The barrier() from preempt_enable() prevents the compiler from @@ -125,6 +138,13 @@ extern bool percpu_is_read_locked(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); +DEFINE_GUARD(percpu_read, struct percpu_rw_semaphore *, + percpu_down_read(_T), percpu_up_read(_T)) +DEFINE_GUARD_COND(percpu_read, _try, percpu_down_read_trylock(_T)) + +DEFINE_GUARD(percpu_write, struct percpu_rw_semaphore *, + percpu_down_write(_T), percpu_up_write(_T)) + static inline bool percpu_is_write_locked(struct percpu_rw_semaphore *sem) { return atomic_read(&sem->block); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 52b5ea663b9f..85bf8dd9f087 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -15,11 +15,7 @@ /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#ifdef CONFIG_MEM_ALLOC_PROFILING -#define PERCPU_MODULE_RESERVE (8 << 13) -#else #define PERCPU_MODULE_RESERVE (8 << 10) -#endif #else #define PERCPU_MODULE_RESERVE 0 #endif diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 4b5b83677e3f..6dc5e0cd76ca 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -84,7 +84,6 @@ struct arm_pmu { struct pmu pmu; cpumask_t supported_cpus; char *name; - int pmuver; irqreturn_t (*handle_irq)(struct arm_pmu *pmu); void (*enable)(struct perf_event *event); void (*disable)(struct perf_event *event); @@ -100,20 +99,26 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); + /* + * Called by KVM to map the PMUv3 event space onto non-PMUv3 hardware. + */ + int (*map_pmuv3_event)(unsigned int eventsel); DECLARE_BITMAP(cntr_mask, ARMPMU_MAX_HWEVENTS); bool secure_access; /* 32-bit ARM only */ -#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 - DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); -#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000 - DECLARE_BITMAP(pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); struct platform_device *plat_device; struct pmu_hw_events __percpu *hw_events; struct hlist_node node; struct notifier_block cpu_pm_nb; /* the attr_groups array must be NULL-terminated */ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; - /* store the PMMIR_EL1 to expose slots */ + + /* PMUv3 only */ + int pmuver; u64 reg_pmmir; +#define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 + DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); +#define ARMV8_PMUV3_EXT_COMMON_EVENT_BASE 0x4000 + DECLARE_BITMAP(pmceid_ext_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS); /* Only to be used by ACPI probing code */ unsigned long acpi_cpuid; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f7c0a3f2f502..ec9d96025683 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -26,18 +26,9 @@ # include <asm/local64.h> #endif -#define PERF_GUEST_ACTIVE 0x01 -#define PERF_GUEST_USER 0x02 - -struct perf_guest_info_callbacks { - unsigned int (*state)(void); - unsigned long (*get_ip)(void); - unsigned int (*handle_intel_pt_intr)(void); -}; - #ifdef CONFIG_HAVE_HW_BREAKPOINT -#include <linux/rhashtable-types.h> -#include <asm/hw_breakpoint.h> +# include <linux/rhashtable-types.h> +# include <asm/hw_breakpoint.h> #endif #include <linux/list.h> @@ -62,19 +53,20 @@ struct perf_guest_info_callbacks { #include <linux/security.h> #include <linux/static_call.h> #include <linux/lockdep.h> + #include <asm/local.h> struct perf_callchain_entry { - __u64 nr; - __u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ + u64 nr; + u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ }; struct perf_callchain_entry_ctx { - struct perf_callchain_entry *entry; - u32 max_stack; - u32 nr; - short contexts; - bool contexts_maxed; + struct perf_callchain_entry *entry; + u32 max_stack; + u32 nr; + short contexts; + bool contexts_maxed; }; typedef unsigned long (*perf_copy_f)(void *dst, const void *src, @@ -121,8 +113,8 @@ static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag) * already stored in age order, the hw_idx should be 0. */ struct perf_branch_stack { - __u64 nr; - __u64 hw_idx; + u64 nr; + u64 hw_idx; struct perf_branch_entry entries[]; }; @@ -132,10 +124,10 @@ struct task_struct; * extra PMU register associated with an event */ struct hw_perf_event_extra { - u64 config; /* register value */ - unsigned int reg; /* register address or index */ - int alloc; /* extra register already allocated */ - int idx; /* index in shared_regs->regs[] */ + u64 config; /* register value */ + unsigned int reg; /* register address or index */ + int alloc; /* extra register already allocated */ + int idx; /* index in shared_regs->regs[] */ }; /** @@ -144,8 +136,8 @@ struct hw_perf_event_extra { * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific * usage. */ -#define PERF_EVENT_FLAG_ARCH 0x000fffff -#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 +#define PERF_EVENT_FLAG_ARCH 0x0fffffff +#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0); @@ -157,7 +149,9 @@ struct hw_perf_event { union { struct { /* hardware */ u64 config; + u64 config1; u64 last_tag; + u64 dyn_constraint; unsigned long config_base; unsigned long event_base; int event_base_rdpmc; @@ -225,9 +219,14 @@ struct hw_perf_event { /* * hw_perf_event::state flags; used to track the PERF_EF_* state. */ -#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ -#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ -#define PERF_HES_ARCH 0x04 + +/* the counter is stopped */ +#define PERF_HES_STOPPED 0x01 + +/* event->count up-to-date */ +#define PERF_HES_UPTODATE 0x02 + +#define PERF_HES_ARCH 0x04 int state; @@ -276,7 +275,7 @@ struct hw_perf_event { */ u64 freq_time_stamp; u64 freq_count_stamp; -#endif +#endif /* CONFIG_PERF_EVENTS */ }; struct perf_event; @@ -285,28 +284,33 @@ struct perf_event_pmu_context; /* * Common implementation detail of pmu::{start,commit,cancel}_txn */ -#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ -#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ + +/* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_ADD 0x1 + +/* txn to read event group from PMU */ +#define PERF_PMU_TXN_READ 0x2 /** * pmu::capabilities flags */ -#define PERF_PMU_CAP_NO_INTERRUPT 0x0001 -#define PERF_PMU_CAP_NO_NMI 0x0002 -#define PERF_PMU_CAP_AUX_NO_SG 0x0004 -#define PERF_PMU_CAP_EXTENDED_REGS 0x0008 -#define PERF_PMU_CAP_EXCLUSIVE 0x0010 -#define PERF_PMU_CAP_ITRACE 0x0020 -#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 -#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 -#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 -#define PERF_PMU_CAP_AUX_PAUSE 0x0200 +#define PERF_PMU_CAP_NO_INTERRUPT 0x0001 +#define PERF_PMU_CAP_NO_NMI 0x0002 +#define PERF_PMU_CAP_AUX_NO_SG 0x0004 +#define PERF_PMU_CAP_EXTENDED_REGS 0x0008 +#define PERF_PMU_CAP_EXCLUSIVE 0x0010 +#define PERF_PMU_CAP_ITRACE 0x0020 +#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 +#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 +#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 +#define PERF_PMU_CAP_AUX_PAUSE 0x0200 +#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400 /** * pmu::scope */ enum perf_pmu_scope { - PERF_PMU_SCOPE_NONE = 0, + PERF_PMU_SCOPE_NONE = 0, PERF_PMU_SCOPE_CORE, PERF_PMU_SCOPE_DIE, PERF_PMU_SCOPE_CLUSTER, @@ -325,6 +329,9 @@ struct perf_output_handle; struct pmu { struct list_head entry; + spinlock_t events_lock; + struct list_head events; + struct module *module; struct device *dev; struct device *parent; @@ -343,8 +350,7 @@ struct pmu { */ unsigned int scope; - int __percpu *pmu_disable_count; - struct perf_cpu_pmu_context __percpu *cpu_pmu_context; + struct perf_cpu_pmu_context * __percpu *cpu_pmu_context; atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */ int task_ctx_nr; int hrtimer_interval_ms; @@ -388,11 +394,21 @@ struct pmu { * Flags for ->add()/->del()/ ->start()/->stop(). There are * matching hw_perf_event::state flags. */ -#define PERF_EF_START 0x01 /* start the counter when adding */ -#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ -#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ -#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */ -#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */ + +/* start the counter when adding */ +#define PERF_EF_START 0x01 + +/* reload the counter when starting */ +#define PERF_EF_RELOAD 0x02 + +/* update the counter when stopping */ +#define PERF_EF_UPDATE 0x04 + +/* AUX area event, pause tracing */ +#define PERF_EF_PAUSE 0x08 + +/* AUX area event, resume tracing */ +#define PERF_EF_RESUME 0x10 /* * Adds/Removes a counter to/from the PMU, can be done inside a @@ -495,7 +511,7 @@ struct pmu { * context-switches callback */ void (*sched_task) (struct perf_event_pmu_context *pmu_ctx, - bool sched_in); + struct task_struct *task, bool sched_in); /* * Kmem cache of PMU specific data @@ -503,16 +519,6 @@ struct pmu { struct kmem_cache *task_ctx_cache; /* - * PMU specific parts of task perf event context (i.e. ctx->task_ctx_data) - * can be synchronized using this function. See Intel LBR callstack support - * implementation and Perf core context switch handling callbacks for usage - * examples. - */ - void (*swap_task_ctx) (struct perf_event_pmu_context *prev_epc, - struct perf_event_pmu_context *next_epc); - /* optional */ - - /* * Set up pmu-private data structures for an AUX area */ void *(*setup_aux) (struct perf_event *event, void **pages, @@ -601,10 +607,10 @@ enum perf_addr_filter_action_t { * This is a hardware-agnostic filter configuration as specified by the user. */ struct perf_addr_filter { - struct list_head entry; - struct path path; - unsigned long offset; - unsigned long size; + struct list_head entry; + struct path path; + unsigned long offset; + unsigned long size; enum perf_addr_filter_action_t action; }; @@ -619,23 +625,62 @@ struct perf_addr_filter { * bundled together; see perf_event_addr_filters(). */ struct perf_addr_filters_head { - struct list_head list; - raw_spinlock_t lock; - unsigned int nr_file_filters; + struct list_head list; + raw_spinlock_t lock; + unsigned int nr_file_filters; }; struct perf_addr_filter_range { - unsigned long start; - unsigned long size; + unsigned long start; + unsigned long size; }; -/** - * enum perf_event_state - the states of an event: +/* + * The normal states are: + * + * ACTIVE --. + * ^ | + * | | + * sched_{in,out}() | + * | | + * v | + * ,---> INACTIVE --+ <-. + * | | | + * | {dis,en}able() + * sched_in() | | + * | OFF <--' --+ + * | | + * `---> ERROR ------' + * + * That is: + * + * sched_in: INACTIVE -> {ACTIVE,ERROR} + * sched_out: ACTIVE -> INACTIVE + * disable: {ACTIVE,INACTIVE} -> OFF + * enable: {OFF,ERROR} -> INACTIVE + * + * Where {OFF,ERROR} are disabled states. + * + * Then we have the {EXIT,REVOKED,DEAD} states which are various shades of + * defunct events: + * + * - EXIT means task that the even was assigned to died, but child events + * still live, and further children can still be created. But the event + * itself will never be active again. It can only transition to + * {REVOKED,DEAD}; + * + * - REVOKED means the PMU the event was associated with is gone; all + * functionality is stopped but the event is still alive. Can only + * transition to DEAD; + * + * - DEAD event really is DYING tearing down state and freeing bits. + * */ enum perf_event_state { - PERF_EVENT_STATE_DEAD = -4, - PERF_EVENT_STATE_EXIT = -3, - PERF_EVENT_STATE_ERROR = -2, + PERF_EVENT_STATE_DEAD = -5, + PERF_EVENT_STATE_REVOKED = -4, /* pmu gone, must not touch */ + PERF_EVENT_STATE_EXIT = -3, /* task died, still inherit */ + PERF_EVENT_STATE_ERROR = -2, /* scheduling error, can enable */ PERF_EVENT_STATE_OFF = -1, PERF_EVENT_STATE_INACTIVE = 0, PERF_EVENT_STATE_ACTIVE = 1, @@ -673,21 +718,24 @@ struct swevent_hlist { struct rcu_head rcu_head; }; -#define PERF_ATTACH_CONTEXT 0x01 -#define PERF_ATTACH_GROUP 0x02 -#define PERF_ATTACH_TASK 0x04 -#define PERF_ATTACH_TASK_DATA 0x08 -#define PERF_ATTACH_ITRACE 0x10 -#define PERF_ATTACH_SCHED_CB 0x20 -#define PERF_ATTACH_CHILD 0x40 +#define PERF_ATTACH_CONTEXT 0x0001 +#define PERF_ATTACH_GROUP 0x0002 +#define PERF_ATTACH_TASK 0x0004 +#define PERF_ATTACH_TASK_DATA 0x0008 +#define PERF_ATTACH_GLOBAL_DATA 0x0010 +#define PERF_ATTACH_SCHED_CB 0x0020 +#define PERF_ATTACH_CHILD 0x0040 +#define PERF_ATTACH_EXCLUSIVE 0x0080 +#define PERF_ATTACH_CALLCHAIN 0x0100 +#define PERF_ATTACH_ITRACE 0x0200 struct bpf_prog; struct perf_cgroup; struct perf_buffer; struct pmu_event_list { - raw_spinlock_t lock; - struct list_head list; + raw_spinlock_t lock; + struct list_head list; }; /* @@ -697,12 +745,12 @@ struct pmu_event_list { * disabled is sufficient since it will hold-off the IPIs. */ #ifdef CONFIG_PROVE_LOCKING -#define lockdep_assert_event_ctx(event) \ +# define lockdep_assert_event_ctx(event) \ WARN_ON_ONCE(__lockdep_enabled && \ (this_cpu_read(hardirqs_enabled) && \ lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD)) #else -#define lockdep_assert_event_ctx(event) +# define lockdep_assert_event_ctx(event) #endif #define for_each_sibling_event(sibling, event) \ @@ -831,7 +879,6 @@ struct perf_event { struct irq_work pending_disable_irq; struct callback_head pending_task; unsigned int pending_work; - struct rcuwait pending_work_wait; atomic_t event_limit; @@ -861,9 +908,9 @@ struct perf_event { #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; struct event_filter *filter; -#ifdef CONFIG_FUNCTION_TRACER +# ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops ftrace_ops; -#endif +# endif #endif #ifdef CONFIG_CGROUP_PERF @@ -874,6 +921,7 @@ struct perf_event { void *security; #endif struct list_head sb_list; + struct list_head pmu_list; /* * Certain events gets forwarded to another pmu internally by over- @@ -881,7 +929,7 @@ struct perf_event { * of it. event->orig_type contains original 'type' requested by * user. */ - __u32 orig_type; + u32 orig_type; #endif /* CONFIG_PERF_EVENTS */ }; @@ -921,7 +969,7 @@ struct perf_event_pmu_context { struct list_head pinned_active; struct list_head flexible_active; - /* Used to avoid freeing per-cpu perf_event_pmu_context */ + /* Used to identify the per-cpu perf_event_pmu_context */ unsigned int embedded : 1; unsigned int nr_events; @@ -931,7 +979,6 @@ struct perf_event_pmu_context { atomic_t refcount; /* event <-> epc */ struct rcu_head rcu_head; - void *task_ctx_data; /* pmu specific data */ /* * Set when one or more (plausibly active) event can't be scheduled * due to pmu overcommit or pmu constraints, except tolerant to @@ -947,8 +994,8 @@ static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc) } struct perf_event_groups { - struct rb_root tree; - u64 index; + struct rb_root tree; + u64 index; }; @@ -979,7 +1026,6 @@ struct perf_event_context { int nr_user; int is_active; - int nr_task_data; int nr_stat; int nr_freq; int rotate_disable; @@ -1020,6 +1066,41 @@ struct perf_event_context { local_t nr_no_switch_fast; }; +/** + * struct perf_ctx_data - PMU specific data for a task + * @rcu_head: To avoid the race on free PMU specific data + * @refcount: To track users + * @global: To track system-wide users + * @ctx_cache: Kmem cache of PMU specific data + * @data: PMU specific data + * + * Currently, the struct is only used in Intel LBR call stack mode to + * save/restore the call stack of a task on context switches. + * + * The rcu_head is used to prevent the race on free the data. + * The data only be allocated when Intel LBR call stack mode is enabled. + * The data will be freed when the mode is disabled. + * The content of the data will only be accessed in context switch, which + * should be protected by rcu_read_lock(). + * + * Because of the alignment requirement of Intel Arch LBR, the Kmem cache + * is used to allocate the PMU specific data. The ctx_cache is to track + * the Kmem cache. + * + * Careful: Struct perf_ctx_data is added as a pointer in struct task_struct. + * When system-wide Intel LBR call stack mode is enabled, a buffer with + * constant size will be allocated for each task. + * Also, system memory consumption can further grow when the size of + * struct perf_ctx_data enlarges. + */ +struct perf_ctx_data { + struct rcu_head rcu_head; + refcount_t refcount; + int global; + struct kmem_cache *ctx_cache; + void *data; +}; + struct perf_cpu_pmu_context { struct perf_event_pmu_context epc; struct perf_event_pmu_context *task_epc; @@ -1029,6 +1110,7 @@ struct perf_cpu_pmu_context { int active_oncpu; int exclusive; + int pmu_disable_count; raw_spinlock_t hrtimer_lock; struct hrtimer hrtimer; @@ -1062,7 +1144,13 @@ struct perf_output_handle { struct perf_buffer *rb; unsigned long wakeup; unsigned long size; - u64 aux_flags; + union { + u64 flags; /* perf_output*() */ + u64 aux_flags; /* perf_aux_output*() */ + struct { + u64 skip_read : 1; + }; + }; union { void *addr; unsigned long head; @@ -1124,7 +1212,7 @@ extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); extern void perf_event_itrace_started(struct perf_event *event); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); -extern void perf_pmu_unregister(struct pmu *pmu); +extern int perf_pmu_unregister(struct pmu *pmu); extern void __perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task); @@ -1150,16 +1238,18 @@ extern void perf_pmu_resched(struct pmu *pmu); extern int perf_event_refresh(struct perf_event *event, int refresh); extern void perf_event_update_userpage(struct perf_event *event); extern int perf_event_release_kernel(struct perf_event *event); + extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, - int cpu, - struct task_struct *task, - perf_overflow_handler_t callback, - void *context); + int cpu, + struct task_struct *task, + perf_overflow_handler_t callback, + void *context); + extern void perf_pmu_migrate_context(struct pmu *pmu, - int src_cpu, int dst_cpu); -int perf_event_read_local(struct perf_event *event, u64 *value, - u64 *enabled, u64 *running); + int src_cpu, int dst_cpu); +extern int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -1279,6 +1369,11 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data, { int size = 1; + if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) + return; + if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) + return; + data->callchain = perf_callchain(event, regs); size += data->callchain->nr; @@ -1315,6 +1410,11 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data, data->sample_flags |= PERF_SAMPLE_RAW; } +static inline bool has_branch_stack(struct perf_event *event) +{ + return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; +} + static inline void perf_sample_save_brstack(struct perf_sample_data *data, struct perf_event *event, struct perf_branch_stack *brs, @@ -1322,8 +1422,16 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data, { int size = sizeof(u64); /* nr */ + if (!has_branch_stack(event)) + return; + if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK)) + return; + if (branch_sample_hw_index(event)) size += sizeof(u64); + + brs->nr = min_t(u16, event->attr.sample_max_stack, brs->nr); + size += brs->nr * sizeof(struct perf_branch_entry); /* @@ -1358,14 +1466,14 @@ static inline u32 perf_sample_data_size(struct perf_sample_data *data, */ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br) { - br->mispred = 0; - br->predicted = 0; - br->in_tx = 0; - br->abort = 0; - br->cycles = 0; - br->type = 0; - br->spec = PERF_BR_SPEC_NA; - br->reserved = 0; + br->mispred = 0; + br->predicted = 0; + br->in_tx = 0; + br->abort = 0; + br->cycles = 0; + br->type = 0; + br->spec = PERF_BR_SPEC_NA; + br->reserved = 0; } extern void perf_output_sample(struct perf_output_handle *handle, @@ -1554,7 +1662,17 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, enum perf_bpf_event_type type, u16 flags); +#define PERF_GUEST_ACTIVE 0x01 +#define PERF_GUEST_USER 0x02 + +struct perf_guest_info_callbacks { + unsigned int (*state)(void); + unsigned long (*get_ip)(void); + unsigned int (*handle_intel_pt_intr)(void); +}; + #ifdef CONFIG_GUEST_PERF_EVENTS + extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); @@ -1565,21 +1683,27 @@ static inline unsigned int perf_guest_state(void) { return static_call(__perf_guest_state)(); } + static inline unsigned long perf_guest_get_ip(void) { return static_call(__perf_guest_get_ip)(); } + static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return static_call(__perf_guest_handle_intel_pt_intr)(); } + extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); -#else + +#else /* !CONFIG_GUEST_PERF_EVENTS: */ + static inline unsigned int perf_guest_state(void) { return 0; } static inline unsigned long perf_guest_get_ip(void) { return 0; } static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; } -#endif /* CONFIG_GUEST_PERF_EVENTS */ + +#endif /* !CONFIG_GUEST_PERF_EVENTS */ extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); @@ -1609,6 +1733,7 @@ static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx * { if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) { struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; ++ctx->contexts; return 0; @@ -1622,6 +1747,7 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 { if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) { struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; ++ctx->nr; return 0; @@ -1631,19 +1757,10 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 } extern int sysctl_perf_event_paranoid; -extern int sysctl_perf_event_mlock; extern int sysctl_perf_event_sample_rate; -extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -int perf_event_max_sample_rate_handler(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int perf_cpu_time_max_percent_handler(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int perf_event_max_stack_handler(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); - /* Access to perf_event_open(2) syscall. */ #define PERF_SECURITY_OPEN 0 @@ -1657,24 +1774,26 @@ static inline int perf_is_paranoid(void) return sysctl_perf_event_paranoid > -1; } -int perf_allow_kernel(struct perf_event_attr *attr); +extern int perf_allow_kernel(void); -static inline int perf_allow_cpu(struct perf_event_attr *attr) +static inline int perf_allow_cpu(void) { if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) return -EACCES; - return security_perf_event_open(attr, PERF_SECURITY_CPU); + return security_perf_event_open(PERF_SECURITY_CPU); } -static inline int perf_allow_tracepoint(struct perf_event_attr *attr) +static inline int perf_allow_tracepoint(void) { if (sysctl_perf_event_paranoid > -1 && !perfmon_capable()) return -EPERM; - return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); + return security_perf_event_open(PERF_SECURITY_TRACEPOINT); } +extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs); + extern void perf_event_init(void); extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, @@ -1711,11 +1830,6 @@ static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) # define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs) #endif -static inline bool has_branch_stack(struct perf_event *event) -{ - return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; -} - static inline bool needs_branch_stack(struct perf_event *event) { return event->attr.branch_sample_type != 0; @@ -1723,7 +1837,7 @@ static inline bool needs_branch_stack(struct perf_event *event) static inline bool has_aux(struct perf_event *event) { - return event->pmu->setup_aux; + return event->pmu && event->pmu->setup_aux; } static inline bool has_aux_action(struct perf_event *event) @@ -1782,7 +1896,7 @@ extern int perf_output_begin_backward(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern unsigned int perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len); + const void *buf, unsigned int len); extern unsigned int perf_output_skip(struct perf_output_handle *handle, unsigned int len); extern long perf_output_copy_aux(struct perf_output_handle *aux_handle, @@ -1799,7 +1913,9 @@ extern void perf_event_task_tick(void); extern int perf_event_account_interrupt(struct perf_event *event); extern int perf_event_period(struct perf_event *event, u64 value); extern u64 perf_event_pause(struct perf_event *event, bool reset); + #else /* !CONFIG_PERF_EVENTS: */ + static inline void * perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event) { return NULL; } @@ -1877,15 +1993,14 @@ static inline void perf_event_disable(struct perf_event *event) { } static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } -static inline int perf_event_period(struct perf_event *event, u64 value) -{ - return -EINVAL; -} -static inline u64 perf_event_pause(struct perf_event *event, bool reset) -{ - return 0; -} -#endif +static inline int +perf_event_period(struct perf_event *event, u64 value) { return -EINVAL; } +static inline u64 +perf_event_pause(struct perf_event *event, bool reset) { return 0; } +static inline int +perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { return 0; } + +#endif /* !CONFIG_PERF_EVENTS */ #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern void perf_restore_debug_store(void); @@ -1893,31 +2008,31 @@ extern void perf_restore_debug_store(void); static inline void perf_restore_debug_store(void) { } #endif -#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) +#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) struct perf_pmu_events_attr { - struct device_attribute attr; - u64 id; - const char *event_str; + struct device_attribute attr; + u64 id; + const char *event_str; }; struct perf_pmu_events_ht_attr { - struct device_attribute attr; - u64 id; - const char *event_str_ht; - const char *event_str_noht; + struct device_attribute attr; + u64 id; + const char *event_str_ht; + const char *event_str_noht; }; struct perf_pmu_events_hybrid_attr { - struct device_attribute attr; - u64 id; - const char *event_str; - u64 pmu_type; + struct device_attribute attr; + u64 id; + const char *event_str; + u64 pmu_type; }; struct perf_pmu_format_hybrid_attr { - struct device_attribute attr; - u64 pmu_type; + struct device_attribute attr; + u64 pmu_type; }; ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, @@ -1959,11 +2074,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name) /* Performance counter hotplug functions */ #ifdef CONFIG_PERF_EVENTS -int perf_event_init_cpu(unsigned int cpu); -int perf_event_exit_cpu(unsigned int cpu); +extern int perf_event_init_cpu(unsigned int cpu); +extern int perf_event_exit_cpu(unsigned int cpu); #else -#define perf_event_init_cpu NULL -#define perf_event_exit_cpu NULL +# define perf_event_init_cpu NULL +# define perf_event_exit_cpu NULL #endif extern void arch_perf_update_userpage(struct perf_event *event, diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 3469c4b20105..8a7f4f802c57 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -162,72 +162,37 @@ static inline void update_page_tag_ref(union pgtag_ref_handle handle, union code } } -static inline void clear_page_tag_ref(struct page *page) -{ - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - set_codetag_empty(&ref); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); - } - } -} +/* Should be called only if mem_alloc_profiling_enabled() */ +void __clear_page_tag_ref(struct page *page); -static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, - unsigned int nr) +static inline void clear_page_tag_ref(struct page *page) { - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); - } - } + if (mem_alloc_profiling_enabled()) + __clear_page_tag_ref(page); } -static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) -{ - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - alloc_tag_sub(&ref, PAGE_SIZE * nr); - update_page_tag_ref(handle, &ref); - put_page_tag_ref(handle); - } - } -} - -static inline struct alloc_tag *pgalloc_tag_get(struct page *page) +/* Should be called only if mem_alloc_profiling_enabled() */ +static inline struct alloc_tag *__pgalloc_tag_get(struct page *page) { struct alloc_tag *tag = NULL; - - if (mem_alloc_profiling_enabled()) { - union pgtag_ref_handle handle; - union codetag_ref ref; - - if (get_page_tag_ref(page, &ref, &handle)) { - alloc_tag_sub_check(&ref); - if (ref.ct) - tag = ct_to_alloc_tag(ref.ct); - put_page_tag_ref(handle); - } + union pgtag_ref_handle handle; + union codetag_ref ref; + + if (get_page_tag_ref(page, &ref, &handle)) { + alloc_tag_sub_check(&ref); + if (ref.ct) + tag = ct_to_alloc_tag(ref.ct); + put_page_tag_ref(handle); } return tag; } -static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { - if (mem_alloc_profiling_enabled() && tag) - this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); + if (mem_alloc_profiling_enabled()) + return __pgalloc_tag_get(page); + return NULL; } void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); @@ -238,14 +203,10 @@ void __init alloc_tag_sec_init(void); #else /* CONFIG_MEM_ALLOC_PROFILING */ static inline void clear_page_tag_ref(struct page *page) {} -static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, - unsigned int nr) {} -static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} -static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } -static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} static inline void alloc_tag_sec_init(void) {} static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} +static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } #endif /* CONFIG_MEM_ALLOC_PROFILING */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index adef9d6e9b1b..0b6e1f781d86 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -222,10 +222,14 @@ static inline int pmd_dirty(pmd_t pmd) * hazard could result in the direct mode hypervisor case, since the actual * write to the page tables may not yet have taken place, so reads though * a raw PTE pointer after it has been modified are not guaranteed to be - * up to date. This mode can only be entered and left under the protection of - * the page table locks for all page tables which may be modified. In the UP - * case, this is required so that preemption is disabled, and in the SMP case, - * it must synchronize the delayed page table writes properly on other CPUs. + * up to date. + * + * In the general case, no lock is guaranteed to be held between entry and exit + * of the lazy mode. So the implementation must assume preemption may be enabled + * and cpu migration is possible; it must take steps to be robust against this. + * (In practice, for user PTE updates, the appropriate page table lock(s) are + * held, but for kernel PTE updates, no lock is held). Nesting is not permitted + * and the mode cannot be used in interrupt context. */ #ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE #define arch_enter_lazy_mmu_mode() do {} while (0) @@ -287,7 +291,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, { page_table_check_ptes_set(mm, ptep, pte, nr); - arch_enter_lazy_mmu_mode(); for (;;) { set_pte(ptep, pte); if (--nr == 0) @@ -295,7 +298,6 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, ptep++; pte = pte_next_pfn(pte); } - arch_leave_lazy_mmu_mode(); } #endif #define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, pte, 1) @@ -533,7 +535,14 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma, static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - ptep_get_and_clear(mm, addr, ptep); + pte_t pte = ptep_get(ptep); + + pte_clear(mm, addr, ptep); + /* + * No need for ptep_get_and_clear(): page table check doesn't care about + * any bits that could have been set by HW concurrently. + */ + page_table_check_pte_clear(mm, pte); } #ifdef CONFIG_GUP_GET_PXX_LOW_HIGH @@ -1155,10 +1164,6 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) } #endif -#ifndef __HAVE_ARCH_PGD_OFFSET_GATE -#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) -#endif - #ifndef __HAVE_ARCH_MOVE_PTE #define move_pte(pte, old_addr, new_addr) (pte) #endif @@ -1480,64 +1485,92 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) * vmf_insert_pfn. */ -/* - * track_pfn_remap is called when a _new_ pfn mapping is being established - * by remap_pfn_range() for physical range indicated by pfn and size. - */ -static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long addr, - unsigned long size) +static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, + pgprot_t *prot) { return 0; } -/* - * track_pfn_insert is called when a _new_ single pfn is established - * by vmf_insert_pfn(). - */ -static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - pfn_t pfn) +static inline int pfnmap_track(unsigned long pfn, unsigned long size, + pgprot_t *prot) { + return 0; } -/* - * track_pfn_copy is called when vma that is covering the pfnmap gets - * copied through copy_page_range(). - */ -static inline int track_pfn_copy(struct vm_area_struct *vma) +static inline void pfnmap_untrack(unsigned long pfn, unsigned long size) { - return 0; } +#else +/** + * pfnmap_setup_cachemode - setup the cachemode in the pgprot for a pfn range + * @pfn: the start of the pfn range + * @size: the size of the pfn range in bytes + * @prot: the pgprot to modify + * + * Lookup the cachemode for the pfn range starting at @pfn with the size + * @size and store it in @prot, leaving other data in @prot unchanged. + * + * This allows for a hardware implementation to have fine-grained control of + * memory cache behavior at page level granularity. Without a hardware + * implementation, this function does nothing. + * + * Currently there is only one implementation for this - x86 Page Attribute + * Table (PAT). See Documentation/arch/x86/pat.rst for more details. + * + * This function can fail if the pfn range spans pfns that require differing + * cachemodes. If the pfn range was previously verified to have a single + * cachemode, it is sufficient to query only a single pfn. The assumption is + * that this is the case for drivers using the vmf_insert_pfn*() interface. + * + * Returns 0 on success and -EINVAL on error. + */ +int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, + pgprot_t *prot); -/* - * untrack_pfn is called while unmapping a pfnmap for a region. - * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case pfn, size are zero). +/** + * pfnmap_track - track a pfn range + * @pfn: the start of the pfn range + * @size: the size of the pfn range in bytes + * @prot: the pgprot to track + * + * Requested the pfn range to be 'tracked' by a hardware implementation and + * setup the cachemode in @prot similar to pfnmap_setup_cachemode(). + * + * This allows for fine-grained control of memory cache behaviour at page + * level granularity. Tracking memory this way is persisted across VMA splits + * (VMA merging does not apply for VM_PFNMAP). + * + * Currently, there is only one implementation for this - x86 Page Attribute + * Table (PAT). See Documentation/arch/x86/pat.rst for more details. + * + * Returns 0 on success and -EINVAL on error. */ -static inline void untrack_pfn(struct vm_area_struct *vma, - unsigned long pfn, unsigned long size, - bool mm_wr_locked) -{ -} +int pfnmap_track(unsigned long pfn, unsigned long size, pgprot_t *prot); -/* - * untrack_pfn_clear is called while mremapping a pfnmap for a new region - * or fails to copy pgtable during duplicate vm area. +/** + * pfnmap_untrack - untrack a pfn range + * @pfn: the start of the pfn range + * @size: the size of the pfn range in bytes + * + * Untrack a pfn range previously tracked through pfnmap_track(). + */ +void pfnmap_untrack(unsigned long pfn, unsigned long size); +#endif + +/** + * pfnmap_setup_cachemode_pfn - setup the cachemode in the pgprot for a pfn + * @pfn: the pfn + * @prot: the pgprot to modify + * + * Lookup the cachemode for @pfn and store it in @prot, leaving other + * data in @prot unchanged. + * + * See pfnmap_setup_cachemode() for details. */ -static inline void untrack_pfn_clear(struct vm_area_struct *vma) +static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot) { + pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot); } -#else -extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long addr, - unsigned long size); -extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, - pfn_t pfn); -extern int track_pfn_copy(struct vm_area_struct *vma); -extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size, bool mm_wr_locked); -extern void untrack_pfn_clear(struct vm_area_struct *vma); -#endif #ifdef CONFIG_MMU #ifdef __HAVE_COLOR_ZERO_PAGE diff --git a/include/linux/phy.h b/include/linux/phy.h index 563c46205685..e194dad1623d 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -32,28 +32,12 @@ #include <linux/atomic.h> #include <net/eee.h> -#define PHY_DEFAULT_FEATURES (SUPPORTED_Autoneg | \ - SUPPORTED_TP | \ - SUPPORTED_MII) - -#define PHY_10BT_FEATURES (SUPPORTED_10baseT_Half | \ - SUPPORTED_10baseT_Full) - -#define PHY_100BT_FEATURES (SUPPORTED_100baseT_Half | \ - SUPPORTED_100baseT_Full) - -#define PHY_1000BT_FEATURES (SUPPORTED_1000baseT_Half | \ - SUPPORTED_1000baseT_Full) - extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1s_p2mp_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init; -extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init; -extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_fec_features) __ro_after_init; -extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap1_features) __ro_after_init; extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; @@ -62,21 +46,11 @@ extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_eee_cap2_features) __ro_after_init; #define PHY_BASIC_T1S_P2MP_FEATURES ((unsigned long *)&phy_basic_t1s_p2mp_features) #define PHY_GBIT_FEATURES ((unsigned long *)&phy_gbit_features) #define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features) -#define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features) #define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features) -#define PHY_10GBIT_FEC_FEATURES ((unsigned long *)&phy_10gbit_fec_features) -#define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features) #define PHY_EEE_CAP1_FEATURES ((unsigned long *)&phy_eee_cap1_features) #define PHY_EEE_CAP2_FEATURES ((unsigned long *)&phy_eee_cap2_features) extern const int phy_basic_ports_array[3]; -extern const int phy_fibre_port_array[1]; -extern const int phy_all_ports_features_array[7]; -extern const int phy_10_100_features_array[4]; -extern const int phy_basic_t1_features_array[3]; -extern const int phy_basic_t1s_p2mp_features_array[2]; -extern const int phy_gbit_features_array[2]; -extern const int phy_10gbit_features_array[1]; /* * Set phydev->irq to PHY_POLL if interrupts are not supported, @@ -107,7 +81,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_RGMII: Reduced gigabit media-independent interface * @PHY_INTERFACE_MODE_RGMII_ID: RGMII with Internal RX+TX delay * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay - * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal RX delay + * @PHY_INTERFACE_MODE_RGMII_TXID: RGMII with Internal TX delay * @PHY_INTERFACE_MODE_RTBI: Reduced TBI * @PHY_INTERFACE_MODE_SMII: Serial MII * @PHY_INTERFACE_MODE_XGMII: 10 gigabit media-independent interface @@ -207,13 +181,6 @@ static inline void phy_interface_set_rgmii(unsigned long *intf) __set_bit(PHY_INTERFACE_MODE_RGMII_TXID, intf); } -/* - * phy_supported_speeds - return all speeds currently supported by a PHY device - */ -unsigned int phy_supported_speeds(struct phy_device *phy, - unsigned int *speeds, - unsigned int size); - /** * phy_modes - map phy_interface_t enum to device tree binding of phy-mode * @interface: enum phy_interface_t value @@ -298,13 +265,34 @@ static inline const char *phy_modes(phy_interface_t interface) } } -#define PHY_INIT_TIMEOUT 100000 -#define PHY_FORCE_TIMEOUT 10 +/** + * rgmii_clock - map link speed to the clock rate + * @speed: link speed value + * + * Description: maps RGMII supported link speeds + * into the clock rates. + * + * Returns: clock rate or negative errno + */ +static inline long rgmii_clock(int speed) +{ + switch (speed) { + case SPEED_10: + return 2500000; + case SPEED_100: + return 25000000; + case SPEED_1000: + return 125000000; + default: + return -EINVAL; + } +} #define PHY_MAX_ADDR 32 /* Used when trying to connect to a specific phy (mii bus id:phy device id) */ #define PHY_ID_FMT "%s:%02x" +#define PHY_ID_SIZE (MII_BUS_ID_SIZE + 3) #define MII_BUS_ID_SIZE 61 @@ -333,41 +321,6 @@ struct mdio_bus_stats { }; /** - * struct phy_package_shared - Shared information in PHY packages - * @base_addr: Base PHY address of PHY package used to combine PHYs - * in one package and for offset calculation of phy_package_read/write - * @np: Pointer to the Device Node if PHY package defined in DT - * @refcnt: Number of PHYs connected to this shared data - * @flags: Initialization of PHY package - * @priv_size: Size of the shared private data @priv - * @priv: Driver private data shared across a PHY package - * - * Represents a shared structure between different phydev's in the same - * package, for example a quad PHY. See phy_package_join() and - * phy_package_leave(). - */ -struct phy_package_shared { - u8 base_addr; - /* With PHY package defined in DT this points to the PHY package node */ - struct device_node *np; - refcount_t refcnt; - unsigned long flags; - size_t priv_size; - - /* private data pointer */ - /* note that this pointer is shared between different phydevs and - * the user has to take care of appropriate locking. It is allocated - * and freed automatically by phy_package_join() and - * phy_package_leave(). - */ - void *priv; -}; - -/* used as bit number in atomic bitops */ -#define PHY_SHARED_F_INIT_DONE 0 -#define PHY_SHARED_F_PROBE_DONE 1 - -/** * struct mii_bus - Represents an MDIO bus * * @owner: Who owns this device @@ -606,7 +559,7 @@ struct macsec_ops; * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host - * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited + * @eee_disabled_modes: Energy efficient ethernet modes not to be advertised * @autoneg: Flag autoneg being used * @rate_matching: Current rate matching mode * @link: Current link state @@ -722,7 +675,7 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); /* Energy efficient ethernet modes which should be prohibited */ - __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes); + __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_disabled_modes); bool enable_tx_lpi; bool eee_active; struct eee_config eee_cfg; @@ -791,10 +744,7 @@ struct phy_device { #define PHY_F_NO_IRQ 0x80000000 #define PHY_F_RXC_ALWAYS_ON 0x40000000 -static inline struct phy_device *to_phy_device(const struct device *dev) -{ - return container_of(to_mdio_device(dev), struct phy_device, mdio); -} +#define to_phy_device(__dev) container_of_const(to_mdio_device(__dev), struct phy_device, mdio) /** * struct phy_tdr_config - Configuration of a TDR raw test @@ -818,6 +768,24 @@ struct phy_tdr_config { #define PHY_PAIR_ALL -1 /** + * enum link_inband_signalling - in-band signalling modes that are supported + * + * @LINK_INBAND_DISABLE: in-band signalling can be disabled + * @LINK_INBAND_ENABLE: in-band signalling can be enabled without bypass + * @LINK_INBAND_BYPASS: in-band signalling can be enabled with bypass + * + * The possible and required bits can only be used if the valid bit is set. + * If possible is clear, that means inband signalling can not be used. + * Required is only valid when possible is set, and means that inband + * signalling must be used. + */ +enum link_inband_signalling { + LINK_INBAND_DISABLE = BIT(0), + LINK_INBAND_ENABLE = BIT(1), + LINK_INBAND_BYPASS = BIT(2), +}; + +/** * struct phy_plca_cfg - Configuration of the PLCA (Physical Layer Collision * Avoidance) Reconciliation Sublayer. * @@ -957,6 +925,19 @@ struct phy_driver { int (*get_features)(struct phy_device *phydev); /** + * @inband_caps: query whether in-band is supported for the given PHY + * interface mode. Returns a bitmask of bits defined by enum + * link_inband_signalling. + */ + unsigned int (*inband_caps)(struct phy_device *phydev, + phy_interface_t interface); + + /** + * @config_inband: configure in-band mode for the PHY + */ + int (*config_inband)(struct phy_device *phydev, unsigned int modes); + + /** * @get_rate_matching: Get the supported type of rate matching for a * particular phy interface. This is used by phy consumers to determine * whether to advertise lower-speed modes for that interface. It is @@ -1006,7 +987,8 @@ struct phy_driver { * driver for the given phydev. If NULL, matching is based on * phy_id and phy_id_mask. */ - int (*match_phy_device)(struct phy_device *phydev); + int (*match_phy_device)(struct phy_device *phydev, + const struct phy_driver *phydrv); /** * @set_wol: Some devices (e.g. qnap TS-119P II) require PHY @@ -1090,6 +1072,53 @@ struct phy_driver { int (*cable_test_get_status)(struct phy_device *dev, bool *finished); /* Get statistics from the PHY using ethtool */ + /** + * @get_phy_stats: Retrieve PHY statistics. + * @dev: The PHY device for which the statistics are retrieved. + * @eth_stats: structure where Ethernet PHY stats will be stored. + * @stats: structure where additional PHY-specific stats will be stored. + * + * Retrieves the supported PHY statistics and populates the provided + * structures. The input structures are pre-initialized with + * `ETHTOOL_STAT_NOT_SET`, and the driver must only modify members + * corresponding to supported statistics. Unmodified members will remain + * set to `ETHTOOL_STAT_NOT_SET` and will not be returned to userspace. + */ + void (*get_phy_stats)(struct phy_device *dev, + struct ethtool_eth_phy_stats *eth_stats, + struct ethtool_phy_stats *stats); + + /** + * @get_link_stats: Retrieve link statistics. + * @dev: The PHY device for which the statistics are retrieved. + * @link_stats: structure where link-specific stats will be stored. + * + * Retrieves link-related statistics for the given PHY device. The input + * structure is pre-initialized with `ETHTOOL_STAT_NOT_SET`, and the + * driver must only modify members corresponding to supported + * statistics. Unmodified members will remain set to + * `ETHTOOL_STAT_NOT_SET` and will not be returned to userspace. + */ + void (*get_link_stats)(struct phy_device *dev, + struct ethtool_link_ext_stats *link_stats); + + /** + * @update_stats: Trigger periodic statistics updates. + * @dev: The PHY device for which statistics updates are triggered. + * + * Periodically gathers statistics from the PHY device to update locally + * maintained 64-bit counters. This is necessary for PHYs that implement + * reduced-width counters (e.g., 16-bit or 32-bit) which can overflow + * more frequently compared to 64-bit counters. By invoking this + * callback, drivers can fetch the current counter values, handle + * overflow detection, and accumulate the results into local 64-bit + * counters for accurate reporting through the `get_phy_stats` and + * `get_link_stats` interfaces. + * + * Return: 0 on success or a negative error code on failure. + */ + int (*update_stats)(struct phy_device *dev); + /** @get_sset_count: Number of statistic counters */ int (*get_sset_count)(struct phy_device *dev); /** @get_strings: Names of the statistic counters */ @@ -1106,8 +1135,16 @@ struct phy_driver { int (*set_tunable)(struct phy_device *dev, struct ethtool_tunable *tuna, const void *data); - /** @set_loopback: Set the loopback mood of the PHY */ - int (*set_loopback)(struct phy_device *dev, bool enable); + /** + * @set_loopback: Set the loopback mode of the PHY + * enable selects if the loopback mode is enabled or disabled. If the + * loopback mode is enabled, then the speed of the loopback mode can be + * requested with the speed argument. If the speed argument is zero, + * then any speed can be selected. If the speed argument is > 0, then + * this speed shall be selected for the loopback mode or EOPNOTSUPP + * shall be returned if speed selection is not supported. + */ + int (*set_loopback)(struct phy_device *dev, bool enable, int speed); /** @get_sqi: Get the signal quality indication */ int (*get_sqi)(struct phy_device *dev); /** @get_sqi_max: Get the maximum signal quality indication */ @@ -1190,13 +1227,23 @@ struct phy_driver { */ int (*led_polarity_set)(struct phy_device *dev, int index, unsigned long modes); + + /** + * @get_next_update_time: Get the time until the next update event + * @dev: PHY device + * + * Callback to determine the time (in jiffies) until the next + * update event for the PHY state machine. Allows PHY drivers to + * dynamically adjust polling intervals based on link state or other + * conditions. + * + * Returns the time in jiffies until the next update event. + */ + unsigned int (*get_next_update_time)(struct phy_device *dev); }; #define to_phy_driver(d) container_of_const(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) -#define PHY_ANY_ID "MATCH ANY PHY" -#define PHY_ANY_UID 0xffffffff - #define PHY_ID_MATCH_EXACT(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 0) #define PHY_ID_MATCH_MODEL(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 4) #define PHY_ID_MATCH_VENDOR(id) .phy_id = (id), .phy_id_mask = GENMASK(31, 10) @@ -1229,62 +1276,36 @@ static inline bool phydev_id_compare(struct phy_device *phydev, u32 id) return phy_id_compare(id, phydev->phy_id, phydev->drv->phy_id_mask); } -/* A Structure for boards to register fixups with the PHY Lib */ -struct phy_fixup { - struct list_head list; - char bus_id[MII_BUS_ID_SIZE + 3]; - u32 phy_uid; - u32 phy_uid_mask; - int (*run)(struct phy_device *phydev); -}; - const char *phy_speed_to_str(int speed); const char *phy_duplex_to_str(unsigned int duplex); const char *phy_rate_matching_to_str(int rate_matching); int phy_interface_num_ports(phy_interface_t interface); -/* A structure for mapping a particular speed and duplex - * combination to a particular SUPPORTED and ADVERTISED value - */ -struct phy_setting { - u32 speed; - u8 duplex; - u8 bit; -}; - -const struct phy_setting * -phy_lookup_setting(int speed, int duplex, const unsigned long *mask, - bool exact); -size_t phy_speeds(unsigned int *speeds, size_t size, - unsigned long *mask); -void of_set_phy_supported(struct phy_device *phydev); -void of_set_phy_eee_broken(struct phy_device *phydev); -void of_set_phy_timing_role(struct phy_device *phydev); -int phy_speed_down_core(struct phy_device *phydev); - /** - * phy_set_eee_broken - Mark an EEE mode as broken so that it isn't advertised. + * phy_is_started - Convenience function to check whether PHY is started * @phydev: The phy_device struct - * @link_mode: The broken EEE mode */ -static inline void phy_set_eee_broken(struct phy_device *phydev, u32 link_mode) +static inline bool phy_is_started(struct phy_device *phydev) { - linkmode_set_bit(link_mode, phydev->eee_broken_modes); + return phydev->state >= PHY_UP; } /** - * phy_is_started - Convenience function to check whether PHY is started + * phy_disable_eee_mode - Don't advertise an EEE mode. * @phydev: The phy_device struct + * @link_mode: The EEE mode to be disabled */ -static inline bool phy_is_started(struct phy_device *phydev) +static inline void phy_disable_eee_mode(struct phy_device *phydev, u32 link_mode) { - return phydev->state >= PHY_UP; + WARN_ON(phy_is_started(phydev)); + + linkmode_set_bit(link_mode, phydev->eee_disabled_modes); + linkmode_clear_bit(link_mode, phydev->advertising_eee); } void phy_resolve_aneg_pause(struct phy_device *phydev); void phy_resolve_aneg_linkmode(struct phy_device *phydev); -void phy_check_downshift(struct phy_device *phydev); /** * phy_read - Convenience function for reading a given PHY register @@ -1580,6 +1601,9 @@ static inline bool phy_polling_mode(struct phy_device *phydev) if (phydev->drv->flags & PHY_POLL_CABLE_TEST) return true; + if (phydev->drv->update_stats) + return true; + return phydev->irq == PHY_POLL; } @@ -1661,15 +1685,6 @@ static inline bool phy_is_default_hwtstamp(struct phy_device *phydev) } /** - * phy_is_internal - Convenience function for testing if a PHY is internal - * @phydev: the phy_device struct - */ -static inline bool phy_is_internal(struct phy_device *phydev) -{ - return phydev->is_internal; -} - -/** * phy_on_sfp - Convenience function for testing if a PHY is on an SFP module * @phydev: the phy_device struct */ @@ -1736,63 +1751,20 @@ int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum, struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); -#if IS_ENABLED(CONFIG_PHYLIB) int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id); struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode); struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode); -struct phy_device *device_phy_find_device(struct device *dev); struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); void phy_device_free(struct phy_device *phydev); -#else -static inline int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id) -{ - return 0; -} -static inline -struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode) -{ - return 0; -} - -static inline -struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode) -{ - return NULL; -} - -static inline struct phy_device *device_phy_find_device(struct device *dev) -{ - return NULL; -} - -static inline -struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode) -{ - return NULL; -} - -static inline -struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45) -{ - return NULL; -} - -static inline int phy_device_register(struct phy_device *phy) -{ - return 0; -} - -static inline void phy_device_free(struct phy_device *phydev) { } -#endif /* CONFIG_PHYLIB */ void phy_device_remove(struct phy_device *phydev); int phy_get_c45_ids(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); int __phy_resume(struct phy_device *phydev); -int phy_loopback(struct phy_device *phydev, bool enable); +int phy_loopback(struct phy_device *phydev, bool enable, int speed); int phy_sfp_connect_phy(void *upstream, struct phy_device *phy); void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy); void phy_sfp_attach(void *upstream, struct sfp_bus *bus); @@ -1818,6 +1790,9 @@ int phy_config_aneg(struct phy_device *phydev); int _phy_start_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); +unsigned int phy_inband_caps(struct phy_device *phydev, + phy_interface_t interface); +int phy_config_inband(struct phy_device *phydev, unsigned int modes); int phy_speed_down(struct phy_device *phydev, bool sync); int phy_speed_up(struct phy_device *phydev); bool phy_check_valid(int speed, int duplex, unsigned long *features); @@ -1890,6 +1865,9 @@ char *phy_attached_info_irq(struct phy_device *phydev) __malloc; void phy_attached_info(struct phy_device *phydev); +int genphy_match_phy_device(struct phy_device *phydev, + const struct phy_driver *phydrv); + /* Clause 22 PHY */ int genphy_read_abilities(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); @@ -1904,7 +1882,7 @@ int genphy_read_status(struct phy_device *phydev); int genphy_read_master_slave(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); -int genphy_loopback(struct phy_device *phydev, bool enable); +int genphy_loopback(struct phy_device *phydev, bool enable, int speed); int genphy_soft_reset(struct phy_device *phydev); irqreturn_t genphy_handle_interrupt_no_ack(struct phy_device *phydev); @@ -1946,7 +1924,7 @@ int genphy_c45_pma_baset1_read_master_slave(struct phy_device *phydev); int genphy_c45_read_status(struct phy_device *phydev); int genphy_c45_baset1_read_status(struct phy_device *phydev); int genphy_c45_config_aneg(struct phy_device *phydev); -int genphy_c45_loopback(struct phy_device *phydev, bool enable); +int genphy_c45_loopback(struct phy_device *phydev, bool enable, int speed); int genphy_c45_pma_resume(struct phy_device *phydev); int genphy_c45_pma_suspend(struct phy_device *phydev); int genphy_c45_fast_retrain(struct phy_device *phydev, bool enable); @@ -1956,14 +1934,12 @@ int genphy_c45_plca_set_cfg(struct phy_device *phydev, const struct phy_plca_cfg *plca_cfg); int genphy_c45_plca_get_status(struct phy_device *phydev, struct phy_plca_status *plca_st); -int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, - unsigned long *lp, bool *is_enabled); +int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *lp); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); -int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; @@ -1989,7 +1965,6 @@ int phy_drivers_register(struct phy_driver *new_driver, int n, struct module *owner); void phy_error(struct phy_device *phydev); void phy_state_machine(struct work_struct *work); -void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies); void phy_trigger_machine(struct phy_device *phydev); void phy_mac_interrupt(struct phy_device *phydev); void phy_start_machine(struct phy_device *phydev); @@ -2014,6 +1989,7 @@ void phy_advertise_eee_all(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); void phy_support_eee(struct phy_device *phydev); +void phy_disable_eee(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); @@ -2024,11 +2000,16 @@ void phy_get_pause(struct phy_device *phydev, bool *tx_pause, bool *rx_pause); s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, const int *delay_values, int size, bool is_rx); +int phy_get_tx_amplitude_gain(struct phy_device *phydev, struct device *dev, + enum ethtool_link_mode_bit_indices linkmode, + u32 *val); + +int phy_get_mac_termination(struct phy_device *phydev, struct device *dev, + u32 *val); + void phy_resolve_pause(unsigned long *local_adv, unsigned long *partner_adv, bool *tx_pause, bool *rx_pause); -int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, - int (*run)(struct phy_device *)); int phy_register_fixup_for_id(const char *bus_id, int (*run)(struct phy_device *)); int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, @@ -2038,6 +2019,8 @@ int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask); int phy_unregister_fixup_for_id(const char *bus_id); int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask); +int phy_eee_tx_clock_stop_capable(struct phy_device *phydev); +int phy_eee_rx_clock_stop(struct phy_device *phydev, bool clk_stop_enable); int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable); int phy_get_eee_err(struct phy_device *phydev); int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); @@ -2050,21 +2033,18 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev, int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); int phy_ethtool_nway_reset(struct net_device *ndev); -int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size); -int of_phy_package_join(struct phy_device *phydev, size_t priv_size); -void phy_package_leave(struct phy_device *phydev); -int devm_phy_package_join(struct device *dev, struct phy_device *phydev, - int base_addr, size_t priv_size); -int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev, - size_t priv_size); - -int __init mdio_bus_init(void); -void mdio_bus_exit(void); int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data); int phy_ethtool_get_sset_count(struct phy_device *phydev); int phy_ethtool_get_stats(struct phy_device *phydev, struct ethtool_stats *stats, u64 *data); + +void __phy_ethtool_get_phy_stats(struct phy_device *phydev, + struct ethtool_eth_phy_stats *phy_stats, + struct ethtool_phy_stats *phydev_stats); +void __phy_ethtool_get_link_ext_stats(struct phy_device *phydev, + struct ethtool_link_ext_stats *link_stats); + int phy_ethtool_get_plca_cfg(struct phy_device *phydev, struct phy_plca_cfg *plca_cfg); int phy_ethtool_set_plca_cfg(struct phy_device *phydev, @@ -2079,105 +2059,8 @@ int __phy_hwtstamp_set(struct phy_device *phydev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack); -static inline int phy_package_address(struct phy_device *phydev, - unsigned int addr_offset) -{ - struct phy_package_shared *shared = phydev->shared; - u8 base_addr = shared->base_addr; - - if (addr_offset >= PHY_MAX_ADDR - base_addr) - return -EIO; - - /* we know that addr will be in the range 0..31 and thus the - * implicit cast to a signed int is not a problem. - */ - return base_addr + addr_offset; -} - -static inline int phy_package_read(struct phy_device *phydev, - unsigned int addr_offset, u32 regnum) -{ - int addr = phy_package_address(phydev, addr_offset); - - if (addr < 0) - return addr; - - return mdiobus_read(phydev->mdio.bus, addr, regnum); -} - -static inline int __phy_package_read(struct phy_device *phydev, - unsigned int addr_offset, u32 regnum) -{ - int addr = phy_package_address(phydev, addr_offset); - - if (addr < 0) - return addr; - - return __mdiobus_read(phydev->mdio.bus, addr, regnum); -} - -static inline int phy_package_write(struct phy_device *phydev, - unsigned int addr_offset, u32 regnum, - u16 val) -{ - int addr = phy_package_address(phydev, addr_offset); - - if (addr < 0) - return addr; - - return mdiobus_write(phydev->mdio.bus, addr, regnum, val); -} - -static inline int __phy_package_write(struct phy_device *phydev, - unsigned int addr_offset, u32 regnum, - u16 val) -{ - int addr = phy_package_address(phydev, addr_offset); - - if (addr < 0) - return addr; - - return __mdiobus_write(phydev->mdio.bus, addr, regnum, val); -} - -int __phy_package_read_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum); - -int phy_package_read_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum); - -int __phy_package_write_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum, u16 val); - -int phy_package_write_mmd(struct phy_device *phydev, - unsigned int addr_offset, int devad, - u32 regnum, u16 val); - -static inline bool __phy_package_set_once(struct phy_device *phydev, - unsigned int b) -{ - struct phy_package_shared *shared = phydev->shared; - - if (!shared) - return false; - - return !test_and_set_bit(b, &shared->flags); -} - -static inline bool phy_package_init_once(struct phy_device *phydev) -{ - return __phy_package_set_once(phydev, PHY_SHARED_F_INIT_DONE); -} - -static inline bool phy_package_probe_once(struct phy_device *phydev) -{ - return __phy_package_set_once(phydev, PHY_SHARED_F_PROBE_DONE); -} - extern const struct bus_type mdio_bus_type; +extern const struct class mdio_bus_class; struct mdio_board_info { const char *bus_id; @@ -2186,17 +2069,8 @@ struct mdio_board_info { const void *platform_data; }; -#if IS_ENABLED(CONFIG_MDIO_DEVICE) int mdiobus_register_board_info(const struct mdio_board_info *info, unsigned int n); -#else -static inline int mdiobus_register_board_info(const struct mdio_board_info *i, - unsigned int n) -{ - return 0; -} -#endif - /** * phy_module_driver() - Helper macro for registering PHY drivers diff --git a/include/linux/phy/phy-hdmi.h b/include/linux/phy/phy-hdmi.h new file mode 100644 index 000000000000..f0ec963c6e84 --- /dev/null +++ b/include/linux/phy/phy-hdmi.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2022,2024 NXP + */ + +#ifndef __PHY_HDMI_H_ +#define __PHY_HDMI_H_ + +/** + * struct phy_configure_opts_hdmi - HDMI configuration set + * @tmds_char_rate: HDMI TMDS Character Rate in Hertz. + * @bpc: Bits per color channel. + * + * This structure is used to represent the configuration state of a HDMI phy. + */ +struct phy_configure_opts_hdmi { + unsigned long long tmds_char_rate; + unsigned int bpc; +}; + +#endif /* __PHY_HDMI_H_ */ diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 03cd5bae92d3..13add0c2c407 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -17,6 +17,7 @@ #include <linux/regulator/consumer.h> #include <linux/phy/phy-dp.h> +#include <linux/phy/phy-hdmi.h> #include <linux/phy/phy-lvds.h> #include <linux/phy/phy-mipi-dphy.h> @@ -42,7 +43,8 @@ enum phy_mode { PHY_MODE_MIPI_DPHY, PHY_MODE_SATA, PHY_MODE_LVDS, - PHY_MODE_DP + PHY_MODE_DP, + PHY_MODE_HDMI, }; enum phy_media { @@ -60,11 +62,14 @@ enum phy_media { * the DisplayPort protocol. * @lvds: Configuration set applicable for phys supporting * the LVDS phy mode. + * @hdmi: Configuration set applicable for phys supporting + * the HDMI phy mode. */ union phy_configure_opts { struct phy_configure_opts_mipi_dphy mipi_dphy; struct phy_configure_opts_dp dp; struct phy_configure_opts_lvds lvds; + struct phy_configure_opts_hdmi hdmi; }; /** @@ -149,6 +154,7 @@ struct phy_attrs { * @id: id of the phy device * @ops: function pointers for performing phy operations * @mutex: mutex to protect phy_ops + * @lockdep_key: lockdep information for this mutex * @init_count: used to protect when the PHY is used by multiple consumers * @power_count: used to protect when the PHY is used by multiple consumers * @attrs: used to specify PHY specific attributes @@ -160,6 +166,7 @@ struct phy { int id; const struct phy_ops *ops; struct mutex mutex; + struct lock_class_key lockdep_key; int init_count; int power_count; struct phy_attrs attrs; @@ -227,8 +234,6 @@ int phy_pm_runtime_get(struct phy *phy); int phy_pm_runtime_get_sync(struct phy *phy); int phy_pm_runtime_put(struct phy *phy); int phy_pm_runtime_put_sync(struct phy *phy); -void phy_pm_runtime_allow(struct phy *phy); -void phy_pm_runtime_forbid(struct phy *phy); int phy_init(struct phy *phy); int phy_exit(struct phy *phy); int phy_power_on(struct phy *phy); @@ -321,16 +326,6 @@ static inline int phy_pm_runtime_put_sync(struct phy *phy) return -ENOSYS; } -static inline void phy_pm_runtime_allow(struct phy *phy) -{ - return; -} - -static inline void phy_pm_runtime_forbid(struct phy *phy) -{ - return; -} - static inline int phy_init(struct phy *phy) { if (!phy) diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h index 1acafd86ab13..5399b9e41e35 100644 --- a/include/linux/phy_fixed.h +++ b/include/linux/phy_fixed.h @@ -13,43 +13,27 @@ struct fixed_phy_status { }; struct device_node; -struct gpio_desc; struct net_device; #if IS_ENABLED(CONFIG_FIXED_PHY) extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier); -extern int fixed_phy_add(unsigned int irq, int phy_id, - struct fixed_phy_status *status); -extern struct phy_device *fixed_phy_register(unsigned int irq, - struct fixed_phy_status *status, - struct device_node *np); - -extern struct phy_device * -fixed_phy_register_with_gpiod(unsigned int irq, - struct fixed_phy_status *status, - struct gpio_desc *gpiod); +int fixed_phy_add(int phy_id, const struct fixed_phy_status *status); +struct phy_device *fixed_phy_register(const struct fixed_phy_status *status, + struct device_node *np); extern void fixed_phy_unregister(struct phy_device *phydev); extern int fixed_phy_set_link_update(struct phy_device *phydev, int (*link_update)(struct net_device *, struct fixed_phy_status *)); #else -static inline int fixed_phy_add(unsigned int irq, int phy_id, - struct fixed_phy_status *status) +static inline int fixed_phy_add(int phy_id, + const struct fixed_phy_status *status) { return -ENODEV; } -static inline struct phy_device *fixed_phy_register(unsigned int irq, - struct fixed_phy_status *status, - struct device_node *np) -{ - return ERR_PTR(-ENODEV); -} - static inline struct phy_device * -fixed_phy_register_with_gpiod(unsigned int irq, - struct fixed_phy_status *status, - struct gpio_desc *gpiod) +fixed_phy_register(const struct fixed_phy_status *status, + struct device_node *np) { return ERR_PTR(-ENODEV); } diff --git a/include/linux/phylib_stubs.h b/include/linux/phylib_stubs.h index 1279f48c8a70..9d2d6090c86d 100644 --- a/include/linux/phylib_stubs.h +++ b/include/linux/phylib_stubs.h @@ -5,6 +5,9 @@ #include <linux/rtnetlink.h> +struct ethtool_eth_phy_stats; +struct ethtool_link_ext_stats; +struct ethtool_phy_stats; struct kernel_hwtstamp_config; struct netlink_ext_ack; struct phy_device; @@ -19,6 +22,11 @@ struct phylib_stubs { int (*hwtstamp_set)(struct phy_device *phydev, struct kernel_hwtstamp_config *config, struct netlink_ext_ack *extack); + void (*get_phy_stats)(struct phy_device *phydev, + struct ethtool_eth_phy_stats *phy_stats, + struct ethtool_phy_stats *phydev_stats); + void (*get_link_ext_stats)(struct phy_device *phydev, + struct ethtool_link_ext_stats *link_stats); }; static inline int phy_hwtstamp_get(struct phy_device *phydev, @@ -50,6 +58,29 @@ static inline int phy_hwtstamp_set(struct phy_device *phydev, return phylib_stubs->hwtstamp_set(phydev, config, extack); } +static inline void phy_ethtool_get_phy_stats(struct phy_device *phydev, + struct ethtool_eth_phy_stats *phy_stats, + struct ethtool_phy_stats *phydev_stats) +{ + ASSERT_RTNL(); + + if (!phylib_stubs) + return; + + phylib_stubs->get_phy_stats(phydev, phy_stats, phydev_stats); +} + +static inline void phy_ethtool_get_link_ext_stats(struct phy_device *phydev, + struct ethtool_link_ext_stats *link_stats) +{ + ASSERT_RTNL(); + + if (!phylib_stubs) + return; + + phylib_stubs->get_link_ext_stats(phydev, link_stats); +} + #else static inline int phy_hwtstamp_get(struct phy_device *phydev, @@ -65,4 +96,15 @@ static inline int phy_hwtstamp_set(struct phy_device *phydev, return -EOPNOTSUPP; } +static inline void phy_ethtool_get_phy_stats(struct phy_device *phydev, + struct ethtool_eth_phy_stats *phy_stats, + struct ethtool_phy_stats *phydev_stats) +{ +} + +static inline void phy_ethtool_get_link_ext_stats(struct phy_device *phydev, + struct ethtool_link_ext_stats *link_stats) +{ +} + #endif diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 5c01048860c4..30659b615fca 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -5,6 +5,8 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> +#include <net/eee.h> + struct device_node; struct ethtool_cmd; struct fwnode_handle; @@ -143,11 +145,17 @@ enum phylink_op_type { * possible and avoid stopping it during suspend events. * @default_an_inband: if true, defaults to MLO_AN_INBAND rather than * MLO_AN_PHY. A fixed-link specification will override. + * @eee_rx_clk_stop_enable: if true, PHY can stop the receive clock during LPI * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. * @supported_interfaces: bitmap describing which PHY_INTERFACE_MODE_xxx * are supported by the MAC/PCS. + * @lpi_interfaces: bitmap describing which PHY interface modes can support + * LPI signalling. * @mac_capabilities: MAC pause/speed/duplex capabilities. + * @lpi_capabilities: MAC speeds which can support LPI signalling + * @lpi_timer_default: Default EEE LPI timer setting. + * @eee_enabled_default: If set, EEE will be enabled by phylink at creation time */ struct phylink_config { struct device *dev; @@ -156,10 +164,15 @@ struct phylink_config { bool mac_managed_pm; bool mac_requires_rxc; bool default_an_inband; + bool eee_rx_clk_stop_enable; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); DECLARE_PHY_INTERFACE_MASK(supported_interfaces); + DECLARE_PHY_INTERFACE_MASK(lpi_interfaces); unsigned long mac_capabilities; + unsigned long lpi_capabilities; + u32 lpi_timer_default; + bool eee_enabled_default; }; void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed); @@ -173,6 +186,8 @@ void phylink_limit_mac_speed(struct phylink_config *config, u32 max_speed); * @mac_finish: finish a major reconfiguration of the interface. * @mac_link_down: take the link down. * @mac_link_up: allow the link to come up. + * @mac_disable_tx_lpi: disable LPI. + * @mac_enable_tx_lpi: enable and configure LPI. * * The individual methods are described more fully below. */ @@ -193,6 +208,9 @@ struct phylink_mac_ops { struct phy_device *phy, unsigned int mode, phy_interface_t interface, int speed, int duplex, bool tx_pause, bool rx_pause); + void (*mac_disable_tx_lpi)(struct phylink_config *config); + int (*mac_enable_tx_lpi)(struct phylink_config *config, u32 timer, + bool tx_clk_stop); }; #if 0 /* For kernel-doc purposes only. */ @@ -343,23 +361,29 @@ int mac_finish(struct phylink_config *config, unsigned int mode, phy_interface_t iface); /** - * mac_link_down() - take the link down + * mac_link_down() - notification that the link has gone down * @config: a pointer to a &struct phylink_config. * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * - * If @mode is not an in-band negotiation mode (as defined by - * phylink_autoneg_inband()), force the link down and disable any - * Energy Efficient Ethernet MAC configuration. Interface type - * selection must be done in mac_config(). + * Notifies the MAC that the link has gone down. This will not be called + * unless mac_link_up() has been previously called. + * + * The MAC should stop processing packets for transmission and reception. + * phylink will have called netif_carrier_off() to notify the networking + * stack that the link has gone down, so MAC drivers should not make this + * call. + * + * If @mode is %MLO_AN_INBAND, then this function must not prevent the + * link coming up. */ void mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface); /** - * mac_link_up() - allow the link to come up + * mac_link_up() - notification that the link has come up * @config: a pointer to a &struct phylink_config. - * @phy: any attached phy + * @phy: any attached phy (deprecated - please use LPI interfaces) * @mode: link autonegotiation mode * @interface: link &typedef phy_interface_t mode * @speed: link speed @@ -367,7 +391,10 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * @tx_pause: link transmit pause enablement status * @rx_pause: link receive pause enablement status * - * Configure the MAC for an established link. + * Notifies the MAC that the link has come up, and the parameters of the + * link as seen from the MACs point of view. If mac_link_up() has been + * called previously, there will be an intervening call to mac_link_down() + * before this method will be subsequently called. * * @speed, @duplex, @tx_pause and @rx_pause indicate the finalised link * settings, and should be used to configure the MAC block appropriately @@ -379,23 +406,51 @@ void mac_link_down(struct phylink_config *config, unsigned int mode, * that the user wishes to override the pause settings, and this should * be allowed when considering the implementation of this method. * - * If in-band negotiation mode is disabled, allow the link to come up. If - * @phy is non-%NULL, configure Energy Efficient Ethernet by calling - * phy_init_eee() and perform appropriate MAC configuration for EEE. + * Once configured, the MAC may begin to process packets for transmission + * and reception. + * * Interface type selection must be done in mac_config(). */ void mac_link_up(struct phylink_config *config, struct phy_device *phy, unsigned int mode, phy_interface_t interface, int speed, int duplex, bool tx_pause, bool rx_pause); + +/** + * mac_disable_tx_lpi() - disable LPI generation at the MAC + * @config: a pointer to a &struct phylink_config. + * + * Disable generation of LPI at the MAC, effectively preventing the MAC + * from indicating that it is idle. + */ +void mac_disable_tx_lpi(struct phylink_config *config); + +/** + * mac_enable_tx_lpi() - configure and enable LPI generation at the MAC + * @config: a pointer to a &struct phylink_config. + * @timer: LPI timeout in microseconds. + * @tx_clk_stop: allow xMII transmit clock to be stopped during LPI + * + * Configure the LPI timeout accordingly. This will only be called when + * the link is already up, to cater for situations where the hardware + * needs to be programmed according to the link speed. + * + * Enable LPI generation at the MAC, and configure whether the xMII transmit + * clock may be stopped. + * + * Returns: 0 on success. Please consult with rmk before returning an error. + */ +int mac_enable_tx_lpi(struct phylink_config *config, u32 timer, + bool tx_clk_stop); #endif struct phylink_pcs_ops; /** * struct phylink_pcs - PHYLINK PCS instance + * @supported_interfaces: describing which PHY_INTERFACE_MODE_xxx + * are supported by this PCS. * @ops: a pointer to the &struct phylink_pcs_ops structure * @phylink: pointer to &struct phylink_config - * @neg_mode: provide PCS neg mode via "mode" argument * @poll: poll the PCS for link changes * @rxc_always_on: The MAC driver requires the reference clock * to always be on. Standalone PCS drivers which @@ -409,9 +464,9 @@ struct phylink_pcs_ops; * the PCS driver. */ struct phylink_pcs { + DECLARE_PHY_INTERFACE_MASK(supported_interfaces); const struct phylink_pcs_ops *ops; struct phylink *phylink; - bool neg_mode; bool poll; bool rxc_always_on; }; @@ -419,6 +474,7 @@ struct phylink_pcs { /** * struct phylink_pcs_ops - MAC PCS operations structure. * @pcs_validate: validate the link configuration. + * @pcs_inband_caps: query inband support for interface mode. * @pcs_enable: enable the PCS. * @pcs_disable: disable the PCS. * @pcs_pre_config: pre-mac_config method (for errata) @@ -428,19 +484,25 @@ struct phylink_pcs { * @pcs_an_restart: restart 802.3z BaseX autonegotiation. * @pcs_link_up: program the PCS for the resolved link configuration * (where necessary). + * @pcs_disable_eee: optional notification to PCS that EEE has been disabled + * at the MAC. + * @pcs_enable_eee: optional notification to PCS that EEE will be enabled at + * the MAC. * @pcs_pre_init: configure PCS components necessary for MAC hardware * initialization e.g. RX clock for stmmac. */ struct phylink_pcs_ops { int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); + unsigned int (*pcs_inband_caps)(struct phylink_pcs *pcs, + phy_interface_t interface); int (*pcs_enable)(struct phylink_pcs *pcs); void (*pcs_disable)(struct phylink_pcs *pcs); void (*pcs_pre_config)(struct phylink_pcs *pcs, phy_interface_t interface); int (*pcs_post_config)(struct phylink_pcs *pcs, phy_interface_t interface); - void (*pcs_get_state)(struct phylink_pcs *pcs, + void (*pcs_get_state)(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state); int (*pcs_config)(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, @@ -449,6 +511,8 @@ struct phylink_pcs_ops { void (*pcs_an_restart)(struct phylink_pcs *pcs); void (*pcs_link_up)(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); + void (*pcs_disable_eee)(struct phylink_pcs *pcs); + void (*pcs_enable_eee)(struct phylink_pcs *pcs); int (*pcs_pre_init)(struct phylink_pcs *pcs); }; @@ -471,6 +535,20 @@ int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); /** + * pcs_inband_caps - query PCS in-band capabilities for interface mode. + * @pcs: a pointer to a &struct phylink_pcs. + * @interface: interface mode to be queried + * + * Returns zero if it is unknown what in-band signalling is supported by the + * PHY (e.g. because the PHY driver doesn't implement the method.) Otherwise, + * returns a bit mask of the LINK_INBAND_* values from + * &enum link_inband_signalling to describe which inband modes are supported + * for this interface mode. + */ +unsigned int pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface); + +/** * pcs_enable() - enable the PCS. * @pcs: a pointer to a &struct phylink_pcs. */ @@ -485,6 +563,7 @@ void pcs_disable(struct phylink_pcs *pcs); /** * pcs_get_state() - Read the current inband link state from the hardware * @pcs: a pointer to a &struct phylink_pcs. + * @neg_mode: link negotiation mode (PHYLINK_PCS_NEG_xxx) * @state: a pointer to a &struct phylink_link_state. * * Read the current inband link state from the MAC PCS, reporting the @@ -493,8 +572,11 @@ void pcs_disable(struct phylink_pcs *pcs); * negotiation completion state in @state->an_complete, and link up state * in @state->link. If possible, @state->lp_advertising should also be * populated. + * + * Note that the @neg_mode parameter is always the PHYLINK_PCS_NEG_xxx + * state, not MLO_AN_xxx. */ -void pcs_get_state(struct phylink_pcs *pcs, +void pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state); /** @@ -522,6 +604,14 @@ void pcs_get_state(struct phylink_pcs *pcs, * The %neg_mode argument should be tested via the phylink_mode_*() family of * functions, or for PCS that set pcs->neg_mode true, should be tested * against the PHYLINK_PCS_NEG_* definitions. + * + * pcs_config() will be called when configuration of the PCS is required + * or when the advertisement is possibly updated. It must not unnecessarily + * disrupt an established link. + * + * When an autonegotiation restart is required for 802.3z modes, .pcs_config() + * should return a positive non-zero integer (e.g. 1) to indicate to phylink + * to call the pcs_an_restart() method. */ int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, @@ -557,6 +647,22 @@ void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex); /** + * pcs_disable_eee() - Disable EEE at the PCS + * @pcs: a pointer to a &struct phylink_pcs + * + * Optional method informing the PCS that EEE has been disabled at the MAC. + */ +void pcs_disable_eee(struct phylink_pcs *pcs); + +/** + * pcs_enable_eee() - Enable EEE at the PCS + * @pcs: a pointer to a &struct phylink_pcs + * + * Optional method informing the PCS that EEE is about to be enabled at the MAC. + */ +void pcs_enable_eee(struct phylink_pcs *pcs); + +/** * pcs_pre_init() - Configure PCS components necessary for MAC initialization * @pcs: a pointer to a &struct phylink_pcs. * @@ -609,7 +715,11 @@ int phylink_pcs_pre_init(struct phylink *pl, struct phylink_pcs *pcs); void phylink_start(struct phylink *); void phylink_stop(struct phylink *); +void phylink_rx_clk_stop_block(struct phylink *); +void phylink_rx_clk_stop_unblock(struct phylink *); + void phylink_suspend(struct phylink *pl, bool mac_wol); +void phylink_prepare_resume(struct phylink *pl); void phylink_resume(struct phylink *pl); void phylink_ethtool_get_wol(struct phylink *, struct ethtool_wolinfo *); @@ -625,7 +735,6 @@ void phylink_ethtool_get_pauseparam(struct phylink *, int phylink_ethtool_set_pauseparam(struct phylink *, struct ethtool_pauseparam *); int phylink_get_eee_err(struct phylink *); -int phylink_init_eee(struct phylink *, bool); int phylink_ethtool_get_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_ethtool_set_eee(struct phylink *link, struct ethtool_keee *eee); int phylink_mii_ioctl(struct phylink *, struct ifreq *, int); @@ -668,9 +777,22 @@ static inline int phylink_get_link_timer_ns(phy_interface_t interface) } } +/** + * phylink_mac_implements_lpi() - determine if MAC implements LPI ops + * @ops: phylink_mac_ops structure + * + * Returns true if the phylink MAC operations structure indicates that the + * LPI operations have been implemented, false otherwise. + */ +static inline bool phylink_mac_implements_lpi(const struct phylink_mac_ops *ops) +{ + return ops && ops->mac_disable_tx_lpi && ops->mac_enable_tx_lpi; +} + void phylink_mii_c22_pcs_decode_state(struct phylink_link_state *state, - u16 bmsr, u16 lpa); + unsigned int neg_mode, u16 bmsr, u16 lpa); void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, + unsigned int neg_mode, struct phylink_link_state *state); int phylink_mii_c22_pcs_encode_advertisement(phy_interface_t interface, const unsigned long *advertising); diff --git a/include/linux/pid.h b/include/linux/pid.h index a3aad9b4074c..453ae6d8a68d 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -59,6 +59,7 @@ struct pid spinlock_t lock; struct dentry *stashed; u64 ino; + struct rb_node pidfs_node; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; @@ -68,6 +69,7 @@ struct pid struct upid numbers[]; }; +extern seqcount_spinlock_t pidmap_lock_seq; extern struct pid init_struct_pid; struct file; @@ -75,7 +77,7 @@ struct file; struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); -int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); +int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file); void do_notify_pidfd(struct task_struct *task); static inline struct pid *get_pid(struct pid *pid) @@ -99,16 +101,13 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); * these helpers must be called with the tasklist_lock write-held. */ extern void attach_pid(struct task_struct *task, enum pid_type); -extern void detach_pid(struct task_struct *task, enum pid_type); -extern void change_pid(struct task_struct *task, enum pid_type, - struct pid *pid); +void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type); +void change_pid(struct pid **pids, struct task_struct *task, enum pid_type, + struct pid *pid); extern void exchange_tids(struct task_struct *task, struct task_struct *old); extern void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type); -extern int pid_max; -extern int pid_max_min, pid_max_max; - /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. @@ -130,6 +129,7 @@ extern struct pid *find_ge_pid(int nr, struct pid_namespace *); extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, size_t set_tid_size); extern void free_pid(struct pid *pid); +void free_pids(struct pid **pids); extern void disable_pid_allocation(struct pid_namespace *ns); /* diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index f9f9931e02d6..7c67a5811199 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -30,6 +30,7 @@ struct pid_namespace { struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; + int pid_max; struct pid_namespace *parent; #ifdef CONFIG_BSD_PROCESS_ACCT struct fs_pin *bacct; @@ -38,9 +39,14 @@ struct pid_namespace { struct ucounts *ucounts; int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) + struct work_struct work; +#ifdef CONFIG_SYSCTL + struct ctl_table_set set; + struct ctl_table_header *sysctls; +#if defined(CONFIG_MEMFD_CREATE) int memfd_noexec_scope; #endif +#endif } __randomize_layout; extern struct pid_namespace init_pid_ns; @@ -117,6 +123,8 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); void pid_idr_init(void); +int register_pidns_sysctls(struct pid_namespace *pidns); +void unregister_pidns_sysctls(struct pid_namespace *pidns); static inline bool task_is_in_init_pid_ns(struct task_struct *tsk) { diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 75bdf9807802..77e7db194914 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -2,7 +2,19 @@ #ifndef _LINUX_PID_FS_H #define _LINUX_PID_FS_H +struct coredump_params; + struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); +void pidfs_add_pid(struct pid *pid); +void pidfs_remove_pid(struct pid *pid); +void pidfs_exit(struct task_struct *tsk); +#ifdef CONFIG_COREDUMP +void pidfs_coredump(const struct coredump_params *cprm); +#endif +extern const struct dentry_operations pidfs_dentry_operations; +int pidfs_register_pid(struct pid *pid); +void pidfs_get_pid(struct pid *pid); +void pidfs_put_pid(struct pid *pid); #endif /* _LINUX_PID_FS_H */ diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h index 673e96df453b..25620229b1d6 100644 --- a/include/linux/pinctrl/machine.h +++ b/include/linux/pinctrl/machine.h @@ -149,14 +149,18 @@ struct pinctrl_map { #define PIN_MAP_CONFIGS_GROUP_HOG_DEFAULT(dev, grp, cfgs) \ PIN_MAP_CONFIGS_GROUP(dev, PINCTRL_STATE_DEFAULT, dev, grp, cfgs) +struct device; struct pinctrl_map; #ifdef CONFIG_PINCTRL -extern int pinctrl_register_mappings(const struct pinctrl_map *map, - unsigned int num_maps); -extern void pinctrl_unregister_mappings(const struct pinctrl_map *map); -extern void pinctrl_provide_dummies(void); +int pinctrl_register_mappings(const struct pinctrl_map *map, + unsigned int num_maps); +int devm_pinctrl_register_mappings(struct device *dev, + const struct pinctrl_map *map, + unsigned int num_maps); +void pinctrl_unregister_mappings(const struct pinctrl_map *map); +void pinctrl_provide_dummies(void); #else static inline int pinctrl_register_mappings(const struct pinctrl_map *map, @@ -165,6 +169,13 @@ static inline int pinctrl_register_mappings(const struct pinctrl_map *map, return 0; } +static inline int devm_pinctrl_register_mappings(struct device *dev, + const struct pinctrl_map *map, + unsigned int num_maps) +{ + return 0; +} + static inline void pinctrl_unregister_mappings(const struct pinctrl_map *map) { } diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 53cfde98433d..1bcf071b860e 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -232,4 +232,8 @@ static inline int pinconf_generic_dt_node_to_map_all(struct pinctrl_dev *pctldev PIN_MAP_TYPE_INVALID); } +int pinconf_generic_dt_node_to_map_pinmux(struct pinctrl_dev *pctldev, + struct device_node *np, + struct pinctrl_map **map, + unsigned int *num_maps); #endif /* __LINUX_PINCTRL_PINCONF_GENERIC_H */ diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8ff23bf5a819..9d42d473d201 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -31,6 +31,33 @@ struct pipe_buffer { unsigned long private; }; +/* + * Really only alpha needs 32-bit fields, but + * might as well do it for 64-bit architectures + * since that's what we've historically done, + * and it makes 'head_tail' always be a simple + * 'unsigned long'. + */ +#ifdef CONFIG_64BIT +typedef unsigned int pipe_index_t; +#else +typedef unsigned short pipe_index_t; +#endif + +/* + * We have to declare this outside 'struct pipe_inode_info', + * but then we can't use 'union pipe_index' for an anonymous + * union, so we end up having to duplicate this declaration + * below. Annoying. + */ +union pipe_index { + unsigned long head_tail; + struct { + pipe_index_t head; + pipe_index_t tail; + }; +}; + /** * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing @@ -38,6 +65,7 @@ struct pipe_buffer { * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption + * @head_tail: unsigned long union of @head and @tail * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) @@ -58,8 +86,16 @@ struct pipe_buffer { struct pipe_inode_info { struct mutex mutex; wait_queue_head_t rd_wait, wr_wait; - unsigned int head; - unsigned int tail; + + /* This has to match the 'union pipe_index' above */ + union { + unsigned long head_tail; + struct { + pipe_index_t head; + pipe_index_t tail; + }; + }; + unsigned int max_usage; unsigned int ring_size; unsigned int nr_accounted; @@ -72,7 +108,7 @@ struct pipe_inode_info { #ifdef CONFIG_WATCH_QUEUE bool note_loss; #endif - struct page *tmp_page; + struct page *tmp_page[2]; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; struct pipe_buffer *bufs; @@ -141,23 +177,23 @@ static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) } /** - * pipe_empty - Return true if the pipe is empty + * pipe_occupancy - Return number of slots used in the pipe * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ -static inline bool pipe_empty(unsigned int head, unsigned int tail) +static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) { - return head == tail; + return (pipe_index_t)(head - tail); } /** - * pipe_occupancy - Return number of slots used in the pipe + * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ -static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) +static inline bool pipe_empty(unsigned int head, unsigned int tail) { - return head - tail; + return !pipe_occupancy(head, tail); } /** @@ -173,6 +209,33 @@ static inline bool pipe_full(unsigned int head, unsigned int tail, } /** + * pipe_is_full - Return true if the pipe is full + * @pipe: the pipe + */ +static inline bool pipe_is_full(const struct pipe_inode_info *pipe) +{ + return pipe_full(pipe->head, pipe->tail, pipe->max_usage); +} + +/** + * pipe_is_empty - Return true if the pipe is empty + * @pipe: the pipe + */ +static inline bool pipe_is_empty(const struct pipe_inode_info *pipe) +{ + return pipe_empty(pipe->head, pipe->tail); +} + +/** + * pipe_buf_usage - Return how many pipe buffers are in use + * @pipe: the pipe + */ +static inline unsigned int pipe_buf_usage(const struct pipe_inode_info *pipe) +{ + return pipe_occupancy(pipe->head, pipe->tail); +} + +/** * pipe_buf - Return the pipe buffer for the specified slot in the pipe ring * @pipe: The pipe to access * @slot: The slot of interest @@ -245,15 +308,6 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, return buf->ops->try_steal(pipe, buf); } -static inline void pipe_discard_from(struct pipe_inode_info *pipe, - unsigned int old_head) -{ - unsigned int mask = pipe->ring_size - 1; - - while (pipe->head > old_head) - pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); -} - /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE diff --git a/include/linux/platform_data/clk-davinci-pll.h b/include/linux/platform_data/clk-davinci-pll.h deleted file mode 100644 index e55dab1d578b..000000000000 --- a/include/linux/platform_data/clk-davinci-pll.h +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PLL clock driver for TI Davinci SoCs - * - * Copyright (C) 2018 David Lechner <david@lechnology.com> - */ - -#ifndef __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ -#define __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ - -#include <linux/regmap.h> - -/** - * davinci_pll_platform_data - * @cfgchip: CFGCHIP syscon regmap - */ -struct davinci_pll_platform_data { - struct regmap *cfgchip; -}; - -#endif /* __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ */ diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index b3c4993e656e..1f4e4f2b89bb 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -5044,8 +5044,12 @@ struct ec_response_pd_status { #define PD_EVENT_POWER_CHANGE BIT(1) #define PD_EVENT_IDENTITY_RECEIVED BIT(2) #define PD_EVENT_DATA_SWAP BIT(3) +#define PD_EVENT_TYPEC BIT(4) +#define PD_EVENT_PPM BIT(5) +#define PD_EVENT_INIT BIT(6) + struct ec_response_host_event_status { - uint32_t status; /* PD MCU host event status */ + uint32_t status; /* PD MCU host event status */ } __ec_align4; /* Set USB type-C port role and muxes */ @@ -6105,6 +6109,29 @@ struct ec_response_typec_vdm_response { #undef VDO_MAX_SIZE +/* + * UCSI OPM-PPM commands + * + * These commands are used for communication between OPM and PPM. + * Only UCSI3.0 is tested. + */ + +#define EC_CMD_UCSI_PPM_SET 0x0140 + +/* The data size is stored in the host command protocol header. */ +struct ec_params_ucsi_ppm_set { + uint16_t offset; + uint8_t data[]; +} __ec_align2; + +#define EC_CMD_UCSI_PPM_GET 0x0141 + +/* For 'GET' sub-commands, data will be returned as a raw payload. */ +struct ec_params_ucsi_ppm_get { + uint16_t offset; + uint8_t size; +} __ec_align2; + /*****************************************************************************/ /* The command range 0x200-0x2FF is reserved for Rotor. */ diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index b34ed0cc1f8d..3ec24f445c29 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -42,6 +42,11 @@ #define EC_MAX_RESPONSE_OVERHEAD 32 /* + * ACPI notify value for MKBP host event. + */ +#define ACPI_NOTIFY_CROS_EC_MKBP 0x80 + +/* * EC panic is not covered by the standard (0-F) ACPI notify values. * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF * device specific ACPI notify range. @@ -246,6 +251,8 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev, int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev, struct cros_ec_command *msg); +int cros_ec_rwsig_continue(struct cros_ec_device *ec_dev); + int cros_ec_query_all(struct cros_ec_device *ec_dev); int cros_ec_get_next_event(struct cros_ec_device *ec_dev, diff --git a/include/linux/platform_data/huawei-gaokun-ec.h b/include/linux/platform_data/huawei-gaokun-ec.h new file mode 100644 index 000000000000..faa15d315128 --- /dev/null +++ b/include/linux/platform_data/huawei-gaokun-ec.h @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Huawei Matebook E Go Embedded Controller + * + * Copyright (C) 2024-2025 Pengyu Luo <mitltlatltl@gmail.com> + */ + +#ifndef __HUAWEI_GAOKUN_EC_H__ +#define __HUAWEI_GAOKUN_EC_H__ + +#define GAOKUN_UCSI_CCI_SIZE 4 +#define GAOKUN_UCSI_MSGI_SIZE 16 +#define GAOKUN_UCSI_READ_SIZE (GAOKUN_UCSI_CCI_SIZE + GAOKUN_UCSI_MSGI_SIZE) +#define GAOKUN_UCSI_WRITE_SIZE 24 /* 8B CTRL, 16B MSGO */ + +#define GAOKUN_UCSI_NO_PORT_UPDATE (-1) + +#define GAOKUN_SMART_CHARGE_DATA_SIZE 4 /* mode, delay, start, end */ + +/* -------------------------------------------------------------------------- */ + +struct gaokun_ec; +struct gaokun_ucsi_reg; +struct notifier_block; + +#define GAOKUN_MOD_NAME "huawei_gaokun_ec" +#define GAOKUN_DEV_PSY "psy" +#define GAOKUN_DEV_UCSI "ucsi" + +/* -------------------------------------------------------------------------- */ +/* Common API */ + +int gaokun_ec_register_notify(struct gaokun_ec *ec, + struct notifier_block *nb); +void gaokun_ec_unregister_notify(struct gaokun_ec *ec, + struct notifier_block *nb); + +int gaokun_ec_read(struct gaokun_ec *ec, const u8 *req, + size_t resp_len, u8 *resp); +int gaokun_ec_write(struct gaokun_ec *ec, const u8 *req); +int gaokun_ec_read_byte(struct gaokun_ec *ec, const u8 *req, u8 *byte); + +/* -------------------------------------------------------------------------- */ +/* API for PSY */ + +int gaokun_ec_psy_multi_read(struct gaokun_ec *ec, u8 reg, + size_t resp_len, u8 *resp); + +static inline int gaokun_ec_psy_read_byte(struct gaokun_ec *ec, + u8 reg, u8 *byte) +{ + return gaokun_ec_psy_multi_read(ec, reg, sizeof(*byte), byte); +} + +static inline int gaokun_ec_psy_read_word(struct gaokun_ec *ec, + u8 reg, u16 *word) +{ + return gaokun_ec_psy_multi_read(ec, reg, sizeof(*word), (u8 *)word); +} + +int gaokun_ec_psy_get_smart_charge(struct gaokun_ec *ec, + u8 resp[GAOKUN_SMART_CHARGE_DATA_SIZE]); +int gaokun_ec_psy_set_smart_charge(struct gaokun_ec *ec, + const u8 req[GAOKUN_SMART_CHARGE_DATA_SIZE]); + +int gaokun_ec_psy_get_smart_charge_enable(struct gaokun_ec *ec, bool *on); +int gaokun_ec_psy_set_smart_charge_enable(struct gaokun_ec *ec, bool on); + +/* -------------------------------------------------------------------------- */ +/* API for UCSI */ + +int gaokun_ec_ucsi_read(struct gaokun_ec *ec, u8 resp[GAOKUN_UCSI_READ_SIZE]); +int gaokun_ec_ucsi_write(struct gaokun_ec *ec, + const u8 req[GAOKUN_UCSI_WRITE_SIZE]); + +int gaokun_ec_ucsi_get_reg(struct gaokun_ec *ec, struct gaokun_ucsi_reg *ureg); +int gaokun_ec_ucsi_pan_ack(struct gaokun_ec *ec, int port_id); + +#endif /* __HUAWEI_GAOKUN_EC_H__ */ diff --git a/include/linux/platform_data/i2c-davinci.h b/include/linux/platform_data/i2c-davinci.h deleted file mode 100644 index 98967df07468..000000000000 --- a/include/linux/platform_data/i2c-davinci.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * DaVinci I2C controller platform_device info - * - * Author: Vladimir Barinov, MontaVista Software, Inc. <source@mvista.com> - * - * 2007 (c) MontaVista Software, Inc. This file is licensed under - * the terms of the GNU General Public License version 2. This program - * is licensed "as is" without any warranty of any kind, whether express - * or implied. -*/ - -#ifndef __ASM_ARCH_I2C_H -#define __ASM_ARCH_I2C_H - -/* All frequencies are expressed in kHz */ -struct davinci_i2c_platform_data { - unsigned int bus_freq; /* standard bus frequency (kHz) */ - unsigned int bus_delay; /* post-transaction delay (usec) */ - bool gpio_recovery; /* Use GPIO recovery method */ - bool has_pfunc; /* Chip has a ICPFUNC register */ -}; - -/* for board setup code */ -void davinci_init_i2c(struct davinci_i2c_platform_data *); - -#endif /* __ASM_ARCH_I2C_H */ diff --git a/include/linux/platform_data/keyscan-davinci.h b/include/linux/platform_data/keyscan-davinci.h deleted file mode 100644 index 260d596ba0af..000000000000 --- a/include/linux/platform_data/keyscan-davinci.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009 Texas Instruments, Inc - * - * Author: Miguel Aguilar <miguel.aguilar@ridgerun.com> - */ - -#ifndef DAVINCI_KEYSCAN_H -#define DAVINCI_KEYSCAN_H - -#include <linux/io.h> - -enum davinci_matrix_types { - DAVINCI_KEYSCAN_MATRIX_4X4, - DAVINCI_KEYSCAN_MATRIX_5X3, -}; - -struct davinci_ks_platform_data { - int (*device_enable)(struct device *dev); - unsigned short *keymap; - u32 keymapsize; - u8 rep:1; - u8 strobe; - u8 interval; - u8 matrix_type; -}; - -#endif - diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h index 0b9f81a6f753..f6cca7a035c7 100644 --- a/include/linux/platform_data/mlxreg.h +++ b/include/linux/platform_data/mlxreg.h @@ -209,7 +209,7 @@ struct mlxreg_core_platform_data { * @items: same type components with the hotplug capability; * @irq: platform interrupt number; * @regmap: register map of parent device; - * @counter: number of the components with the hotplug capability; + * @count: number of the components with the hotplug capability; * @cell: location of top aggregation interrupt register; * @mask: top aggregation interrupt common mask; * @cell_low: location of low aggregation interrupt register; @@ -224,7 +224,7 @@ struct mlxreg_core_hotplug_platform_data { struct mlxreg_core_item *items; int irq; void *regmap; - int counter; + int count; u32 cell; u32 mask; u32 cell_low; diff --git a/include/linux/platform_data/syscon.h b/include/linux/platform_data/syscon.h deleted file mode 100644 index 2c089dd3e2bd..000000000000 --- a/include/linux/platform_data/syscon.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PLATFORM_DATA_SYSCON_H -#define PLATFORM_DATA_SYSCON_H - -struct syscon_platform_data { - const char *label; -}; - -#endif diff --git a/include/linux/platform_data/x86/amd-fch.h b/include/linux/platform_data/x86/amd-fch.h new file mode 100644 index 000000000000..2cf5153edbc2 --- /dev/null +++ b/include/linux/platform_data/x86/amd-fch.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_FCH_H_ +#define _ASM_X86_AMD_FCH_H_ + +#define FCH_PM_BASE 0xFED80300 + +/* Register offsets from PM base: */ +#define FCH_PM_DECODEEN 0x00 +#define FCH_PM_DECODEEN_SMBUS0SEL GENMASK(20, 19) +#define FCH_PM_SCRATCH 0x80 +#define FCH_PM_S5_RESET_STATUS 0xC0 + +#endif /* _ASM_X86_AMD_FCH_H_ */ diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 783e2a336861..8a515179113d 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -157,9 +157,28 @@ #define ASUS_WMI_DSTS_MAX_BRIGTH_MASK 0x0000FF00 #define ASUS_WMI_DSTS_LIGHTBAR_MASK 0x0000000F +enum asus_ally_mcu_hack { + ASUS_WMI_ALLY_MCU_HACK_INIT, + ASUS_WMI_ALLY_MCU_HACK_ENABLED, + ASUS_WMI_ALLY_MCU_HACK_DISABLED, +}; + #if IS_REACHABLE(CONFIG_ASUS_WMI) +void set_ally_mcu_hack(enum asus_ally_mcu_hack status); +void set_ally_mcu_powersave(bool enabled); +int asus_wmi_set_devstate(u32 dev_id, u32 ctrl_param, u32 *retval); int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval); #else +static inline void set_ally_mcu_hack(enum asus_ally_mcu_hack status) +{ +} +static inline void set_ally_mcu_powersave(bool enabled) +{ +} +static inline int asus_wmi_set_devstate(u32 dev_id, u32 ctrl_param, u32 *retval) +{ + return -ENODEV; +} static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval) { diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h new file mode 100644 index 000000000000..78276a11c48d --- /dev/null +++ b/include/linux/platform_data/x86/int3472.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Intel INT3472 ACPI camera sensor power-management support + * + * Author: Dan Scally <djrscally@gmail.com> + */ + +#ifndef __PLATFORM_DATA_X86_INT3472_H +#define __PLATFORM_DATA_X86_INT3472_H + +#include <linux/clk-provider.h> +#include <linux/gpio/machine.h> +#include <linux/leds.h> +#include <linux/regulator/driver.h> +#include <linux/regulator/machine.h> +#include <linux/types.h> + +/* FIXME drop this once the I2C_DEV_NAME_FORMAT macro has been added to include/linux/i2c.h */ +#ifndef I2C_DEV_NAME_FORMAT +#define I2C_DEV_NAME_FORMAT "i2c-%s" +#endif + +/* PMIC GPIO Types */ +#define INT3472_GPIO_TYPE_RESET 0x00 +#define INT3472_GPIO_TYPE_POWERDOWN 0x01 +#define INT3472_GPIO_TYPE_POWER_ENABLE 0x0b +#define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c +#define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d +#define INT3472_GPIO_TYPE_HANDSHAKE 0x12 + +#define INT3472_PDEV_MAX_NAME_LEN 23 +#define INT3472_MAX_SENSOR_GPIOS 3 +#define INT3472_MAX_REGULATORS 3 + +/* E.g. "avdd\0" */ +#define GPIO_SUPPLY_NAME_LENGTH 5 +/* 12 chars for acpi_dev_name() + "-", e.g. "ABCD1234:00-" */ +#define GPIO_REGULATOR_NAME_LENGTH (12 + GPIO_SUPPLY_NAME_LENGTH) +/* lower- and upper-case mapping */ +#define GPIO_REGULATOR_SUPPLY_MAP_COUNT 2 +/* + * Ensure the GPIO is driven low/high for at least 2 ms before changing. + * + * 2 ms has been chosen because it is the minimum time ovXXXX sensors need to + * have their reset line driven logical high to properly register a reset. + */ +#define GPIO_REGULATOR_ENABLE_TIME (2 * USEC_PER_MSEC) +#define GPIO_REGULATOR_OFF_ON_DELAY (2 * USEC_PER_MSEC) + +#define INT3472_LED_MAX_NAME_LEN 32 + +#define CIO2_SENSOR_SSDB_MCLKSPEED_OFFSET 86 + +#define INT3472_REGULATOR(_name, _ops, _enable_time, _off_on_delay) \ + (const struct regulator_desc) { \ + .name = _name, \ + .type = REGULATOR_VOLTAGE, \ + .ops = _ops, \ + .owner = THIS_MODULE, \ + .enable_time = _enable_time, \ + .off_on_delay = _off_on_delay, \ + } + +#define to_int3472_clk(hw) \ + container_of(hw, struct int3472_clock, clk_hw) + +#define to_int3472_device(clk) \ + container_of(clk, struct int3472_discrete_device, clock) + +struct acpi_device; +struct dmi_system_id; +struct i2c_client; +struct platform_device; + +struct int3472_cldb { + u8 version; + /* + * control logic type + * 0: UNKNOWN + * 1: DISCRETE(CRD-D) + * 2: PMIC TPS68470 + * 3: PMIC uP6641 + */ + u8 control_logic_type; + u8 control_logic_id; + u8 sensor_card_sku; + u8 reserved[10]; + u8 clock_source; + u8 reserved2[17]; +}; + +struct int3472_discrete_quirks { + /* For models where AVDD GPIO is shared between sensors */ + const char *avdd_second_sensor; +}; + +struct int3472_gpio_regulator { + /* SUPPLY_MAP_COUNT * 2 to make room for second sensor mappings */ + struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2]; + char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH]; + char regulator_name[GPIO_REGULATOR_NAME_LENGTH]; + struct gpio_desc *ena_gpio; + struct regulator_dev *rdev; + struct regulator_desc rdesc; +}; + +struct int3472_discrete_device { + struct acpi_device *adev; + struct device *dev; + struct acpi_device *sensor; + const char *sensor_name; + + struct int3472_gpio_regulator regulators[INT3472_MAX_REGULATORS]; + + struct int3472_clock { + struct clk *clk; + struct clk_hw clk_hw; + struct clk_lookup *cl; + struct gpio_desc *ena_gpio; + u32 frequency; + u8 imgclk_index; + } clock; + + struct int3472_pled { + struct led_classdev classdev; + struct led_lookup_data lookup; + char name[INT3472_LED_MAX_NAME_LEN]; + struct gpio_desc *gpio; + } pled; + + struct int3472_discrete_quirks quirks; + + unsigned int ngpios; /* how many GPIOs have we seen */ + unsigned int n_sensor_gpios; /* how many have we mapped to sensor */ + unsigned int n_regulator_gpios; /* how many have we mapped to a regulator */ + struct gpiod_lookup_table gpios; +}; + +extern const struct dmi_system_id skl_int3472_discrete_quirks[]; + +union acpi_object *skl_int3472_get_acpi_buffer(struct acpi_device *adev, + char *id); +int skl_int3472_fill_cldb(struct acpi_device *adev, struct int3472_cldb *cldb); +int skl_int3472_get_sensor_adev_and_name(struct device *dev, + struct acpi_device **sensor_adev_ret, + const char **name_ret); + +int int3472_discrete_parse_crs(struct int3472_discrete_device *int3472); +void int3472_discrete_cleanup(struct int3472_discrete_device *int3472); + +int skl_int3472_register_gpio_clock(struct int3472_discrete_device *int3472, + struct gpio_desc *gpio); +int skl_int3472_register_dsm_clock(struct int3472_discrete_device *int3472); +void skl_int3472_unregister_clock(struct int3472_discrete_device *int3472); + +int skl_int3472_register_regulator(struct int3472_discrete_device *int3472, + struct gpio_desc *gpio, + unsigned int enable_time, + const char *supply_name, + const char *second_sensor); +void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472); + +int skl_int3472_register_pled(struct int3472_discrete_device *int3472, struct gpio_desc *gpio); +void skl_int3472_unregister_pled(struct int3472_discrete_device *int3472); + +#endif diff --git a/include/linux/platform_data/x86/intel_pmc_ipc.h b/include/linux/platform_data/x86/intel_pmc_ipc.h new file mode 100644 index 000000000000..1d34435b7001 --- /dev/null +++ b/include/linux/platform_data/x86/intel_pmc_ipc.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Intel Core SoC Power Management Controller Header File + * + * Copyright (c) 2025, Intel Corporation. + * All Rights Reserved. + * + */ +#ifndef INTEL_PMC_IPC_H +#define INTEL_PMC_IPC_H +#include <linux/acpi.h> + +#define IPC_SOC_REGISTER_ACCESS 0xAA +#define IPC_SOC_SUB_CMD_READ 0x00 +#define IPC_SOC_SUB_CMD_WRITE 0x01 +#define PMC_IPCS_PARAM_COUNT 7 +#define VALID_IPC_RESPONSE 5 + +struct pmc_ipc_cmd { + u32 cmd; + u32 sub_cmd; + u32 size; + u32 wbuf[4]; +}; + +struct pmc_ipc_rbuf { + u32 buf[4]; +}; + +/** + * intel_pmc_ipc() - PMC IPC Mailbox accessor + * @ipc_cmd: Prepared input command to send + * @rbuf: Allocated array for returned IPC data + * + * Return: 0 on success. Non-zero on mailbox error + */ +static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf *rbuf) +{ +#ifdef CONFIG_ACPI + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object params[PMC_IPCS_PARAM_COUNT] = { + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + }; + struct acpi_object_list arg_list = { PMC_IPCS_PARAM_COUNT, params }; + union acpi_object *obj; + int status; + + if (!ipc_cmd || !rbuf) + return -EINVAL; + + /* + * 0: IPC Command + * 1: IPC Sub Command + * 2: Size + * 3-6: Write Buffer for offset + */ + params[0].integer.value = ipc_cmd->cmd; + params[1].integer.value = ipc_cmd->sub_cmd; + params[2].integer.value = ipc_cmd->size; + params[3].integer.value = ipc_cmd->wbuf[0]; + params[4].integer.value = ipc_cmd->wbuf[1]; + params[5].integer.value = ipc_cmd->wbuf[2]; + params[6].integer.value = ipc_cmd->wbuf[3]; + + status = acpi_evaluate_object(NULL, "\\IPCS", &arg_list, &buffer); + if (ACPI_FAILURE(status)) + return -ENODEV; + + obj = buffer.pointer; + + if (obj && obj->type == ACPI_TYPE_PACKAGE && + obj->package.count == VALID_IPC_RESPONSE) { + const union acpi_object *objs = obj->package.elements; + + if ((u8)objs[0].integer.value != 0) + return -EINVAL; + + rbuf->buf[0] = objs[1].integer.value; + rbuf->buf[1] = objs[2].integer.value; + rbuf->buf[2] = objs[3].integer.value; + rbuf->buf[3] = objs[4].integer.value; + } else { + return -EINVAL; + } + + return 0; +#else + return -ENODEV; +#endif /* CONFIG_ACPI */ +} + +#endif /* INTEL_PMC_IPC_H */ diff --git a/include/linux/platform_data/x86/pwm-lpss.h b/include/linux/platform_data/x86/pwm-lpss.h index 752c06b47cc8..f0349edb47f4 100644 --- a/include/linux/platform_data/x86/pwm-lpss.h +++ b/include/linux/platform_data/x86/pwm-lpss.h @@ -15,9 +15,36 @@ struct pwm_lpss_boardinfo { unsigned int npwm; unsigned long base_unit_bits; /* - * Some versions of the IP may stuck in the state machine if enable - * bit is not set, and hence update bit will show busy status till - * the reset. For the rest it may be otherwise. + * NOTE: + * Intel Broxton, Apollo Lake, and Gemini Lake have different programming flow. + * + * Initial Enable or First Activation + * 1. Program the base unit and on time divisor values. + * 2. Set the software update bit. + * 3. Poll in a loop on the PWMCTRL bit until software update bit is cleared.+ + * 4. Enable the PWM output by setting PWM Enable. + * 5. Repeat the above steps for the next PWM Module. + * + * Dynamic update while PWM is Enabled + * 1. Program the base unit and on-time divisor values. + * 2. Set the software update bit. + * 3. Repeat the above steps for the next PWM module. + * + * + After setting PWMCTRL register's SW update bit, hardware automatically + * deasserts the SW update bit after a brief delay. It was observed that + * setting of PWM enable is typically done via read-modify-write of the PWMCTRL + * register. If there is no/little delay between setting software update bit + * and setting enable bit via read-modify-write, it is possible that the read + * could return with software enable as 1. In that case, the last write to set + * enable to 1 could also set sw_update to 1. If this happens, sw_update gets + * stuck and the driver code can hang as it explicitly waits for sw_update bit + * to be 0 after setting the enable bit to 1. To avoid this race condition, + * SW should poll on the software update bit to make sure that it is 0 before + * doing the read-modify-write to set the enable bit to 1. + * + * Also, we noted that if sw_update bit was set in step #1 above then when it + * is set again in step #2, sw_update bit never gets cleared and the flow hangs. + * As such, we need to make sure that sw_update bit is 0 when doing step #1. */ bool bypass; /* diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index f5492ed413f3..a299225ab92e 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -9,6 +9,7 @@ #ifndef _PLATFORM_PROFILE_H_ #define _PLATFORM_PROFILE_H_ +#include <linux/device.h> #include <linux/bitops.h> /* @@ -23,20 +24,37 @@ enum platform_profile_option { PLATFORM_PROFILE_BALANCED, PLATFORM_PROFILE_BALANCED_PERFORMANCE, PLATFORM_PROFILE_PERFORMANCE, + PLATFORM_PROFILE_CUSTOM, PLATFORM_PROFILE_LAST, /*must always be last */ }; -struct platform_profile_handler { - unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; - int (*profile_get)(struct platform_profile_handler *pprof, - enum platform_profile_option *profile); - int (*profile_set)(struct platform_profile_handler *pprof, - enum platform_profile_option profile); +/** + * struct platform_profile_ops - platform profile operations + * @probe: Callback to setup choices available to the new class device. These + * choices will only be enforced when setting a new profile, not when + * getting the current one. + * @hidden_choices: Callback to setup choices that are not visible to the user + * but can be set by the driver. + * @profile_get: Callback that will be called when showing the current platform + * profile in sysfs. + * @profile_set: Callback that will be called when storing a new platform + * profile in sysfs. + */ +struct platform_profile_ops { + int (*probe)(void *drvdata, unsigned long *choices); + int (*hidden_choices)(void *drvdata, unsigned long *choices); + int (*profile_get)(struct device *dev, enum platform_profile_option *profile); + int (*profile_set)(struct device *dev, enum platform_profile_option profile); }; -int platform_profile_register(struct platform_profile_handler *pprof); -int platform_profile_remove(void); +struct device *platform_profile_register(struct device *dev, const char *name, + void *drvdata, + const struct platform_profile_ops *ops); +void platform_profile_remove(struct device *dev); +struct device *devm_platform_profile_register(struct device *dev, const char *name, + void *drvdata, + const struct platform_profile_ops *ops); int platform_profile_cycle(void); -void platform_profile_notify(void); +void platform_profile_notify(struct device *dev); #endif /*_PLATFORM_PROFILE_H_*/ diff --git a/include/linux/pldmfw.h b/include/linux/pldmfw.h index 0fc831338226..f5047983004f 100644 --- a/include/linux/pldmfw.h +++ b/include/linux/pldmfw.h @@ -125,9 +125,17 @@ struct pldmfw_ops; * a pointer to their own data, used to implement the device specific * operations. */ + +enum pldmfw_update_mode { + PLDMFW_UPDATE_MODE_FULL, + PLDMFW_UPDATE_MODE_SINGLE_COMPONENT, +}; + struct pldmfw { const struct pldmfw_ops *ops; struct device *dev; + u16 component_identifier; + enum pldmfw_update_mode mode; }; bool pldmfw_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record); diff --git a/include/linux/pm.h b/include/linux/pm.h index e7f0260f15ad..f0bd8fbae4f2 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -384,12 +384,8 @@ const struct dev_pm_ops name = { \ #ifdef CONFIG_PM #define _EXPORT_DEV_PM_OPS(name, license, ns) _EXPORT_PM_OPS(name, license, ns) -#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, "ns") #else #define _EXPORT_DEV_PM_OPS(name, license, ns) _DISCARD_PM_OPS(name, license, ns) -#define EXPORT_PM_FN_GPL(name) -#define EXPORT_PM_FN_NS_GPL(name, ns) #endif #ifdef CONFIG_PM_SLEEP @@ -570,7 +566,8 @@ const struct dev_pm_ops name = { \ { .event = PM_EVENT_AUTO_RESUME, }) #define PMSG_IS_AUTO(msg) (((msg).event & PM_EVENT_AUTO) != 0) - +#define PMSG_NO_WAKEUP(msg) (((msg).event & \ + (PM_EVENT_FREEZE | PM_EVENT_QUIESCE)) != 0) /* * Device run-time power management status. * @@ -600,6 +597,7 @@ enum rpm_status { RPM_RESUMING, RPM_SUSPENDED, RPM_SUSPENDING, + RPM_BLOCKED, }; /* @@ -681,7 +679,8 @@ struct dev_pm_info { bool wakeup_path:1; bool syscore:1; bool no_pm_callbacks:1; /* Owned by the PM core */ - bool async_in_progress:1; /* Owned by the PM core */ + bool work_in_progress:1; /* Owned by the PM core */ + bool smart_suspend:1; /* Owned by the PM core */ bool must_resume:1; /* Owned by the PM core */ bool may_skip_resume:1; /* Set by subsystems */ #else @@ -840,10 +839,8 @@ extern int pm_generic_resume_early(struct device *dev); extern int pm_generic_resume_noirq(struct device *dev); extern int pm_generic_resume(struct device *dev); extern int pm_generic_freeze_noirq(struct device *dev); -extern int pm_generic_freeze_late(struct device *dev); extern int pm_generic_freeze(struct device *dev); extern int pm_generic_thaw_noirq(struct device *dev); -extern int pm_generic_thaw_early(struct device *dev); extern int pm_generic_thaw(struct device *dev); extern int pm_generic_restore_noirq(struct device *dev); extern int pm_generic_restore_early(struct device *dev); @@ -885,10 +882,8 @@ static inline void dpm_for_each_dev(void *data, void (*fn)(struct device *, void #define pm_generic_resume_noirq NULL #define pm_generic_resume NULL #define pm_generic_freeze_noirq NULL -#define pm_generic_freeze_late NULL #define pm_generic_freeze NULL #define pm_generic_thaw_noirq NULL -#define pm_generic_thaw_early NULL #define pm_generic_thaw NULL #define pm_generic_restore_noirq NULL #define pm_generic_restore_early NULL diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h index 68669ce18720..c3b46fa358d3 100644 --- a/include/linux/pm_clock.h +++ b/include/linux/pm_clock.h @@ -41,9 +41,7 @@ extern int pm_clk_create(struct device *dev); extern void pm_clk_destroy(struct device *dev); extern int pm_clk_add(struct device *dev, const char *con_id); extern int pm_clk_add_clk(struct device *dev, struct clk *clk); -extern int of_pm_clk_add_clk(struct device *dev, const char *name); extern int of_pm_clk_add_clks(struct device *dev); -extern void pm_clk_remove(struct device *dev, const char *con_id); extern void pm_clk_remove_clk(struct device *dev, struct clk *clk); extern int pm_clk_suspend(struct device *dev); extern int pm_clk_resume(struct device *dev); @@ -76,9 +74,6 @@ static inline int of_pm_clk_add_clks(struct device *dev) { return -EINVAL; } -static inline void pm_clk_remove(struct device *dev, const char *con_id) -{ -} #define pm_clk_suspend NULL #define pm_clk_resume NULL static inline void pm_clk_remove_clk(struct device *dev, struct clk *clk) diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 45646bfcaf1a..0b18160901a2 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -142,16 +142,21 @@ struct genpd_governor_data { bool max_off_time_changed; ktime_t next_wakeup; ktime_t next_hrtimer; + ktime_t last_enter; + bool reflect_residency; bool cached_power_down_ok; bool cached_power_down_state_idx; }; struct genpd_power_state { + const char *name; s64 power_off_latency_ns; s64 power_on_latency_ns; s64 residency_ns; u64 usage; u64 rejected; + u64 above; + u64 below; struct fwnode_handle *fwnode; u64 idle_time; void *data; @@ -260,6 +265,7 @@ struct generic_pm_domain_data { unsigned int rpm_pstate; unsigned int opp_token; bool hw_mode; + bool rpm_always_on; void *data; }; @@ -283,6 +289,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); int pm_genpd_remove(struct generic_pm_domain *genpd); +void pm_genpd_inc_rejected(struct generic_pm_domain *genpd, + unsigned int state_idx); struct device *dev_to_genpd_dev(struct device *dev); int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb); @@ -292,6 +300,7 @@ ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev); void dev_pm_genpd_synced_poweroff(struct device *dev); int dev_pm_genpd_set_hwmode(struct device *dev, bool enable); bool dev_pm_genpd_get_hwmode(struct device *dev); +int dev_pm_genpd_rpm_always_on(struct device *dev, bool on); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -333,6 +342,10 @@ static inline int pm_genpd_remove(struct generic_pm_domain *genpd) return -EOPNOTSUPP; } +static inline void pm_genpd_inc_rejected(struct generic_pm_domain *genpd, + unsigned int state_idx) +{ } + static inline struct device *dev_to_genpd_dev(struct device *dev) { return ERR_PTR(-EOPNOTSUPP); @@ -375,6 +388,11 @@ static inline bool dev_pm_genpd_get_hwmode(struct device *dev) return false; } +static inline int dev_pm_genpd_rpm_always_on(struct device *dev, bool on) +{ + return -EOPNOTSUPP; +} + #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 568183e3e641..cf477beae4bb 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -11,6 +11,7 @@ #ifndef __LINUX_OPP_H__ #define __LINUX_OPP_H__ +#include <linux/cleanup.h> #include <linux/energy_model.h> #include <linux/err.h> #include <linux/notifier.h> @@ -100,8 +101,11 @@ struct dev_pm_opp_data { #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); +struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table); void dev_pm_opp_put_opp_table(struct opp_table *opp_table); +unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index); + unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *supplies); @@ -158,6 +162,7 @@ struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, unsigned int *bw, int index); +struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp); void dev_pm_opp_put(struct dev_pm_opp *opp); int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp); @@ -192,6 +197,7 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) void dev_pm_opp_remove_table(struct device *dev); void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask); int dev_pm_opp_sync_regulators(struct device *dev); + #else static inline struct opp_table *dev_pm_opp_get_opp_table(struct device *dev) { @@ -203,8 +209,18 @@ static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device * return ERR_PTR(-EOPNOTSUPP); } +static inline struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table) +{ + return opp_table; +} + static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {} +static inline unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index) +{ + return 0; +} + static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp) { return 0; @@ -334,6 +350,11 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } +static inline struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp) +{ + return opp; +} + static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} static inline int @@ -560,6 +581,12 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta } #endif +/* Scope based cleanup macro for OPP reference counting */ +DEFINE_FREE(put_opp, struct dev_pm_opp *, if (!IS_ERR_OR_NULL(_T)) dev_pm_opp_put(_T)) + +/* Scope based cleanup macro for OPP table reference counting */ +DEFINE_FREE(put_opp_table, struct opp_table *, if (!IS_ERR_OR_NULL(_T)) dev_pm_opp_put_opp_table(_T)) + /* OPP Configuration helpers */ static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, @@ -691,4 +718,14 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) return dev_pm_opp_get_freq_indexed(opp, 0); } +static inline int dev_pm_opp_set_level(struct device *dev, unsigned int level) +{ + struct dev_pm_opp *opp __free(put_opp) = dev_pm_opp_find_level_exact(dev, level); + + if (IS_ERR(opp)) + return PTR_ERR(opp); + + return dev_pm_opp_set_opp(dev, opp); +} + #endif /* __LINUX_OPP_H__ */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d39dc863f612..e7cb70fcc0af 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -66,6 +66,7 @@ static inline bool queue_pm_work(struct work_struct *work) extern int pm_generic_runtime_suspend(struct device *dev); extern int pm_generic_runtime_resume(struct device *dev); +extern bool pm_runtime_need_not_resume(struct device *dev); extern int pm_runtime_force_suspend(struct device *dev); extern int pm_runtime_force_resume(struct device *dev); @@ -77,6 +78,8 @@ extern int pm_runtime_get_if_in_use(struct device *dev); extern int pm_schedule_suspend(struct device *dev, unsigned int delay); extern int __pm_runtime_set_status(struct device *dev, unsigned int status); extern int pm_runtime_barrier(struct device *dev); +extern bool pm_runtime_block_if_disabled(struct device *dev); +extern void pm_runtime_unblock(struct device *dev); extern void pm_runtime_enable(struct device *dev); extern void __pm_runtime_disable(struct device *dev, bool check_resume); extern void pm_runtime_allow(struct device *dev); @@ -93,7 +96,9 @@ extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); extern void pm_runtime_release_supplier(struct device_link *link); +int devm_pm_runtime_set_active_enabled(struct device *dev); extern int devm_pm_runtime_enable(struct device *dev); +int devm_pm_runtime_get_noresume(struct device *dev); /** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. @@ -197,6 +202,17 @@ static inline bool pm_runtime_enabled(struct device *dev) } /** + * pm_runtime_blocked - Check if runtime PM enabling is blocked. + * @dev: Target device. + * + * Do not call this function outside system suspend/resume code paths. + */ +static inline bool pm_runtime_blocked(struct device *dev) +{ + return dev->power.last_status == RPM_BLOCKED; +} + +/** * pm_runtime_has_no_callbacks - Check if runtime PM callbacks may be present. * @dev: Target device. * @@ -241,6 +257,7 @@ static inline bool queue_pm_work(struct work_struct *work) { return false; } static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; } static inline int pm_generic_runtime_resume(struct device *dev) { return 0; } +static inline bool pm_runtime_need_not_resume(struct device *dev) {return true; } static inline int pm_runtime_force_suspend(struct device *dev) { return 0; } static inline int pm_runtime_force_resume(struct device *dev) { return 0; } @@ -271,12 +288,17 @@ static inline int pm_runtime_get_if_active(struct device *dev) static inline int __pm_runtime_set_status(struct device *dev, unsigned int status) { return 0; } static inline int pm_runtime_barrier(struct device *dev) { return 0; } +static inline bool pm_runtime_block_if_disabled(struct device *dev) { return true; } +static inline void pm_runtime_unblock(struct device *dev) {} static inline void pm_runtime_enable(struct device *dev) {} static inline void __pm_runtime_disable(struct device *dev, bool c) {} +static inline bool pm_runtime_blocked(struct device *dev) { return true; } static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} +static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; } static inline int devm_pm_runtime_enable(struct device *dev) { return 0; } +static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; } static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} @@ -448,6 +470,8 @@ static inline int pm_runtime_put(struct device *dev) return __pm_runtime_idle(dev, RPM_GET_PUT | RPM_ASYNC); } +DEFINE_FREE(pm_runtime_put, struct device *, if (_T) pm_runtime_put(_T)) + /** * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0. * @dev: Target device. @@ -556,11 +580,18 @@ static inline int pm_runtime_set_suspended(struct device *dev) * pm_runtime_disable - Disable runtime PM for a device. * @dev: Target device. * - * Prevent the runtime PM framework from working with @dev (by incrementing its - * "blocking" counter). + * Prevent the runtime PM framework from working with @dev by incrementing its + * "disable" counter. + * + * If the counter is zero when this function runs and there is a pending runtime + * resume request for @dev, it will be resumed. If the counter is still zero at + * that point, all of the pending runtime PM requests for @dev will be canceled + * and all runtime PM operations in progress involving it will be waited for to + * complete. * - * For each invocation of this function for @dev there must be a matching - * pm_runtime_enable() call in order for runtime PM to be enabled for it. + * For each invocation of this function for @dev, there must be a matching + * pm_runtime_enable() call, so that runtime PM is eventually enabled for it + * again. */ static inline void pm_runtime_disable(struct device *dev) { diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h index d9642c6cf852..25b63ed51b76 100644 --- a/include/linux/pm_wakeirq.h +++ b/include/linux/pm_wakeirq.h @@ -10,6 +10,7 @@ extern int dev_pm_set_wake_irq(struct device *dev, int irq); extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq); extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq); extern void dev_pm_clear_wake_irq(struct device *dev); +extern int devm_pm_set_wake_irq(struct device *dev, int irq); #else /* !CONFIG_PM */ @@ -32,5 +33,10 @@ static inline void dev_pm_clear_wake_irq(struct device *dev) { } +static inline int devm_pm_set_wake_irq(struct device *dev, int irq) +{ + return 0; +} + #endif /* CONFIG_PM */ #endif /* _LINUX_PM_WAKEIRQ_H */ diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 222f7530806c..c838b4a30f87 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -95,10 +95,6 @@ static inline void device_set_wakeup_path(struct device *dev) } /* drivers/base/power/wakeup.c */ -extern struct wakeup_source *wakeup_source_create(const char *name); -extern void wakeup_source_destroy(struct wakeup_source *ws); -extern void wakeup_source_add(struct wakeup_source *ws); -extern void wakeup_source_remove(struct wakeup_source *ws); extern struct wakeup_source *wakeup_source_register(struct device *dev, const char *name); extern void wakeup_source_unregister(struct wakeup_source *ws); @@ -129,17 +125,6 @@ static inline bool device_can_wakeup(struct device *dev) return dev->power.can_wakeup; } -static inline struct wakeup_source *wakeup_source_create(const char *name) -{ - return NULL; -} - -static inline void wakeup_source_destroy(struct wakeup_source *ws) {} - -static inline void wakeup_source_add(struct wakeup_source *ws) {} - -static inline void wakeup_source_remove(struct wakeup_source *ws) {} - static inline struct wakeup_source *wakeup_source_register(struct device *dev, const char *name) { @@ -205,17 +190,17 @@ static inline void device_set_awake_path(struct device *dev) static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) { - return pm_wakeup_ws_event(ws, msec, false); + pm_wakeup_ws_event(ws, msec, false); } static inline void pm_wakeup_event(struct device *dev, unsigned int msec) { - return pm_wakeup_dev_event(dev, msec, false); + pm_wakeup_dev_event(dev, msec, false); } static inline void pm_wakeup_hard_event(struct device *dev) { - return pm_wakeup_dev_event(dev, 0, true); + pm_wakeup_dev_event(dev, 0, true); } /** @@ -240,4 +225,21 @@ static inline int device_init_wakeup(struct device *dev, bool enable) return 0; } +static void device_disable_wakeup(void *dev) +{ + device_init_wakeup(dev, false); +} + +/** + * devm_device_init_wakeup - Resource managed device wakeup initialization. + * @dev: Device to handle. + * + * This function is the devm managed version of device_init_wakeup(dev, true). + */ +static inline int devm_device_init_wakeup(struct device *dev) +{ + device_init_wakeup(dev, true); + return devm_add_action_or_reset(dev, device_disable_wakeup, dev); +} + #endif /* _LINUX_PM_WAKEUP_H */ diff --git a/include/linux/pmbus.h b/include/linux/pmbus.h index fa9f08164c36..884040e1383b 100644 --- a/include/linux/pmbus.h +++ b/include/linux/pmbus.h @@ -73,6 +73,20 @@ */ #define PMBUS_USE_COEFFICIENTS_CMD BIT(5) +/* + * PMBUS_OP_PROTECTED + * Set if the chip OPERATION command is protected and protection is not + * determined by the standard WRITE_PROTECT command. + */ +#define PMBUS_OP_PROTECTED BIT(6) + +/* + * PMBUS_VOUT_PROTECTED + * Set if the chip VOUT_COMMAND command is protected and protection is not + * determined by the standard WRITE_PROTECT command. + */ +#define PMBUS_VOUT_PROTECTED BIT(7) + struct pmbus_platform_data { u32 flags; /* Device specific flags */ diff --git a/include/linux/pnp.h b/include/linux/pnp.h index b7a7158aaf65..23fe3eaf242d 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -290,7 +290,7 @@ static inline void pnp_set_drvdata(struct pnp_dev *pdev, void *data) } struct pnp_fixup { - char id[7]; + char id[8]; void (*quirk_function) (struct pnp_dev *dev); /* fixup function */ }; diff --git a/include/linux/poison.h b/include/linux/poison.h index 331a9a996fa8..8ca2235f78d5 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -70,6 +70,10 @@ #define KEY_DESTROY 0xbd /********** net/core/page_pool.c **********/ +/* + * page_pool uses additional free bits within this value to store data, see the + * definition of PP_DMA_INDEX_MASK in mm.h + */ #define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA) /********** net/core/skbuff.c **********/ diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index ef8619f48920..a500d3160fe8 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -95,10 +95,13 @@ struct posix_clock { * struct posix_clock_context - represents clock file operations context * * @clk: Pointer to the clock + * @fp: Pointer to the file used to open the clock * @private_clkdata: Pointer to user data * * Drivers should use struct posix_clock_context during specific character - * device file operation methods to access the posix clock. + * device file operation methods to access the posix clock. In particular, + * the file pointer can be used to verify correct access mode for ioctl() + * calls. * * Drivers can store a private data structure during the open operation * if they have specific information that is required in other file @@ -106,6 +109,7 @@ struct posix_clock { */ struct posix_clock_context { struct posix_clock *clk; + struct file *fp; void *private_clkdata; }; diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index f11f10c97bd9..dd48c64b605e 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -114,6 +114,7 @@ bool posixtimer_init_sigqueue(struct sigqueue *q); void posixtimer_send_sigqueue(struct k_itimer *tmr); bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq); void posixtimer_free_timer(struct k_itimer *timer); +long posixtimer_create_prctl(unsigned long ctrl); /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ @@ -140,6 +141,7 @@ static inline void posixtimer_rearm_itimer(struct task_struct *p) { } static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq) { return false; } static inline void posixtimer_free_timer(struct k_itimer *timer) { } +static inline long posixtimer_create_prctl(unsigned long ctrl) { return -EINVAL; } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK @@ -177,23 +179,26 @@ static inline void posix_cputimers_init_work(void) { } * @rcu: RCU head for freeing the timer. */ struct k_itimer { - struct hlist_node list; - struct hlist_node ignored_list; + /* 1st cacheline contains read-mostly fields */ struct hlist_node t_hash; - spinlock_t it_lock; - const struct k_clock *kclock; - clockid_t it_clock; + struct hlist_node list; timer_t it_id; + clockid_t it_clock; + int it_sigev_notify; + enum pid_type it_pid_type; + struct signal_struct *it_signal; + const struct k_clock *kclock; + + /* 2nd cacheline and above contain fields which are modified regularly */ + spinlock_t it_lock; int it_status; bool it_sig_periodic; s64 it_overrun; s64 it_overrun_last; unsigned int it_signal_seq; unsigned int it_sigqueue_seq; - int it_sigev_notify; - enum pid_type it_pid_type; ktime_t it_interval; - struct signal_struct *it_signal; + struct hlist_node ignored_list; union { struct pid *it_pid; struct task_struct *it_process; @@ -210,7 +215,7 @@ struct k_itimer { } alarm; } it; struct rcu_head rcu; -}; +} ____cacheline_aligned_in_smp; void run_posix_cpu_timers(void); void posix_cpu_timers_exit(struct task_struct *task); @@ -240,6 +245,13 @@ static inline void posixtimer_sigqueue_putref(struct sigqueue *q) posixtimer_putref(tmr); } + +static inline bool posixtimer_valid(const struct k_itimer *timer) +{ + unsigned long val = (unsigned long)timer->it_signal; + + return !(val & 0x1UL); +} #else /* CONFIG_POSIX_TIMERS */ static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { } static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index e2d47eb1a7f3..62d497763e25 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -27,11 +27,16 @@ struct posix_acl_entry { }; struct posix_acl { - refcount_t a_refcount; - unsigned int a_count; - struct rcu_head a_rcu; + /* New members MUST be added within the struct_group() macro below. */ + struct_group_tagged(posix_acl_hdr, hdr, + refcount_t a_refcount; + unsigned int a_count; + struct rcu_head a_rcu; + ); struct posix_acl_entry a_entries[] __counted_by(a_count); }; +static_assert(offsetof(struct posix_acl, a_entries) == sizeof(struct posix_acl_hdr), + "struct member likely outside of struct_group_tagged()"); #define FOREACH_ACL_ENTRY(pa, acl, pe) \ for(pa=(acl)->a_entries, pe=pa+(acl)->a_count; pa<pe; pa++) diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index 5180dc9f1706..d56e1276aafe 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -61,6 +61,8 @@ struct bq27xxx_device_info { struct bq27xxx_access_methods bus; struct bq27xxx_reg_cache cache; int charge_design_full; + int voltage_min_design; + int voltage_max_design; bool removed; unsigned long last_update; union power_supply_propval last_status; diff --git a/include/linux/power/max77705_charger.h b/include/linux/power/max77705_charger.h new file mode 100644 index 000000000000..fdec9af9c541 --- /dev/null +++ b/include/linux/power/max77705_charger.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Maxim MAX77705 definitions. + * + * Copyright (C) 2015 Samsung Electronics, Inc. + * Copyright (C) 2025 Dzmitry Sankouski <dsankouski@gmail.com> + */ + +#ifndef __MAX77705_CHARGER_H +#define __MAX77705_CHARGER_H __FILE__ + +/* MAX77705_CHG_REG_CHG_INT */ +#define MAX77705_BYP_I BIT(0) +#define MAX77705_INP_LIMIT_I BIT(1) +#define MAX77705_BATP_I BIT(2) +#define MAX77705_BAT_I BIT(3) +#define MAX77705_CHG_I BIT(4) +#define MAX77705_WCIN_I BIT(5) +#define MAX77705_CHGIN_I BIT(6) +#define MAX77705_AICL_I BIT(7) + +/* MAX77705_CHG_REG_CHG_INT_MASK */ +#define MAX77705_BYP_IM BIT(0) +#define MAX77705_INP_LIMIT_IM BIT(1) +#define MAX77705_BATP_IM BIT(2) +#define MAX77705_BAT_IM BIT(3) +#define MAX77705_CHG_IM BIT(4) +#define MAX77705_WCIN_IM BIT(5) +#define MAX77705_CHGIN_IM BIT(6) +#define MAX77705_AICL_IM BIT(7) + +/* MAX77705_CHG_REG_CHG_INT_OK */ +#define MAX77705_BYP_OK BIT(0) +#define MAX77705_DISQBAT_OK BIT(1) +#define MAX77705_BATP_OK BIT(2) +#define MAX77705_BAT_OK BIT(3) +#define MAX77705_CHG_OK BIT(4) +#define MAX77705_WCIN_OK BIT(5) +#define MAX77705_CHGIN_OK BIT(6) +#define MAX77705_AICL_OK BIT(7) + +/* MAX77705_CHG_REG_DETAILS_00 */ +#define MAX77705_BATP_DTLS BIT(0) +#define MAX77705_WCIN_DTLS GENMASK(4, 3) +#define MAX77705_WCIN_DTLS_SHIFT 3 +#define MAX77705_CHGIN_DTLS GENMASK(6, 5) +#define MAX77705_CHGIN_DTLS_SHIFT 5 + +/* MAX77705_CHG_REG_DETAILS_01 */ +#define MAX77705_CHG_DTLS GENMASK(3, 0) +#define MAX77705_CHG_DTLS_SHIFT 0 +#define MAX77705_BAT_DTLS GENMASK(6, 4) +#define MAX77705_BAT_DTLS_SHIFT 4 + +/* MAX77705_CHG_REG_DETAILS_02 */ +#define MAX77705_BYP_DTLS GENMASK(3, 0) +#define MAX77705_BYP_DTLS_SHIFT 0 + +/* MAX77705_CHG_REG_CNFG_00 */ +#define MAX77705_CHG_SHIFT 0 +#define MAX77705_UNO_SHIFT 1 +#define MAX77705_OTG_SHIFT 1 +#define MAX77705_BUCK_SHIFT 2 +#define MAX77705_BOOST_SHIFT 3 +#define MAX77705_WDTEN_SHIFT 4 +#define MAX77705_MODE_MASK GENMASK(3, 0) +#define MAX77705_CHG_MASK BIT(MAX77705_CHG_SHIFT) +#define MAX77705_UNO_MASK BIT(MAX77705_UNO_SHIFT) +#define MAX77705_OTG_MASK BIT(MAX77705_OTG_SHIFT) +#define MAX77705_BUCK_MASK BIT(MAX77705_BUCK_SHIFT) +#define MAX77705_BOOST_MASK BIT(MAX77705_BOOST_SHIFT) +#define MAX77705_WDTEN_MASK BIT(MAX77705_WDTEN_SHIFT) +#define MAX77705_UNO_CTRL (MAX77705_UNO_MASK | MAX77705_BOOST_MASK) +#define MAX77705_OTG_CTRL (MAX77705_OTG_MASK | MAX77705_BOOST_MASK) + +/* MAX77705_CHG_REG_CNFG_01 */ +#define MAX77705_FCHGTIME_SHIFT 0 +#define MAX77705_FCHGTIME_MASK GENMASK(2, 0) +#define MAX77705_CHG_RSTRT_SHIFT 4 +#define MAX77705_CHG_RSTRT_MASK GENMASK(5, 4) +#define MAX77705_FCHGTIME_DISABLE 0 +#define MAX77705_CHG_RSTRT_DISABLE 0x3 + +#define MAX77705_PQEN_SHIFT 7 +#define MAX77705_PQEN_MASK BIT(7) +#define MAX77705_CHG_PQEN_DISABLE 0 +#define MAX77705_CHG_PQEN_ENABLE 1 + +/* MAX77705_CHG_REG_CNFG_02 */ +#define MAX77705_OTG_ILIM_SHIFT 6 +#define MAX77705_OTG_ILIM_MASK GENMASK(7, 6) +#define MAX77705_OTG_ILIM_500 0 +#define MAX77705_OTG_ILIM_900 1 +#define MAX77705_OTG_ILIM_1200 2 +#define MAX77705_OTG_ILIM_1500 3 +#define MAX77705_CHG_CC GENMASK(5, 0) + +/* MAX77705_CHG_REG_CNFG_03 */ +#define MAX77705_TO_ITH_SHIFT 0 +#define MAX77705_TO_ITH_MASK GENMASK(2, 0) +#define MAX77705_TO_TIME_SHIFT 3 +#define MAX77705_TO_TIME_MASK GENMASK(5, 3) +#define MAX77705_SYS_TRACK_DIS_SHIFT 7 +#define MAX77705_SYS_TRACK_DIS_MASK BIT(7) +#define MAX77705_TO_ITH_150MA 0 +#define MAX77705_TO_TIME_30M 3 +#define MAX77705_SYS_TRACK_ENABLE 0 +#define MAX77705_SYS_TRACK_DISABLE 1 + +/* MAX77705_CHG_REG_CNFG_04 */ +#define MAX77705_CHG_MINVSYS_SHIFT 6 +#define MAX77705_CHG_MINVSYS_MASK GENMASK(7, 6) +#define MAX77705_CHG_PRM_SHIFT 0 +#define MAX77705_CHG_PRM_MASK GENMASK(5, 0) + +#define MAX77705_CHG_CV_PRM_SHIFT 0 +#define MAX77705_CHG_CV_PRM_MASK GENMASK(5, 0) + +/* MAX77705_CHG_REG_CNFG_05 */ +#define MAX77705_REG_B2SOVRC_SHIFT 0 +#define MAX77705_REG_B2SOVRC_MASK GENMASK(3, 0) +#define MAX77705_B2SOVRC_DISABLE 0 +#define MAX77705_B2SOVRC_4_5A 6 +#define MAX77705_B2SOVRC_4_8A 8 +#define MAX77705_B2SOVRC_5_0A 9 + +/* MAX77705_CHG_CNFG_06 */ +#define MAX77705_WDTCLR_SHIFT 0 +#define MAX77705_WDTCLR_MASK GENMASK(1, 0) +#define MAX77705_WDTCLR 1 +#define MAX77705_CHGPROT_MASK GENMASK(3, 2) +#define MAX77705_CHGPROT_UNLOCKED GENMASK(3, 2) +#define MAX77705_SLOWEST_LX_SLOPE GENMASK(6, 5) + +/* MAX77705_CHG_REG_CNFG_07 */ +#define MAX77705_CHG_FMBST 4 +#define MAX77705_REG_FMBST_SHIFT 2 +#define MAX77705_REG_FMBST_MASK BIT(MAX77705_REG_FMBST_SHIFT) +#define MAX77705_REG_FGSRC_SHIFT 1 +#define MAX77705_REG_FGSRC_MASK BIT(MAX77705_REG_FGSRC_SHIFT) + +/* MAX77705_CHG_REG_CNFG_08 */ +#define MAX77705_REG_FSW_SHIFT 0 +#define MAX77705_REG_FSW_MASK GENMASK(1, 0) +#define MAX77705_CHG_FSW_3MHz 0 +#define MAX77705_CHG_FSW_2MHz 1 +#define MAX77705_CHG_FSW_1_5MHz 2 + +/* MAX77705_CHG_REG_CNFG_09 */ +#define MAX77705_CHG_CHGIN_LIM_MASK GENMASK(6, 0) +#define MAX77705_CHG_EN_MASK BIT(7) +#define MAX77705_CHG_DISABLE 0 +#define MAX77705_CHARGER_CHG_CHARGING(_reg) \ + (((_reg) & MAX77705_CHG_EN_MASK) > 1) + + +/* MAX77705_CHG_REG_CNFG_10 */ +#define MAX77705_CHG_WCIN_LIM GENMASK(5, 0) + +/* MAX77705_CHG_REG_CNFG_11 */ +#define MAX77705_VBYPSET_SHIFT 0 +#define MAX77705_VBYPSET_MASK GENMASK(6, 0) + +/* MAX77705_CHG_REG_CNFG_12 */ +#define MAX77705_CHGINSEL_SHIFT 5 +#define MAX77705_CHGINSEL_MASK BIT(MAX77705_CHGINSEL_SHIFT) +#define MAX77705_WCINSEL_SHIFT 6 +#define MAX77705_WCINSEL_MASK BIT(MAX77705_WCINSEL_SHIFT) +#define MAX77705_VCHGIN_REG_MASK GENMASK(4, 3) +#define MAX77705_WCIN_REG_MASK GENMASK(2, 1) +#define MAX77705_REG_DISKIP_SHIFT 0 +#define MAX77705_REG_DISKIP_MASK BIT(MAX77705_REG_DISKIP_SHIFT) +/* REG=4.5V, UVLO=4.7V */ +#define MAX77705_VCHGIN_4_5 0 +/* REG=4.5V, UVLO=4.7V */ +#define MAX77705_WCIN_4_5 0 +#define MAX77705_DISABLE_SKIP 1 +#define MAX77705_AUTO_SKIP 0 + +/* uA */ +#define MAX77705_CURRENT_CHGIN_STEP 25000 +#define MAX77705_CURRENT_CHG_STEP 50000 +#define MAX77705_CURRENT_CHGIN_MIN 100000 +#define MAX77705_CURRENT_CHGIN_MAX 3200000 + +struct max77705_charger_data { + struct device *dev; + struct regmap *regmap; + struct power_supply_battery_info *bat_info; + struct workqueue_struct *wqueue; + struct work_struct chgin_work; + struct power_supply *psy_chg; +}; + +#endif /* __MAX77705_CHARGER_H */ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index b98106e1a90f..0cca01b5607b 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -15,6 +15,8 @@ #include <linux/device.h> #include <linux/workqueue.h> #include <linux/leds.h> +#include <linux/rwsem.h> +#include <linux/list.h> #include <linux/spinlock.h> #include <linux/notifier.h> @@ -40,7 +42,7 @@ enum { }; /* What algorithm is the charger using? */ -enum { +enum power_supply_charge_type { POWER_SUPPLY_CHARGE_TYPE_UNKNOWN = 0, POWER_SUPPLY_CHARGE_TYPE_NONE, POWER_SUPPLY_CHARGE_TYPE_TRICKLE, /* slow speed */ @@ -58,6 +60,7 @@ enum { POWER_SUPPLY_HEALTH_OVERHEAT, POWER_SUPPLY_HEALTH_DEAD, POWER_SUPPLY_HEALTH_OVERVOLTAGE, + POWER_SUPPLY_HEALTH_UNDERVOLTAGE, POWER_SUPPLY_HEALTH_UNSPEC_FAILURE, POWER_SUPPLY_HEALTH_COLD, POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE, @@ -68,6 +71,8 @@ enum { POWER_SUPPLY_HEALTH_COOL, POWER_SUPPLY_HEALTH_HOT, POWER_SUPPLY_HEALTH_NO_BATTERY, + POWER_SUPPLY_HEALTH_BLOWN_FUSE, + POWER_SUPPLY_HEALTH_CELL_IMBALANCE, }; enum { @@ -99,6 +104,7 @@ enum power_supply_property { /* Properties of type `int' */ POWER_SUPPLY_PROP_STATUS = 0, POWER_SUPPLY_PROP_CHARGE_TYPE, + POWER_SUPPLY_PROP_CHARGE_TYPES, POWER_SUPPLY_PROP_HEALTH, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_ONLINE, @@ -208,6 +214,7 @@ enum power_supply_usb_type { enum power_supply_charge_behaviour { POWER_SUPPLY_CHARGE_BEHAVIOUR_AUTO = 0, POWER_SUPPLY_CHARGE_BEHAVIOUR_INHIBIT_CHARGE, + POWER_SUPPLY_CHARGE_BEHAVIOUR_INHIBIT_CHARGE_AWAKE, POWER_SUPPLY_CHARGE_BEHAVIOUR_FORCE_DISCHARGE, }; @@ -245,6 +252,7 @@ struct power_supply_desc { const char *name; enum power_supply_type type; u8 charge_behaviours; + u32 charge_types; u32 usb_types; const enum power_supply_property *properties; size_t num_properties; @@ -269,7 +277,6 @@ struct power_supply_desc { int (*property_is_writeable)(struct power_supply *psy, enum power_supply_property psp); void (*external_power_changed)(struct power_supply *psy); - void (*set_charged)(struct power_supply *psy); /* * Set if thermal zone should not be created for this power supply. @@ -281,6 +288,29 @@ struct power_supply_desc { int use_for_apm; }; +struct power_supply_ext { + const char *const name; + u8 charge_behaviours; + u32 charge_types; + const enum power_supply_property *properties; + size_t num_properties; + + int (*get_property)(struct power_supply *psy, + const struct power_supply_ext *ext, + void *data, + enum power_supply_property psp, + union power_supply_propval *val); + int (*set_property)(struct power_supply *psy, + const struct power_supply_ext *ext, + void *data, + enum power_supply_property psp, + const union power_supply_propval *val); + int (*property_is_writeable)(struct power_supply *psy, + const struct power_supply_ext *ext, + void *data, + enum power_supply_property psp); +}; + struct power_supply { const struct power_supply_desc *desc; @@ -289,7 +319,6 @@ struct power_supply { char **supplied_from; size_t num_supplies; - struct device_node *of_node; /* Driver private data */ void *drv_data; @@ -300,10 +329,13 @@ struct power_supply { struct delayed_work deferred_register_work; spinlock_t changed_lock; bool changed; + bool update_groups; bool initialized; bool removing; atomic_t use_cnt; struct power_supply_battery_info *battery_info; + struct rw_semaphore extensions_sem; /* protects "extensions" */ + struct list_head extensions; #ifdef CONFIG_THERMAL struct thermal_zone_device *tzd; struct thermal_cooling_device *tcd; @@ -318,6 +350,8 @@ struct power_supply { #endif }; +#define dev_to_psy(__dev) container_of_const(__dev, struct power_supply, dev) + /* * This is recommended structure to specify static power supply parameters. * Generic one, parametrizable for different power supplies. Power supply @@ -820,7 +854,6 @@ extern int power_supply_am_i_supplied(struct power_supply *psy); int power_supply_get_property_from_supplier(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val); -extern int power_supply_set_battery_charged(struct power_supply *psy); static inline bool power_supply_supports_maintenance_charging(struct power_supply_battery_info *info) @@ -855,15 +888,23 @@ static inline int power_supply_is_system_supplied(void) { return -ENOSYS; } extern int power_supply_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val); +int power_supply_get_property_direct(struct power_supply *psy, enum power_supply_property psp, + union power_supply_propval *val); #if IS_ENABLED(CONFIG_POWER_SUPPLY) extern int power_supply_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val); +int power_supply_set_property_direct(struct power_supply *psy, enum power_supply_property psp, + const union power_supply_propval *val); #else static inline int power_supply_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val) { return 0; } +static inline int power_supply_set_property_direct(struct power_supply *psy, + enum power_supply_property psp, + const union power_supply_propval *val) +{ return 0; } #endif extern void power_supply_external_power_changed(struct power_supply *psy); @@ -878,10 +919,18 @@ devm_power_supply_register(struct device *parent, extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); +extern int __must_check +power_supply_register_extension(struct power_supply *psy, + const struct power_supply_ext *ext, + struct device *dev, + void *data); +extern void power_supply_unregister_extension(struct power_supply *psy, + const struct power_supply_ext *ext); + #define to_power_supply(device) container_of(device, struct power_supply, dev) extern void *power_supply_get_drvdata(struct power_supply *psy); -extern int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void *data)); +extern int power_supply_for_each_psy(void *data, int (*fn)(struct power_supply *psy, void *data)); static inline bool power_supply_is_amp_property(enum power_supply_property psp) { @@ -944,6 +993,11 @@ ssize_t power_supply_charge_behaviour_show(struct device *dev, char *buf); int power_supply_charge_behaviour_parse(unsigned int available_behaviours, const char *buf); +ssize_t power_supply_charge_types_show(struct device *dev, + unsigned int available_types, + enum power_supply_charge_type current_type, + char *buf); +int power_supply_charge_types_parse(unsigned int available_types, const char *buf); #else static inline ssize_t power_supply_charge_behaviour_show(struct device *dev, @@ -959,6 +1013,20 @@ static inline int power_supply_charge_behaviour_parse(unsigned int available_beh { return -EOPNOTSUPP; } + +static inline +ssize_t power_supply_charge_types_show(struct device *dev, + unsigned int available_types, + enum power_supply_charge_type current_type, + char *buf) +{ + return -EOPNOTSUPP; +} + +static inline int power_supply_charge_types_parse(unsigned int available_types, const char *buf) +{ + return -EOPNOTSUPP; +} #endif #endif /* __LINUX_POWER_SUPPLY_H__ */ diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h index 45e6e427ceb8..f73fbea0dbc2 100644 --- a/include/linux/ppp_channel.h +++ b/include/linux/ppp_channel.h @@ -42,8 +42,7 @@ struct ppp_channel { int hdrlen; /* amount of headroom channel needs */ void *ppp; /* opaque to channel */ int speed; /* transfer rate (bytes/second) */ - /* the following is not used at present */ - int latency; /* overhead time in milliseconds */ + bool direct_xmit; /* no qdisc, xmit directly */ }; #ifdef __KERNEL__ diff --git a/include/linux/pps_gen_kernel.h b/include/linux/pps_gen_kernel.h new file mode 100644 index 000000000000..6214c8aa2e02 --- /dev/null +++ b/include/linux/pps_gen_kernel.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PPS generator API kernel header + * + * Copyright (C) 2024 Rodolfo Giometti <giometti@enneenne.com> + */ + +#ifndef LINUX_PPS_GEN_KERNEL_H +#define LINUX_PPS_GEN_KERNEL_H + +#include <linux/pps_gen.h> +#include <linux/cdev.h> +#include <linux/device.h> + +/* + * Global defines + */ + +#define PPS_GEN_MAX_SOURCES 16 /* should be enough... */ + +struct pps_gen_device; + +/** + * struct pps_gen_source_info - the specific PPS generator info + * @use_system_clock: true, if the system clock is used to generate pulses + * @get_time: query the time stored into the generator clock + * @enable: enable/disable the PPS pulses generation + * + * This is the main generator struct where all needed information must be + * placed before calling the pps_gen_register_source(). + */ +struct pps_gen_source_info { + bool use_system_clock; + + int (*get_time)(struct pps_gen_device *pps_gen, + struct timespec64 *time); + int (*enable)(struct pps_gen_device *pps_gen, bool enable); + +/* private: internal use only */ + struct module *owner; + struct device *parent; /* for device_create */ +}; + +/* The main struct */ +struct pps_gen_device { + const struct pps_gen_source_info *info; /* PSS generator info */ + bool enabled; /* PSS generator status */ + + unsigned int event; + unsigned int sequence; + + unsigned int last_ev; /* last PPS event id */ + wait_queue_head_t queue; /* PPS event queue */ + + unsigned int id; /* PPS generator unique ID */ + struct cdev cdev; + struct device *dev; + struct fasync_struct *async_queue; /* fasync method */ + spinlock_t lock; +}; + +/* + * Global variables + */ + +extern const struct attribute_group *pps_gen_groups[]; + +/* + * Exported functions + */ + +extern struct pps_gen_device *pps_gen_register_source( + const struct pps_gen_source_info *info); +extern void pps_gen_unregister_source(struct pps_gen_device *pps_gen); +extern void pps_gen_event(struct pps_gen_device *pps_gen, + unsigned int event, void *data); + +#endif /* LINUX_PPS_GEN_KERNEL_H */ diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index c7abce28ed29..aab0aebb529e 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -52,6 +52,7 @@ struct pps_device { int current_mode; /* PPS mode at event time */ unsigned int last_ev; /* last PPS event id */ + unsigned int last_fetched_ev; /* last fetched PPS event id */ wait_queue_head_t queue; /* PPS event queue */ unsigned int id; /* PPS source unique ID */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h index ca86235ac15c..b0af8d4ef6e6 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -319,6 +319,7 @@ do { \ #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier; +struct task_struct; /** * preempt_ops - notifiers called when a task is preempted and rescheduled @@ -515,6 +516,8 @@ static inline bool preempt_model_rt(void) return IS_ENABLED(CONFIG_PREEMPT_RT); } +extern const char *preempt_model_str(void); + /* * Does the preemption model allow non-cooperative preemption? * diff --git a/include/linux/printk.h b/include/linux/printk.h index 4217a9f412b2..5b462029d03c 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -207,6 +207,7 @@ void printk_legacy_allow_panic_sync(void); extern bool nbcon_device_try_acquire(struct console *con); extern void nbcon_device_release(struct console *con); void nbcon_atomic_flush_unsafe(void); +bool pr_flush(int timeout_ms, bool reset_on_progress); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -315,6 +316,11 @@ static inline void nbcon_atomic_flush_unsafe(void) { } +static inline bool pr_flush(int timeout_ms, bool reset_on_progress) +{ + return true; +} + #endif bool this_cpu_in_panic(void); diff --git a/include/linux/prmt.h b/include/linux/prmt.h index 9c094294403f..c53ab287e932 100644 --- a/include/linux/prmt.h +++ b/include/linux/prmt.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/uuid.h> + #ifdef CONFIG_ACPI_PRMT void init_prmt(void); int acpi_call_prm_handler(guid_t handler_guid, void *param_buffer); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 0b2a89854440..703d0c76cc9a 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -20,10 +20,14 @@ enum { * If in doubt, ignore this flag. */ #ifdef MODULE - PROC_ENTRY_PERMANENT = 0U, + PROC_ENTRY_PERMANENT = 0U, #else - PROC_ENTRY_PERMANENT = 1U << 0, + PROC_ENTRY_PERMANENT = 1U << 0, #endif + + PROC_ENTRY_proc_read_iter = 1U << 1, + PROC_ENTRY_proc_compat_ioctl = 1U << 2, + PROC_ENTRY_proc_lseek = 1U << 3, }; struct proc_ops { diff --git a/include/linux/property.h b/include/linux/property.h index 61fc20e5f81f..f718dd4789e5 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -37,6 +37,7 @@ struct fwnode_handle *__dev_fwnode(struct device *dev); struct device *: __dev_fwnode)(dev) bool device_property_present(const struct device *dev, const char *propname); +bool device_property_read_bool(const struct device *dev, const char *propname); int device_property_read_u8_array(const struct device *dev, const char *propname, u8 *val, size_t nval); int device_property_read_u16_array(const struct device *dev, const char *propname, @@ -54,6 +55,8 @@ int device_property_match_string(const struct device *dev, bool fwnode_property_present(const struct fwnode_handle *fwnode, const char *propname); +bool fwnode_property_read_bool(const struct fwnode_handle *fwnode, + const char *propname); int fwnode_property_read_u8_array(const struct fwnode_handle *fwnode, const char *propname, u8 *val, size_t nval); @@ -164,6 +167,10 @@ struct fwnode_handle *fwnode_get_next_available_child_node( for (child = fwnode_get_next_child_node(fwnode, NULL); child; \ child = fwnode_get_next_child_node(fwnode, child)) +#define fwnode_for_each_named_child_node(fwnode, child, name) \ + fwnode_for_each_child_node(fwnode, child) \ + if (!fwnode_name_eq(child, name)) { } else + #define fwnode_for_each_available_child_node(fwnode, child) \ for (child = fwnode_get_next_available_child_node(fwnode, NULL); child;\ child = fwnode_get_next_available_child_node(fwnode, child)) @@ -175,11 +182,19 @@ struct fwnode_handle *device_get_next_child_node(const struct device *dev, for (child = device_get_next_child_node(dev, NULL); child; \ child = device_get_next_child_node(dev, child)) +#define device_for_each_named_child_node(dev, child, name) \ + device_for_each_child_node(dev, child) \ + if (!fwnode_name_eq(child, name)) { } else + #define device_for_each_child_node_scoped(dev, child) \ for (struct fwnode_handle *child __free(fwnode_handle) = \ device_get_next_child_node(dev, NULL); \ child; child = device_get_next_child_node(dev, child)) +#define device_for_each_named_child_node_scoped(dev, child, name) \ + device_for_each_child_node_scoped(dev, child) \ + if (!fwnode_name_eq(child, name)) { } else + struct fwnode_handle *fwnode_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname); struct fwnode_handle *device_get_named_child_node(const struct device *dev, @@ -205,12 +220,19 @@ DEFINE_FREE(fwnode_handle, struct fwnode_handle *, fwnode_handle_put(_T)) int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index); int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name); -unsigned int device_get_child_node_count(const struct device *dev); +unsigned int fwnode_get_child_node_count(const struct fwnode_handle *fwnode); + +static inline unsigned int device_get_child_node_count(const struct device *dev) +{ + return fwnode_get_child_node_count(dev_fwnode(dev)); +} -static inline bool device_property_read_bool(const struct device *dev, - const char *propname) +unsigned int fwnode_get_named_child_node_count(const struct fwnode_handle *fwnode, + const char *name); +static inline unsigned int device_get_named_child_node_count(const struct device *dev, + const char *name) { - return device_property_present(dev, propname); + return fwnode_get_named_child_node_count(dev_fwnode(dev), name); } static inline int device_property_read_u8(const struct device *dev, @@ -263,12 +285,6 @@ static inline int device_property_string_array_count(const struct device *dev, return device_property_read_string_array(dev, propname, NULL, 0); } -static inline bool fwnode_property_read_bool(const struct fwnode_handle *fwnode, - const char *propname) -{ - return fwnode_property_present(fwnode, propname); -} - static inline int fwnode_property_read_u8(const struct fwnode_handle *fwnode, const char *propname, u8 *val) { diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index df1592022d93..c773eeb92d04 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -5,15 +5,37 @@ #ifndef _LINUX_PSE_CONTROLLER_H #define _LINUX_PSE_CONTROLLER_H -#include <linux/ethtool.h> #include <linux/list.h> #include <uapi/linux/ethtool.h> /* Maximum current in uA according to IEEE 802.3-2022 Table 145-1 */ #define MAX_PI_CURRENT 1920000 +/* Maximum power in mW according to IEEE 802.3-2022 Table 145-16 */ +#define MAX_PI_PW 99900 struct phy_device; struct pse_controller_dev; +struct netlink_ext_ack; + +/* C33 PSE extended state and substate. */ +struct ethtool_c33_pse_ext_state_info { + enum ethtool_c33_pse_ext_state c33_pse_ext_state; + union { + enum ethtool_c33_pse_ext_substate_error_condition error_condition; + enum ethtool_c33_pse_ext_substate_mr_pse_enable mr_pse_enable; + enum ethtool_c33_pse_ext_substate_option_detect_ted option_detect_ted; + enum ethtool_c33_pse_ext_substate_option_vport_lim option_vport_lim; + enum ethtool_c33_pse_ext_substate_ovld_detected ovld_detected; + enum ethtool_c33_pse_ext_substate_power_not_available power_not_available; + enum ethtool_c33_pse_ext_substate_short_detected short_detected; + u32 __c33_pse_ext_substate; + }; +}; + +struct ethtool_c33_pse_pw_limit_range { + u32 min; + u32 max; +}; /** * struct pse_control_config - PSE control/channel configuration. @@ -29,7 +51,52 @@ struct pse_control_config { }; /** - * struct pse_control_status - PSE control/channel status. + * struct pse_admin_state - PSE operational state + * + * @podl_admin_state: operational state of the PoDL PSE + * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState + * @c33_admin_state: operational state of the PSE + * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState + */ +struct pse_admin_state { + enum ethtool_podl_pse_admin_state podl_admin_state; + enum ethtool_c33_pse_admin_state c33_admin_state; +}; + +/** + * struct pse_pw_status - PSE power detection status + * + * @podl_pw_status: power detection status of the PoDL PSE. + * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: + * @c33_pw_status: power detection status of the PSE. + * IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus: + */ +struct pse_pw_status { + enum ethtool_podl_pse_pw_d_status podl_pw_status; + enum ethtool_c33_pse_pw_d_status c33_pw_status; +}; + +/** + * struct pse_ext_state_info - PSE extended state information + * + * @c33_ext_state_info: extended state information of the PSE + */ +struct pse_ext_state_info { + struct ethtool_c33_pse_ext_state_info c33_ext_state_info; +}; + +/** + * struct pse_pw_limit_ranges - PSE power limit configuration range + * + * @c33_pw_limit_ranges: supported power limit configuration range. The driver + * is in charge of the memory allocation. + */ +struct pse_pw_limit_ranges { + struct ethtool_c33_pse_pw_limit_range *c33_pw_limit_ranges; +}; + +/** + * struct ethtool_pse_control_status - PSE control/channel status. * * @podl_admin_state: operational state of the PoDL PSE * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState @@ -47,11 +114,11 @@ struct pse_control_config { * @c33_avail_pw_limit: available power limit of the PSE in mW * IEEE 802.3-2022 145.2.5.4 pse_avail_pwr * @c33_pw_limit_ranges: supported power limit configuration range. The driver - * is in charge of the memory allocation. + * is in charge of the memory allocation * @c33_pw_limit_nb_ranges: number of supported power limit configuration * ranges */ -struct pse_control_status { +struct ethtool_pse_control_status { enum ethtool_podl_pse_admin_state podl_admin_state; enum ethtool_podl_pse_pw_d_status podl_pw_status; enum ethtool_c33_pse_admin_state c33_admin_state; @@ -67,23 +134,35 @@ struct pse_control_status { /** * struct pse_controller_ops - PSE controller driver callbacks * - * @ethtool_get_status: get PSE control status for ethtool interface * @setup_pi_matrix: setup PI matrix of the PSE controller - * @pi_is_enabled: Return 1 if the PSE PI is enabled, 0 if not. - * May also return negative errno. + * @pi_get_admin_state: Get the operational state of the PSE PI. This ops + * is mandatory. + * @pi_get_pw_status: Get the power detection status of the PSE PI. This + * ops is mandatory. + * @pi_get_ext_state: Get the extended state of the PSE PI. + * @pi_get_pw_class: Get the power class of the PSE PI. + * @pi_get_actual_pw: Get actual power of the PSE PI in mW. * @pi_enable: Configure the PSE PI as enabled. * @pi_disable: Configure the PSE PI as disabled. * @pi_get_voltage: Return voltage similarly to get_voltage regulator - * callback. - * @pi_get_pw_limit: Get the configured power limit of the PSE PI. - * @pi_set_pw_limit: Configure the power limit of the PSE PI. + * callback in uV. + * @pi_get_pw_limit: Get the configured power limit of the PSE PI in mW. + * @pi_set_pw_limit: Configure the power limit of the PSE PI in mW. + * @pi_get_pw_limit_ranges: Get the supported power limit configuration + * range. The driver is in charge of the memory + * allocation and should return the number of + * ranges. */ struct pse_controller_ops { - int (*ethtool_get_status)(struct pse_controller_dev *pcdev, - unsigned long id, struct netlink_ext_ack *extack, - struct pse_control_status *status); int (*setup_pi_matrix)(struct pse_controller_dev *pcdev); - int (*pi_is_enabled)(struct pse_controller_dev *pcdev, int id); + int (*pi_get_admin_state)(struct pse_controller_dev *pcdev, int id, + struct pse_admin_state *admin_state); + int (*pi_get_pw_status)(struct pse_controller_dev *pcdev, int id, + struct pse_pw_status *pw_status); + int (*pi_get_ext_state)(struct pse_controller_dev *pcdev, int id, + struct pse_ext_state_info *ext_state_info); + int (*pi_get_pw_class)(struct pse_controller_dev *pcdev, int id); + int (*pi_get_actual_pw)(struct pse_controller_dev *pcdev, int id); int (*pi_enable)(struct pse_controller_dev *pcdev, int id); int (*pi_disable)(struct pse_controller_dev *pcdev, int id); int (*pi_get_voltage)(struct pse_controller_dev *pcdev, int id); @@ -91,12 +170,15 @@ struct pse_controller_ops { int id); int (*pi_set_pw_limit)(struct pse_controller_dev *pcdev, int id, int max_mW); + int (*pi_get_pw_limit_ranges)(struct pse_controller_dev *pcdev, int id, + struct pse_pw_limit_ranges *pw_limit_ranges); }; struct module; struct device_node; struct of_phandle_args; struct pse_control; +struct ethtool_pse_control_status; /* PSE PI pairset pinout can either be Alternative A or Alternative B */ enum pse_pi_pairset_pinout { @@ -173,15 +255,13 @@ void pse_control_put(struct pse_control *psec); int pse_ethtool_get_status(struct pse_control *psec, struct netlink_ext_ack *extack, - struct pse_control_status *status); + struct ethtool_pse_control_status *status); int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config); int pse_ethtool_set_pw_limit(struct pse_control *psec, struct netlink_ext_ack *extack, const unsigned int pw_limit); -int pse_ethtool_get_pw_limit(struct pse_control *psec, - struct netlink_ext_ack *extack); bool pse_has_podl(struct pse_control *psec); bool pse_has_c33(struct pse_control *psec); @@ -199,7 +279,7 @@ static inline void pse_control_put(struct pse_control *psec) static inline int pse_ethtool_get_status(struct pse_control *psec, struct netlink_ext_ack *extack, - struct pse_control_status *status) + struct ethtool_pse_control_status *status) { return -EOPNOTSUPP; } @@ -218,12 +298,6 @@ static inline int pse_ethtool_set_pw_limit(struct pse_control *psec, return -EOPNOTSUPP; } -static inline int pse_ethtool_get_pw_limit(struct pse_control *psec, - struct netlink_ext_ack *extack) -{ - return -EOPNOTSUPP; -} - static inline bool pse_has_podl(struct pse_control *psec) { return false; diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index 730f77381d55..2503f7625d65 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -5,6 +5,7 @@ struct pseudo_fs_context { const struct super_operations *ops; + const struct export_operations *eops; const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index f1fd3a8044e0..dd10c22299ab 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -84,11 +84,9 @@ enum psi_aggregators { struct psi_group_cpu { /* 1st cacheline updated by the scheduler */ - /* Aggregator needs to know of concurrent changes */ - seqcount_t seq ____cacheline_aligned_in_smp; - /* States of the tasks belonging to this group */ - unsigned int tasks[NR_PSI_TASK_COUNTS]; + unsigned int tasks[NR_PSI_TASK_COUNTS] + ____cacheline_aligned_in_smp; /* Aggregate pressure state derived from the tasks */ u32 state_mask; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 903ddfea8585..0f5f94137f6d 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -594,6 +594,7 @@ struct sev_data_snp_addr { * @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the * purpose of guest-assisted migration. * @rsvd: reserved + * @desired_tsc_khz: hypervisor desired mean TSC freq in kHz of the guest * @gosvw: guest OS-visible workarounds, as defined by hypervisor */ struct sev_data_snp_launch_start { @@ -603,6 +604,7 @@ struct sev_data_snp_launch_start { u32 ma_en:1; /* In */ u32 imi_en:1; /* In */ u32 rsvd:30; + u32 desired_tsc_khz; /* In */ u8 gosvw[16]; /* In */ } __packed; @@ -815,6 +817,15 @@ struct sev_data_snp_commit { #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** + * sev_module_init - perform PSP SEV module initialization + * + * Returns: + * 0 if the PSP module is successfully initialized + * negative value if the PSP module initialization fails + */ +int sev_module_init(void); + +/** * sev_platform_init - perform SEV INIT command * * @args: struct sev_platform_init_args to pass in arguments @@ -945,6 +956,7 @@ int sev_do_cmd(int cmd, void *data, int *psp_ret); void *psp_copy_user_blob(u64 uaddr, u32 len); void *snp_alloc_firmware_page(gfp_t mask); void snp_free_firmware_page(void *addr); +void sev_platform_shutdown(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ @@ -979,6 +991,8 @@ static inline void *snp_alloc_firmware_page(gfp_t mask) static inline void snp_free_firmware_page(void *addr) { } +static inline void sev_platform_shutdown(void) { } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h index 8dbd51ea8626..240bd3bff18d 100644 --- a/include/linux/ptdump.h +++ b/include/linux/ptdump.h @@ -11,10 +11,17 @@ struct ptdump_range { }; struct ptdump_state { - /* level is 0:PGD to 4:PTE, or -1 if unknown */ - void (*note_page)(struct ptdump_state *st, unsigned long addr, - int level, u64 val); - void (*effective_prot)(struct ptdump_state *st, int level, u64 val); + void (*note_page_pte)(struct ptdump_state *st, unsigned long addr, pte_t pte); + void (*note_page_pmd)(struct ptdump_state *st, unsigned long addr, pmd_t pmd); + void (*note_page_pud)(struct ptdump_state *st, unsigned long addr, pud_t pud); + void (*note_page_p4d)(struct ptdump_state *st, unsigned long addr, p4d_t p4d); + void (*note_page_pgd)(struct ptdump_state *st, unsigned long addr, pgd_t pgd); + void (*note_page_flush)(struct ptdump_state *st); + void (*effective_prot_pte)(struct ptdump_state *st, pte_t pte); + void (*effective_prot_pmd)(struct ptdump_state *st, pmd_t pmd); + void (*effective_prot_pud)(struct ptdump_state *st, pud_t pud); + void (*effective_prot_p4d)(struct ptdump_state *st, p4d_t p4d); + void (*effective_prot_pgd)(struct ptdump_state *st, pgd_t pgd); const struct ptdump_range *range; }; diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index c892d22ce0a7..eced7e9bf69a 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -68,6 +68,22 @@ struct ptp_system_timestamp { * @n_per_out: The number of programmable periodic signals. * @n_pins: The number of programmable pins. * @pps: Indicates whether the clock supports a PPS callback. + * + * @supported_perout_flags: The set of flags the driver supports for the + * PTP_PEROUT_REQUEST ioctl. The PTP core will + * reject a request with any flag not specified + * here. + * + * @supported_extts_flags: The set of flags the driver supports for the + * PTP_EXTTS_REQUEST ioctl. The PTP core will use + * this list to reject unsupported requests. + * PTP_ENABLE_FEATURE is assumed and does not need to + * be included. If PTP_STRICT_FLAGS is *not* set, + * then both PTP_RISING_EDGE and PTP_FALLING_EDGE + * will be assumed. Note that PTP_STRICT_FLAGS must + * be set if the drivers wants to honor + * PTP_EXTTS_REQUEST2 and any future flags. + * * @pin_config: Array of length 'n_pins'. If the number of * programmable pins is nonzero, then drivers must * allocate and initialize this array. @@ -174,6 +190,8 @@ struct ptp_clock_info { int n_per_out; int n_pins; int pps; + unsigned int supported_perout_flags; + unsigned int supported_extts_flags; struct ptp_pin_desc *pin_config; int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm); int (*adjphase)(struct ptp_clock_info *ptp, s32 phase); @@ -307,7 +325,7 @@ static inline u64 adjust_by_scaled_ppm(u64 base, long scaled_ppm) * @info: Structure describing the new clock. * @parent: Pointer to the parent device of the new clock. * - * Returns a valid pointer on success or PTR_ERR on failure. If PHC + * Returns: a valid pointer on success or PTR_ERR on failure. If PHC * support is missing at the configuration level, this function * returns NULL, and drivers are expected to gracefully handle that * case separately. @@ -445,7 +463,7 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); * @hwtstamp: timestamp * @vclock_index: phc index of ptp vclock. * - * Returns converted timestamp, or 0 on error. + * Returns: converted timestamp, or 0 on error. */ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index); #else diff --git a/include/linux/pwm.h b/include/linux/pwm.h index a2df509056ac..63a17d2b4ec8 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -218,6 +218,8 @@ static inline void pwm_init_state(const struct pwm_device *pwm, * * pwm_get_state(pwm, &state); * duty = pwm_get_relative_duty_cycle(&state, 100); + * + * Returns: rounded relative duty cycle multiplied by @scale */ static inline unsigned int pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale) @@ -244,8 +246,8 @@ pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale) * pwm_set_relative_duty_cycle(&state, 50, 100); * pwm_apply_might_sleep(pwm, &state); * - * This functions returns -EINVAL if @duty_cycle and/or @scale are - * inconsistent (@scale == 0 or @duty_cycle > @scale). + * Returns: 0 on success or ``-EINVAL`` if @duty_cycle and/or @scale are + * inconsistent (@scale == 0 or @duty_cycle > @scale) */ static inline int pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, @@ -351,7 +353,7 @@ struct pwm_chip { * pwmchip_supports_waveform() - checks if the given chip supports waveform callbacks * @chip: The pwm_chip to test * - * Returns true iff the pwm chip support the waveform functions like + * Returns: true iff the pwm chip support the waveform functions like * pwm_set_waveform_might_sleep() and pwm_round_waveform_might_sleep() */ static inline bool pwmchip_supports_waveform(struct pwm_chip *chip) @@ -369,7 +371,7 @@ static inline struct device *pwmchip_parent(const struct pwm_chip *chip) return chip->dev.parent; } -static inline void *pwmchip_get_drvdata(struct pwm_chip *chip) +static inline void *pwmchip_get_drvdata(const struct pwm_chip *chip) { return dev_get_drvdata(&chip->dev); } @@ -379,7 +381,7 @@ static inline void pwmchip_set_drvdata(struct pwm_chip *chip, void *data) dev_set_drvdata(&chip->dev, data); } -#if IS_ENABLED(CONFIG_PWM) +#if IS_REACHABLE(CONFIG_PWM) /* PWM consumer APIs */ int pwm_round_waveform_might_sleep(struct pwm_device *pwm, struct pwm_waveform *wf); @@ -661,7 +663,7 @@ struct pwm_lookup { PWM_LOOKUP_WITH_MODULE(_provider, _index, _dev_id, _con_id, _period, \ _polarity, NULL) -#if IS_ENABLED(CONFIG_PWM) +#if IS_REACHABLE(CONFIG_PWM) void pwm_add_table(struct pwm_lookup *table, size_t num); void pwm_remove_table(struct pwm_lookup *table, size_t num); #else diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 5b67cd03276e..aa29ac53b833 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -267,7 +267,7 @@ struct qed_ll2_ops { int qed_ll2_alloc_if(struct qed_dev *); void qed_ll2_dealloc_if(struct qed_dev *); #else -static const struct qed_ll2_ops qed_ll2_ops_pass = { +static __maybe_unused const struct qed_ll2_ops qed_ll2_ops_pass = { .start = NULL, .stop = NULL, .start_xmit = NULL, diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 98030accf641..72ff44cca864 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -108,6 +108,10 @@ extern const struct raid6_calls raid6_vpermxor4; extern const struct raid6_calls raid6_vpermxor8; extern const struct raid6_calls raid6_lsx; extern const struct raid6_calls raid6_lasx; +extern const struct raid6_calls raid6_rvvx1; +extern const struct raid6_calls raid6_rvvx2; +extern const struct raid6_calls raid6_rvvx4; +extern const struct raid6_calls raid6_rvvx8; struct raid6_recov_calls { void (*data2)(int, size_t, int, int, void **); @@ -125,6 +129,7 @@ extern const struct raid6_recov_calls raid6_recov_s390xc; extern const struct raid6_recov_calls raid6_recov_neon; extern const struct raid6_recov_calls raid6_recov_lsx; extern const struct raid6_recov_calls raid6_recov_lasx; +extern const struct raid6_recov_calls raid6_recov_rvv; extern const struct raid6_calls raid6_neonx1; extern const struct raid6_calls raid6_neonx2; diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index b17e0cd0a30c..7aaad158ee37 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -22,16 +22,43 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs) DEFAULT_RATELIMIT_BURST); } +static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs) +{ + atomic_inc(&rs->missed); +} + +static inline int ratelimit_state_get_miss(struct ratelimit_state *rs) +{ + return atomic_read(&rs->missed); +} + +static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs) +{ + return atomic_xchg_relaxed(&rs->missed, 0); +} + +static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&rs->lock, flags); + rs->interval = interval_init; + rs->flags &= ~RATELIMIT_INITIALIZED; + atomic_set(&rs->rs_n_left, rs->burst); + ratelimit_state_reset_miss(rs); + raw_spin_unlock_irqrestore(&rs->lock, flags); +} + static inline void ratelimit_state_exit(struct ratelimit_state *rs) { + int m; + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) return; - if (rs->missed) { - pr_warn("%s: %d output lines suppressed due to ratelimiting\n", - current->comm, rs->missed); - rs->missed = 0; - } + m = ratelimit_state_reset_miss(rs); + if (m) + pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, m); } static inline void diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index 765232ce0b5e..b19c4354540a 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -11,14 +11,15 @@ /* issue num suppressed message on exit */ #define RATELIMIT_MSG_ON_RELEASE BIT(0) +#define RATELIMIT_INITIALIZED BIT(1) struct ratelimit_state { raw_spinlock_t lock; /* protect the state */ int interval; int burst; - int printed; - int missed; + atomic_t rs_n_left; + atomic_t missed; unsigned int flags; unsigned long begin; }; diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 7c173aa64e1e..8d2ba3749866 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -211,6 +211,43 @@ rb_add(struct rb_node *node, struct rb_root *tree, } /** + * rb_find_add_cached() - find equivalent @node in @tree, or add @node + * @node: node to look-for / insert + * @tree: tree to search / modify + * @cmp: operator defining the node order + * + * Returns the rb_node matching @node, or NULL when no match is found and @node + * is inserted. + */ +static __always_inline struct rb_node * +rb_find_add_cached(struct rb_node *node, struct rb_root_cached *tree, + int (*cmp)(const struct rb_node *new, const struct rb_node *exist)) +{ + bool leftmost = true; + struct rb_node **link = &tree->rb_root.rb_node; + struct rb_node *parent = NULL; + int c; + + while (*link) { + parent = *link; + c = cmp(node, parent); + + if (c < 0) { + link = &parent->rb_left; + } else if (c > 0) { + link = &parent->rb_right; + leftmost = false; + } else { + return parent; + } + } + + rb_link_node(node, parent, link); + rb_insert_color_cached(node, tree, leftmost); + return NULL; +} + +/** * rb_find_add() - find equivalent @node in @tree, or add @node * @node: node to look-for / insert * @tree: tree to search / modify diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 14dfa6008467..1b11926ddd47 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -30,6 +30,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) * way, we must not access it directly */ #define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) +/* + * Return the ->prev pointer of a list_head in an rcu safe way. Don't + * access it directly. + * + * Any list traversed with list_bidir_prev_rcu() must never use + * list_del_rcu(). Doing so will poison the ->prev pointer that + * list_bidir_prev_rcu() relies on, which will result in segfaults. + * To prevent these segfaults, use list_bidir_del_rcu() instead + * of list_del_rcu(). + */ +#define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev))) /** * list_tail_rcu - returns the prev pointer of the head of the list @@ -159,6 +170,39 @@ static inline void list_del_rcu(struct list_head *entry) } /** + * list_bidir_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * In contrast to list_del_rcu() doesn't poison the prev pointer thus + * allowing backwards traversal via list_bidir_prev_rcu(). + * + * Note: list_empty() on entry does not return true after this because + * the entry is in a special undefined state that permits RCU-based + * lockfree reverse traversal. In particular this means that we can not + * poison the forward and backwards pointers that may still be used for + * walking the list. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another list-mutation + * primitive, such as list_bidir_del_rcu() or list_add_rcu(), running on + * this same list. However, it is perfectly legal to run concurrently + * with the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that list_del_rcu() and list_bidir_del_rcu() must not be used on + * the same list. + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_bidir_del_rcu(struct list_head *entry) +{ + __list_del_entry(entry); +} + +/** * hlist_del_init_rcu - deletes entry from hash list with re-initialization * @n: the element to delete from the hash list. * diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 48e5c03df1dd..120536f4c6eb 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -95,9 +95,9 @@ static inline void __rcu_read_lock(void) static inline void __rcu_read_unlock(void) { - preempt_enable(); if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) rcu_read_unlock_strict(); + preempt_enable(); } static inline int rcu_preempt_depth(void) @@ -121,12 +121,6 @@ void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -#ifdef CONFIG_TASKS_RCU_GENERIC -void rcu_init_tasks_generic(void); -#else -static inline void rcu_init_tasks_generic(void) { } -#endif - #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); @@ -138,7 +132,7 @@ static inline void rcu_sysrq_end(void) { } #if defined(CONFIG_NO_HZ_FULL) && (!defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK)) void rcu_irq_work_resched(void); #else -static inline void rcu_irq_work_resched(void) { } +static __always_inline void rcu_irq_work_resched(void) { } #endif #ifdef CONFIG_RCU_NOCB_CPU @@ -806,11 +800,9 @@ do { \ * sections, invocation of the corresponding RCU callback is deferred * until after the all the other CPUs exit their critical sections. * - * In v5.0 and later kernels, synchronize_rcu() and call_rcu() also - * wait for regions of code with preemption disabled, including regions of - * code with interrupts or softirqs disabled. In pre-v5.0 kernels, which - * define synchronize_sched(), only code enclosed within rcu_read_lock() - * and rcu_read_unlock() are guaranteed to be waited for. + * Both synchronize_rcu() and call_rcu() also wait for regions of code + * with preemption disabled, including regions of code with interrupts or + * softirqs disabled. * * Note, however, that RCU callbacks are permitted to run concurrently * with new RCU read-side critical sections. One way that this can happen @@ -865,11 +857,10 @@ static __always_inline void rcu_read_lock(void) * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * In almost all situations, rcu_read_unlock() is immune from deadlock. - * In recent kernels that have consolidated synchronize_sched() and - * synchronize_rcu_bh() into synchronize_rcu(), this deadlock immunity - * also extends to the scheduler's runqueue and priority-inheritance - * spinlocks, courtesy of the quiescent-state deferral that is carried - * out when rcu_read_unlock() is invoked with interrupts disabled. + * This deadlock immunity also extends to the scheduler's runqueue + * and priority-inheritance spinlocks, courtesy of the quiescent-state + * deferral that is carried out when rcu_read_unlock() is invoked with + * interrupts disabled. * * See rcu_read_lock() for more information. */ @@ -1025,12 +1016,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define RCU_POINTER_INITIALIZER(p, v) \ .p = RCU_INITIALIZER(v) -/* - * Does the specified offset indicate that the corresponding rcu_head - * structure can be handled by kvfree_rcu()? - */ -#define __is_kvfree_rcu_offset(offset) ((offset) < 4096) - /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree for double-argument invocations. @@ -1041,11 +1026,11 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * when they are used in a kernel module, that module must invoke the * high-latency rcu_barrier() function at module-unload time. * - * The kfree_rcu() function handles this issue. Rather than encoding a - * function address in the embedded rcu_head structure, kfree_rcu() instead - * encodes the offset of the rcu_head structure within the base structure. - * Because the functions are not allowed in the low-order 4096 bytes of - * kernel virtual memory, offsets up to 4095 bytes can be accommodated. + * The kfree_rcu() function handles this issue. In order to have a universal + * callback function handling different offsets of rcu_head, the callback needs + * to determine the starting address of the freed object, which can be a large + * kmalloc or vmalloc allocation. To allow simply aligning the pointer down to + * page boundary for those, only offsets up to 4095 bytes can be accommodated. * If the offset is larger than 4095 bytes, a compile-time error will * be generated in kvfree_rcu_arg_2(). If this error is triggered, you can * either fall back to use of call_rcu() or rearrange the structure to @@ -1082,14 +1067,23 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) #define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr) +/* + * In mm/slab_common.c, no suitable header to include here. + */ +void kvfree_call_rcu(struct rcu_head *head, void *ptr); + +/* + * The BUILD_BUG_ON() makes sure the rcu_head offset can be handled. See the + * comment of kfree_rcu() for details. + */ #define kvfree_rcu_arg_2(ptr, rhf) \ do { \ typeof (ptr) ___p = (ptr); \ \ - if (___p) { \ - BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \ - kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \ - } \ + if (___p) { \ + BUILD_BUG_ON(offsetof(typeof(*(ptr)), rhf) >= 4096); \ + kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \ + } \ } while (0) #define kvfree_rcu_arg_1(ptr) \ diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 303ab9bee155..4c92d4291cce 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -16,6 +16,9 @@ struct rcu_synchronize { struct rcu_head head; struct completion completion; + + /* This is for debugging. */ + struct rcu_gp_oldstate oldstate; }; void wakeme_after_rcu(struct rcu_head *head); @@ -65,4 +68,15 @@ static inline void cond_resched_rcu(void) #endif } +// Has the current task blocked within its current RCU read-side +// critical section? +static inline bool has_rcu_reader_blocked(void) +{ +#ifdef CONFIG_PREEMPT_RCU + return !list_empty(¤t->rcu_node_entry); +#else + return false; +#endif +} + #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h index 6322d8c1c6b4..2fb2af6d9824 100644 --- a/include/linux/rcuref.h +++ b/include/linux/rcuref.h @@ -30,7 +30,11 @@ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt) * rcuref_read - Read the number of held reference counts of a rcuref * @ref: Pointer to the reference count * - * Return: The number of held references (0 ... N) + * Return: The number of held references (0 ... N). The value 0 does not + * indicate that it is safe to schedule the object, protected by this reference + * counter, for deconstruction. + * If you want to know if the reference counter has been marked DEAD (as + * signaled by rcuref_put()) please use rcuread_is_dead(). */ static inline unsigned int rcuref_read(rcuref_t *ref) { @@ -40,6 +44,22 @@ static inline unsigned int rcuref_read(rcuref_t *ref) return c >= RCUREF_RELEASED ? 0 : c + 1; } +/** + * rcuref_is_dead - Check if the rcuref has been already marked dead + * @ref: Pointer to the reference count + * + * Return: True if the object has been marked DEAD. This signals that a previous + * invocation of rcuref_put() returned true on this reference counter meaning + * the protected object can safely be scheduled for deconstruction. + * Otherwise, returns false. + */ +static inline bool rcuref_is_dead(rcuref_t *ref) +{ + unsigned int c = atomic_read(&ref->refcnt); + + return (c >= RCUREF_RELEASED) && (c < RCUREF_NOREF); +} + extern __must_check bool rcuref_get_slowpath(rcuref_t *ref); /** diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index fe42315f667f..f519cd680228 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -90,41 +90,6 @@ static inline void synchronize_rcu_expedited(void) synchronize_rcu(); } -/* - * Add one more declaration of kvfree() here. It is - * not so straight forward to just include <linux/mm.h> - * where it is defined due to getting many compile - * errors caused by that include. - */ -extern void kvfree(const void *addr); - -static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr) -{ - if (head) { - call_rcu(head, (rcu_callback_t) ((void *) head - ptr)); - return; - } - - // kvfree_rcu(one_arg) call. - might_sleep(); - synchronize_rcu(); - kvfree(ptr); -} - -static inline void kvfree_rcu_barrier(void) -{ - rcu_barrier(); -} - -#ifdef CONFIG_KASAN_GENERIC -void kvfree_call_rcu(struct rcu_head *head, void *ptr); -#else -static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr) -{ - __kvfree_call_rcu(head, ptr); -} -#endif - void rcu_qs(void); static inline void rcu_softirq_qs(void) @@ -164,7 +129,6 @@ static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } static inline void rcu_momentary_eqs(void) { } -static inline void kfree_rcu_scheduler_running(void) { } /* Avoid RCU read-side critical sections leaking across. */ static inline void rcu_all_qs(void) { barrier(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 27d86d912781..9d2d7bd251d4 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,12 +34,9 @@ static inline void rcu_virt_note_context_switch(void) } void synchronize_rcu_expedited(void); -void kvfree_call_rcu(struct rcu_head *head, void *ptr); -void kvfree_rcu_barrier(void); void rcu_barrier(void); void rcu_momentary_eqs(void); -void kfree_rcu_scheduler_running(void); struct rcu_gp_oldstate { unsigned long rgos_norm; @@ -103,7 +100,7 @@ extern int rcu_scheduler_active; void rcu_end_inkernel_boot(void); bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); -#ifndef CONFIG_PREEMPTION +#ifndef CONFIG_PREEMPT_RCU void rcu_all_qs(void); #endif diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 27343424225c..9ad134a04b41 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -4,18 +4,7 @@ #include <linux/rcupdate.h> #include <linux/sched/signal.h> - -/* - * rcuwait provides a way of blocking and waking up a single - * task in an rcu-safe manner. - * - * The only time @task is non-nil is when a user is blocked (or - * checking if it needs to) on a condition, and reset as soon as we - * know that the condition has succeeded and are awoken. - */ -struct rcuwait { - struct task_struct __rcu *task; -}; +#include <linux/types.h> #define __RCUWAIT_INITIALIZER(name) \ { .task = NULL, } diff --git a/include/linux/reboot.h b/include/linux/reboot.h index abcdde4df697..aa08c3bbbf59 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -177,16 +177,38 @@ void ctrl_alt_del(void); extern void orderly_poweroff(bool force); extern void orderly_reboot(void); -void __hw_protection_shutdown(const char *reason, int ms_until_forced, bool shutdown); -static inline void hw_protection_reboot(const char *reason, int ms_until_forced) -{ - __hw_protection_shutdown(reason, ms_until_forced, false); -} +/** + * enum hw_protection_action - Hardware protection action + * + * @HWPROT_ACT_DEFAULT: + * The default action should be taken. This is HWPROT_ACT_SHUTDOWN + * by default, but can be overridden. + * @HWPROT_ACT_SHUTDOWN: + * The system should be shut down (powered off) for HW protection. + * @HWPROT_ACT_REBOOT: + * The system should be rebooted for HW protection. + */ +enum hw_protection_action { HWPROT_ACT_DEFAULT, HWPROT_ACT_SHUTDOWN, HWPROT_ACT_REBOOT }; -static inline void hw_protection_shutdown(const char *reason, int ms_until_forced) +void __hw_protection_trigger(const char *reason, int ms_until_forced, + enum hw_protection_action action); + +/** + * hw_protection_trigger - Trigger default emergency system hardware protection action + * + * @reason: Reason of emergency shutdown or reboot to be printed. + * @ms_until_forced: Time to wait for orderly shutdown or reboot before + * triggering it. Negative value disables the forced + * shutdown or reboot. + * + * Initiate an emergency system shutdown or reboot in order to protect + * hardware from further damage. The exact action taken is controllable at + * runtime and defaults to shutdown. + */ +static inline void hw_protection_trigger(const char *reason, int ms_until_forced) { - __hw_protection_shutdown(reason, ms_until_forced, true); + __hw_protection_trigger(reason, ms_until_forced, HWPROT_ACT_DEFAULT); } /* diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 35f039ecb272..80dc023ac2bf 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -87,6 +87,15 @@ * The decrements dec_and_test() and sub_and_test() also provide acquire * ordering on success. * + * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() provide + * acquire and release ordering for cases when the memory occupied by the + * object might be reused to store another object. This is important for the + * cases where secondary validation is required to detect such reuse, e.g. + * SLAB_TYPESAFE_BY_RCU. The secondary validation checks have to happen after + * the refcount is taken, hence acquire order is necessary. Similarly, when the + * object is initialized, all stores to its attributes should be visible before + * the refcount is set, otherwise a stale attribute value might be used by + * another task which succeeds in taking a refcount to the new object. */ #ifndef _LINUX_REFCOUNT_H @@ -126,6 +135,31 @@ static inline void refcount_set(refcount_t *r, int n) } /** + * refcount_set_release - set a refcount's value with release ordering + * @r: the refcount + * @n: value to which the refcount will be set + * + * This function should be used when memory occupied by the object might be + * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU. + * + * Provides release memory ordering which will order previous memory operations + * against this store. This ensures all updates to this object are visible + * once the refcount is set and stale values from the object previously + * occupying this memory are overwritten with new ones. + * + * This function should be called only after new object is fully initialized. + * After this call the object should be considered visible to other tasks even + * if it was not yet added into an object collection normally used to discover + * it. This is because other tasks might have discovered the object previously + * occupying the same memory and after memory reuse they can succeed in taking + * refcount to the new object and start using it. + */ +static inline void refcount_set_release(refcount_t *r, int n) +{ + atomic_set_release(&r->refs, n); +} + +/** * refcount_read - get a refcount's value * @r: the refcount * @@ -178,6 +212,71 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) return __refcount_add_not_zero(i, r, NULL); } +static inline __must_check __signed_wrap +bool __refcount_add_not_zero_limited_acquire(int i, refcount_t *r, int *oldp, + int limit) +{ + int old = refcount_read(r); + + do { + if (!old) + break; + + if (i > limit - old) { + if (oldp) + *oldp = old; + return false; + } + } while (!atomic_try_cmpxchg_acquire(&r->refs, &old, old + i)); + + if (oldp) + *oldp = old; + + if (unlikely(old < 0 || old + i < 0)) + refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF); + + return old; +} + +static inline __must_check bool +__refcount_inc_not_zero_limited_acquire(refcount_t *r, int *oldp, int limit) +{ + return __refcount_add_not_zero_limited_acquire(1, r, oldp, limit); +} + +static inline __must_check __signed_wrap +bool __refcount_add_not_zero_acquire(int i, refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero_limited_acquire(i, r, oldp, INT_MAX); +} + +/** + * refcount_add_not_zero_acquire - add a value to a refcount with acquire ordering unless it is 0 + * + * @i: the value to add to the refcount + * @r: the refcount + * + * Will saturate at REFCOUNT_SATURATED and WARN. + * + * This function should be used when memory occupied by the object might be + * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU. + * + * Provides acquire memory ordering on success, it is assumed the caller has + * guaranteed the object memory to be stable (RCU, etc.). It does provide a + * control dependency and thereby orders future stores. See the comment on top. + * + * Use of this function is not recommended for the normal reference counting + * use case in which references are taken and released one at a time. In these + * cases, refcount_inc_not_zero_acquire() should instead be used to increment a + * reference count. + * + * Return: false if the passed refcount is 0, true otherwise + */ +static inline __must_check bool refcount_add_not_zero_acquire(int i, refcount_t *r) +{ + return __refcount_add_not_zero_acquire(i, r, NULL); +} + static inline __signed_wrap void __refcount_add(int i, refcount_t *r, int *oldp) { @@ -236,6 +335,32 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) return __refcount_inc_not_zero(r, NULL); } +static inline __must_check bool __refcount_inc_not_zero_acquire(refcount_t *r, int *oldp) +{ + return __refcount_add_not_zero_acquire(1, r, oldp); +} + +/** + * refcount_inc_not_zero_acquire - increment a refcount with acquire ordering unless it is 0 + * @r: the refcount to increment + * + * Similar to refcount_inc_not_zero(), but provides acquire memory ordering on + * success. + * + * This function should be used when memory occupied by the object might be + * reused to store another object -- consider SLAB_TYPESAFE_BY_RCU. + * + * Provides acquire memory ordering on success, it is assumed the caller has + * guaranteed the object memory to be stable (RCU, etc.). It does provide a + * control dependency and thereby orders future stores. See the comment on top. + * + * Return: true if the increment was successful, false otherwise + */ +static inline __must_check bool refcount_inc_not_zero_acquire(refcount_t *r) +{ + return __refcount_inc_not_zero_acquire(r, NULL); +} + static inline void __refcount_inc(refcount_t *r, int *oldp) { __refcount_add(1, r, oldp); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index fd41baccbf3e..02b83f5499b8 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -506,6 +506,32 @@ struct regmap_range_cfg { unsigned int window_len; }; +/** + * struct regmap_sdw_mbq_cfg - Configuration for Multi-Byte Quantities + * + * @mbq_size: Callback returning the actual size of the given register. + * @deferrable: Callback returning true if the hardware can defer + * transactions to the given register. Deferral should + * only be used by SDCA parts and typically which controls + * are deferrable will be specified in either as a hard + * coded list or from the DisCo tables in the platform + * firmware. + * + * @timeout_us: The time in microseconds after which waiting for a deferred + * transaction should time out. + * @retry_us: The time in microseconds between polls of the function busy + * status whilst waiting for an opportunity to retry a deferred + * transaction. + * + * Provides additional configuration required for SoundWire MBQ register maps. + */ +struct regmap_sdw_mbq_cfg { + int (*mbq_size)(struct device *dev, unsigned int reg); + bool (*deferrable)(struct device *dev, unsigned int reg); + unsigned long timeout_us; + unsigned long retry_us; +}; + struct regmap_async; typedef int (*regmap_hw_write)(void *context, const void *data, @@ -652,6 +678,7 @@ struct regmap *__regmap_init_sdw(struct sdw_slave *sdw, const char *lock_name); struct regmap *__regmap_init_sdw_mbq(struct sdw_slave *sdw, const struct regmap_config *config, + const struct regmap_sdw_mbq_cfg *mbq_config, struct lock_class_key *lock_key, const char *lock_name); struct regmap *__regmap_init_spi_avmm(struct spi_device *spi, @@ -713,6 +740,7 @@ struct regmap *__devm_regmap_init_sdw(struct sdw_slave *sdw, const char *lock_name); struct regmap *__devm_regmap_init_sdw_mbq(struct sdw_slave *sdw, const struct regmap_config *config, + const struct regmap_sdw_mbq_cfg *mbq_config, struct lock_class_key *lock_key, const char *lock_name); struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus, @@ -942,7 +970,22 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); */ #define regmap_init_sdw_mbq(sdw, config) \ __regmap_lockdep_wrapper(__regmap_init_sdw_mbq, #config, \ - sdw, config) + sdw, config, NULL) + +/** + * regmap_init_sdw_mbq_cfg() - Initialise MBQ SDW register map with config + * + * @sdw: Device that will be interacted with + * @config: Configuration for register map + * @mbq_config: Properties for the MBQ registers + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define regmap_init_sdw_mbq_cfg(sdw, config, mbq_config) \ + __regmap_lockdep_wrapper(__regmap_init_sdw_mbq, #config, \ + sdw, config, mbq_config) /** * regmap_init_spi_avmm() - Initialize register map for Intel SPI Slave @@ -1155,7 +1198,22 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); */ #define devm_regmap_init_sdw_mbq(sdw, config) \ __regmap_lockdep_wrapper(__devm_regmap_init_sdw_mbq, #config, \ - sdw, config) + sdw, config, NULL) + +/** + * devm_regmap_init_sdw_mbq_cfg() - Initialise managed MBQ SDW register map with config + * + * @sdw: Device that will be interacted with + * @config: Configuration for register map + * @mbq_config: Properties for the MBQ registers + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_sdw_mbq_cfg(sdw, config, mbq_config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_sdw_mbq, \ + #config, sdw, config, mbq_config) /** * devm_regmap_init_slimbus() - Initialise managed register map @@ -1244,7 +1302,7 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg, void *val, size_t val_len); int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, size_t val_count); -int regmap_multi_reg_read(struct regmap *map, unsigned int *reg, void *val, +int regmap_multi_reg_read(struct regmap *map, const unsigned int *reg, void *val, size_t val_count); int regmap_update_bits_base(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val, @@ -1294,6 +1352,7 @@ bool regmap_can_raw_write(struct regmap *map); size_t regmap_get_raw_read_max(struct regmap *map); size_t regmap_get_raw_write_max(struct regmap *map); +void regcache_sort_defaults(struct reg_default *defaults, unsigned int ndefaults); int regcache_sync(struct regmap *map); int regcache_sync_region(struct regmap *map, unsigned int min, unsigned int max); @@ -1582,6 +1641,8 @@ struct regmap_irq_chip_data; * @ack_invert: Inverted ack register: cleared bits for ack. * @clear_ack: Use this to set 1 and 0 or vice-versa to clear interrupts. * @status_invert: Inverted status register: cleared bits are active interrupts. + * @status_is_level: Status register is actuall signal level: Xor status + * register with previous value to get active interrupts. * @wake_invert: Inverted wake register: cleared bits are wake enabled. * @type_in_mask: Use the mask registers for controlling irq type. Use this if * the hardware provides separate bits for rising/falling edge @@ -1645,6 +1706,7 @@ struct regmap_irq_chip { unsigned int ack_invert:1; unsigned int clear_ack:1; unsigned int status_invert:1; + unsigned int status_is_level:1; unsigned int wake_invert:1; unsigned int type_in_mask:1; unsigned int clear_on_unmask:1; @@ -1985,6 +2047,12 @@ static inline bool regmap_might_sleep(struct regmap *map) return true; } +static inline void regcache_sort_defaults(struct reg_default *defaults, + unsigned int ndefaults) +{ + WARN_ONCE(1, "regmap API is disabled"); +} + static inline int regcache_sync(struct regmap *map) { WARN_ONCE(1, "regmap API is disabled"); diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index bcba3935c6f9..56fe2693d9b2 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -233,6 +233,11 @@ int regulator_sync_voltage(struct regulator *regulator); int regulator_set_current_limit(struct regulator *regulator, int min_uA, int max_uA); int regulator_get_current_limit(struct regulator *regulator); +int regulator_get_unclaimed_power_budget(struct regulator *regulator); +int regulator_request_power_budget(struct regulator *regulator, + unsigned int pw_req); +void regulator_free_power_budget(struct regulator *regulator, + unsigned int pw); int regulator_set_mode(struct regulator *regulator, unsigned int mode); unsigned int regulator_get_mode(struct regulator *regulator); @@ -526,6 +531,22 @@ static inline int regulator_get_current_limit(struct regulator *regulator) return 0; } +static inline int regulator_get_unclaimed_power_budget(struct regulator *regulator) +{ + return INT_MAX; +} + +static inline int regulator_request_power_budget(struct regulator *regulator, + unsigned int pw_req) +{ + return -EOPNOTSUPP; +} + +static inline void regulator_free_power_budget(struct regulator *regulator, + unsigned int pw) +{ +} + static inline int regulator_set_mode(struct regulator *regulator, unsigned int mode) { @@ -656,6 +677,12 @@ regulator_is_equal(struct regulator *reg1, struct regulator *reg2) #endif #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_REGULATOR) +struct regulator *__must_check of_regulator_get(struct device *dev, + struct device_node *node, + const char *id); +struct regulator *__must_check devm_of_regulator_get(struct device *dev, + struct device_node *node, + const char *id); struct regulator *__must_check of_regulator_get_optional(struct device *dev, struct device_node *node, const char *id); diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 5b66caf1695d..4a216fdba354 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -656,6 +656,8 @@ struct regulator_dev { int cached_err; bool use_cached_err; spinlock_t err_lock; + + int pw_requested_mW; }; /* diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index b3db09a7429b..1fc440c5c4c7 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -113,6 +113,7 @@ struct notification_limit { * @min_uA: Smallest current consumers may set. * @max_uA: Largest current consumers may set. * @ilim_uA: Maximum input current. + * @pw_budget_mW: Power budget for the regulator in mW. * @system_load: Load that isn't captured by any consumer requests. * * @over_curr_limits: Limits for acting on over current. @@ -185,6 +186,7 @@ struct regulation_constraints { int max_uA; int ilim_uA; + int pw_budget_mW; int system_load; /* used for coupled regulators */ diff --git a/include/linux/regulator/max8952.h b/include/linux/regulator/max8952.h index 8712c091abf0..61dcd8e00a2f 100644 --- a/include/linux/regulator/max8952.h +++ b/include/linux/regulator/max8952.h @@ -2,7 +2,7 @@ /* * max8952.h - Voltage regulation for the Maxim 8952 * - * Copyright (C) 2010 Samsung Electrnoics + * Copyright (C) 2010 Samsung Electronics * MyungJoo Ham <myungjoo.ham@samsung.com> */ diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index 243633c8dceb..85b4fecc10d8 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -10,6 +10,7 @@ enum pca9450_chip_type { PCA9450_TYPE_PCA9450A = 0, PCA9450_TYPE_PCA9450BC, PCA9450_TYPE_PCA9451A, + PCA9450_TYPE_PCA9452, PCA9450_TYPE_AMOUNT, }; @@ -34,6 +35,8 @@ enum { PCA9450_DVS_LEVEL_MAX, }; +#define PCA9450_RESTART_HANDLER_PRIORITY 130 + #define PCA9450_BUCK1_VOLTAGE_NUM 0x80 #define PCA9450_BUCK2_VOLTAGE_NUM 0x80 #define PCA9450_BUCK3_VOLTAGE_NUM 0x80 @@ -234,4 +237,7 @@ enum { #define I2C_LT_ON_RUN 0x02 #define I2C_LT_FORCE_ENABLE 0x03 +/* PCA9450_REG_SW_RST command */ +#define SW_RST_COMMAND 0x14 + #endif /* __LINUX_REG_PCA9450_H__ */ diff --git a/include/linux/relay.h b/include/linux/relay.h index 72b876dd5cb8..b3224111d074 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -159,9 +159,6 @@ struct rchan *relay_open(const char *base_filename, size_t n_subbufs, const struct rchan_callbacks *cb, void *private_data); -extern int relay_late_setup_files(struct rchan *chan, - const char *base_filename, - struct dentry *parent); extern void relay_close(struct rchan *chan); extern void relay_flush(struct rchan *chan); extern void relay_subbufs_consumed(struct rchan *chan, diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index d94abba1c716..6fb4894b8cfd 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -6,6 +6,11 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/pid.h> +#include <linux/resctrl_types.h> + +#ifdef CONFIG_ARCH_HAS_CPU_RESCTRL +#include <asm/resctrl.h> +#endif /* CLOSID, RMID value used by the default control group */ #define RESCTRL_RESERVED_CLOSID 0 @@ -25,6 +30,34 @@ int proc_resctrl_show(struct seq_file *m, /* max value for struct rdt_domain's mbps_val */ #define MBA_MAX_MBPS U32_MAX +/* Walk all possible resources, with variants for only controls or monitors. */ +#define for_each_rdt_resource(_r) \ + for ((_r) = resctrl_arch_get_resource(0); \ + (_r) && (_r)->rid < RDT_NUM_RESOURCES; \ + (_r) = resctrl_arch_get_resource((_r)->rid + 1)) + +#define for_each_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->alloc_capable || (r)->mon_capable) + +#define for_each_alloc_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->alloc_capable) + +#define for_each_mon_capable_rdt_resource(r) \ + for_each_rdt_resource((r)) \ + if ((r)->mon_capable) + +enum resctrl_res_level { + RDT_RESOURCE_L3, + RDT_RESOURCE_L2, + RDT_RESOURCE_MBA, + RDT_RESOURCE_SMBA, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + /** * enum resctrl_conf_type - The type of configuration. * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. @@ -40,13 +73,42 @@ enum resctrl_conf_type { #define CDP_NUM_TYPES (CDP_DATA + 1) /* - * Event IDs, the values match those used to program IA32_QM_EVTSEL before - * reading IA32_QM_CTR on RDT systems. + * struct pseudo_lock_region - pseudo-lock region information + * @s: Resctrl schema for the resource to which this + * pseudo-locked region belongs + * @closid: The closid that this pseudo-locked region uses + * @d: RDT domain to which this pseudo-locked region + * belongs + * @cbm: bitmask of the pseudo-locked region + * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread + * completion + * @thread_done: variable used by waitqueue to test if pseudo-locking + * thread completed + * @cpu: core associated with the cache on which the setup code + * will be run + * @line_size: size of the cache lines + * @size: size of pseudo-locked region in bytes + * @kmem: the kernel memory associated with pseudo-locked region + * @minor: minor number of character device associated with this + * region + * @debugfs_dir: pointer to this region's directory in the debugfs + * filesystem + * @pm_reqs: Power management QoS requests related to this region */ -enum resctrl_event_id { - QOS_L3_OCCUP_EVENT_ID = 0x01, - QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, - QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +struct pseudo_lock_region { + struct resctrl_schema *s; + u32 closid; + struct rdt_ctrl_domain *d; + u32 cbm; + wait_queue_head_t lock_thread_wq; + int thread_done; + int cpu; + unsigned int line_size; + unsigned int size; + void *kmem; + unsigned int minor; + struct dentry *debugfs_dir; + struct list_head pm_reqs; }; /** @@ -97,7 +159,7 @@ struct rdt_ctrl_domain { /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types - * @ci: cache info for this domain + * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -108,7 +170,7 @@ struct rdt_ctrl_domain { */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; - struct cacheinfo *ci; + unsigned int ci_id; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; @@ -155,6 +217,7 @@ enum membw_throttle_mode { /** * struct resctrl_membw - Memory bandwidth allocation related data * @min_bw: Minimum memory bandwidth percentage user can request + * @max_bw: Maximum memory bandwidth value, used as the reset value * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale * @arch_needs_linear: True if we can't configure non-linear resources @@ -165,6 +228,7 @@ enum membw_throttle_mode { */ struct resctrl_membw { u32 min_bw; + u32 max_bw; u32 bw_gran; u32 delay_linear; bool arch_needs_linear; @@ -173,7 +237,6 @@ struct resctrl_membw { u32 *mb_map; }; -struct rdt_parse_data; struct resctrl_schema; enum resctrl_scope { @@ -183,6 +246,16 @@ enum resctrl_scope { }; /** + * enum resctrl_schema_fmt - The format user-space provides for a schema. + * @RESCTRL_SCHEMA_BITMAP: The schema is a bitmap in hex. + * @RESCTRL_SCHEMA_RANGE: The schema is a decimal number. + */ +enum resctrl_schema_fmt { + RESCTRL_SCHEMA_BITMAP, + RESCTRL_SCHEMA_RANGE, +}; + +/** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource * @alloc_capable: Is allocation available on this machine @@ -195,12 +268,10 @@ enum resctrl_scope { * @ctrl_domains: RCU list of all control domains for this resource * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. - * @data_width: Character width of data when displaying - * @default_ctrl: Specifies default cache cbm or memory B/W percent. - * @format_str: Per resource format string to show domain value - * @parse_ctrlval: Per resource function pointer to parse control values + * @schema_fmt: Which format string and parser is used for this schema. * @evt_list: List of monitoring events - * @fflags: flags to choose base and info files + * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth + * monitoring events can be configured. * @cdp_capable: Is the CDP feature available on this resource */ struct rdt_resource { @@ -215,22 +286,25 @@ struct rdt_resource { struct list_head ctrl_domains; struct list_head mon_domains; char *name; - int data_width; - u32 default_ctrl; - const char *format_str; - int (*parse_ctrlval)(struct rdt_parse_data *data, - struct resctrl_schema *s, - struct rdt_ctrl_domain *d); + enum resctrl_schema_fmt schema_fmt; struct list_head evt_list; - unsigned long fflags; + unsigned int mbm_cfg_mask; bool cdp_capable; }; +/* + * Get the resource that exists at this level. If the level is not supported + * a dummy/not-capable resource can be returned. Levels >= RDT_NUM_RESOURCES + * will return NULL. + */ +struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l); + /** * struct resctrl_schema - configuration abilities of a resource presented to * user-space * @list: Member of resctrl_schema_all. * @name: The name to use in the "schemata" file. + * @fmt_str: Format string to show domain value. * @conf_type: Whether this schema is specific to code/data. * @res: The resource structure exported by the architecture to describe * the hardware that is configured by this schema. @@ -241,16 +315,107 @@ struct rdt_resource { struct resctrl_schema { struct list_head list; char name[8]; + const char *fmt_str; enum resctrl_conf_type conf_type; struct rdt_resource *res; u32 num_closid; }; +struct resctrl_cpu_defaults { + u32 closid; + u32 rmid; +}; + +struct resctrl_mon_config_info { + struct rdt_resource *r; + struct rdt_mon_domain *d; + u32 evtid; + u32 mon_config; +}; + +/** + * resctrl_arch_sync_cpu_closid_rmid() - Refresh this CPU's CLOSID and RMID. + * Call via IPI. + * @info: If non-NULL, a pointer to a struct resctrl_cpu_defaults + * specifying the new CLOSID and RMID for tasks in the default + * resctrl ctrl and mon group when running on this CPU. If NULL, + * this CPU is not re-assigned to a different default group. + * + * Propagates reassignment of CPUs and/or tasks to different resctrl groups + * when requested by the resctrl core code. + * + * This function records the per-cpu defaults specified by @info (if any), + * and then reconfigures the CPU's hardware CLOSID and RMID for subsequent + * execution based on @current, in the same way as during a task switch. + */ +void resctrl_arch_sync_cpu_closid_rmid(void *info); + +/** + * resctrl_get_default_ctrl() - Return the default control value for this + * resource. + * @r: The resource whose default control type is queried. + */ +static inline u32 resctrl_get_default_ctrl(struct rdt_resource *r) +{ + switch (r->schema_fmt) { + case RESCTRL_SCHEMA_BITMAP: + return BIT_MASK(r->cache.cbm_len) - 1; + case RESCTRL_SCHEMA_RANGE: + return r->membw.max_bw; + } + + return WARN_ON_ONCE(1); +} + /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); +bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); + +/** + * resctrl_arch_mon_event_config_write() - Write the config for an event. + * @config_info: struct resctrl_mon_config_info describing the resource, domain + * and event. + * + * Reads resource, domain and eventid from @config_info and writes the + * event config_info->mon_config into hardware. + * + * Called via IPI to reach a CPU that is a member of the specified domain. + */ +void resctrl_arch_mon_event_config_write(void *config_info); + +/** + * resctrl_arch_mon_event_config_read() - Read the config for an event. + * @config_info: struct resctrl_mon_config_info describing the resource, domain + * and event. + * + * Reads resource, domain and eventid from @config_info and reads the + * hardware config value into config_info->mon_config. + * + * Called via IPI to reach a CPU that is a member of the specified domain. + */ +void resctrl_arch_mon_event_config_read(void *config_info); + +/* For use by arch code to remap resctrl's smaller CDP CLOSID range */ +static inline u32 resctrl_get_config_index(u32 closid, + enum resctrl_conf_type type) +{ + switch (type) { + default: + case CDP_NONE: + return closid; + case CDP_CODE: + return closid * 2 + 1; + case CDP_DATA: + return closid * 2; + } +} + +bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l); +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. @@ -314,6 +479,20 @@ static inline void resctrl_arch_rmid_read_context_check(void) } /** + * resctrl_find_domain() - Search for a domain id in a resource domain list. + * @h: The domain list to search. + * @id: The domain id to search for. + * @pos: A pointer to position in the list id should be inserted. + * + * Search the domain list to find the domain id. If the domain id is + * found, return the domain. NULL otherwise. If the domain id is not + * found (and NULL returned) then the first domain with id bigger than + * the input id can be returned to the caller via @pos. + */ +struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id, + struct list_head **pos); + +/** * resctrl_arch_reset_rmid() - Reset any private state associated with rmid * and eventid. * @r: The domain's resource. @@ -340,7 +519,32 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, */ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d); +/** + * resctrl_arch_reset_all_ctrls() - Reset the control for each CLOSID to its + * default. + * @r: The resctrl resource to reset. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); + extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; +int resctrl_init(void); +void resctrl_exit(void); + +#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK +u64 resctrl_arch_get_prefetch_disable_bits(void); +int resctrl_arch_pseudo_lock_fn(void *_plr); +int resctrl_arch_measure_cycles_lat_fn(void *_plr); +int resctrl_arch_measure_l2_residency(void *_plr); +int resctrl_arch_measure_l3_residency(void *_plr); +#else +static inline u64 resctrl_arch_get_prefetch_disable_bits(void) { return 0; } +static inline int resctrl_arch_pseudo_lock_fn(void *_plr) { return 0; } +static inline int resctrl_arch_measure_cycles_lat_fn(void *_plr) { return 0; } +static inline int resctrl_arch_measure_l2_residency(void *_plr) { return 0; } +static inline int resctrl_arch_measure_l3_residency(void *_plr) { return 0; } +#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ #endif /* _RESCTRL_H */ diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h new file mode 100644 index 000000000000..a25fb9c4070d --- /dev/null +++ b/include/linux/resctrl_types.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2025 Arm Ltd. + * Based on arch/x86/kernel/cpu/resctrl/internal.h + */ + +#ifndef __LINUX_RESCTRL_TYPES_H +#define __LINUX_RESCTRL_TYPES_H + +#define MAX_MBA_BW 100u +#define MBM_OVERFLOW_INTERVAL 1000 + +/* Reads to Local DRAM Memory */ +#define READS_TO_LOCAL_MEM BIT(0) + +/* Reads to Remote DRAM Memory */ +#define READS_TO_REMOTE_MEM BIT(1) + +/* Non-Temporal Writes to Local Memory */ +#define NON_TEMP_WRITE_TO_LOCAL_MEM BIT(2) + +/* Non-Temporal Writes to Remote Memory */ +#define NON_TEMP_WRITE_TO_REMOTE_MEM BIT(3) + +/* Reads to Local Memory the system identifies as "Slow Memory" */ +#define READS_TO_LOCAL_S_MEM BIT(4) + +/* Reads to Remote Memory the system identifies as "Slow Memory" */ +#define READS_TO_REMOTE_S_MEM BIT(5) + +/* Dirty Victims to All Types of Memory */ +#define DIRTY_VICTIMS_TO_ALL_MEM BIT(6) + +/* Max event bits supported */ +#define MAX_EVT_CONFIG_BITS GENMASK(6, 0) + +/* + * Event IDs, the values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ +enum resctrl_event_id { + QOS_L3_OCCUP_EVENT_ID = 0x01, + QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, + QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, + + /* Must be the last */ + QOS_NUM_EVENTS, +}; + +#endif /* __LINUX_RESCTRL_TYPES_H */ diff --git a/include/linux/reset.h b/include/linux/reset.h index 2986ced69a02..840d75d172f6 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -1005,6 +1005,12 @@ devm_reset_control_array_get_exclusive(struct device *dev) } static inline struct reset_control * +devm_reset_control_array_get_exclusive_released(struct device *dev) +{ + return devm_reset_control_array_get(dev, RESET_CONTROL_EXCLUSIVE_RELEASED); +} + +static inline struct reset_control * devm_reset_control_array_get_shared(struct device *dev) { return devm_reset_control_array_get(dev, RESET_CONTROL_SHARED); diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index 13f17676c5f4..7e50bbc94e47 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -26,7 +26,7 @@ struct restart_block { unsigned long arch_data; long (*fn)(struct restart_block *); union { - /* For futex_wait and futex_wait_requeue_pi */ + /* For futex_wait() */ struct { u32 __user *uaddr; u32 val; diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 997b34197385..6816e4c5f3f0 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -241,7 +241,7 @@ bool rfkill_soft_blocked(struct rfkill *rfkill); * rfkill_find_type - Helper for finding rfkill type by name * @name: the name of the type * - * Returns enum rfkill_type that corresponds to the name. + * Returns: enum rfkill_type that corresponds to the name. */ enum rfkill_type rfkill_find_type(const char *name); diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 8463a128e2f4..6c85b28ea30b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1259,7 +1259,7 @@ static inline int rhashtable_replace_fast( static inline void rhltable_walk_enter(struct rhltable *hlt, struct rhashtable_iter *iter) { - return rhashtable_walk_enter(&hlt->ht, iter); + rhashtable_walk_enter(&hlt->ht, iter); } /** @@ -1275,12 +1275,12 @@ static inline void rhltable_free_and_destroy(struct rhltable *hlt, void *arg), void *arg) { - return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); + rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); } static inline void rhltable_destroy(struct rhltable *hlt) { - return rhltable_free_and_destroy(hlt, NULL, NULL); + rhltable_free_and_destroy(hlt, NULL, NULL); } #endif /* _LINUX_RHASHTABLE_H */ diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 17fbb7855295..bc90c3c7b5fd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -92,10 +92,10 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags, int order, unsigned long start, unsigned long range_size, + unsigned long scratch_size, struct lock_class_key *key); -bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, - long *data); +void *ring_buffer_meta_scratch(struct trace_buffer *buffer, unsigned int *size); /* * Because the ring buffer is generic, if other users of the ring buffer get @@ -113,11 +113,11 @@ bool ring_buffer_last_boot_delta(struct trace_buffer *buffer, long *text, * traced by ftrace, it can produce lockdep warnings. We need to keep each * ring buffer's lock class separate. */ -#define ring_buffer_alloc_range(size, flags, order, start, range_size) \ +#define ring_buffer_alloc_range(size, flags, order, start, range_size, s_size) \ ({ \ static struct lock_class_key __key; \ __ring_buffer_alloc_range((size), (flags), (order), (start), \ - (range_size), &__key); \ + (range_size), (s_size), &__key); \ }) typedef bool (*ring_buffer_cond_fn)(void *data); @@ -152,9 +152,7 @@ ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_iter * -ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags); -void ring_buffer_read_prepare_sync(void); -void ring_buffer_read_start(struct ring_buffer_iter *iter); +ring_buffer_read_start(struct trace_buffer *buffer, int cpu, gfp_t flags); void ring_buffer_read_finish(struct ring_buffer_iter *iter); struct ring_buffer_event * @@ -192,6 +190,7 @@ void ring_buffer_record_off(struct trace_buffer *buffer); void ring_buffer_record_on(struct trace_buffer *buffer); bool ring_buffer_record_is_on(struct trace_buffer *buffer); bool ring_buffer_record_is_set_on(struct trace_buffer *buffer); +bool ring_buffer_record_is_on_cpu(struct trace_buffer *buffer, int cpu); void ring_buffer_record_disable_cpu(struct trace_buffer *buffer, int cpu); void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu); diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index e49c32b0f394..dd8afe511242 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -391,13 +391,8 @@ struct rio_dev *rio_dev_get(struct rio_dev *); void rio_dev_put(struct rio_dev *); #ifdef CONFIG_RAPIDIO_DMA_ENGINE -extern struct dma_chan *rio_request_dma(struct rio_dev *rdev); extern struct dma_chan *rio_request_mport_dma(struct rio_mport *mport); extern void rio_release_dma(struct dma_chan *dchan); -extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg( - struct rio_dev *rdev, struct dma_chan *dchan, - struct rio_dma_data *data, - enum dma_transfer_direction direction, unsigned long flags); extern struct dma_async_tx_descriptor *rio_dma_prep_xfer( struct dma_chan *dchan, u16 destid, struct rio_dma_data *data, diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 683a04088f3f..c4f4903b1088 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -13,6 +13,7 @@ #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/memremap.h> +#include <linux/bit_spinlock.h> /* * The anon_vma heads a list of private "related" vmas, to scan if @@ -173,6 +174,214 @@ static inline void anon_vma_merge(struct vm_area_struct *vma, struct anon_vma *folio_get_anon_vma(const struct folio *folio); +#ifdef CONFIG_MM_ID +static __always_inline void folio_lock_large_mapcount(struct folio *folio) +{ + bit_spin_lock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids); +} + +static __always_inline void folio_unlock_large_mapcount(struct folio *folio) +{ + __bit_spin_unlock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids); +} + +static inline unsigned int folio_mm_id(const struct folio *folio, int idx) +{ + VM_WARN_ON_ONCE(idx != 0 && idx != 1); + return folio->_mm_id[idx] & MM_ID_MASK; +} + +static inline void folio_set_mm_id(struct folio *folio, int idx, mm_id_t id) +{ + VM_WARN_ON_ONCE(idx != 0 && idx != 1); + folio->_mm_id[idx] &= ~MM_ID_MASK; + folio->_mm_id[idx] |= id; +} + +static inline void __folio_large_mapcount_sanity_checks(const struct folio *folio, + int diff, mm_id_t mm_id) +{ + VM_WARN_ON_ONCE(!folio_test_large(folio) || folio_test_hugetlb(folio)); + VM_WARN_ON_ONCE(diff <= 0); + VM_WARN_ON_ONCE(mm_id < MM_ID_MIN || mm_id > MM_ID_MAX); + + /* + * Make sure we can detect at least one complete PTE mapping of the + * folio in a single MM as "exclusively mapped". This is primarily + * a check on 32bit, where we currently reduce the size of the per-MM + * mapcount to a short. + */ + VM_WARN_ON_ONCE(diff > folio_large_nr_pages(folio)); + VM_WARN_ON_ONCE(folio_large_nr_pages(folio) - 1 > MM_ID_MAPCOUNT_MAX); + + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) == MM_ID_DUMMY && + folio->_mm_id_mapcount[0] != -1); + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY && + folio->_mm_id_mapcount[0] < 0); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) == MM_ID_DUMMY && + folio->_mm_id_mapcount[1] != -1); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY && + folio->_mm_id_mapcount[1] < 0); + VM_WARN_ON_ONCE(!folio_mapped(folio) && + test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids)); +} + +static __always_inline void folio_set_large_mapcount(struct folio *folio, + int mapcount, struct vm_area_struct *vma) +{ + __folio_large_mapcount_sanity_checks(folio, mapcount, vma->vm_mm->mm_id); + + VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY); + VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY); + + /* Note: mapcounts start at -1. */ + atomic_set(&folio->_large_mapcount, mapcount - 1); + folio->_mm_id_mapcount[0] = mapcount - 1; + folio_set_mm_id(folio, 0, vma->vm_mm->mm_id); +} + +static __always_inline int folio_add_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + const mm_id_t mm_id = vma->vm_mm->mm_id; + int new_mapcount_val; + + folio_lock_large_mapcount(folio); + __folio_large_mapcount_sanity_checks(folio, diff, mm_id); + + new_mapcount_val = atomic_read(&folio->_large_mapcount) + diff; + atomic_set(&folio->_large_mapcount, new_mapcount_val); + + /* + * If a folio is mapped more than once into an MM on 32bit, we + * can in theory overflow the per-MM mapcount (although only for + * fairly large folios), turning it negative. In that case, just + * free up the slot and mark the folio "mapped shared", otherwise + * we might be in trouble when unmapping pages later. + */ + if (folio_mm_id(folio, 0) == mm_id) { + folio->_mm_id_mapcount[0] += diff; + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[0] < 0)) { + folio->_mm_id_mapcount[0] = -1; + folio_set_mm_id(folio, 0, MM_ID_DUMMY); + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + } else if (folio_mm_id(folio, 1) == mm_id) { + folio->_mm_id_mapcount[1] += diff; + if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[1] < 0)) { + folio->_mm_id_mapcount[1] = -1; + folio_set_mm_id(folio, 1, MM_ID_DUMMY); + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + } else if (folio_mm_id(folio, 0) == MM_ID_DUMMY) { + folio_set_mm_id(folio, 0, mm_id); + folio->_mm_id_mapcount[0] = diff - 1; + /* We might have other mappings already. */ + if (new_mapcount_val != diff - 1) + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } else if (folio_mm_id(folio, 1) == MM_ID_DUMMY) { + folio_set_mm_id(folio, 1, mm_id); + folio->_mm_id_mapcount[1] = diff - 1; + /* Slot 0 certainly has mappings as well. */ + folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; + } + folio_unlock_large_mapcount(folio); + return new_mapcount_val + 1; +} +#define folio_add_large_mapcount folio_add_return_large_mapcount + +static __always_inline int folio_sub_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + const mm_id_t mm_id = vma->vm_mm->mm_id; + int new_mapcount_val; + + folio_lock_large_mapcount(folio); + __folio_large_mapcount_sanity_checks(folio, diff, mm_id); + + new_mapcount_val = atomic_read(&folio->_large_mapcount) - diff; + atomic_set(&folio->_large_mapcount, new_mapcount_val); + + /* + * There are valid corner cases where we might underflow a per-MM + * mapcount (some mappings added when no slot was free, some mappings + * added once a slot was free), so we always set it to -1 once we go + * negative. + */ + if (folio_mm_id(folio, 0) == mm_id) { + folio->_mm_id_mapcount[0] -= diff; + if (folio->_mm_id_mapcount[0] >= 0) + goto out; + folio->_mm_id_mapcount[0] = -1; + folio_set_mm_id(folio, 0, MM_ID_DUMMY); + } else if (folio_mm_id(folio, 1) == mm_id) { + folio->_mm_id_mapcount[1] -= diff; + if (folio->_mm_id_mapcount[1] >= 0) + goto out; + folio->_mm_id_mapcount[1] = -1; + folio_set_mm_id(folio, 1, MM_ID_DUMMY); + } + + /* + * If one MM slot owns all mappings, the folio is mapped exclusively. + * Note that if the folio is now unmapped (new_mapcount_val == -1), both + * slots must be free (mapcount == -1), and we'll also mark it as + * exclusive. + */ + if (folio->_mm_id_mapcount[0] == new_mapcount_val || + folio->_mm_id_mapcount[1] == new_mapcount_val) + folio->_mm_ids &= ~FOLIO_MM_IDS_SHARED_BIT; +out: + folio_unlock_large_mapcount(folio); + return new_mapcount_val + 1; +} +#define folio_sub_large_mapcount folio_sub_return_large_mapcount +#else /* !CONFIG_MM_ID */ +/* + * See __folio_rmap_sanity_checks(), we might map large folios even without + * CONFIG_TRANSPARENT_HUGEPAGE. We'll keep that working for now. + */ +static inline void folio_set_large_mapcount(struct folio *folio, int mapcount, + struct vm_area_struct *vma) +{ + /* Note: mapcounts start at -1. */ + atomic_set(&folio->_large_mapcount, mapcount - 1); +} + +static inline void folio_add_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + atomic_add(diff, &folio->_large_mapcount); +} + +static inline int folio_add_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + BUILD_BUG(); +} + +static inline void folio_sub_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + atomic_sub(diff, &folio->_large_mapcount); +} + +static inline int folio_sub_return_large_mapcount(struct folio *folio, + int diff, struct vm_area_struct *vma) +{ + BUILD_BUG(); +} +#endif /* CONFIG_MM_ID */ + +#define folio_inc_large_mapcount(folio, vma) \ + folio_add_large_mapcount(folio, 1, vma) +#define folio_inc_return_large_mapcount(folio, vma) \ + folio_add_return_large_mapcount(folio, 1, vma) +#define folio_dec_large_mapcount(folio, vma) \ + folio_sub_large_mapcount(folio, 1, vma) +#define folio_dec_return_large_mapcount(folio, vma) \ + folio_sub_return_large_mapcount(folio, 1, vma) + /* RMAP flags, currently only relevant for some anon rmap operations. */ typedef int __bitwise rmap_t; @@ -192,6 +401,7 @@ typedef int __bitwise rmap_t; enum rmap_level { RMAP_LEVEL_PTE = 0, RMAP_LEVEL_PMD, + RMAP_LEVEL_PUD, }; static inline void __folio_rmap_sanity_checks(const struct folio *folio, @@ -228,6 +438,14 @@ static inline void __folio_rmap_sanity_checks(const struct folio *folio, VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); break; + case RMAP_LEVEL_PUD: + /* + * Assume that we are creating a single "entire" mapping of the + * folio. + */ + VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PUD_NR, folio); + VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio); + break; default: VM_WARN_ON_ONCE(true); } @@ -251,12 +469,16 @@ void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, folio_add_file_rmap_ptes(folio, page, 1, vma) void folio_add_file_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); +void folio_add_file_rmap_pud(struct folio *, struct page *, + struct vm_area_struct *); void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages, struct vm_area_struct *); #define folio_remove_rmap_pte(folio, page, vma) \ folio_remove_rmap_ptes(folio, page, 1, vma) void folio_remove_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); +void folio_remove_rmap_pud(struct folio *, struct page *, + struct vm_area_struct *); void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); @@ -322,7 +544,8 @@ static inline void hugetlb_remove_rmap(struct folio *folio) } static __always_inline void __folio_dup_file_rmap(struct folio *folio, - struct page *page, int nr_pages, enum rmap_level level) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + enum rmap_level level) { const int orig_nr_pages = nr_pages; @@ -335,14 +558,17 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, break; } - do { - atomic_inc(&page->_mapcount); - } while (page++, --nr_pages > 0); - atomic_add(orig_nr_pages, &folio->_large_mapcount); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) { + do { + atomic_inc(&page->_mapcount); + } while (page++, --nr_pages > 0); + } + folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; case RMAP_LEVEL_PMD: + case RMAP_LEVEL_PUD: atomic_inc(&folio->_entire_mapcount); - atomic_inc(&folio->_large_mapcount); + folio_inc_large_mapcount(folio, dst_vma); break; } } @@ -352,45 +578,47 @@ static __always_inline void __folio_dup_file_rmap(struct folio *folio, * @folio: The folio to duplicate the mappings of * @page: The first page to duplicate the mappings of * @nr_pages: The number of pages of which the mapping will be duplicated + * @dst_vma: The destination vm area * * The page range of the folio is defined by [page, page + nr_pages) * * The caller needs to hold the page table lock. */ static inline void folio_dup_file_rmap_ptes(struct folio *folio, - struct page *page, int nr_pages) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, nr_pages, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, RMAP_LEVEL_PTE); } static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, - struct page *page) + struct page *page, struct vm_area_struct *dst_vma) { - __folio_dup_file_rmap(folio, page, 1, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, 1, dst_vma, RMAP_LEVEL_PTE); } /** * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio * @folio: The folio to duplicate the mapping of * @page: The first page to duplicate the mapping of + * @dst_vma: The destination vm area * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) * * The caller needs to hold the page table lock. */ static inline void folio_dup_file_rmap_pmd(struct folio *folio, - struct page *page) + struct page *page, struct vm_area_struct *dst_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, RMAP_LEVEL_PTE); + __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, RMAP_LEVEL_PTE); #else WARN_ON_ONCE(true); #endif } static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, - struct page *page, int nr_pages, struct vm_area_struct *src_vma, - enum rmap_level level) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma, enum rmap_level level) { const int orig_nr_pages = nr_pages; bool maybe_pinned; @@ -432,18 +660,20 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, do { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); - atomic_inc(&page->_mapcount); + if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) + atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); - atomic_add(orig_nr_pages, &folio->_large_mapcount); + folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; case RMAP_LEVEL_PMD: + case RMAP_LEVEL_PUD: if (PageAnonExclusive(page)) { if (unlikely(maybe_pinned)) return -EBUSY; ClearPageAnonExclusive(page); } atomic_inc(&folio->_entire_mapcount); - atomic_inc(&folio->_large_mapcount); + folio_inc_large_mapcount(folio, dst_vma); break; } return 0; @@ -455,6 +685,7 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * @folio: The folio to duplicate the mappings of * @page: The first page to duplicate the mappings of * @nr_pages: The number of pages of which the mapping will be duplicated + * @dst_vma: The destination vm area * @src_vma: The vm area from which the mappings are duplicated * * The page range of the folio is defined by [page, page + nr_pages) @@ -473,16 +704,18 @@ static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise. */ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, - struct page *page, int nr_pages, struct vm_area_struct *src_vma) + struct page *page, int nr_pages, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { - return __folio_try_dup_anon_rmap(folio, page, nr_pages, src_vma, - RMAP_LEVEL_PTE); + return __folio_try_dup_anon_rmap(folio, page, nr_pages, dst_vma, + src_vma, RMAP_LEVEL_PTE); } static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, - struct page *page, struct vm_area_struct *src_vma) + struct page *page, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { - return __folio_try_dup_anon_rmap(folio, page, 1, src_vma, + return __folio_try_dup_anon_rmap(folio, page, 1, dst_vma, src_vma, RMAP_LEVEL_PTE); } @@ -491,6 +724,7 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, * of a folio * @folio: The folio to duplicate the mapping of * @page: The first page to duplicate the mapping of + * @dst_vma: The destination vm area * @src_vma: The vm area from which the mapping is duplicated * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) @@ -509,11 +743,12 @@ static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. */ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, - struct page *page, struct vm_area_struct *src_vma) + struct page *page, struct vm_area_struct *dst_vma, + struct vm_area_struct *src_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, src_vma, - RMAP_LEVEL_PMD); + return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, dst_vma, + src_vma, RMAP_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; @@ -663,9 +898,8 @@ int folio_referenced(struct folio *, int is_locked, void try_to_migrate(struct folio *folio, enum ttu_flags flags); void try_to_unmap(struct folio *, enum ttu_flags flags); -int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, - unsigned long end, struct page **pages, - void *arg); +struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, + void *owner, struct folio **foliop); /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) @@ -739,6 +973,9 @@ unsigned long page_address_in_vma(const struct folio *folio, */ int folio_mkclean(struct folio *); +int mapping_wrprotect_range(struct address_space *mapping, pgoff_t pgoff, + unsigned long pfn, unsigned long nr_pages); + int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, struct vm_area_struct *vma); diff --git a/include/linux/rolling_buffer.h b/include/linux/rolling_buffer.h new file mode 100644 index 000000000000..ac15b1ffdd83 --- /dev/null +++ b/include/linux/rolling_buffer.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Rolling buffer of folios + * + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#ifndef _ROLLING_BUFFER_H +#define _ROLLING_BUFFER_H + +#include <linux/folio_queue.h> +#include <linux/uio.h> + +/* + * Rolling buffer. Whilst the buffer is live and in use, folios and folio + * queue segments can be added to one end by one thread and removed from the + * other end by another thread. The buffer isn't allowed to be empty; it must + * always have at least one folio_queue in it so that neither side has to + * modify both queue pointers. + * + * The iterator in the buffer is extended as buffers are inserted. It can be + * snapshotted to use a segment of the buffer. + */ +struct rolling_buffer { + struct folio_queue *head; /* Producer's insertion point */ + struct folio_queue *tail; /* Consumer's removal point */ + struct iov_iter iter; /* Iterator tracking what's left in the buffer */ + u8 next_head_slot; /* Next slot in ->head */ + u8 first_tail_slot; /* First slot in ->tail */ +}; + +/* + * Snapshot of a rolling buffer. + */ +struct rolling_buffer_snapshot { + struct folio_queue *curr_folioq; /* Queue segment in which current folio resides */ + unsigned char curr_slot; /* Folio currently being read */ + unsigned char curr_order; /* Order of folio */ +}; + +/* Marks to store per-folio in the internal folio_queue structs. */ +#define ROLLBUF_MARK_1 BIT(0) +#define ROLLBUF_MARK_2 BIT(1) + +int rolling_buffer_init(struct rolling_buffer *roll, unsigned int rreq_id, + unsigned int direction); +int rolling_buffer_make_space(struct rolling_buffer *roll); +ssize_t rolling_buffer_load_from_ra(struct rolling_buffer *roll, + struct readahead_control *ractl, + struct folio_batch *put_batch); +ssize_t rolling_buffer_append(struct rolling_buffer *roll, struct folio *folio, + unsigned int flags); +struct folio_queue *rolling_buffer_delete_spent(struct rolling_buffer *roll); +void rolling_buffer_clear(struct rolling_buffer *roll); + +static inline void rolling_buffer_advance(struct rolling_buffer *roll, size_t amount) +{ + iov_iter_advance(&roll->iter, amount); +} + +#endif /* _ROLLING_BUFFER_H */ diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 90d8e4475f80..fb7ab9165645 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -184,13 +184,9 @@ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *, int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len); int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); -int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, - void *data, int len); int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len); int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst); -int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst, - void *data, int len); __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, poll_table *wait); @@ -271,15 +267,6 @@ static inline int rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len, } -static inline int rpmsg_send_offchannel(struct rpmsg_endpoint *ept, u32 src, - u32 dst, void *data, int len) -{ - /* This shouldn't be possible */ - WARN_ON(1); - - return -ENXIO; -} - static inline int rpmsg_trysend(struct rpmsg_endpoint *ept, void *data, int len) { /* This shouldn't be possible */ @@ -297,15 +284,6 @@ static inline int rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data, return -ENXIO; } -static inline int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, - u32 dst, void *data, int len) -{ - /* This shouldn't be possible */ - WARN_ON(1); - - return -ENXIO; -} - static inline __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp, poll_table *wait) { diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 3f4d315aaec9..95da051fb155 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -170,6 +170,7 @@ struct rtc_device { /* useful timestamps */ #define RTC_TIMESTAMP_BEGIN_0000 -62167219200ULL /* 0000-01-01 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_1900 -2208988800LL /* 1900-01-01 00:00:00 */ +#define RTC_TIMESTAMP_EPOCH_GPS 315964800LL /* 1980-01-06 00:00:00 */ #define RTC_TIMESTAMP_BEGIN_2000 946684800LL /* 2000-01-01 00:00:00 */ #define RTC_TIMESTAMP_END_2063 2966371199LL /* 2063-12-31 23:59:59 */ #define RTC_TIMESTAMP_END_2079 3471292799LL /* 2079-12-31 23:59:59 */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 14b88f551920..ea39dd23a197 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -43,6 +43,7 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); +extern int rtnl_lock_interruptible(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); @@ -78,7 +79,7 @@ static inline bool lockdep_rtnl_is_held(void) * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit + * Return: the value of the specified RCU-protected pointer, but omit * the READ_ONCE(), because caller holds RTNL. */ #define rtnl_dereference(p) \ @@ -102,6 +103,7 @@ void __rtnl_net_unlock(struct net *net); void rtnl_net_lock(struct net *net); void rtnl_net_unlock(struct net *net); int rtnl_net_trylock(struct net *net); +int rtnl_net_lock_killable(struct net *net); int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); bool rtnl_net_is_locked(struct net *net); @@ -138,6 +140,11 @@ static inline int rtnl_net_trylock(struct net *net) return rtnl_trylock(); } +static inline int rtnl_net_lock_killable(struct net *net) +{ + return rtnl_lock_killable(); +} + static inline void ASSERT_RTNL_NET(struct net *net) { ASSERT_RTNL(); @@ -178,6 +185,12 @@ void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); +/* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */ +struct ndo_fdb_dump_context { + unsigned long ifindex; + unsigned long fdb_idx; +}; + extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, @@ -227,6 +240,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } -void netdev_set_operstate(struct net_device *dev, int newstate); +void netif_set_operstate(struct net_device *dev, int newstate); #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h index 4612ef09a0c7..3b4c36705a9b 100644 --- a/include/linux/rtsx_pci.h +++ b/include/linux/rtsx_pci.h @@ -1312,8 +1312,6 @@ void rtsx_pci_add_cmd(struct rtsx_pcr *pcr, u8 cmd_type, u16 reg_addr, u8 mask, u8 data); void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr); int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout); -int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, - int num_sg, bool read, int timeout); int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, int num_sg, bool read); void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, diff --git a/include/linux/rv.h b/include/linux/rv.h index 8883b41d88ec..3452b5e4b29e 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -7,7 +7,7 @@ #ifndef _LINUX_RV_H #define _LINUX_RV_H -#define MAX_DA_NAME_LEN 24 +#define MAX_DA_NAME_LEN 32 #ifdef CONFIG_RV /* @@ -56,7 +56,7 @@ struct rv_monitor { bool rv_monitoring_on(void); int rv_unregister_monitor(struct rv_monitor *monitor); -int rv_register_monitor(struct rv_monitor *monitor); +int rv_register_monitor(struct rv_monitor *monitor, struct rv_monitor *parent); int rv_get_task_monitor_slot(void); void rv_put_task_monitor_slot(int slot); diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 189140bf11fc..4adf4b364fcd 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -213,12 +213,12 @@ int sbitmap_get(struct sbitmap *sb); * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, * limiting the depth used from each word. * @sb: Bitmap to allocate from. - * @shallow_depth: The maximum number of bits to allocate from a single word. + * @shallow_depth: The maximum number of bits to allocate from the bitmap. * * This rather specific operation allows for having multiple users with * different allocation limits. E.g., there can be a high-priority class that * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() - * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority + * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority * class can only allocate half of the total bits in the bitmap, preventing it * from starving out the high-priority class. * @@ -478,7 +478,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, * sbitmap_queue, limiting the depth used from each word, with preemption * already disabled. * @sbq: Bitmap queue to allocate from. - * @shallow_depth: The maximum number of bits to allocate from a single word. + * @shallow_depth: The maximum number of bits to allocate from the queue. * See sbitmap_get_shallow(). * * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index d836e7440ee8..6f8a4965f9b9 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -95,6 +95,28 @@ static inline bool sg_is_last(struct scatterlist *sg) } /** + * sg_next - return the next scatterlist entry in a list + * @sg: The current sg entry + * + * Description: + * Usually the next entry will be @sg + 1, but if this sg element is part + * of a chained scatterlist, it could jump to the start of a new + * scatterlist array. + * + **/ +static inline struct scatterlist *sg_next(struct scatterlist *sg) +{ + if (sg_is_last(sg)) + return NULL; + + sg++; + if (unlikely(sg_is_chain(sg))) + sg = sg_chain_ptr(sg); + + return sg; +} + +/** * sg_assign_page - Assign a given page to an SG entry * @sg: SG entry * @page: The page @@ -232,7 +254,7 @@ static inline void __sg_chain(struct scatterlist *chain_sg, * @sgl: Second scatterlist * * Description: - * Links @prv@ and @sgl@ together, to form a longer scatterlist. + * Links @prv and @sgl together, to form a longer scatterlist. * **/ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, @@ -418,7 +440,6 @@ static inline void sg_init_marker(struct scatterlist *sgl, int sg_nents(struct scatterlist *sg); int sg_nents_for_len(struct scatterlist *sg, u64 len); -struct scatterlist *sg_next(struct scatterlist *); struct scatterlist *sg_last(struct scatterlist *s, unsigned int); void sg_init_table(struct scatterlist *, unsigned int); void sg_init_one(struct scatterlist *, const void *, unsigned int); @@ -671,6 +692,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) #define SG_MITER_ATOMIC (1 << 0) /* use kmap_atomic */ #define SG_MITER_TO_SG (1 << 1) /* flush back to phys on unmap */ #define SG_MITER_FROM_SG (1 << 2) /* nop */ +#define SG_MITER_LOCAL (1 << 3) /* use kmap_local */ struct sg_mapping_iter { /* the following three fields can be accessed directly */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 949b53e0accf..aa9c5be7a632 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -44,8 +44,8 @@ #include <linux/seqlock_types.h> #include <linux/kcsan.h> #include <linux/rv.h> -#include <linux/livepatch_sched.h> #include <linux/uidgid_types.h> +#include <linux/tracepoint-defs.h> #include <asm/kmap_size.h> /* task_struct member predeclarations (sorted alphabetically): */ @@ -65,6 +65,7 @@ struct mempolicy; struct nameidata; struct nsproxy; struct perf_event_context; +struct perf_ctx_data; struct pid_namespace; struct pipe_inode_info; struct rcu_node; @@ -186,6 +187,12 @@ struct user_event_mm; # define debug_rtlock_wait_restore_state() do { } while (0) #endif +#define trace_set_current_state(state_value) \ + do { \ + if (tracepoint_enabled(sched_set_state_tp)) \ + __trace_set_current_state(state_value); \ + } while (0) + /* * set_current_state() includes a barrier so that the write of current->__state * is correctly serialised wrt the caller's subsequent test of whether to @@ -226,12 +233,14 @@ struct user_event_mm; #define __set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ + trace_set_current_state(state_value); \ WRITE_ONCE(current->__state, (state_value)); \ } while (0) #define set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ + trace_set_current_state(state_value); \ smp_store_mb(current->__state, (state_value)); \ } while (0) @@ -247,6 +256,7 @@ struct user_event_mm; \ raw_spin_lock_irqsave(¤t->pi_lock, flags); \ debug_special_state_change((state_value)); \ + trace_set_current_state(state_value); \ WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ } while (0) @@ -282,6 +292,7 @@ struct user_event_mm; raw_spin_lock(¤t->pi_lock); \ current->saved_state = current->__state; \ debug_rtlock_wait_set_state(); \ + trace_set_current_state(TASK_RTLOCK_WAIT); \ WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ raw_spin_unlock(¤t->pi_lock); \ } while (0); @@ -291,6 +302,7 @@ struct user_event_mm; lockdep_assert_irqs_disabled(); \ raw_spin_lock(¤t->pi_lock); \ debug_rtlock_wait_restore_state(); \ + trace_set_current_state(current->saved_state); \ WRITE_ONCE(current->__state, current->saved_state); \ current->saved_state = TASK_RUNNING; \ raw_spin_unlock(¤t->pi_lock); \ @@ -327,6 +339,10 @@ extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); +/* wrapper function to trace from this header file */ +DECLARE_TRACEPOINT(sched_set_state_tp); +extern void __trace_set_current_state(int state_value); + /** * struct prev_cputime - snapshot of system and user cputime * @utime: time spent in user mode @@ -382,6 +398,11 @@ enum uclamp_id { #ifdef CONFIG_SMP extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; +extern void sched_domains_mutex_lock(void); +extern void sched_domains_mutex_unlock(void); +#else +static inline void sched_domains_mutex_lock(void) { } +static inline void sched_domains_mutex_unlock(void) { } #endif struct sched_param { @@ -398,6 +419,12 @@ struct sched_info { /* Time spent waiting on a runqueue: */ unsigned long long run_delay; + /* Max time spent waiting on a runqueue: */ + unsigned long long max_run_delay; + + /* Min time spent waiting on a runqueue: */ + unsigned long long min_run_delay; + /* Timestamps: */ /* When did we last run on a CPU? */ @@ -1016,6 +1043,7 @@ struct task_struct { /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif + unsigned in_nf_duplicate:1; #ifdef CONFIG_PREEMPT_RT struct netdev_xmit net_xmit; #endif @@ -1211,6 +1239,14 @@ struct task_struct { struct mutex_waiter *blocked_on; #endif +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER + /* + * Encoded lock address causing task block (lower 2 bits = type from + * <linux/hung_task.h>). Accessed via hung_task_*() helpers. + */ + unsigned long blocker; +#endif + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP int non_block_count; #endif @@ -1305,6 +1341,7 @@ struct task_struct { struct perf_event_context *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; + struct perf_ctx_data __rcu *perf_ctx_data; #endif #ifdef CONFIG_DEBUG_PREEMPT unsigned long preempt_disable_ip; @@ -1375,6 +1412,15 @@ struct task_struct { * with respect to preemption. */ unsigned long rseq_event_mask; +# ifdef CONFIG_DEBUG_RSEQ + /* + * This is a place holder to save a copy of the rseq fields for + * validation of read-only fields. The struct rseq has a + * variable-length array at the end, so it cannot be used + * directly. Reserve a size large enough for the known fields. + */ + char rseq_fields[sizeof(struct rseq)]; +# endif #endif #ifdef CONFIG_SCHED_MM_CID @@ -1604,22 +1650,15 @@ struct task_struct { struct user_event_mm *user_event_mm; #endif - /* - * New fields for task_struct should be added above here, so that - * they are included in the randomized portion of task_struct. - */ - randomized_struct_fields_end - /* CPU-specific state of this task: */ struct thread_struct thread; /* - * WARNING: on x86, 'thread_struct' contains a variable-sized - * structure. It *MUST* be at the end of 'task_struct'. - * - * Do not put anything below here! + * New fields for task_struct should be added above here, so that + * they are included in the randomized portion of task_struct. */ -}; + randomized_struct_fields_end +} __attribute__ ((aligned (64))); #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) @@ -1686,7 +1725,7 @@ extern struct pid *cad_pid; #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ -#define PF__HOLE__00010000 0x00010000 +#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ @@ -1945,11 +1984,10 @@ static inline void kick_process(struct task_struct *tsk) { } #endif extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); - -static inline void set_task_comm(struct task_struct *tsk, const char *from) -{ - __set_task_comm(tsk, from, false); -} +#define set_task_comm(tsk, from) ({ \ + BUILD_BUG_ON(sizeof(from) != TASK_COMM_LEN); \ + __set_task_comm(tsk, from, false); \ +}) /* * - Why not use task_lock()? @@ -2048,9 +2086,6 @@ extern int __cond_resched(void); #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) -void sched_dynamic_klp_enable(void); -void sched_dynamic_klp_disable(void); - DECLARE_STATIC_CALL(cond_resched, __cond_resched); static __always_inline int _cond_resched(void) @@ -2071,7 +2106,6 @@ static __always_inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return __cond_resched(); } @@ -2081,7 +2115,6 @@ static inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return 0; } diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 3a912ab42bb5..f9aabbc9d22e 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -34,7 +34,11 @@ static inline bool dl_time_before(u64 a, u64 b) struct root_domain; extern void dl_add_task_root_domain(struct task_struct *p); extern void dl_clear_root_domain(struct root_domain *rd); +extern void dl_clear_root_domain_cpu(int cpu); #endif /* CONFIG_SMP */ +extern u64 dl_cookie; +extern bool dl_bw_visited(int cpu, u64 cookie); + #endif /* _LINUX_SCHED_DEADLINE_H */ diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index b5035afa2396..35ed4577a6cc 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -35,12 +35,10 @@ extern void show_stack(struct task_struct *task, unsigned long *sp, extern void sched_show_task(struct task_struct *p); -#ifdef CONFIG_SCHED_DEBUG struct seq_file; extern void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); -#endif /* Attach to any functions which should be ignored in wchan output. */ #define __sched __section(".sched.text") diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 1d70a9867fb1..f7545430a548 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -146,6 +146,7 @@ struct sched_ext_entity { u32 weight; s32 sticky_cpu; s32 holding_cpu; + s32 selected_cpu; u32 kf_mask; /* see scx_kf_mask above */ struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */ atomic_long_t ops_state; diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h index 412cdaba33eb..17e04859b9a4 100644 --- a/include/linux/sched/hotplug.h +++ b/include/linux/sched/hotplug.h @@ -18,10 +18,6 @@ extern int sched_cpu_dying(unsigned int cpu); # define sched_cpu_dying NULL #endif -#ifdef CONFIG_HOTPLUG_CPU -extern void idle_task_exit(void); -#else static inline void idle_task_exit(void) {} -#endif #endif /* _LINUX_SCHED_HOTPLUG_H */ diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h index e670ac282333..439f6029d3b9 100644 --- a/include/linux/sched/idle.h +++ b/include/linux/sched/idle.h @@ -79,6 +79,21 @@ static __always_inline bool __must_check current_clr_polling_and_test(void) return unlikely(tif_need_resched()); } +static __always_inline void current_clr_polling(void) +{ + __current_clr_polling(); + + /* + * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. + * Once the bit is cleared, we'll get IPIs with every new + * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also + * fold. + */ + smp_mb__after_atomic(); /* paired with resched_curr() */ + + preempt_fold_need_resched(); +} + #else static inline void __current_set_polling(void) { } static inline void __current_clr_polling(void) { } @@ -91,21 +106,15 @@ static inline bool __must_check current_clr_polling_and_test(void) { return unlikely(tif_need_resched()); } -#endif static __always_inline void current_clr_polling(void) { __current_clr_polling(); - /* - * Ensure we check TIF_NEED_RESCHED after we clear the polling bit. - * Once the bit is cleared, we'll get IPIs with every new - * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also - * fold. - */ smp_mb(); /* paired with resched_curr() */ preempt_fold_need_resched(); } +#endif #endif /* _LINUX_SCHED_IDLE_H */ diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 2b461129d1fa..d8501f4709b5 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -7,16 +7,21 @@ #include <linux/tick.h> enum hk_type { - HK_TYPE_TIMER, - HK_TYPE_RCU, - HK_TYPE_MISC, - HK_TYPE_SCHED, - HK_TYPE_TICK, HK_TYPE_DOMAIN, - HK_TYPE_WQ, HK_TYPE_MANAGED_IRQ, - HK_TYPE_KTHREAD, - HK_TYPE_MAX + HK_TYPE_KERNEL_NOISE, + HK_TYPE_MAX, + + /* + * The following housekeeping types are only set by the nohz_full + * boot commandline option. So they can share the same value. + */ + HK_TYPE_TICK = HK_TYPE_KERNEL_NOISE, + HK_TYPE_TIMER = HK_TYPE_KERNEL_NOISE, + HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE, + HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE, + HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE, + HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE }; #ifdef CONFIG_CPU_ISOLATION diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 928a626725e6..b13474825130 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -531,6 +531,13 @@ enum { static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm) { + /* + * The atomic_read() below prevents CSE. The following should + * help the compiler generate more efficient code on architectures + * where sync_core_before_usermode() is a no-op. + */ + if (!IS_ENABLED(CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE)) + return; if (current->mm != mm) return; if (likely(!(atomic_read(&mm->membarrier_state) & diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index d5d03d919df8..1ef1edbaaf79 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -136,7 +136,8 @@ struct signal_struct { #ifdef CONFIG_POSIX_TIMERS /* POSIX.1b Interval Timers */ - unsigned int next_posix_timer_id; + unsigned int timer_create_restore_ids:1; + atomic_t next_posix_timer_id; struct hlist_head posix_timers; struct hlist_head ignored_posix_timers; diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h index fb1e295e7e63..166b19af956f 100644 --- a/include/linux/sched/smt.h +++ b/include/linux/sched/smt.h @@ -12,7 +12,7 @@ static __always_inline bool sched_smt_active(void) return static_branch_likely(&sched_smt_present); } #else -static inline bool sched_smt_active(void) { return false; } +static __always_inline bool sched_smt_active(void) { return false; } #endif void arch_smt_update(void); diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index cffad65bdc6a..1fab7e9043a3 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -53,7 +53,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct * When the stack grows up, this is the highest address. * Beyond that position, we corrupt data on the next page. */ -static inline unsigned long *end_of_stack(struct task_struct *p) +static inline unsigned long *end_of_stack(const struct task_struct *p) { #ifdef CONFIG_STACK_GROWSUP return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1; @@ -106,7 +106,6 @@ static inline unsigned long stack_not_used(struct task_struct *p) #endif extern void set_task_stack_end_magic(struct task_struct *tsk); -#ifndef __HAVE_ARCH_KSTACK_END static inline int kstack_end(void *addr) { /* Reliable end of stack detection: @@ -114,6 +113,5 @@ static inline int kstack_end(void *addr) */ return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*))); } -#endif #endif /* _LINUX_SCHED_TASK_STACK_H */ diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 4237daa5ac7a..198bb5cc1774 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -25,16 +25,12 @@ enum { }; #undef SD_FLAG -#ifdef CONFIG_SCHED_DEBUG - struct sd_flag_debug { unsigned int meta_flags; char *name; }; extern const struct sd_flag_debug sd_flag_debug[]; -#endif - #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) { @@ -114,7 +110,10 @@ struct sched_domain { unsigned int lb_count[CPU_MAX_IDLE_TYPES]; unsigned int lb_failed[CPU_MAX_IDLE_TYPES]; unsigned int lb_balanced[CPU_MAX_IDLE_TYPES]; - unsigned int lb_imbalance[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_load[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_util[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_task[CPU_MAX_IDLE_TYPES]; + unsigned int lb_imbalance_misfit[CPU_MAX_IDLE_TYPES]; unsigned int lb_gained[CPU_MAX_IDLE_TYPES]; unsigned int lb_hot_gained[CPU_MAX_IDLE_TYPES]; unsigned int lb_nobusyg[CPU_MAX_IDLE_TYPES]; @@ -140,9 +139,7 @@ struct sched_domain { unsigned int ttwu_move_affine; unsigned int ttwu_move_balance; #endif -#ifdef CONFIG_SCHED_DEBUG char *name; -#endif union { void *private; /* used during construction */ struct rcu_head rcu; /* used during destruction */ @@ -165,10 +162,6 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) return to_cpumask(sd->span); } -extern void partition_sched_domains_locked(int ndoms_new, - cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new); - extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new); @@ -198,30 +191,20 @@ struct sched_domain_topology_level { int flags; int numa_level; struct sd_data data; -#ifdef CONFIG_SCHED_DEBUG char *name; -#endif }; extern void __init set_sched_topology(struct sched_domain_topology_level *tl); +extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio); + -#ifdef CONFIG_SCHED_DEBUG # define SD_INIT_NAME(type) .name = #type -#else -# define SD_INIT_NAME(type) -#endif #else /* CONFIG_SMP */ struct sched_domain_attr; static inline void -partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[], - struct sched_domain_attr *dattr_new) -{ -} - -static inline void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { @@ -242,6 +225,10 @@ static inline bool cpus_share_resources(int this_cpu, int that_cpu) return true; } +static inline void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) +{ +} + #endif /* !CONFIG_SMP */ #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h index 06cd8fb2f409..0f28b4623ad4 100644 --- a/include/linux/sched/wake_q.h +++ b/include/linux/sched/wake_q.h @@ -63,4 +63,38 @@ extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); +/* Spin unlock helpers to unlock and call wake_up_q with preempt disabled */ +static inline +void raw_spin_unlock_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q) +{ + guard(preempt)(); + raw_spin_unlock(lock); + if (wake_q) { + wake_up_q(wake_q); + wake_q_init(wake_q); + } +} + +static inline +void raw_spin_unlock_irq_wake(raw_spinlock_t *lock, struct wake_q_head *wake_q) +{ + guard(preempt)(); + raw_spin_unlock_irq(lock); + if (wake_q) { + wake_up_q(wake_q); + wake_q_init(wake_q); + } +} + +static inline +void raw_spin_unlock_irqrestore_wake(raw_spinlock_t *lock, unsigned long flags, + struct wake_q_head *wake_q) +{ + guard(preempt)(); + raw_spin_unlock_irqrestore(lock, flags); + if (wake_q) { + wake_up_q(wake_q); + wake_q_init(wake_q); + } +} #endif /* _LINUX_SCHED_WAKE_Q_H */ diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h index 066216f1357a..27bd372cbfb1 100644 --- a/include/linux/scmi_imx_protocol.h +++ b/include/linux/scmi_imx_protocol.h @@ -11,12 +11,16 @@ #include <linux/bitfield.h> #include <linux/device.h> #include <linux/notifier.h> +#include <linux/scmi_protocol.h> #include <linux/types.h> -enum scmi_nxp_protocol { - SCMI_PROTOCOL_IMX_BBM = 0x81, - SCMI_PROTOCOL_IMX_MISC = 0x84, -}; +#define SCMI_PROTOCOL_IMX_LMM 0x80 +#define SCMI_PROTOCOL_IMX_BBM 0x81 +#define SCMI_PROTOCOL_IMX_CPU 0x82 +#define SCMI_PROTOCOL_IMX_MISC 0x84 + +#define SCMI_IMX_VENDOR "NXP" +#define SCMI_IMX_SUBVENDOR "IMX" struct scmi_imx_bbm_proto_ops { int (*rtc_time_set)(const struct scmi_protocol_handle *ph, u32 id, @@ -56,4 +60,43 @@ struct scmi_imx_misc_proto_ops { int (*misc_ctrl_req_notify)(const struct scmi_protocol_handle *ph, u32 ctrl_id, u32 evt_id, u32 flags); }; + +/* See LMM_ATTRIBUTES in imx95.rst */ +#define LMM_ID_DISCOVER 0xFFFFFFFFU +#define LMM_MAX_NAME 16 + +enum scmi_imx_lmm_state { + LMM_STATE_LM_OFF, + LMM_STATE_LM_ON, + LMM_STATE_LM_SUSPEND, + LMM_STATE_LM_POWERED, +}; + +struct scmi_imx_lmm_info { + u32 lmid; + enum scmi_imx_lmm_state state; + u32 errstatus; + u8 name[LMM_MAX_NAME]; +}; + +struct scmi_imx_lmm_proto_ops { + int (*lmm_power_boot)(const struct scmi_protocol_handle *ph, u32 lmid, + bool boot); + int (*lmm_info)(const struct scmi_protocol_handle *ph, u32 lmid, + struct scmi_imx_lmm_info *info); + int (*lmm_reset_vector_set)(const struct scmi_protocol_handle *ph, + u32 lmid, u32 cpuid, u32 flags, u64 vector); + int (*lmm_shutdown)(const struct scmi_protocol_handle *ph, u32 lmid, + u32 flags); +}; + +struct scmi_imx_cpu_proto_ops { + int (*cpu_reset_vector_set)(const struct scmi_protocol_handle *ph, + u32 cpuid, u64 vector, bool start, + bool boot, bool resume); + int (*cpu_start)(const struct scmi_protocol_handle *ph, u32 cpuid, + bool start); + int (*cpu_started)(const struct scmi_protocol_handle *ph, u32 cpuid, + bool *started); +}; #endif diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 6a4a3cec4638..923d68e07679 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -126,8 +126,17 @@ static inline unsigned int screen_info_video_type(const struct screen_info *si) return VIDEO_TYPE_CGA; } +static inline u32 __screen_info_vesapm_info_base(const struct screen_info *si) +{ + if (si->vesapm_seg < 0xc000) + return 0; + return (si->vesapm_seg << 4) + si->vesapm_off; +} + ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num); +u32 __screen_info_lfb_bits_per_pixel(const struct screen_info *si); + #if defined(CONFIG_PCI) void screen_info_apply_fixups(void); struct pci_dev *screen_info_pci_dev(const struct screen_info *si); diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 836a7e200f39..6719949135c9 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -222,7 +222,6 @@ struct sctp_datahdr { __be16 stream; __be16 ssn; __u32 ppid; - /* __u8 payload[]; */ }; struct sctp_data_chunk { @@ -239,7 +238,6 @@ struct sctp_idatahdr { __u32 ppid; __be32 fsn; }; - __u8 payload[0]; }; struct sctp_idata_chunk { diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index e45531455d3b..9b959972bf4a 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -22,21 +22,17 @@ #include <linux/atomic.h> #include <asm/seccomp.h> +extern int __secure_computing(void); + #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER -extern int __secure_computing(const struct seccomp_data *sd); static inline int secure_computing(void) { if (unlikely(test_syscall_work(SECCOMP))) - return __secure_computing(NULL); + return __secure_computing(); return 0; } #else extern void secure_computing_strict(int this_syscall); -static inline int __secure_computing(const struct seccomp_data *sd) -{ - secure_computing_strict(sd->nr); - return 0; -} #endif extern long prctl_get_seccomp(void); @@ -58,7 +54,7 @@ static inline int secure_computing(void) { return 0; } #else static inline void secure_computing_strict(int this_syscall) { return; } #endif -static inline int __secure_computing(const struct seccomp_data *sd) { return 0; } +static inline int __secure_computing(void) { return 0; } static inline long prctl_get_seccomp(void) { diff --git a/include/linux/security.h b/include/linux/security.h index cbdba435b798..dba349629229 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -226,6 +226,18 @@ extern unsigned long dac_mmap_min_addr; #endif /* + * A "security context" is the text representation of + * the information used by LSMs. + * This structure contains the string, its length, and which LSM + * it is useful for. + */ +struct lsm_context { + char *context; /* Provided by the module */ + u32 len; + int id; /* Identifies the module */ +}; + +/* * Values used in the task_security_ops calls */ /* setuid or setgid, id0 == uid or gid */ @@ -378,8 +390,8 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb, int security_move_mount(const struct path *from_path, const struct path *to_path); int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, - const char **xattr_name, void **ctx, - u32 *ctxlen); + const char **xattr_name, + struct lsm_context *lsmcxt); int security_dentry_create_files_as(struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, @@ -551,16 +563,15 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *ctx, int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value); int security_setprocattr(int lsmid, const char *name, void *value, size_t size); -int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); -int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); -int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen); +int security_secid_to_secctx(u32 secid, struct lsm_context *cp); +int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); -void security_release_secctx(char *secdata, u32 seclen); +void security_release_secctx(struct lsm_context *cp); void security_inode_invalidate_secctx(struct inode *inode); int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); -int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); +int security_inode_getsecctx(struct inode *inode, struct lsm_context *cp); int security_locked_down(enum lockdown_reason what); int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len, void *val, size_t val_len, u64 id, u64 flags); @@ -852,8 +863,7 @@ static inline int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, - void **ctx, - u32 *ctxlen) + struct lsm_context *lsmcxt) { return -EOPNOTSUPP; } @@ -1516,24 +1526,18 @@ static inline int security_setprocattr(int lsmid, char *name, void *value, return -EINVAL; } -static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb) -{ - return 0; -} - static inline int security_ismaclabel(const char *name) { return 0; } -static inline int security_secid_to_secctx(u32 secid, char **secdata, - u32 *seclen) +static inline int security_secid_to_secctx(u32 secid, struct lsm_context *cp) { return -EOPNOTSUPP; } static inline int security_lsmprop_to_secctx(struct lsm_prop *prop, - char **secdata, u32 *seclen) + struct lsm_context *cp) { return -EOPNOTSUPP; } @@ -1545,7 +1549,7 @@ static inline int security_secctx_to_secid(const char *secdata, return -EOPNOTSUPP; } -static inline void security_release_secctx(char *secdata, u32 seclen) +static inline void security_release_secctx(struct lsm_context *cp) { } @@ -1561,7 +1565,8 @@ static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 { return -EOPNOTSUPP; } -static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) +static inline int security_inode_getsecctx(struct inode *inode, + struct lsm_context *cp) { return -EOPNOTSUPP; } @@ -1618,6 +1623,7 @@ static inline int security_watch_key(struct key *key) #ifdef CONFIG_SECURITY_NETWORK +int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk); int security_unix_may_send(struct socket *sock, struct socket *other); int security_socket_create(int family, int type, int protocol, int kern); @@ -1673,6 +1679,11 @@ int security_sctp_assoc_established(struct sctp_association *asoc, int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk); #else /* CONFIG_SECURITY_NETWORK */ +static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb) +{ + return 0; +} + static inline int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk) @@ -2238,14 +2249,14 @@ struct bpf_map; struct bpf_prog; struct bpf_token; #ifdef CONFIG_SECURITY -extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); +extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size, bool kernel); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); extern int security_bpf_prog(struct bpf_prog *prog); extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token); + struct bpf_token *token, bool kernel); extern void security_bpf_map_free(struct bpf_map *map); extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token); + struct bpf_token *token, bool kernel); extern void security_bpf_prog_free(struct bpf_prog *prog); extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr, const struct path *path); @@ -2254,7 +2265,7 @@ extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cm extern int security_bpf_token_capable(const struct bpf_token *token, int cap); #else static inline int security_bpf(int cmd, union bpf_attr *attr, - unsigned int size) + unsigned int size, bool kernel) { return 0; } @@ -2270,7 +2281,7 @@ static inline int security_bpf_prog(struct bpf_prog *prog) } static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr, - struct bpf_token *token) + struct bpf_token *token, bool kernel) { return 0; } @@ -2279,7 +2290,7 @@ static inline void security_bpf_map_free(struct bpf_map *map) { } static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr, - struct bpf_token *token) + struct bpf_token *token, bool kernel) { return 0; } @@ -2313,14 +2324,13 @@ struct perf_event_attr; struct perf_event; #ifdef CONFIG_SECURITY -extern int security_perf_event_open(struct perf_event_attr *attr, int type); +extern int security_perf_event_open(int type); extern int security_perf_event_alloc(struct perf_event *event); extern void security_perf_event_free(struct perf_event *event); extern int security_perf_event_read(struct perf_event *event); extern int security_perf_event_write(struct perf_event *event); #else -static inline int security_perf_event_open(struct perf_event_attr *attr, - int type) +static inline int security_perf_event_open(int type) { return 0; } @@ -2351,6 +2361,7 @@ static inline int security_perf_event_write(struct perf_event *event) extern int security_uring_override_creds(const struct cred *new); extern int security_uring_sqpoll(void); extern int security_uring_cmd(struct io_uring_cmd *ioucmd); +extern int security_uring_allowed(void); #else static inline int security_uring_override_creds(const struct cred *new) { @@ -2364,6 +2375,10 @@ static inline int security_uring_cmd(struct io_uring_cmd *ioucmd) { return 0; } +static inline int security_uring_allowed(void) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #endif /* CONFIG_IO_URING */ diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 04655faadc2d..89706157e622 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -16,13 +16,25 @@ struct semaphore { raw_spinlock_t lock; unsigned int count; struct list_head wait_list; + +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER + unsigned long last_holder; +#endif }; +#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER +#define __LAST_HOLDER_SEMAPHORE_INITIALIZER \ + , .last_holder = 0UL +#else +#define __LAST_HOLDER_SEMAPHORE_INITIALIZER +#endif + #define __SEMAPHORE_INITIALIZER(name, n) \ { \ .lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \ .count = n, \ - .wait_list = LIST_HEAD_INIT((name).wait_list), \ + .wait_list = LIST_HEAD_INIT((name).wait_list) \ + __LAST_HOLDER_SEMAPHORE_INITIALIZER \ } /* @@ -47,5 +59,6 @@ extern int __must_check down_killable(struct semaphore *sem); extern int __must_check down_trylock(struct semaphore *sem); extern int __must_check down_timeout(struct semaphore *sem, long jiffies); extern void up(struct semaphore *sem); +extern unsigned long sem_last_holder(struct semaphore *sem); #endif /* __LINUX_SEMAPHORE_H */ diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index fe41da005970..52791e070506 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -167,8 +167,8 @@ extern int seq_buf_hex_dump(struct seq_buf *s, const char *prefix_str, const void *buf, size_t len, bool ascii); #ifdef CONFIG_BINARY_PRINTF -extern int -seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); +__printf(2, 0) +int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); #endif void seq_buf_do_printk(struct seq_buf *s, const char *lvl); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 2fb266ea69fa..d6ebf0596510 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -181,6 +181,7 @@ int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); #ifdef CONFIG_BINARY_PRINTF +__printf(2, 0) void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary); #endif diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5298765d6ca4..5ce48eab7a2a 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -272,7 +272,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) ({ \ unsigned __seq; \ \ - while ((__seq = seqprop_sequence(s)) & 1) \ + while (unlikely((__seq = seqprop_sequence(s)) & 1)) \ cpu_relax(); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ @@ -319,6 +319,29 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) }) /** + * raw_seqcount_try_begin() - begin a seqcount_t read critical section + * w/o lockdep and w/o counter stabilization + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants + * @start: count to be passed to read_seqcount_retry() + * + * Similar to raw_seqcount_begin(), except it enables eliding the critical + * section entirely if odd, instead of doing the speculation knowing it will + * fail. + * + * Useful when counter stabilization is more or less equivalent to taking + * the lock and there is a slowpath that does that. + * + * If true, start will be set to the (even) sequence count read. + * + * Return: true when a read critical section is started. + */ +#define raw_seqcount_try_begin(s, start) \ +({ \ + start = raw_read_seqcount(s); \ + !(start & 1); \ +}) + +/** * raw_seqcount_begin() - begin a seqcount_t read critical section w/o * lockdep and w/o counter stabilization * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants diff --git a/include/linux/serdev.h b/include/linux/serdev.h index ff78efc1f60d..34562eb99931 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -84,7 +84,6 @@ enum serdev_parity { struct serdev_controller_ops { ssize_t (*write_buf)(struct serdev_controller *, const u8 *, size_t); void (*write_flush)(struct serdev_controller *); - int (*write_room)(struct serdev_controller *); int (*open)(struct serdev_controller *); void (*close)(struct serdev_controller *); void (*set_flow_control)(struct serdev_controller *, bool); @@ -212,7 +211,6 @@ int serdev_device_break_ctl(struct serdev_device *serdev, int break_state); void serdev_device_write_wakeup(struct serdev_device *); ssize_t serdev_device_write(struct serdev_device *, const u8 *, size_t, long); void serdev_device_write_flush(struct serdev_device *); -int serdev_device_write_room(struct serdev_device *); /* * serdev device driver functions @@ -273,10 +271,6 @@ static inline ssize_t serdev_device_write(struct serdev_device *sdev, return -ENODEV; } static inline void serdev_device_write_flush(struct serdev_device *sdev) {} -static inline int serdev_device_write_room(struct serdev_device *sdev) -{ - return 0; -} #define serdev_device_driver_register(x) #define serdev_device_driver_unregister(x) diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index e0717c8393d7..144de7a7948d 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -161,8 +161,8 @@ struct uart_8250_port { void (*dl_write)(struct uart_8250_port *up, u32 value); struct uart_8250_em485 *em485; - void (*rs485_start_tx)(struct uart_8250_port *); - void (*rs485_stop_tx)(struct uart_8250_port *); + void (*rs485_start_tx)(struct uart_8250_port *up, bool toggle_ier); + void (*rs485_stop_tx)(struct uart_8250_port *up, bool toggle_ier); /* Serial port overrun backoff */ struct delayed_work overrun_backoff; diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 743b4afaad4c..914b5e97e056 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -427,6 +427,18 @@ struct uart_icount { typedef u64 __bitwise upf_t; typedef unsigned int __bitwise upstat_t; +enum uart_iotype { + UPIO_UNKNOWN = -1, + UPIO_PORT = SERIAL_IO_PORT, /* 8b I/O port access */ + UPIO_HUB6 = SERIAL_IO_HUB6, /* Hub6 ISA card */ + UPIO_MEM = SERIAL_IO_MEM, /* driver-specific */ + UPIO_MEM32 = SERIAL_IO_MEM32, /* 32b little endian */ + UPIO_AU = SERIAL_IO_AU, /* Au1x00 and RT288x type IO */ + UPIO_TSI = SERIAL_IO_TSI, /* Tsi108/109 type IO */ + UPIO_MEM32BE = SERIAL_IO_MEM32BE, /* 32b big endian */ + UPIO_MEM16 = SERIAL_IO_MEM16, /* 16b little endian */ +}; + struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ @@ -469,23 +481,13 @@ struct uart_port { unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ - unsigned char iotype; /* io access style */ - -#define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */ -#define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ -#define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ -#define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ -#define UPIO_MEM32 (SERIAL_IO_MEM32) /* 32b little endian */ -#define UPIO_AU (SERIAL_IO_AU) /* Au1x00 and RT288x type IO */ -#define UPIO_TSI (SERIAL_IO_TSI) /* Tsi108/109 type IO */ -#define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ -#define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ - unsigned char quirks; /* internal quirks */ /* internal quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) + enum uart_iotype iotype; /* io access style */ + unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ struct uart_state *state; /* pointer to parent state */ @@ -1101,8 +1103,8 @@ static inline bool uart_console_registered(struct uart_port *port) struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); -int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, - char **options); +int uart_parse_earlycon(char *p, enum uart_iotype *iotype, + resource_size_t *addr, char **options); void uart_parse_options(const char *options, int *baud, int *parity, int *bits, int *flow); int uart_set_options(struct uart_port *port, struct console *co, int baud, diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 0b273a7b9f01..5f03a39a26f7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -104,10 +104,11 @@ static inline bool shmem_mapping(struct address_space *mapping) return false; } #endif /* CONFIG_SHMEM */ -extern void shmem_unlock_mapping(struct address_space *mapping); -extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, +void shmem_unlock_mapping(struct address_space *mapping); +struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); -extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); +int shmem_writeout(struct folio *folio, struct writeback_control *wbc); +void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/sizes.h b/include/linux/sizes.h index c3a00b967d18..49039494076f 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -23,17 +23,25 @@ #define SZ_4K 0x00001000 #define SZ_8K 0x00002000 #define SZ_16K 0x00004000 +#define SZ_24K 0x00006000 #define SZ_32K 0x00008000 #define SZ_64K 0x00010000 #define SZ_128K 0x00020000 +#define SZ_192K 0x00030000 #define SZ_256K 0x00040000 +#define SZ_384K 0x00060000 #define SZ_512K 0x00080000 #define SZ_1M 0x00100000 #define SZ_2M 0x00200000 +#define SZ_3M 0x00300000 #define SZ_4M 0x00400000 +#define SZ_6M 0x00600000 #define SZ_8M 0x00800000 +#define SZ_12M 0x00c00000 #define SZ_16M 0x01000000 +#define SZ_18M 0x01200000 +#define SZ_24M 0x01800000 #define SZ_32M 0x02000000 #define SZ_64M 0x04000000 #define SZ_128M 0x08000000 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 58009fa66102..67a906702830 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -274,7 +274,6 @@ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) -struct ahash_request; struct net_device; struct scatterlist; struct pipe_inode_info; @@ -470,7 +469,7 @@ struct skb_shared_hwtstamps { /* Definitions for tx_flags in struct skb_shared_info */ enum { /* generate hardware time stamp */ - SKBTX_HW_TSTAMP = 1 << 0, + SKBTX_HW_TSTAMP_NOBPF = 1 << 0, /* generate software time stamp when queueing packet to NIC */ SKBTX_SW_TSTAMP = 1 << 1, @@ -478,23 +477,26 @@ enum { /* device driver is going to provide hardware time stamp */ SKBTX_IN_PROGRESS = 1 << 2, - /* generate hardware time stamp based on cycles if supported */ - SKBTX_HW_TSTAMP_USE_CYCLES = 1 << 3, - - /* generate wifi status information (where possible) */ - SKBTX_WIFI_STATUS = 1 << 4, + /* generate software time stamp on packet tx completion */ + SKBTX_COMPLETION_TSTAMP = 1 << 3, /* determine hardware time stamp based on time or cycles */ SKBTX_HW_TSTAMP_NETDEV = 1 << 5, /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, + + /* used for bpf extension when a bpf program is loaded */ + SKBTX_BPF = 1 << 7, }; +#define SKBTX_HW_TSTAMP (SKBTX_HW_TSTAMP_NOBPF | SKBTX_BPF) + #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ - SKBTX_SCHED_TSTAMP) + SKBTX_SCHED_TSTAMP | \ + SKBTX_BPF | \ + SKBTX_COMPLETION_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | \ - SKBTX_HW_TSTAMP_USE_CYCLES | \ SKBTX_ANY_SW_TSTAMP) /* Definitions for flags in struct skb_shared_info */ @@ -608,11 +610,19 @@ struct skb_shared_info { * Warning : all fields before dataref are cleared in __alloc_skb() */ atomic_t dataref; - unsigned int xdp_frags_size; - /* Intermediate layers must ensure that destructor_arg - * remains valid until skb destructor */ - void * destructor_arg; + union { + struct { + u32 xdp_frags_size; + u32 xdp_frags_truesize; + }; + + /* + * Intermediate layers must ensure that destructor_arg + * remains valid until skb destructor. + */ + void *destructor_arg; + }; /* must be last field, see pskb_expand_head() */ skb_frag_t frags[MAX_SKB_FRAGS]; @@ -695,6 +705,8 @@ enum { SKB_GSO_UDP_L4 = 1 << 17, SKB_GSO_FRAGLIST = 1 << 18, + + SKB_GSO_TCP_ACCECN = 1 << 19, }; #if BITS_PER_LONG > 32 @@ -1134,7 +1146,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb) * skb_dst - returns skb dst_entry * @skb: buffer * - * Returns skb dst_entry, regardless of reference taken or not. + * Returns: skb dst_entry, regardless of reference taken or not. */ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { @@ -1222,7 +1234,7 @@ static inline bool skb_wifi_acked_valid(const struct sk_buff *skb) * skb_unref - decrement the skb's reference count * @skb: buffer * - * Returns true if we can free the skb. + * Returns: true if we can free the skb. */ static inline bool skb_unref(struct sk_buff *skb) { @@ -1307,6 +1319,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size); void skb_attempt_defer_free(struct sk_buff *skb); +u32 napi_skb_cache_get_bulk(void **skbs, u32 n); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); struct sk_buff *slab_build_skb(void *data); @@ -1344,7 +1357,7 @@ struct sk_buff_fclones { * @sk: socket * @skb: buffer * - * Returns true if skb is a fast clone, and its clone is not freed. + * Returns: true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), * so we also check that didn't happen. */ @@ -1519,14 +1532,8 @@ void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); -__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - const void *data, int hlen_proto); - -static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, - int thoff, u8 ip_proto) -{ - return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0); -} +__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, + const void *data, int hlen_proto); void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, @@ -1699,13 +1706,16 @@ static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops; struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, - struct ubuf_info *uarg); + struct ubuf_info *uarg, bool devmem); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); +struct net_devmem_dmabuf_binding; + int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, - size_t length); + size_t length, + struct net_devmem_dmabuf_binding *binding); int zerocopy_fill_skb_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length); @@ -1713,12 +1723,14 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb, static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { - return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len); + return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len, + NULL); } int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, - struct ubuf_info *uarg); + struct ubuf_info *uarg, + struct net_devmem_dmabuf_binding *binding); /* Internal */ #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) @@ -3021,6 +3033,29 @@ static inline void skb_reset_transport_header(struct sk_buff *skb) skb->transport_header = offset; } +/** + * skb_reset_transport_header_careful - conditionally reset transport header + * @skb: buffer to alter + * + * Hardened version of skb_reset_transport_header(). + * + * Returns: true if the operation was a success. + */ +static inline bool __must_check +skb_reset_transport_header_careful(struct sk_buff *skb) +{ + long offset = skb->data - skb->head; + + if (unlikely(offset != (typeof(skb->transport_header))offset)) + return false; + + if (unlikely(offset == (typeof(skb->transport_header))~0U)) + return false; + + skb->transport_header = offset; + return true; +} + static inline void skb_set_transport_header(struct sk_buff *skb, const int offset) { @@ -3516,7 +3551,7 @@ static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask) * A page shouldn't be considered for reusing/recycling if it was allocated * under memory pressure or at a distant memory node. * - * Returns false if this page should be returned to page allocator, true + * Returns: false if this page should be returned to page allocator, true * otherwise. */ static inline bool dev_page_is_reusable(const struct page *page) @@ -3627,13 +3662,13 @@ static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag) int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, - struct bpf_prog *prog); + const struct bpf_prog *prog); /** * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * - * Returns the address of the data within @frag. The page must already + * Returns: the address of the data within @frag. The page must already * be mapped. */ static inline void *skb_frag_address(const skb_frag_t *frag) @@ -3648,12 +3683,18 @@ static inline void *skb_frag_address(const skb_frag_t *frag) * skb_frag_address_safe - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * - * Returns the address of the data within @frag. Checks that the page + * Returns: the address of the data within @frag. Checks that the page * is mapped and returns %NULL otherwise. */ static inline void *skb_frag_address_safe(const skb_frag_t *frag) { - void *ptr = page_address(skb_frag_page(frag)); + struct page *page = skb_frag_page(frag); + void *ptr; + + if (!page) + return NULL; + + ptr = page_address(page); if (unlikely(!ptr)) return NULL; @@ -3674,7 +3715,7 @@ static inline void skb_frag_page_copy(skb_frag_t *fragto, bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); /** - * skb_frag_dma_map - maps a paged fragment via the DMA API + * __skb_frag_dma_map - maps a paged fragment via the DMA API * @dev: the device to map the fragment to * @frag: the paged fragment to map * @offset: the offset within the fragment (starting at the @@ -3684,15 +3725,40 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); * * Maps the page associated with @frag to @device. */ -static inline dma_addr_t skb_frag_dma_map(struct device *dev, - const skb_frag_t *frag, - size_t offset, size_t size, - enum dma_data_direction dir) +static inline dma_addr_t __skb_frag_dma_map(struct device *dev, + const skb_frag_t *frag, + size_t offset, size_t size, + enum dma_data_direction dir) { + if (skb_frag_is_net_iov(frag)) { + return netmem_to_net_iov(frag->netmem)->dma_addr + offset + + frag->offset; + } return dma_map_page(dev, skb_frag_page(frag), skb_frag_off(frag) + offset, size, dir); } +#define skb_frag_dma_map(dev, frag, ...) \ + CONCATENATE(_skb_frag_dma_map, \ + COUNT_ARGS(__VA_ARGS__))(dev, frag, ##__VA_ARGS__) + +#define __skb_frag_dma_map1(dev, frag, offset, uf, uo) ({ \ + const skb_frag_t *uf = (frag); \ + size_t uo = (offset); \ + \ + __skb_frag_dma_map(dev, uf, uo, skb_frag_size(uf) - uo, \ + DMA_TO_DEVICE); \ +}) +#define _skb_frag_dma_map1(dev, frag, offset) \ + __skb_frag_dma_map1(dev, frag, offset, __UNIQUE_ID(frag_), \ + __UNIQUE_ID(offset_)) +#define _skb_frag_dma_map0(dev, frag) \ + _skb_frag_dma_map1(dev, frag, 0) +#define _skb_frag_dma_map2(dev, frag, offset, size) \ + __skb_frag_dma_map(dev, frag, offset, size, DMA_TO_DEVICE) +#define _skb_frag_dma_map3(dev, frag, offset, size, dir) \ + __skb_frag_dma_map(dev, frag, offset, size, dir) + static inline struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) { @@ -3836,25 +3902,6 @@ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int l bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) __must_check; -static inline int skb_add_data(struct sk_buff *skb, - struct iov_iter *from, int copy) -{ - const int off = skb->len; - - if (skb->ip_summed == CHECKSUM_NONE) { - __wsum csum = 0; - if (csum_and_copy_from_iter_full(skb_put(skb, copy), copy, - &csum, from)) { - skb->csum = csum_block_add(skb->csum, csum, off); - return 0; - } - } else if (copy_from_iter_full(skb_put(skb, copy), copy, from)) - return 0; - - __skb_trim(skb, off); - return -EFAULT; -} - static inline bool skb_can_coalesce(struct sk_buff *skb, int i, const struct page *page, int off) { @@ -3890,7 +3937,7 @@ static inline int skb_linearize(struct sk_buff *skb) * skb_has_shared_frag - can any frag be overwritten * @skb: buffer to test * - * Return true if the skb has at least one frag that might be modified + * Return: true if the skb has at least one frag that might be modified * by an external entity (as in vmsplice()/sendfile()) */ static inline bool skb_has_shared_frag(const struct sk_buff *skb) @@ -4092,8 +4139,7 @@ static inline void skb_frag_list_init(struct sk_buff *skb) int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, int *err, long *timeo_p, const struct sk_buff *skb); -struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, - struct sk_buff_head *queue, +struct sk_buff *__skb_try_recv_from_queue(struct sk_buff_head *queue, unsigned int flags, int *off, int *err, struct sk_buff **last); @@ -4116,9 +4162,8 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, } int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg); -int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, - struct iov_iter *to, int len, - struct ahash_request *hash); +int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); @@ -4133,6 +4178,8 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, unsigned int flags); int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len); +int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb, + int offset, int len, int flags); int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); @@ -4172,17 +4219,9 @@ static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; } -struct skb_checksum_ops { - __wsum (*update)(const void *mem, int len, __wsum wsum); - __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len); -}; - -extern const struct skb_checksum_ops *crc32c_csum_stub __read_mostly; - -__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, - __wsum csum, const struct skb_checksum_ops *ops); __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); +u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc); static inline void * __must_check __skb_header_pointer(const struct sk_buff *skb, int offset, int len, @@ -4535,7 +4574,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, static inline void skb_tx_timestamp(struct sk_buff *skb) { skb_clone_tx_timestamp(skb); - if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) + if (skb_shinfo(skb)->tx_flags & (SKBTX_SW_TSTAMP | SKBTX_BPF)) skb_tstamp_tx(skb, NULL); } @@ -4612,7 +4651,7 @@ static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb) /* Check if we need to perform checksum complete validation. * - * Returns true if checksum complete is needed, false otherwise + * Returns: true if checksum complete is needed, false otherwise * (either checksum is unnecessary or zero checksum is allowed). */ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 0f3c58007488..9e49372ef1a0 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -17,7 +17,7 @@ */ static inline void __skb_frag_ref(skb_frag_t *frag) { - get_page(skb_frag_page(frag)); + get_netmem(skb_frag_netmem(frag)); } /** @@ -40,7 +40,7 @@ static inline void skb_page_unref(netmem_ref netmem, bool recycle) if (recycle && napi_pp_put_page(netmem)) return; #endif - put_page(netmem_to_page(netmem)); + put_netmem(netmem); } /** diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 2cbe0c22a32f..0b9095a281b8 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -91,6 +91,8 @@ struct sk_psock { struct sk_psock_progs progs; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) struct strparser strp; + u32 copied_seq; + u32 ingress_bytes; #endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; diff --git a/include/linux/slab.h b/include/linux/slab.h index 10a971c2bde3..d5a8ab98035c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include <linux/overflow.h> #include <linux/types.h> +#include <linux/rcupdate.h> #include <linux/workqueue.h> #include <linux/percpu-refcount.h> #include <linux/cleanup.h> @@ -136,6 +137,15 @@ enum _slab_flag_bits { * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. * + * Note that object identity check has to be done *after* acquiring a + * reference, therefore user has to ensure proper ordering for loads. + * Similarly, when initializing objects allocated with SLAB_TYPESAFE_BY_RCU, + * the newly allocated object has to be fully initialized *before* its + * refcount gets initialized and proper ordering for stores is required. + * refcount_{add|inc}_not_zero_acquire() and refcount_set_release() are + * designed with the proper fences required for reference counting objects + * allocated with SLAB_TYPESAFE_BY_RCU. + * * Note that it is not possible to acquire a lock within a structure * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages @@ -235,12 +245,6 @@ enum _slab_flag_bits { #endif /* - * freeptr_t represents a SLUB freelist pointer, which might be encoded - * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled. - */ -typedef struct { unsigned long v; } freeptr_t; - -/* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. * * Dereferencing ZERO_SIZE_PTR will lead to a distinct access fault. @@ -941,8 +945,6 @@ static inline __alloc_size(1, 2) void *kmalloc_array_noprof(size_t n, size_t siz if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - if (__builtin_constant_p(n) && __builtin_constant_p(size)) - return kmalloc_noprof(bytes, flags); return kmalloc_noprof(bytes, flags); } #define kmalloc_array(...) alloc_hooks(kmalloc_array_noprof(__VA_ARGS__)) @@ -1082,6 +1084,19 @@ extern void kvfree_sensitive(const void *addr, size_t len); unsigned int kmem_cache_size(struct kmem_cache *s); +#ifndef CONFIG_KVFREE_RCU_BATCHED +static inline void kvfree_rcu_barrier(void) +{ + rcu_barrier(); +} + +static inline void kfree_rcu_scheduler_running(void) { } +#else +void kvfree_rcu_barrier(void); + +void kfree_rcu_scheduler_running(void); +#endif + /** * kmalloc_size_roundup - Report allocation bucket size for the given size * @@ -1099,5 +1114,6 @@ unsigned int kmem_cache_size(struct kmem_cache *s); size_t kmalloc_size_roundup(size_t size); void __init kmem_cache_init_late(void); +void __init kvfree_rcu_init(void); #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/sm501.h b/include/linux/sm501.h index 2f3488b2875d..bcda27a46e7a 100644 --- a/include/linux/sm501.h +++ b/include/linux/sm501.h @@ -12,9 +12,6 @@ extern int sm501_unit_power(struct device *dev, extern unsigned long sm501_set_clock(struct device *dev, int clksrc, unsigned long freq); -extern unsigned long sm501_find_clock(struct device *dev, - int clksrc, unsigned long req_freq); - /* sm501_misc_control * * Modify the SM501's MISC_CONTROL register diff --git a/include/linux/soc/amd/isp4_misc.h b/include/linux/soc/amd/isp4_misc.h new file mode 100644 index 000000000000..6738796986a7 --- /dev/null +++ b/include/linux/soc/amd/isp4_misc.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. + */ + +#ifndef __SOC_ISP4_MISC_H +#define __SOC_ISP4_MISC_H + +#define AMDISP_I2C_ADAP_NAME "AMDISP DesignWare I2C adapter" + +#endif diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index c06d17599ae7..736f53018017 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -56,7 +56,7 @@ struct apple_rtkit_shmem { * context. */ struct apple_rtkit_ops { - void (*crashed)(void *cookie); + void (*crashed)(void *cookie, const void *crashlog, size_t crashlog_size); void (*recv_message)(void *cookie, u8 endpoint, u64 message); bool (*recv_message_early)(void *cookie, u8 endpoint, u64 message); int (*shmem_setup)(void *cookie, struct apple_rtkit_shmem *bfr); diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 5bee6f7fc400..0c3906e8ad19 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -391,14 +391,6 @@ int cmdq_pkt_jump_rel(struct cmdq_pkt *pkt, s32 offset, u8 shift_pa); */ int cmdq_pkt_eoc(struct cmdq_pkt *pkt); -/** - * cmdq_pkt_finalize() - Append EOC and jump command to pkt. - * @pkt: the CMDQ packet - * - * Return: 0 for success; else the error code is returned - */ -int cmdq_pkt_finalize(struct cmdq_pkt *pkt); - #else /* IS_ENABLED(CONFIG_MTK_CMDQ) */ static inline int cmdq_dev_get_client_reg(struct device *dev, @@ -519,11 +511,6 @@ static inline int cmdq_pkt_eoc(struct cmdq_pkt *pkt) return -EINVAL; } -static inline int cmdq_pkt_finalize(struct cmdq_pkt *pkt) -{ - return -EINVAL; -} - #endif /* IS_ENABLED(CONFIG_MTK_CMDQ) */ #endif /* __MTK_CMDQ_H__ */ diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index a476648858a6..d8949a4ed0dc 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -192,7 +192,7 @@ struct mtk_wed_device { }; struct mtk_wed_ops { - int (*attach)(struct mtk_wed_device *dev); + int (*attach)(struct mtk_wed_device *dev) __releases(RCU); int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, void __iomem *regs, bool reset); int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring, diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 8e5d78fb4847..7a69210a250c 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -24,6 +24,7 @@ #define LLCC_CMPTDMA 15 #define LLCC_DISP 16 #define LLCC_VIDFW 17 +#define LLCC_CAMFW 18 #define LLCC_MDMHPFX 20 #define LLCC_MDMPNG 21 #define LLCC_AUDHW 22 @@ -67,6 +68,13 @@ #define LLCC_EVCS_LEFT 67 #define LLCC_EVCS_RIGHT 68 #define LLCC_SPAD 69 +#define LLCC_VIDDEC 70 +#define LLCC_CAMOFE 71 +#define LLCC_CAMRTIP 72 +#define LLCC_CAMSRTIP 73 +#define LLCC_CAMRTRF 74 +#define LLCC_CAMSRTRF 75 +#define LLCC_CPUSSMPAM 89 /** * struct llcc_slice_desc - Cache slice descriptor diff --git a/include/linux/soc/qcom/qmi.h b/include/linux/soc/qcom/qmi.h index 469e02d2aa0d..291cdc7ef49c 100644 --- a/include/linux/soc/qcom/qmi.h +++ b/include/linux/soc/qcom/qmi.h @@ -24,9 +24,9 @@ struct socket; */ struct qmi_header { u8 type; - u16 txn_id; - u16 msg_id; - u16 msg_len; + __le16 txn_id; + __le16 msg_id; + __le16 msg_len; } __packed; #define QMI_REQUEST 0 diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index ce1a3790d6fb..1a2c0e0838f9 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -55,6 +55,8 @@ #define EXYNOS4_MIPI_PHY_SRESETN (1 << 1) #define EXYNOS4_MIPI_PHY_MRESETN (1 << 2) #define EXYNOS4_MIPI_PHY_RESET_MASK (3 << 1) +/* USB PHY enable bit, valid for Exynos7870 */ +#define EXYNOS7870_USB2PHY_ENABLE (1 << 1) #define S5P_INFORM0 0x0800 #define S5P_INFORM1 0x0804 @@ -185,6 +187,9 @@ /* Only for S5Pv210 */ #define S5PV210_EINT_WAKEUP_MASK 0xC004 +/* Only for Exynos2200 */ +#define EXYNOS2200_PHY_CTRL_USB20 0x72C + /* Only for Exynos4210 */ #define S5P_CMU_CLKSTOP_LCD1_LOWPWR 0x1154 #define S5P_CMU_RESET_LCD1_LOWPWR 0x1174 @@ -658,9 +663,20 @@ #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION (0x32A8) /* For Tensor GS101 */ +/* PMU ALIVE */ #define GS101_SYSIP_DAT0 (0x810) +#define GS101_CPU0_INFORM (0x860) +#define GS101_CPU_INFORM(cpu) \ + (GS101_CPU0_INFORM + (cpu*4)) #define GS101_SYSTEM_CONFIGURATION (0x3A00) #define GS101_PHY_CTRL_USB20 (0x3EB0) #define GS101_PHY_CTRL_USBDP (0x3EB4) +/* PMU INTR GEN */ +#define GS101_GRP1_INTR_BID_UPEND (0x0108) +#define GS101_GRP1_INTR_BID_CLEAR (0x010c) +#define GS101_GRP2_INTR_BID_ENABLE (0x0200) +#define GS101_GRP2_INTR_BID_UPEND (0x0208) +#define GS101_GRP2_INTR_BID_CLEAR (0x020c) + #endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */ diff --git a/include/linux/socket.h b/include/linux/socket.h index c3322eb3d686..3b262487ec06 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } -static inline size_t msg_data_left(struct msghdr *msg) +static inline size_t msg_data_left(const struct msghdr *msg) { return iov_iter_count(&msg->msg_iter); } diff --git a/include/linux/sony-laptop.h b/include/linux/sony-laptop.h deleted file mode 100644 index 1e3c92feea6e..000000000000 --- a/include/linux/sony-laptop.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SONYLAPTOP_H_ -#define _SONYLAPTOP_H_ - -#include <linux/types.h> - -#ifdef __KERNEL__ - -/* used only for communication between v4l and sony-laptop */ - -#define SONY_PIC_COMMAND_GETCAMERA 1 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERA 2 -#define SONY_PIC_COMMAND_GETCAMERABRIGHTNESS 3 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERABRIGHTNESS 4 -#define SONY_PIC_COMMAND_GETCAMERACONTRAST 5 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERACONTRAST 6 -#define SONY_PIC_COMMAND_GETCAMERAHUE 7 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERAHUE 8 -#define SONY_PIC_COMMAND_GETCAMERACOLOR 9 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERACOLOR 10 -#define SONY_PIC_COMMAND_GETCAMERASHARPNESS 11 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERASHARPNESS 12 -#define SONY_PIC_COMMAND_GETCAMERAPICTURE 13 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERAPICTURE 14 -#define SONY_PIC_COMMAND_GETCAMERAAGC 15 /* obsolete */ -#define SONY_PIC_COMMAND_SETCAMERAAGC 16 -#define SONY_PIC_COMMAND_GETCAMERADIRECTION 17 /* obsolete */ -#define SONY_PIC_COMMAND_GETCAMERAROMVERSION 18 /* obsolete */ -#define SONY_PIC_COMMAND_GETCAMERAREVISION 19 /* obsolete */ - -#if IS_ENABLED(CONFIG_SONY_LAPTOP) -int sony_pic_camera_command(int command, u8 value); -#else -static inline int sony_pic_camera_command(int command, u8 value) { return 0; } -#endif - -#endif /* __KERNEL__ */ - -#endif /* _SONYLAPTOP_H_ */ diff --git a/include/linux/sort.h b/include/linux/sort.h index e163287ac6c1..c01ef804a0eb 100644 --- a/include/linux/sort.h +++ b/include/linux/sort.h @@ -4,6 +4,16 @@ #include <linux/types.h> +/** + * cmp_int - perform a three-way comparison of the arguments + * @l: the left argument + * @r: the right argument + * + * Return: 1 if the left argument is greater than the right one; 0 if the + * arguments are equal; -1 if the left argument is less than the right one. + */ +#define cmp_int(l, r) (((l) > (r)) - ((l) < (r))) + void sort_r(void *base, size_t num, size_t size, cmp_r_func_t cmp_func, swap_r_func_t swap_func, @@ -13,4 +23,15 @@ void sort(void *base, size_t num, size_t size, cmp_func_t cmp_func, swap_func_t swap_func); +/* Versions that periodically call cond_resched(): */ + +void sort_r_nonatomic(void *base, size_t num, size_t size, + cmp_r_func_t cmp_func, + swap_r_func_t swap_func, + const void *priv); + +void sort_nonatomic(void *base, size_t num, size_t size, + cmp_func_t cmp_func, + swap_func_t swap_func); + #endif diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index bd9836690da6..0832776262ac 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -8,6 +8,7 @@ #include <linux/bug.h> #include <linux/completion.h> #include <linux/device.h> +#include <linux/idr.h> #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/lockdep_types.h> @@ -50,10 +51,13 @@ struct sdw_slave; #define SDW_FRAME_CTRL_BITS 48 #define SDW_MAX_DEVICES 11 +#define SDW_FW_MAX_DEVICES 16 #define SDW_MAX_PORTS 15 #define SDW_VALID_PORT_RANGE(n) ((n) < SDW_MAX_PORTS && (n) >= 1) +#define SDW_MAX_LANES 8 + enum { SDW_PORT_DIRN_SINK = 0, SDW_PORT_DIRN_SOURCE, @@ -148,12 +152,14 @@ enum sdw_dpn_pkg_mode { * * @SDW_STREAM_PCM: PCM data stream * @SDW_STREAM_PDM: PDM data stream + * @SDW_STREAM_BPT: BPT data stream * * spec doesn't define this, but is used in implementation */ enum sdw_stream_type { SDW_STREAM_PCM = 0, SDW_STREAM_PDM = 1, + SDW_STREAM_BPT = 2, }; /** @@ -356,6 +362,7 @@ struct sdw_dpn_prop { * and masks are supported * @commit_register_supported: is PCP_Commit register supported * @scp_int1_mask: SCP_INT1_MASK desired settings + * @lane_maps: Lane mapping for the slave, only valid if lane_control_support is set * @clock_reg_supported: the Peripheral implements the clock base and scale * registers introduced with the SoundWire 1.2 specification. SDCA devices * do not need to set this boolean property as the registers are required. @@ -385,6 +392,7 @@ struct sdw_slave_prop { u32 sdca_interrupt_register_list; u8 commit_register_supported; u8 scp_int1_mask; + u8 lane_maps[SDW_MAX_LANES]; bool clock_reg_supported; bool use_domain_irq; }; @@ -450,6 +458,7 @@ struct sdw_master_prop { int sdw_master_read_prop(struct sdw_bus *bus); int sdw_slave_read_prop(struct sdw_slave *slave); +int sdw_slave_read_lane_mapping(struct sdw_slave *slave); /* * SDW Slave Structures and APIs @@ -623,6 +632,7 @@ struct sdw_slave_ops { * struct sdw_slave - SoundWire Slave * @id: MIPI device ID * @dev: Linux device + * @index: internal ID for this slave * @irq: IRQ number * @status: Status reported by the Slave * @bus: Bus handle @@ -654,6 +664,7 @@ struct sdw_slave_ops { struct sdw_slave { struct sdw_slave_id id; struct device dev; + int index; int irq; enum sdw_slave_status status; struct sdw_bus *bus; @@ -817,6 +828,15 @@ struct sdw_defer { struct completion complete; }; +/* + * Add a practical limit to BPT transfer sizes. BPT is typically used + * to transfer firmware, and larger firmware transfers will increase + * the cold latency beyond typical OS or user requirements. + */ +#define SDW_BPT_MSG_MAX_BYTES (1024 * 1024) + +struct sdw_bpt_msg; + /** * struct sdw_master_ops - Master driver ops * @read_prop: Read Master properties @@ -832,6 +852,10 @@ struct sdw_defer { * @get_device_num: Callback for vendor-specific device_number allocation * @put_device_num: Callback for vendor-specific device_number release * @new_peripheral_assigned: Callback to handle enumeration of new peripheral. + * @bpt_send_async: reserve resources for BPT stream and send message + * using BTP protocol + * @bpt_wait: wait for message completion using BTP protocol + * and release resources */ struct sdw_master_ops { int (*read_prop)(struct sdw_bus *bus); @@ -848,77 +872,9 @@ struct sdw_master_ops { void (*new_peripheral_assigned)(struct sdw_bus *bus, struct sdw_slave *slave, int dev_num); -}; - -/** - * struct sdw_bus - SoundWire bus - * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. - * @md: Master device - * @bus_lock_key: bus lock key associated to @bus_lock - * @bus_lock: bus lock - * @slaves: list of Slaves on this bus - * @msg_lock_key: message lock key associated to @msg_lock - * @msg_lock: message lock - * @m_rt_list: List of Master instance of all stream(s) running on Bus. This - * is used to compute and program bus bandwidth, clock, frame shape, - * transport and port parameters - * @defer_msg: Defer message - * @params: Current bus parameters - * @stream_refcount: number of streams currently using this bus - * @ops: Master callback ops - * @port_ops: Master port callback ops - * @prop: Master properties - * @vendor_specific_prop: pointer to non-standard properties - * @hw_sync_min_links: Number of links used by a stream above which - * hardware-based synchronization is required. This value is only - * meaningful if multi_link is set. If set to 1, hardware-based - * synchronization will be used even if a stream only uses a single - * SoundWire segment. - * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. - * @link_id: Link id number, can be 0 to N, unique for each Controller - * @id: bus system-wide unique id - * @compute_params: points to Bus resource management implementation - * @assigned: Bitmap for Slave device numbers. - * Bit set implies used number, bit clear implies unused number. - * @clk_stop_timeout: Clock stop timeout computed - * @bank_switch_timeout: Bank switch timeout computed - * @domain: IRQ domain - * @irq_chip: IRQ chip - * @debugfs: Bus debugfs (optional) - * @multi_link: Store bus property that indicates if multi links - * are supported. This flag is populated by drivers after reading - * appropriate firmware (ACPI/DT). - */ -struct sdw_bus { - struct device *dev; - struct sdw_master_device *md; - struct lock_class_key bus_lock_key; - struct mutex bus_lock; - struct list_head slaves; - struct lock_class_key msg_lock_key; - struct mutex msg_lock; - struct list_head m_rt_list; - struct sdw_defer defer_msg; - struct sdw_bus_params params; - int stream_refcount; - const struct sdw_master_ops *ops; - const struct sdw_master_port_ops *port_ops; - struct sdw_master_prop prop; - void *vendor_specific_prop; - int hw_sync_min_links; - int controller_id; - unsigned int link_id; - int id; - int (*compute_params)(struct sdw_bus *bus); - DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); - unsigned int clk_stop_timeout; - u32 bank_switch_timeout; - struct irq_chip irq_chip; - struct irq_domain *domain; -#ifdef CONFIG_DEBUG_FS - struct dentry *debugfs; -#endif - bool multi_link; + int (*bpt_send_async)(struct sdw_bus *bus, struct sdw_slave *slave, + struct sdw_bpt_msg *msg); + int (*bpt_wait)(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, @@ -945,7 +901,7 @@ struct sdw_port_config { * @ch_count: Channel count of the stream * @bps: Number of bits per audio sample * @direction: Data direction - * @type: Stream type PCM or PDM + * @type: Stream type PCM, PDM or BPT */ struct sdw_stream_config { unsigned int frame_rate; @@ -995,7 +951,7 @@ struct sdw_stream_params { * @name: SoundWire stream name * @params: Stream parameters * @state: Current state of the stream - * @type: Stream type PCM or PDM + * @type: Stream type PCM, PDM or BPT * @m_rt_count: Count of Master runtime(s) in this stream * @master_list: List of Master runtime(s) in this stream. * master_list can contain only one m_rt per Master instance @@ -1010,10 +966,90 @@ struct sdw_stream_runtime { struct list_head master_list; }; -struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name); +/** + * struct sdw_bus - SoundWire bus + * @dev: Shortcut to &bus->md->dev to avoid changing the entire code. + * @md: Master device + * @bus_lock_key: bus lock key associated to @bus_lock + * @bus_lock: bus lock + * @slave_ida: IDA for allocating internal slave IDs + * @slaves: list of Slaves on this bus + * @msg_lock_key: message lock key associated to @msg_lock + * @msg_lock: message lock + * @m_rt_list: List of Master instance of all stream(s) running on Bus. This + * is used to compute and program bus bandwidth, clock, frame shape, + * transport and port parameters + * @defer_msg: Defer message + * @params: Current bus parameters + * @stream_refcount: number of streams currently using this bus + * @btp_stream_refcount: number of BTP streams currently using this bus (should + * be zero or one, multiple streams per link is not supported). + * @bpt_stream: pointer stored to handle BTP streams. + * @ops: Master callback ops + * @port_ops: Master port callback ops + * @prop: Master properties + * @vendor_specific_prop: pointer to non-standard properties + * @hw_sync_min_links: Number of links used by a stream above which + * hardware-based synchronization is required. This value is only + * meaningful if multi_link is set. If set to 1, hardware-based + * synchronization will be used even if a stream only uses a single + * SoundWire segment. + * @controller_id: system-unique controller ID. If set to -1, the bus @id will be used. + * @link_id: Link id number, can be 0 to N, unique for each Controller + * @id: bus system-wide unique id + * @compute_params: points to Bus resource management implementation + * @assigned: Bitmap for Slave device numbers. + * Bit set implies used number, bit clear implies unused number. + * @clk_stop_timeout: Clock stop timeout computed + * @bank_switch_timeout: Bank switch timeout computed + * @domain: IRQ domain + * @irq_chip: IRQ chip + * @debugfs: Bus debugfs (optional) + * @multi_link: Store bus property that indicates if multi links + * are supported. This flag is populated by drivers after reading + * appropriate firmware (ACPI/DT). + * @lane_used_bandwidth: how much bandwidth in bits per second is used by each lane + */ +struct sdw_bus { + struct device *dev; + struct sdw_master_device *md; + struct lock_class_key bus_lock_key; + struct mutex bus_lock; + struct ida slave_ida; + struct list_head slaves; + struct lock_class_key msg_lock_key; + struct mutex msg_lock; + struct list_head m_rt_list; + struct sdw_defer defer_msg; + struct sdw_bus_params params; + int stream_refcount; + int bpt_stream_refcount; + struct sdw_stream_runtime *bpt_stream; + const struct sdw_master_ops *ops; + const struct sdw_master_port_ops *port_ops; + struct sdw_master_prop prop; + void *vendor_specific_prop; + int hw_sync_min_links; + int controller_id; + unsigned int link_id; + int id; + int (*compute_params)(struct sdw_bus *bus, struct sdw_stream_runtime *stream); + DECLARE_BITMAP(assigned, SDW_MAX_DEVICES); + unsigned int clk_stop_timeout; + u32 bank_switch_timeout; + struct irq_chip irq_chip; + struct irq_domain *domain; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs; +#endif + bool multi_link; + unsigned int lane_used_bandwidth[SDW_MAX_LANES]; +}; + +struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name, enum sdw_stream_type type); void sdw_release_stream(struct sdw_stream_runtime *stream); -int sdw_compute_params(struct sdw_bus *bus); +int sdw_compute_params(struct sdw_bus *bus, struct sdw_stream_runtime *stream); int sdw_stream_add_master(struct sdw_bus *bus, struct sdw_stream_config *stream_config, @@ -1034,6 +1070,11 @@ int sdw_bus_exit_clk_stop(struct sdw_bus *bus); int sdw_compare_devid(struct sdw_slave *slave, struct sdw_slave_id id); void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id); +bool is_clock_scaling_supported_by_slave(struct sdw_slave *slave); + +int sdw_bpt_send_async(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); +int sdw_bpt_wait(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); +int sdw_bpt_send_sync(struct sdw_bus *bus, struct sdw_slave *slave, struct sdw_bpt_msg *msg); #if IS_ENABLED(CONFIG_SOUNDWIRE) @@ -1045,6 +1086,8 @@ int sdw_stream_add_slave(struct sdw_slave *slave, int sdw_stream_remove_slave(struct sdw_slave *slave, struct sdw_stream_runtime *stream); +int sdw_slave_get_scale_index(struct sdw_slave *slave, u8 *base); + /* messaging and data APIs */ int sdw_read(struct sdw_slave *slave, u32 addr); int sdw_write(struct sdw_slave *slave, u32 addr, u8 value); diff --git a/include/linux/soundwire/sdw_amd.h b/include/linux/soundwire/sdw_amd.h index 799f8578137b..6b839987f14c 100644 --- a/include/linux/soundwire/sdw_amd.h +++ b/include/linux/soundwire/sdw_amd.h @@ -28,6 +28,8 @@ #define ACP_SDW1 1 #define AMD_SDW_MAX_MANAGER_COUNT 2 #define ACP63_PCI_REV_ID 0x63 +#define ACP70_PCI_REV_ID 0x70 +#define ACP71_PCI_REV_ID 0x71 struct acp_sdw_pdata { u16 instance; diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 580086417e4b..9c9435009537 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -189,6 +189,9 @@ #define SDW_SHIM3_INTEL_VS_ACTMCTL_DOAISE2 BIT(14) #define SDW_SHIM3_INTEL_VS_ACTMCTL_CLDE BIT(15) +/* ACE3+ Mic privacy control and status register */ +#define SDW_SHIM2_INTEL_VS_PVCCS 0x10 + /** * struct sdw_intel_stream_params_data: configuration passed during * the @params_stream callback, e.g. for interaction with DSP @@ -331,6 +334,7 @@ struct sdw_intel_ctx { * @shim_base: sdw shim base. * @alh_base: sdw alh base. * @ext: extended HDaudio link support + * @mic_privacy: ACE version supports microphone privacy * @hbus: hdac_bus pointer, needed for power management * @eml_lock: mutex protecting shared registers in the HDaudio multi-link * space @@ -349,6 +353,7 @@ struct sdw_intel_res { u32 shim_base; u32 alh_base; bool ext; + bool mic_privacy; struct hdac_bus *hbus; struct mutex *eml_lock; }; @@ -365,7 +370,7 @@ struct sdw_intel_res { * on e.g. which machine driver to select (I2S mode, HDaudio or * SoundWire). */ -int sdw_intel_acpi_scan(acpi_handle *parent_handle, +int sdw_intel_acpi_scan(acpi_handle parent_handle, struct sdw_intel_acpi_info *info); void sdw_intel_process_wakeen_event(struct sdw_intel_ctx *ctx); @@ -436,6 +441,10 @@ struct sdw_intel_hw_ops { bool (*sync_check_cmdsync_unlocked)(struct sdw_intel *sdw); void (*program_sdi)(struct sdw_intel *sdw, int dev_num); + + int (*bpt_send_async)(struct sdw_intel *sdw, struct sdw_slave *slave, + struct sdw_bpt_msg *msg); + int (*bpt_wait)(struct sdw_intel *sdw, struct sdw_slave *slave, struct sdw_bpt_msg *msg); }; extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops; diff --git a/include/linux/soundwire/sdw_registers.h b/include/linux/soundwire/sdw_registers.h index 658b10fa5b20..0a5939285583 100644 --- a/include/linux/soundwire/sdw_registers.h +++ b/include/linux/soundwire/sdw_registers.h @@ -4,6 +4,9 @@ #ifndef __SDW_REGISTERS_H #define __SDW_REGISTERS_H +#include <linux/bitfield.h> +#include <linux/bits.h> + /* * SDW registers as defined by MIPI 1.2 Spec */ @@ -329,16 +332,27 @@ * 2:0 Control Number[2:0] */ -#define SDW_SDCA_CTL(fun, ent, ctl, ch) (BIT(30) | \ - (((fun) & 0x7) << 22) | \ - (((ent) & 0x40) << 15) | \ - (((ent) & 0x3f) << 7) | \ - (((ctl) & 0x30) << 15) | \ - (((ctl) & 0x0f) << 3) | \ - (((ch) & 0x38) << 12) | \ - ((ch) & 0x07)) +#define SDW_SDCA_CTL(fun, ent, ctl, ch) (BIT(30) | \ + (((fun) & GENMASK(2, 0)) << 22) | \ + (((ent) & BIT(6)) << 15) | \ + (((ent) & GENMASK(5, 0)) << 7) | \ + (((ctl) & GENMASK(5, 4)) << 15) | \ + (((ctl) & GENMASK(3, 0)) << 3) | \ + (((ch) & GENMASK(5, 3)) << 12) | \ + ((ch) & GENMASK(2, 0))) + +#define SDW_SDCA_CTL_FUNC(reg) FIELD_GET(GENMASK(24, 22), (reg)) +#define SDW_SDCA_CTL_ENT(reg) ((FIELD_GET(BIT(21), (reg)) << 6) | \ + FIELD_GET(GENMASK(12, 7), (reg))) +#define SDW_SDCA_CTL_CSEL(reg) ((FIELD_GET(GENMASK(20, 19), (reg)) << 4) | \ + FIELD_GET(GENMASK(6, 3), (reg))) +#define SDW_SDCA_CTL_CNUM(reg) ((FIELD_GET(GENMASK(17, 15), (reg)) << 3) | \ + FIELD_GET(GENMASK(2, 0), (reg))) #define SDW_SDCA_MBQ_CTL(reg) ((reg) | BIT(13)) #define SDW_SDCA_NEXT_CTL(reg) ((reg) | BIT(14)) +/* Check the reserved and fixed bits in address */ +#define SDW_SDCA_VALID_CTL(reg) (((reg) & (GENMASK(31, 25) | BIT(18) | BIT(13))) == BIT(30)) + #endif /* __SDW_REGISTERS_H */ diff --git a/include/linux/spi/offload/consumer.h b/include/linux/spi/offload/consumer.h new file mode 100644 index 000000000000..cd7d5daa21e6 --- /dev/null +++ b/include/linux/spi/offload/consumer.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Analog Devices Inc. + * Copyright (C) 2024 BayLibre, SAS + */ + +#ifndef __LINUX_SPI_OFFLOAD_CONSUMER_H +#define __LINUX_SPI_OFFLOAD_CONSUMER_H + +#include <linux/module.h> +#include <linux/spi/offload/types.h> +#include <linux/types.h> + +MODULE_IMPORT_NS("SPI_OFFLOAD"); + +struct device; +struct spi_device; + +struct spi_offload *devm_spi_offload_get(struct device *dev, struct spi_device *spi, + const struct spi_offload_config *config); + +struct spi_offload_trigger +*devm_spi_offload_trigger_get(struct device *dev, + struct spi_offload *offload, + enum spi_offload_trigger_type type); +int spi_offload_trigger_validate(struct spi_offload_trigger *trigger, + struct spi_offload_trigger_config *config); +int spi_offload_trigger_enable(struct spi_offload *offload, + struct spi_offload_trigger *trigger, + struct spi_offload_trigger_config *config); +void spi_offload_trigger_disable(struct spi_offload *offload, + struct spi_offload_trigger *trigger); + +struct dma_chan *devm_spi_offload_tx_stream_request_dma_chan(struct device *dev, + struct spi_offload *offload); +struct dma_chan *devm_spi_offload_rx_stream_request_dma_chan(struct device *dev, + struct spi_offload *offload); + +#endif /* __LINUX_SPI_OFFLOAD_CONSUMER_H */ diff --git a/include/linux/spi/offload/provider.h b/include/linux/spi/offload/provider.h new file mode 100644 index 000000000000..76c7cf651092 --- /dev/null +++ b/include/linux/spi/offload/provider.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Analog Devices Inc. + * Copyright (C) 2024 BayLibre, SAS + */ + +#ifndef __LINUX_SPI_OFFLOAD_PROVIDER_H +#define __LINUX_SPI_OFFLOAD_PROVIDER_H + +#include <linux/module.h> +#include <linux/spi/offload/types.h> +#include <linux/types.h> + +MODULE_IMPORT_NS("SPI_OFFLOAD"); + +struct device; +struct spi_offload_trigger; + +struct spi_offload *devm_spi_offload_alloc(struct device *dev, size_t priv_size); + +struct spi_offload_trigger_ops { + bool (*match)(struct spi_offload_trigger *trigger, + enum spi_offload_trigger_type type, u64 *args, u32 nargs); + int (*request)(struct spi_offload_trigger *trigger, + enum spi_offload_trigger_type type, u64 *args, u32 nargs); + void (*release)(struct spi_offload_trigger *trigger); + int (*validate)(struct spi_offload_trigger *trigger, + struct spi_offload_trigger_config *config); + int (*enable)(struct spi_offload_trigger *trigger, + struct spi_offload_trigger_config *config); + void (*disable)(struct spi_offload_trigger *trigger); +}; + +struct spi_offload_trigger_info { + /** @fwnode: Provider fwnode, used to match to consumer. */ + struct fwnode_handle *fwnode; + /** @ops: Provider-specific callbacks. */ + const struct spi_offload_trigger_ops *ops; + /** Provider-specific state to be used in callbacks. */ + void *priv; +}; + +int devm_spi_offload_trigger_register(struct device *dev, + struct spi_offload_trigger_info *info); +void *spi_offload_trigger_get_priv(struct spi_offload_trigger *trigger); + +#endif /* __LINUX_SPI_OFFLOAD_PROVIDER_H */ diff --git a/include/linux/spi/offload/types.h b/include/linux/spi/offload/types.h new file mode 100644 index 000000000000..6f7892347871 --- /dev/null +++ b/include/linux/spi/offload/types.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Analog Devices Inc. + * Copyright (C) 2024 BayLibre, SAS + */ + +#ifndef __LINUX_SPI_OFFLOAD_TYPES_H +#define __LINUX_SPI_OFFLOAD_TYPES_H + +#include <linux/bits.h> +#include <linux/types.h> + +struct device; + +/* This is write xfer but TX uses external data stream rather than tx_buf. */ +#define SPI_OFFLOAD_XFER_TX_STREAM BIT(0) +/* This is read xfer but RX uses external data stream rather than rx_buf. */ +#define SPI_OFFLOAD_XFER_RX_STREAM BIT(1) + +/* Offload can be triggered by external hardware event. */ +#define SPI_OFFLOAD_CAP_TRIGGER BIT(0) +/* Offload can record and then play back TX data when triggered. */ +#define SPI_OFFLOAD_CAP_TX_STATIC_DATA BIT(1) +/* Offload can get TX data from an external stream source. */ +#define SPI_OFFLOAD_CAP_TX_STREAM_DMA BIT(2) +/* Offload can send RX data to an external stream sink. */ +#define SPI_OFFLOAD_CAP_RX_STREAM_DMA BIT(3) + +/** + * struct spi_offload_config - offload configuration + * + * This is used to request an offload with specific configuration. + */ +struct spi_offload_config { + /** @capability_flags: required capabilities. See %SPI_OFFLOAD_CAP_* */ + u32 capability_flags; +}; + +/** + * struct spi_offload - offload instance + */ +struct spi_offload { + /** @provider_dev: for get/put reference counting */ + struct device *provider_dev; + /** @priv: provider driver private data */ + void *priv; + /** @ops: callbacks for offload support */ + const struct spi_offload_ops *ops; + /** @xfer_flags: %SPI_OFFLOAD_XFER_* flags supported by provider */ + u32 xfer_flags; +}; + +enum spi_offload_trigger_type { + /* Indication from SPI peripheral that data is read to read. */ + SPI_OFFLOAD_TRIGGER_DATA_READY, + /* Trigger comes from a periodic source such as a clock. */ + SPI_OFFLOAD_TRIGGER_PERIODIC, +}; + +struct spi_offload_trigger_periodic { + u64 frequency_hz; +}; + +struct spi_offload_trigger_config { + /** @type: type discriminator for union */ + enum spi_offload_trigger_type type; + union { + struct spi_offload_trigger_periodic periodic; + }; +}; + +/** + * struct spi_offload_ops - callbacks implemented by offload providers + */ +struct spi_offload_ops { + /** + * @trigger_enable: Optional callback to enable the trigger for the + * given offload instance. + */ + int (*trigger_enable)(struct spi_offload *offload); + /** + * @trigger_disable: Optional callback to disable the trigger for the + * given offload instance. + */ + void (*trigger_disable)(struct spi_offload *offload); + /** + * @tx_stream_request_dma_chan: Optional callback for controllers that + * have an offload where the TX data stream is connected directly to a + * DMA channel. + */ + struct dma_chan *(*tx_stream_request_dma_chan)(struct spi_offload *offload); + /** + * @rx_stream_request_dma_chan: Optional callback for controllers that + * have an offload where the RX data stream is connected directly to a + * DMA channel. + */ + struct dma_chan *(*rx_stream_request_dma_chan)(struct spi_offload *offload); +}; + +#endif /* __LINUX_SPI_OFFLOAD_TYPES_H */ diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h index f950d280461b..9fbef3fd4056 100644 --- a/include/linux/spi/sh_msiof.h +++ b/include/linux/spi/sh_msiof.h @@ -2,6 +2,131 @@ #ifndef __SPI_SH_MSIOF_H__ #define __SPI_SH_MSIOF_H__ +#include <linux/bitfield.h> +#include <linux/bits.h> + +#define SITMDR1 0x00 /* Transmit Mode Register 1 */ +#define SITMDR2 0x04 /* Transmit Mode Register 2 */ +#define SITMDR3 0x08 /* Transmit Mode Register 3 */ +#define SIRMDR1 0x10 /* Receive Mode Register 1 */ +#define SIRMDR2 0x14 /* Receive Mode Register 2 */ +#define SIRMDR3 0x18 /* Receive Mode Register 3 */ +#define SITSCR 0x20 /* Transmit Clock Select Register */ +#define SIRSCR 0x22 /* Receive Clock Select Register (SH, A1, APE6) */ +#define SICTR 0x28 /* Control Register */ +#define SIFCTR 0x30 /* FIFO Control Register */ +#define SISTR 0x40 /* Status Register */ +#define SIIER 0x44 /* Interrupt Enable Register */ +#define SITDR1 0x48 /* Transmit Control Data Register 1 (SH, A1) */ +#define SITDR2 0x4c /* Transmit Control Data Register 2 (SH, A1) */ +#define SITFDR 0x50 /* Transmit FIFO Data Register */ +#define SIRDR1 0x58 /* Receive Control Data Register 1 (SH, A1) */ +#define SIRDR2 0x5c /* Receive Control Data Register 2 (SH, A1) */ +#define SIRFDR 0x60 /* Receive FIFO Data Register */ + +/* SITMDR1 and SIRMDR1 */ +#define SIMDR1_TRMD BIT(31) /* Transfer Mode (1 = Master mode) */ +#define SIMDR1_SYNCMD GENMASK(29, 28) /* SYNC Mode */ +#define SIMDR1_SYNCMD_PULSE 0U /* Frame start sync pulse */ +#define SIMDR1_SYNCMD_SPI 2U /* Level mode/SPI */ +#define SIMDR1_SYNCMD_LR 3U /* L/R mode */ +#define SIMDR1_SYNCAC BIT(25) /* Sync Polarity (1 = Active-low) */ +#define SIMDR1_BITLSB BIT(24) /* MSB/LSB First (1 = LSB first) */ +#define SIMDR1_DTDL GENMASK(22, 20) /* Data Pin Bit Delay for MSIOF_SYNC */ +#define SIMDR1_SYNCDL GENMASK(18, 16) /* Frame Sync Signal Timing Delay */ +#define SIMDR1_FLD GENMASK(3, 2) /* Frame Sync Signal Interval (0-3) */ +#define SIMDR1_XXSTP BIT(0) /* Transmission/Reception Stop on FIFO */ +/* SITMDR1 */ +#define SITMDR1_PCON BIT(30) /* Transfer Signal Connection */ +#define SITMDR1_SYNCCH GENMASK(27, 26) /* Sync Signal Channel Select */ + /* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */ + +/* SITMDR2 and SIRMDR2 */ +#define SIMDR2_GRP GENMASK(31, 30) /* Group Count */ +#define SIMDR2_BITLEN1 GENMASK(28, 24) /* Data Size (8-32 bits) */ +#define SIMDR2_WDLEN1 GENMASK(23, 16) /* Word Count (1-64/256 (SH, A1))) */ +#define SIMDR2_GRPMASK GENMASK(3, 0) /* Group Output Mask 1-4 (SH, A1) */ + +/* SITMDR3 and SIRMDR3 */ +#define SIMDR3_BITLEN2 GENMASK(28, 24) /* Data Size (8-32 bits) */ +#define SIMDR3_WDLEN2 GENMASK(23, 16) /* Word Count (1-64/256 (SH, A1))) */ + +/* SITSCR and SIRSCR */ +#define SISCR_BRPS GENMASK(12, 8) /* Prescaler Setting (1-32) */ +#define SISCR_BRDV GENMASK(2, 0) /* Baud Rate Generator's Division Ratio */ + +/* SICTR */ +#define SICTR_TSCKIZ GENMASK(31, 30) /* Transmit Clock I/O Polarity Select */ +#define SICTR_TSCKIZ_SCK BIT(31) /* Disable SCK when TX disabled */ +#define SICTR_TSCKIZ_POL BIT(30) /* Transmit Clock Polarity */ +#define SICTR_RSCKIZ GENMASK(29, 28) /* Receive Clock Polarity Select */ +#define SICTR_RSCKIZ_SCK BIT(29) /* Must match CTR_TSCKIZ_SCK */ +#define SICTR_RSCKIZ_POL BIT(28) /* Receive Clock Polarity */ +#define SICTR_TEDG BIT(27) /* Transmit Timing (1 = falling edge) */ +#define SICTR_REDG BIT(26) /* Receive Timing (1 = falling edge) */ +#define SICTR_TXDIZ GENMASK(23, 22) /* Pin Output When TX is Disabled */ +#define SICTR_TXDIZ_LOW 0U /* 0 */ +#define SICTR_TXDIZ_HIGH 1U /* 1 */ +#define SICTR_TXDIZ_HIZ 2U /* High-impedance */ +#define SICTR_TSCKE BIT(15) /* Transmit Serial Clock Output Enable */ +#define SICTR_TFSE BIT(14) /* Transmit Frame Sync Signal Output Enable */ +#define SICTR_TXE BIT(9) /* Transmit Enable */ +#define SICTR_RXE BIT(8) /* Receive Enable */ +#define SICTR_TXRST BIT(1) /* Transmit Reset */ +#define SICTR_RXRST BIT(0) /* Receive Reset */ + +/* SIFCTR */ +#define SIFCTR_TFWM GENMASK(31, 29) /* Transmit FIFO Watermark */ +#define SIFCTR_TFWM_64 0U /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 1U /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 2U /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 3U /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 4U /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 5U /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 6U /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 7U /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFUA GENMASK(28, 20) /* Transmit FIFO Usable Area */ +#define SIFCTR_RFWM GENMASK(15, 13) /* Receive FIFO Watermark */ +#define SIFCTR_RFWM_1 0U /* Transfer Request when 1 valid stages */ +#define SIFCTR_RFWM_4 1U /* Transfer Request when 4 valid stages */ +#define SIFCTR_RFWM_8 2U /* Transfer Request when 8 valid stages */ +#define SIFCTR_RFWM_16 3U /* Transfer Request when 16 valid stages */ +#define SIFCTR_RFWM_32 4U /* Transfer Request when 32 valid stages */ +#define SIFCTR_RFWM_64 5U /* Transfer Request when 64 valid stages */ +#define SIFCTR_RFWM_128 6U /* Transfer Request when 128 valid stages */ +#define SIFCTR_RFWM_256 7U /* Transfer Request when 256 valid stages */ +#define SIFCTR_RFUA GENMASK(12, 4) /* Receive FIFO Usable Area (0x40 = full) */ + +/* SISTR */ +#define SISTR_TFEMP BIT(29) /* Transmit FIFO Empty */ +#define SISTR_TDREQ BIT(28) /* Transmit Data Transfer Request */ +#define SISTR_TEOF BIT(23) /* Frame Transmission End */ +#define SISTR_TFSERR BIT(21) /* Transmit Frame Synchronization Error */ +#define SISTR_TFOVF BIT(20) /* Transmit FIFO Overflow */ +#define SISTR_TFUDF BIT(19) /* Transmit FIFO Underflow */ +#define SISTR_RFFUL BIT(13) /* Receive FIFO Full */ +#define SISTR_RDREQ BIT(12) /* Receive Data Transfer Request */ +#define SISTR_REOF BIT(7) /* Frame Reception End */ +#define SISTR_RFSERR BIT(5) /* Receive Frame Synchronization Error */ +#define SISTR_RFUDF BIT(4) /* Receive FIFO Underflow */ +#define SISTR_RFOVF BIT(3) /* Receive FIFO Overflow */ + +/* SIIER */ +#define SIIER_TDMAE BIT(31) /* Transmit Data DMA Transfer Req. Enable */ +#define SIIER_TFEMPE BIT(29) /* Transmit FIFO Empty Enable */ +#define SIIER_TDREQE BIT(28) /* Transmit Data Transfer Request Enable */ +#define SIIER_TEOFE BIT(23) /* Frame Transmission End Enable */ +#define SIIER_TFSERRE BIT(21) /* Transmit Frame Sync Error Enable */ +#define SIIER_TFOVFE BIT(20) /* Transmit FIFO Overflow Enable */ +#define SIIER_TFUDFE BIT(19) /* Transmit FIFO Underflow Enable */ +#define SIIER_RDMAE BIT(15) /* Receive Data DMA Transfer Req. Enable */ +#define SIIER_RFFULE BIT(13) /* Receive FIFO Full Enable */ +#define SIIER_RDREQE BIT(12) /* Receive Data Transfer Request Enable */ +#define SIIER_REOFE BIT(7) /* Frame Reception End Enable */ +#define SIIER_RFSERRE BIT(5) /* Receive Frame Sync Error Enable */ +#define SIIER_RFUDFE BIT(4) /* Receive FIFO Underflow Enable */ +#define SIIER_RFOVFE BIT(3) /* Receive FIFO Overflow Enable */ + enum { MSIOF_SPI_HOST, MSIOF_SPI_TARGET, diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h index c46d2b8029be..c4830dfaff3d 100644 --- a/include/linux/spi/spi-mem.h +++ b/include/linux/spi/spi-mem.h @@ -15,16 +15,32 @@ #define SPI_MEM_OP_CMD(__opcode, __buswidth) \ { \ + .nbytes = 1, \ .buswidth = __buswidth, \ .opcode = __opcode, \ + } + +#define SPI_MEM_DTR_OP_CMD(__opcode, __buswidth) \ + { \ .nbytes = 1, \ + .opcode = __opcode, \ + .buswidth = __buswidth, \ + .dtr = true, \ } #define SPI_MEM_OP_ADDR(__nbytes, __val, __buswidth) \ { \ .nbytes = __nbytes, \ + .buswidth = __buswidth, \ + .val = __val, \ + } + +#define SPI_MEM_DTR_OP_ADDR(__nbytes, __val, __buswidth) \ + { \ + .nbytes = __nbytes, \ .val = __val, \ .buswidth = __buswidth, \ + .dtr = true, \ } #define SPI_MEM_OP_NO_ADDR { } @@ -35,22 +51,47 @@ .buswidth = __buswidth, \ } +#define SPI_MEM_DTR_OP_DUMMY(__nbytes, __buswidth) \ + { \ + .nbytes = __nbytes, \ + .buswidth = __buswidth, \ + .dtr = true, \ + } + #define SPI_MEM_OP_NO_DUMMY { } #define SPI_MEM_OP_DATA_IN(__nbytes, __buf, __buswidth) \ { \ + .buswidth = __buswidth, \ + .dir = SPI_MEM_DATA_IN, \ + .nbytes = __nbytes, \ + .buf.in = __buf, \ + } + +#define SPI_MEM_DTR_OP_DATA_IN(__nbytes, __buf, __buswidth) \ + { \ .dir = SPI_MEM_DATA_IN, \ .nbytes = __nbytes, \ .buf.in = __buf, \ .buswidth = __buswidth, \ + .dtr = true, \ } #define SPI_MEM_OP_DATA_OUT(__nbytes, __buf, __buswidth) \ { \ + .buswidth = __buswidth, \ + .dir = SPI_MEM_DATA_OUT, \ + .nbytes = __nbytes, \ + .buf.out = __buf, \ + } + +#define SPI_MEM_DTR_OP_DATA_OUT(__nbytes, __buf, __buswidth) \ + { \ .dir = SPI_MEM_DATA_OUT, \ .nbytes = __nbytes, \ .buf.out = __buf, \ .buswidth = __buswidth, \ + .dtr = true, \ } #define SPI_MEM_OP_NO_DATA { } @@ -68,6 +109,9 @@ enum spi_mem_data_dir { SPI_MEM_DATA_OUT, }; +#define SPI_MEM_OP_MAX_FREQ(__freq) \ + .max_freq = __freq + /** * struct spi_mem_op - describes a SPI memory operation * @cmd.nbytes: number of opcode bytes (only 1 or 2 are valid). The opcode is @@ -97,6 +141,9 @@ enum spi_mem_data_dir { * operation does not involve transferring data * @data.buf.in: input buffer (must be DMA-able) * @data.buf.out: output buffer (must be DMA-able) + * @max_freq: frequency limitation wrt this operation. 0 means there is no + * specific constraint and the highest achievable frequency can be + * attempted. */ struct spi_mem_op { struct { @@ -135,14 +182,17 @@ struct spi_mem_op { const void *out; } buf; } data; + + unsigned int max_freq; }; -#define SPI_MEM_OP(__cmd, __addr, __dummy, __data) \ +#define SPI_MEM_OP(__cmd, __addr, __dummy, __data, ...) \ { \ .cmd = __cmd, \ .addr = __addr, \ .dummy = __dummy, \ .data = __data, \ + __VA_ARGS__ \ } /** @@ -302,11 +352,13 @@ struct spi_controller_mem_ops { * @ecc: Supports operations with error correction * @swap16: Supports swapping bytes on a 16 bit boundary when configured in * Octal DTR + * @per_op_freq: Supports per operation frequency switching */ struct spi_controller_mem_caps { bool dtr; bool ecc; bool swap16; + bool per_op_freq; }; #define spi_mem_controller_is_capable(ctlr, cap) \ @@ -371,6 +423,8 @@ bool spi_mem_default_supports_op(struct spi_mem *mem, #endif /* CONFIG_SPI_MEM */ int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op); +void spi_mem_adjust_op_freq(struct spi_mem *mem, struct spi_mem_op *op); +u64 spi_mem_calc_op_duration(struct spi_mem_op *op); bool spi_mem_supports_op(struct spi_mem *mem, const struct spi_mem_op *op); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 8497f4747e24..e9ea43234d9a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -21,7 +21,7 @@ #include <uapi/linux/spi/spi.h> /* Max no. of CS supported per spi device */ -#define SPI_CS_CNT_MAX 16 +#define SPI_CS_CNT_MAX 24 struct dma_chan; struct software_node; @@ -31,9 +31,11 @@ struct spi_transfer; struct spi_controller_mem_ops; struct spi_controller_mem_caps; struct spi_message; +struct spi_offload; +struct spi_offload_config; /* - * INTERFACES between SPI master-side drivers and SPI slave protocol handlers, + * INTERFACES between SPI controller-side drivers and SPI target protocol handlers, * and SPI infrastructure. */ extern const struct bus_type spi_bus_type; @@ -128,19 +130,12 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, struct spi_transfer *xfer); /** - * struct spi_device - Controller side proxy for an SPI slave device + * struct spi_device - Controller side proxy for an SPI target device * @dev: Driver model representation of the device. * @controller: SPI controller used with the device. * @max_speed_hz: Maximum clock rate to be used with this chip * (on this board); may be changed by the device's driver. * The spi_transfer.speed_hz can override this for each transfer. - * @chip_select: Array of physical chipselect, spi->chipselect[i] gives - * the corresponding physical CS for logical CS i. - * @mode: The spi mode defines how data is clocked out and in. - * This may be changed by the device's driver. - * The "active low" default for chipselect mode can be overridden - * (by specifying SPI_CS_HIGH) as can the "MSB first" default for - * each word in a transfer (by specifying SPI_LSB_FIRST). * @bits_per_word: Data transfers involve one or more words; word sizes * like eight or 12 bits are common. In-memory wordsizes are * powers of two bytes (e.g. 20 bit samples use 32 bits). @@ -148,6 +143,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * default (0) indicating protocol words are eight bit bytes. * The spi_transfer.bits_per_word can override this for each transfer. * @rt: Make the pump thread real time priority. + * @mode: The spi mode defines how data is clocked out and in. + * This may be changed by the device's driver. + * The "active low" default for chipselect mode can be overridden + * (by specifying SPI_CS_HIGH) as can the "MSB first" default for + * each word in a transfer (by specifying SPI_LSB_FIRST). * @irq: Negative, or the number passed to request_irq() to receive * interrupts from this device. * @controller_state: Controller's runtime state @@ -160,8 +160,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * the device will bind to the named driver and only the named driver. * Do not set directly, because core frees it; use driver_set_override() to * set or clear it. - * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines - * (optional, NULL when not using a GPIO line) + * @pcpu_statistics: statistics for the spi_device * @word_delay: delay to be inserted between consecutive * words of a transfer * @cs_setup: delay to be introduced by the controller after CS is asserted @@ -169,10 +168,13 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * @cs_inactive: delay to be introduced by the controller after CS is * deasserted. If @cs_change_delay is used from @spi_transfer, then the * two delays will be added up. - * @pcpu_statistics: statistics for the spi_device + * @chip_select: Array of physical chipselect, spi->chipselect[i] gives + * the corresponding physical CS for logical CS i. * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array + * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines + * (optional, NULL when not using a GPIO line) * - * A @spi_device is used to interchange data between an SPI slave + * A @spi_device is used to interchange data between an SPI target device * (usually a discrete chip) and CPU memory. * * In @dev, the platform_data is used to hold information about this @@ -185,7 +187,6 @@ struct spi_device { struct device dev; struct spi_controller *controller; u32 max_speed_hz; - u8 chip_select[SPI_CS_CNT_MAX]; u8 bits_per_word; bool rt; #define SPI_NO_TX BIT(31) /* No transmit wire */ @@ -216,23 +217,29 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ + + /* The statistics */ + struct spi_statistics __percpu *pcpu_statistics; + struct spi_delay word_delay; /* Inter-word delay */ + /* CS delays */ struct spi_delay cs_setup; struct spi_delay cs_hold; struct spi_delay cs_inactive; - /* The statistics */ - struct spi_statistics __percpu *pcpu_statistics; + u8 chip_select[SPI_CS_CNT_MAX]; - /* Bit mask of the chipselect(s) that the driver need to use from - * the chipselect array.When the controller is capable to handle + /* + * Bit mask of the chipselect(s) that the driver need to use from + * the chipselect array. When the controller is capable to handle * multiple chip selects & memories are connected in parallel * then more than one bit need to be set in cs_index_mask. */ u32 cs_index_mask : SPI_CS_CNT_MAX; + struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ + /* * Likely need more hooks for more protocol options affecting how * the controller talks to each chip, like: @@ -247,10 +254,7 @@ struct spi_device { static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); -static inline struct spi_device *to_spi_device(const struct device *dev) -{ - return dev ? container_of(dev, struct spi_device, dev) : NULL; -} +#define to_spi_device(__dev) container_of_const(__dev, struct spi_device, dev) /* Most drivers won't need to care about device refcounting */ static inline struct spi_device *spi_dev_get(struct spi_device *spi) @@ -386,15 +390,15 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch spi_unregister_driver) /** - * struct spi_controller - interface to SPI master or slave controller + * struct spi_controller - interface to SPI host or target controller * @dev: device interface to this driver * @list: link with the global spi_controller list * @bus_num: board-specific (and often SOC-specific) identifier for a * given SPI controller. * @num_chipselect: chipselects are used to distinguish individual - * SPI slaves, and are numbered from zero to num_chipselects. - * each slave has a chipselect signal, but it's common that not - * every chipselect is connected to a slave. + * SPI targets, and are numbered from zero to num_chipselects. + * each target has a chipselect signal, but it's common that not + * every chipselect is connected to a target. * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver * @buswidth_override_bits: flags to override for this controller driver @@ -423,9 +427,9 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * must fail if an unrecognized or unsupported mode is requested. * It's always safe to call this unless transfers are pending on * the device whose settings are being modified. - * @set_cs_timing: optional hook for SPI devices to request SPI master + * @set_cs_timing: optional hook for SPI devices to request SPI * controller for configuring specific CS setup time, hold time and inactive - * delay interms of clock counts + * delay in terms of clock counts * @transfer: adds a message to the controller's transfer queue. * @cleanup: frees controller-specific state * @can_dma: determine whether this controller supports DMA @@ -496,7 +500,13 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @mem_ops: optimized/dedicated operations for interactions with SPI memory. * This field is optional and should only be implemented if the * controller has native support for memory like operations. + * @get_offload: callback for controllers with offload support to get matching + * offload instance. Implementations should return -ENODEV if no match is + * found. + * @put_offload: release the offload instance acquired by @get_offload. * @mem_caps: controller capabilities for the handling of memory operations. + * @dtr_caps: true if controller has dtr(single/dual transfer rate) capability. + * QSPI based controller should fill this based on controller's capability. * @unprepare_message: undo any work done by prepare_message(). * @target_abort: abort the ongoing transfer request on an SPI target controller * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS @@ -541,7 +551,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * * The driver for an SPI controller manages access to those devices through * a queue of spi_message transactions, copying data between CPU memory and - * an SPI slave device. For each such message it queues, it calls the + * an SPI target device. For each such message it queues, it calls the * message's completion function when the transaction completes. */ struct spi_controller { @@ -591,7 +601,7 @@ struct spi_controller { #define SPI_CONTROLLER_NO_TX BIT(2) /* Can't do buffer write */ #define SPI_CONTROLLER_MUST_RX BIT(3) /* Requires rx */ #define SPI_CONTROLLER_MUST_TX BIT(4) /* Requires tx */ -#define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select slave */ +#define SPI_CONTROLLER_GPIO_SS BIT(5) /* GPIO CS must select target device */ #define SPI_CONTROLLER_SUSPENDED BIT(6) /* Currently suspended */ /* * The spi-controller has multi chip select capability and can @@ -658,7 +668,7 @@ struct spi_controller { * + To a given spi_device, message queueing is pure FIFO * * + The controller's main job is to process its message queue, - * selecting a chip (for masters), then transferring data + * selecting a chip (for controllers), then transferring data * + If there are multiple spi_device children, the i/o queue * arbitration algorithm is unspecified (round robin, FIFO, * priority, reservations, preemption, etc) @@ -740,6 +750,13 @@ struct spi_controller { const struct spi_controller_mem_ops *mem_ops; const struct spi_controller_mem_caps *mem_caps; + /* SPI or QSPI controller can set to true if supports SDR/DDR transfer rate */ + bool dtr_caps; + + struct spi_offload *(*get_offload)(struct spi_device *spi, + const struct spi_offload_config *config); + void (*put_offload)(struct spi_offload *offload); + /* GPIO chip select */ struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; @@ -822,7 +839,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr, /* The SPI driver core manages memory for the spi_controller classdev */ extern struct spi_controller *__spi_alloc_controller(struct device *host, - unsigned int size, bool slave); + unsigned int size, bool target); static inline struct spi_controller *spi_alloc_host(struct device *dev, unsigned int size) @@ -841,7 +858,7 @@ static inline struct spi_controller *spi_alloc_target(struct device *dev, struct spi_controller *__devm_spi_alloc_controller(struct device *dev, unsigned int size, - bool slave); + bool target); static inline struct spi_controller *devm_spi_alloc_host(struct device *dev, unsigned int size) @@ -963,6 +980,8 @@ struct spi_res { * @rx_sg_mapped: If true, the @rx_sg is mapped for DMA * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use + * @offload_flags: Flags that are only applicable to specialized SPI offload + * transfers. See %SPI_OFFLOAD_XFER_* in spi-offload.h. * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset * within @tx_buf for which the SPI device is requesting that the time * snapshot for this transfer begins. Upon completing the SPI transfer, @@ -977,15 +996,16 @@ struct spi_res { * purposefully (instead of setting to spi_transfer->len - 1) to denote * that a transfer-level snapshot taken from within the driver may still * be of higher quality. - * @ptp_sts: Pointer to a memory location held by the SPI slave device where a + * @ptp_sts: Pointer to a memory location held by the SPI target device where a * PTP system timestamp structure may lie. If drivers use PIO or their * hardware has some sort of assist for retrieving exact transfer timing, * they can (and should) assert @ptp_sts_supported and populate this * structure using the ptp_read_system_*ts helper functions. - * The timestamp must represent the time at which the SPI slave device has + * The timestamp must represent the time at which the SPI target device has * processed the word, i.e. the "pre" timestamp should be taken before * transmitting the "pre" word, and the "post" timestamp after receiving * transmit confirmation from the controller for the "post" word. + * @dtr_mode: true if supports double transfer rate. * @timestamped: true if the transfer has been timestamped * @error: Error status logged by SPI controller driver. * @@ -1037,6 +1057,9 @@ struct spi_res { * two should both be set. User can set transfer mode with SPI_NBITS_SINGLE(1x) * SPI_NBITS_DUAL(2x) and SPI_NBITS_QUAD(4x) to support these three transfer. * + * User may also set dtr_mode to true to use dual transfer mode if desired. if + * not, default considered as single transfer mode. + * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. * Zero-initialize every field you don't set up explicitly, to @@ -1071,6 +1094,7 @@ struct spi_transfer { unsigned tx_nbits:4; unsigned rx_nbits:4; unsigned timestamped:1; + bool dtr_mode; #define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ #define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ @@ -1083,6 +1107,9 @@ struct spi_transfer { u32 effective_speed_hz; + /* Use %SPI_OFFLOAD_XFER_* from spi-offload.h */ + unsigned int offload_flags; + unsigned int ptp_sts_word_pre; unsigned int ptp_sts_word_post; @@ -1108,6 +1135,7 @@ struct spi_transfer { * @state: for use by whichever driver currently owns the message * @opt_state: for use by whichever driver currently owns the message * @resources: for resource management when the SPI message is processed + * @offload: (optional) offload instance used by this message * * A @spi_message is used to execute an atomic sequence of data transfers, * each represented by a struct spi_transfer. The sequence is "atomic" @@ -1168,6 +1196,12 @@ struct spi_message { */ void *opt_state; + /* + * Optional offload instance used by this message. This must be set + * by the peripheral driver before calling spi_optimize_message(). + */ + struct spi_offload *offload; + /* List of spi_res resources when the SPI message is processed */ struct list_head resources; }; @@ -1304,6 +1338,32 @@ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw) } /** + * spi_bpw_to_bytes - Covert bits per word to bytes + * @bpw: Bits per word + * + * This function converts the given @bpw to bytes. The result is always + * power-of-two, e.g., + * + * =============== ================= + * Input (in bits) Output (in bytes) + * =============== ================= + * 5 1 + * 9 2 + * 21 4 + * 37 8 + * =============== ================= + * + * It will return 0 for the 0 input. + * + * Returns: + * Bytes for the given @bpw. + */ +static inline u32 spi_bpw_to_bytes(u32 bpw) +{ + return roundup_pow_of_two(BITS_TO_BYTES(bpw)); +} + +/** * spi_controller_xfer_timeout - Compute a suitable timeout value * @ctlr: SPI device * @xfer: Transfer descriptor @@ -1600,7 +1660,7 @@ struct spi_board_info { * bus_num is board specific and matches the bus_num of some * spi_controller that will probably be registered later. * - * chip_select reflects how this chip is wired to that master; + * chip_select reflects how this chip is wired to that controller; * it's less than num_chipselect. */ u16 bus_num; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 63dd8cf3c3c2..d3561c4a080e 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -548,6 +548,12 @@ DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1(raw_spinlock_bh, raw_spinlock_t, + raw_spin_lock_bh(_T->lock), + raw_spin_unlock_bh(_T->lock)) + +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_bh, _try, raw_spin_trylock_bh(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, raw_spin_lock_irqsave(_T->lock, _T->flags), raw_spin_unlock_irqrestore(_T->lock, _T->flags), @@ -569,6 +575,13 @@ DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, spin_trylock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1(spinlock_bh, spinlock_t, + spin_lock_bh(_T->lock), + spin_unlock_bh(_T->lock)) + +DEFINE_LOCK_GUARD_1_COND(spinlock_bh, _try, + spin_trylock_bh(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, spin_lock_irqsave(_T->lock, _T->flags), spin_unlock_irqrestore(_T->lock, _T->flags), diff --git a/include/linux/sprintf.h b/include/linux/sprintf.h index 33dcbec71925..876130091384 100644 --- a/include/linux/sprintf.h +++ b/include/linux/sprintf.h @@ -4,6 +4,7 @@ #include <linux/compiler_attributes.h> #include <linux/types.h> +#include <linux/stdarg.h> int num_to_str(char *buf, int size, unsigned long long num, unsigned int width); @@ -24,4 +25,7 @@ __scanf(2, 0) int vsscanf(const char *, const char *, va_list); extern bool no_hash_pointers; int no_hash_pointers_enable(char *str); +/* Used for Rust formatting ('%pA') */ +char *rust_fmt_argument(char *buf, char *end, const void *ptr); + #endif /* _LINUX_KERNEL_SPRINTF_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 08339eb8a01c..900b0d5c05f5 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -43,6 +43,18 @@ int init_srcu_struct(struct srcu_struct *ssp); #define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Values for SRCU Tree srcu_data ->srcu_reader_flavor, but also used by rcutorture. */ +#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). +#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). +#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). +#define SRCU_READ_FLAVOR_FAST 0x8 // srcu_read_lock_fast(). +#define SRCU_READ_FLAVOR_ALL (SRCU_READ_FLAVOR_NORMAL | SRCU_READ_FLAVOR_NMI | \ + SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) // All of the above. +#define SRCU_READ_FLAVOR_SLOWGP (SRCU_READ_FLAVOR_LITE | SRCU_READ_FLAVOR_FAST) + // Flavors requiring synchronize_rcu() + // instead of smp_mb(). +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); + #ifdef CONFIG_TINY_SRCU #include <linux/srcutiny.h> #elif defined(CONFIG_TREE_SRCU) @@ -54,15 +66,6 @@ int init_srcu_struct(struct srcu_struct *ssp); void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, void (*func)(struct rcu_head *head)); void cleanup_srcu_struct(struct srcu_struct *ssp); -int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); -void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); -#ifdef CONFIG_TINY_SRCU -#define __srcu_read_lock_lite __srcu_read_lock -#define __srcu_read_unlock_lite __srcu_read_unlock -#else // #ifdef CONFIG_TINY_SRCU -int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp); -void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp); -#endif // #else // #ifdef CONFIG_TINY_SRCU void synchronize_srcu(struct srcu_struct *ssp); #define SRCU_GET_STATE_COMPLETED 0x1 @@ -232,13 +235,14 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * a mutex that is held elsewhere while calling synchronize_srcu() or * synchronize_srcu_expedited(). * - * The return value from srcu_read_lock() must be passed unaltered - * to the matching srcu_read_unlock(). Note that srcu_read_lock() and - * the matching srcu_read_unlock() must occur in the same context, for - * example, it is illegal to invoke srcu_read_unlock() in an irq handler - * if the matching srcu_read_lock() was invoked in process context. Or, - * for that matter to invoke srcu_read_unlock() from one task and the - * matching srcu_read_lock() from another. + * The return value from srcu_read_lock() is guaranteed to be + * non-negative. This value must be passed unaltered to the matching + * srcu_read_unlock(). Note that srcu_read_lock() and the matching + * srcu_read_unlock() must occur in the same context, for example, it is + * illegal to invoke srcu_read_unlock() in an irq handler if the matching + * srcu_read_lock() was invoked in process context. Or, for that matter to + * invoke srcu_read_unlock() from one task and the matching srcu_read_lock() + * from another. */ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { @@ -251,6 +255,51 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) } /** + * srcu_read_lock_fast - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but for a light-weight + * smp_mb()-free reader. See srcu_read_lock() for more information. + * + * If srcu_read_lock_fast() is ever used on an srcu_struct structure, + * then none of the other flavors may be used, whether before, during, + * or after. Note that grace-period auto-expediting is disabled for _fast + * srcu_struct structures because auto-expedited grace periods invoke + * synchronize_rcu_expedited(), IPIs and all. + * + * Note that srcu_read_lock_fast() can be invoked only from those contexts + * where RCU is watching, that is, from contexts where it would be legal + * to invoke rcu_read_lock(). Otherwise, lockdep will complain. + */ +static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *ssp) __acquires(ssp) +{ + struct srcu_ctr __percpu *retval; + + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); + retval = __srcu_read_lock_fast(ssp); + rcu_try_lock_acquire(&ssp->dep_map); + return retval; +} + +/** + * srcu_down_read_fast - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter a semaphore-like SRCU read-side critical section, but for + * a light-weight smp_mb()-free reader. See srcu_read_lock_fast() and + * srcu_down_read() for more information. + * + * The same srcu_struct may be used concurrently by srcu_down_read_fast() + * and srcu_read_lock_fast(). + */ +static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp) +{ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST); + return __srcu_read_lock_fast(ssp); +} + +/** * srcu_read_lock_lite - register a new reader for an SRCU-protected structure. * @ssp: srcu_struct in which to register the new reader. * @@ -271,7 +320,7 @@ static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp) { int retval; - srcu_check_read_flavor_lite(ssp); + srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_LITE); retval = __srcu_read_lock_lite(ssp); rcu_try_lock_acquire(&ssp->dep_map); return retval; @@ -328,7 +377,8 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) * srcu_down_read() nor srcu_up_read() may be invoked from an NMI handler. * * Calls to srcu_down_read() may be nested, similar to the manner in - * which calls to down_read() may be nested. + * which calls to down_read() may be nested. The same srcu_struct may be + * used concurrently by srcu_down_read() and srcu_read_lock(). */ static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp) { @@ -354,9 +404,40 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) } /** + * srcu_read_unlock_fast - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @scp: return value from corresponding srcu_read_lock_fast(). + * + * Exit a light-weight SRCU read-side critical section. + */ +static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) + __releases(ssp) +{ + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); + srcu_lock_release(&ssp->dep_map); + __srcu_read_unlock_fast(ssp, scp); +} + +/** + * srcu_up_read_fast - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @scp: return value from corresponding srcu_read_lock_fast(). + * + * Exit an SRCU read-side critical section, but not necessarily from + * the same context as the maching srcu_down_read_fast(). + */ +static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) + __releases(ssp) +{ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi()); + srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST); + __srcu_read_unlock_fast(ssp, scp); +} + +/** * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. - * @idx: return value from corresponding srcu_read_lock(). + * @idx: return value from corresponding srcu_read_lock_lite(). * * Exit a light-weight SRCU read-side critical section. */ @@ -372,7 +453,7 @@ static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) /** * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. * @ssp: srcu_struct in which to unregister the old reader. - * @idx: return value from corresponding srcu_read_lock(). + * @idx: return value from corresponding srcu_read_lock_nmisafe(). * * Exit an SRCU read-side critical section, but in an NMI-safe manner. */ diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 1321da803274..380260317d98 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -64,13 +64,38 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) { int idx; - preempt_disable(); // Needed for PREEMPT_AUTO + preempt_disable(); // Needed for PREEMPT_LAZY idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; WRITE_ONCE(ssp->srcu_lock_nesting[idx], READ_ONCE(ssp->srcu_lock_nesting[idx]) + 1); preempt_enable(); return idx; } +struct srcu_ctr; + +static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp) +{ + return (int)(intptr_t)(struct srcu_ctr __force __kernel *)scpp; +} + +static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx) +{ + return (struct srcu_ctr __percpu *)(intptr_t)idx; +} + +static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) +{ + return __srcu_ctr_to_ptr(ssp, __srcu_read_lock(ssp)); +} + +static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) +{ + __srcu_read_unlock(ssp, __srcu_ptr_to_ctr(ssp, scp)); +} + +#define __srcu_read_lock_lite __srcu_read_lock +#define __srcu_read_unlock_lite __srcu_read_unlock + static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) { synchronize_srcu(ssp); @@ -82,7 +107,7 @@ static inline void srcu_barrier(struct srcu_struct *ssp) } #define srcu_check_read_flavor(ssp, read_flavor) do { } while (0) -#define srcu_check_read_flavor_lite(ssp) do { } while (0) +#define srcu_check_read_flavor_force(ssp, read_flavor) do { } while (0) /* Defined here to avoid size increase for non-torture kernels. */ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 490aeecc6bb4..8bed7e6cc4c1 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -17,15 +17,21 @@ struct srcu_node; struct srcu_struct; +/* One element of the srcu_data srcu_ctrs array. */ +struct srcu_ctr { + atomic_long_t srcu_locks; /* Locks per CPU. */ + atomic_long_t srcu_unlocks; /* Unlocks per CPU. */ +}; + /* * Per-CPU structure feeding into leaf srcu_node, similar in function * to rcu_node. */ struct srcu_data { /* Read-side state. */ - atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ - atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ + struct srcu_ctr srcu_ctrs[2]; /* Locks and unlocks per CPU. */ int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */ + /* Values: SRCU_READ_FLAVOR_.* */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; @@ -43,11 +49,6 @@ struct srcu_data { struct srcu_struct *ssp; }; -/* Values for ->srcu_reader_flavor. */ -#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). -#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). -#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). - /* * Node in SRCU combining tree, similar in function to rcu_data. */ @@ -99,7 +100,7 @@ struct srcu_usage { * Per-SRCU-domain structure, similar in function to rcu_state. */ struct srcu_struct { - unsigned int srcu_idx; /* Current rdr array element. */ + struct srcu_ctr __percpu *srcu_ctrp; struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ struct lockdep_map dep_map; struct srcu_usage *srcu_sup; /* Update-side data. */ @@ -166,6 +167,7 @@ struct srcu_struct { #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name) \ { \ .sda = &pcpu_name, \ + .srcu_ctrp = &pcpu_name.srcu_ctrs[0], \ __SRCU_STRUCT_INIT_COMMON(name, usage_name) \ } @@ -205,10 +207,77 @@ struct srcu_struct { #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) +int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void synchronize_srcu_expedited(struct srcu_struct *ssp); void srcu_barrier(struct srcu_struct *ssp); void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); +// Converts a per-CPU pointer to an ->srcu_ctrs[] array element to that +// element's index. +static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp) +{ + return scpp - &ssp->sda->srcu_ctrs[0]; +} + +// Converts an integer to a per-CPU pointer to the corresponding +// ->srcu_ctrs[] array element. +static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx) +{ + return &ssp->sda->srcu_ctrs[idx]; +} + +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Returns a pointer that must be passed to the matching + * srcu_read_unlock_fast(). + * + * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side + * critical sections either because they disables interrupts, because they + * are a single instruction, or because they are a read-modify-write atomic + * operation, depending on the whims of the architecture. + * + * This means that __srcu_read_lock_fast() is not all that fast + * on architectures that support NMIs but do not supply NMI-safe + * implementations of this_cpu_inc(). + */ +static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp) +{ + struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); + + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast()."); + if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) + this_cpu_inc(scp->srcu_locks.counter); /* Y */ + else + atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); /* Z */ + barrier(); /* Avoid leaking the critical section. */ + return scp; +} + +/* + * Removes the count for the old reader from the appropriate + * per-CPU element of the srcu_struct. Note that this may well be a + * different CPU than that which was incremented by the corresponding + * srcu_read_lock_fast(), but it must be within the same task. + * + * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side + * critical sections either because they disables interrupts, because they + * are a single instruction, or because they are a read-modify-write atomic + * operation, depending on the whims of the architecture. + * + * This means that __srcu_read_unlock_fast() is not all that fast + * on architectures that support NMIs but do not supply NMI-safe + * implementations of this_cpu_inc(). + */ +static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) +{ + barrier(); /* Avoid leaking the critical section. */ + if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) + this_cpu_inc(scp->srcu_unlocks.counter); /* Z */ + else + atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */ + RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast()."); +} + /* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. Returns an index that must be passed to the matching @@ -221,13 +290,12 @@ void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); */ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) { - int idx; + struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite()."); - idx = READ_ONCE(ssp->srcu_idx) & 0x1; - this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */ + this_cpu_inc(scp->srcu_locks.counter); /* Y */ barrier(); /* Avoid leaking the critical section. */ - return idx; + return __srcu_ptr_to_ctr(ssp, scp); } /* @@ -244,22 +312,24 @@ static inline int __srcu_read_lock_lite(struct srcu_struct *ssp) static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) { barrier(); /* Avoid leaking the critical section. */ - this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */ + this_cpu_inc(__srcu_ctr_to_ptr(ssp, idx)->srcu_unlocks.counter); /* Z */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite()."); } void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); -// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels. -static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp) +// Record reader usage even for CONFIG_PROVE_RCU=n kernels. This is +// needed only for flavors that require grace-period smp_mb() calls to be +// promoted to synchronize_rcu(). +static inline void srcu_check_read_flavor_force(struct srcu_struct *ssp, int read_flavor) { struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); - if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)) + if (likely(READ_ONCE(sdp->srcu_reader_flavor) & read_flavor)) return; - // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered. - __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); + // Note that the cmpxchg() in __srcu_check_read_flavor() is fully ordered. + __srcu_check_read_flavor(ssp, read_flavor); } // Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels. diff --git a/include/linux/stat.h b/include/linux/stat.h index 3d900c86981c..e3d00e7bb26d 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -50,12 +50,14 @@ struct kstat { struct timespec64 btime; /* File creation time */ u64 blocks; u64 mnt_id; - u32 dio_mem_align; - u32 dio_offset_align; u64 change_cookie; u64 subvol; + u32 dio_mem_align; + u32 dio_offset_align; + u32 dio_read_offset_align; u32 atomic_write_unit_min; u32 atomic_write_unit_max; + u32 atomic_write_unit_max_opt; u32 atomic_write_segments_max; }; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d79ff252cfdc..26ddf95d23f9 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -33,7 +33,9 @@ #define STMMAC_CSR_20_35M 0x2 /* MDC = clk_scr_i/16 */ #define STMMAC_CSR_35_60M 0x3 /* MDC = clk_scr_i/26 */ #define STMMAC_CSR_150_250M 0x4 /* MDC = clk_scr_i/102 */ -#define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/122 */ +#define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/124 */ +#define STMMAC_CSR_300_500M 0x6 /* MDC = clk_scr_i/204 */ +#define STMMAC_CSR_500_800M 0x7 /* MDC = clk_scr_i/324 */ /* MTL algorithms identifiers */ #define MTL_TX_ALGORITHM_WRR 0x0 @@ -76,6 +78,7 @@ | DMA_AXI_BLEN_32 | DMA_AXI_BLEN_64 \ | DMA_AXI_BLEN_128 | DMA_AXI_BLEN_256) +struct clk; struct stmmac_priv; /* Platfrom data for platform device structure's platform_data field */ @@ -180,7 +183,8 @@ struct dwmac4_addrs { #define STMMAC_FLAG_INT_SNAPSHOT_EN BIT(9) #define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(10) #define STMMAC_FLAG_EN_TX_LPI_CLOCKGATING BIT(11) -#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(12) +#define STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP BIT(12) +#define STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY BIT(13) struct plat_stmmacenet_data { int bus_id; @@ -229,11 +233,18 @@ struct plat_stmmacenet_data { u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; - void (*fix_mac_speed)(void *priv, unsigned int speed, unsigned int mode); + void (*get_interfaces)(struct stmmac_priv *priv, void *bsp_priv, + unsigned long *interfaces); + int (*set_clk_tx_rate)(void *priv, struct clk *clk_tx_i, + phy_interface_t interface, int speed); + void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); - void (*speed_mode_2500)(struct net_device *ndev, void *priv); + int (*mac_finish)(struct net_device *ndev, + void *priv, + unsigned int mode, + phy_interface_t interface); void (*ptp_clk_freq_config)(struct stmmac_priv *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); @@ -250,8 +261,11 @@ struct plat_stmmacenet_data { struct clk *stmmac_clk; struct clk *pclk; struct clk *clk_ptp_ref; - unsigned int clk_ptp_rate; - unsigned int clk_ref_rate; + struct clk *clk_tx_i; /* clk_tx_i to MAC core */ + unsigned long clk_ptp_rate; + unsigned long clk_ref_rate; + struct clk_bulk_data *clks; + int num_clks; unsigned int mult_fact_100ns; s32 ptp_max_adj; u32 cdc_error_adj; @@ -263,7 +277,6 @@ struct plat_stmmacenet_data { int mac_port_sel_speed; int has_xgmac; u8 vlan_fail_q; - unsigned int eee_usecs_rate; struct pci_dev *pdev; int int_snapshot_num; int msi_mac_vec; diff --git a/include/linux/string.h b/include/linux/string.h index 493ac4862c77..01621ad0f598 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -4,6 +4,7 @@ #include <linux/args.h> #include <linux/array_size.h> +#include <linux/cleanup.h> /* for DEFINE_FREE() */ #include <linux/compiler.h> /* for inline */ #include <linux/types.h> /* for size_t */ #include <linux/stddef.h> /* for NULL */ @@ -312,6 +313,8 @@ extern void *kmemdup_array(const void *src, size_t count, size_t element_size, g extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); +DEFINE_FREE(argv_free, char **, if (!IS_ERR_OR_NULL(_T)) argv_free(_T)) + /* lib/cmdline.c */ extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); @@ -333,8 +336,8 @@ int __sysfs_match_string(const char * const *array, size_t n, const char *s); #define sysfs_match_string(_a, _s) __sysfs_match_string(_a, ARRAY_SIZE(_a), _s) #ifdef CONFIG_BINARY_PRINTF -int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); -int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); +__printf(3, 0) int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); +__printf(3, 0) int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); #endif extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, @@ -411,8 +414,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * must be discoverable by the compiler. */ #define strtomem_pad(dest, src, pad) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ - const size_t _src_len = __builtin_object_size(src, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + __must_be_noncstr(dest) + \ + ARRAY_SIZE(dest); \ + const size_t _src_len = __must_be_cstr(src) + \ + __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ _dest_len == (size_t)-1); \ @@ -434,8 +440,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * must be discoverable by the compiler. */ #define strtomem(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ - const size_t _src_len = __builtin_object_size(src, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + __must_be_noncstr(dest) + \ + ARRAY_SIZE(dest); \ + const size_t _src_len = __must_be_cstr(src) + \ + __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ _dest_len == (size_t)-1); \ @@ -453,8 +462,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * Note that sizes of @dest and @src must be known at compile-time. */ #define memtostr(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ - const size_t _src_len = __builtin_object_size(src, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + __must_be_cstr(dest) + \ + ARRAY_SIZE(dest); \ + const size_t _src_len = __must_be_noncstr(src) + \ + __builtin_object_size(src, 1); \ const size_t _src_chars = strnlen(src, _src_len); \ const size_t _copy_len = min(_dest_len - 1, _src_chars); \ \ @@ -478,8 +490,11 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * Note that sizes of @dest and @src must be known at compile-time. */ #define memtostr_pad(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ - const size_t _src_len = __builtin_object_size(src, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + __must_be_cstr(dest) + \ + ARRAY_SIZE(dest); \ + const size_t _src_len = __must_be_noncstr(src) + \ + __builtin_object_size(src, 1); \ const size_t _src_chars = strnlen(src, _src_len); \ const size_t _copy_len = min(_dest_len - 1, _src_chars); \ \ diff --git a/include/linux/string_choices.h b/include/linux/string_choices.h index 120ca0f28e95..f3ba4f52ff26 100644 --- a/include/linux/string_choices.h +++ b/include/linux/string_choices.h @@ -41,23 +41,23 @@ static inline const char *str_high_low(bool v) } #define str_low_high(v) str_high_low(!(v)) -static inline const char *str_read_write(bool v) -{ - return v ? "read" : "write"; -} -#define str_write_read(v) str_read_write(!(v)) - static inline const char *str_on_off(bool v) { return v ? "on" : "off"; } #define str_off_on(v) str_on_off(!(v)) -static inline const char *str_yes_no(bool v) +static inline const char *str_read_write(bool v) { - return v ? "yes" : "no"; + return v ? "read" : "write"; } -#define str_no_yes(v) str_yes_no(!(v)) +#define str_write_read(v) str_read_write(!(v)) + +static inline const char *str_true_false(bool v) +{ + return v ? "true" : "false"; +} +#define str_false_true(v) str_true_false(!(v)) static inline const char *str_up_down(bool v) { @@ -65,11 +65,11 @@ static inline const char *str_up_down(bool v) } #define str_down_up(v) str_up_down(!(v)) -static inline const char *str_true_false(bool v) +static inline const char *str_yes_no(bool v) { - return v ? "true" : "false"; + return v ? "yes" : "no"; } -#define str_false_true(v) str_true_false(!(v)) +#define str_no_yes(v) str_yes_no(!(v)) /** * str_plural - Return the simple pluralization based on English counts diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index e93fbb5b0c01..3fb88a1e9898 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -31,6 +31,7 @@ enum string_size_units { int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, char *buf, int len); +int parse_int_array(const char *buf, size_t count, int **array); int parse_int_array_user(const char __user *from, size_t count, int **array); #define UNESCAPE_SPACE BIT(0) diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 35766963dd14..e783132e481f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -222,6 +222,8 @@ static inline bool cache_is_expired(struct cache_detail *detail, struct cache_he return detail->flush_time >= h->last_refresh; } +extern int cache_check_rcu(struct cache_detail *detail, + struct cache_head *h, struct cache_req *rqstp); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); extern void cache_flush(void); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 5321585c778f..f8b406b0a1af 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -64,7 +64,9 @@ struct rpc_clnt { cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ cl_chatty : 1,/* be verbose */ - cl_shutdown : 1;/* rpc immediate -EIO */ + cl_shutdown : 1,/* rpc immediate -EIO */ + cl_netunreach_fatal : 1; + /* Treat ENETUNREACH errors as fatal */ struct xprtsec_parms cl_xprtsec; /* transport security policy */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ @@ -93,6 +95,7 @@ struct rpc_clnt { const struct cred *cl_cred; unsigned int cl_max_connect; /* max number of transports not to the same IP */ struct super_block *pipefs_sb; + atomic_t cl_task_count; }; /* @@ -174,6 +177,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) #define RPC_CLNT_CREATE_CONNECTED (1UL << 12) +#define RPC_CLNT_CREATE_NETUNREACH_FATAL (1UL << 13) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/gss_asn1.h b/include/linux/sunrpc/gss_asn1.h deleted file mode 100644 index 3ccecd0ad229..000000000000 --- a/include/linux/sunrpc/gss_asn1.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * linux/include/linux/sunrpc/gss_asn1.h - * - * minimal asn1 for generic encoding/decoding of gss tokens - * - * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, - * lib/gssapi/krb5/gssapiP_krb5.h, and others - * - * Copyright (c) 2000 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - */ - -/* - * Copyright 1995 by the Massachusetts Institute of Technology. - * All Rights Reserved. - * - * Export of this software from the United States of America may - * require a specific license from the United States Government. - * It is the responsibility of any person or organization contemplating - * export to obtain such a license before exporting. - * - * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and - * distribute this software and its documentation for any purpose and - * without fee is hereby granted, provided that the above copyright - * notice appear in all copies and that both that copyright notice and - * this permission notice appear in supporting documentation, and that - * the name of M.I.T. not be used in advertising or publicity pertaining - * to distribution of the software without specific, written prior - * permission. Furthermore if you modify this software you must label - * your software as modified software and not distribute it in such a - * fashion that it might be confused with the original M.I.T. software. - * M.I.T. makes no representations about the suitability of - * this software for any purpose. It is provided "as is" without express - * or implied warranty. - * - */ - - -#include <linux/sunrpc/gss_api.h> - -#define SIZEOF_INT 4 - -/* from gssapi_err_generic.h */ -#define G_BAD_SERVICE_NAME (-2045022976L) -#define G_BAD_STRING_UID (-2045022975L) -#define G_NOUSER (-2045022974L) -#define G_VALIDATE_FAILED (-2045022973L) -#define G_BUFFER_ALLOC (-2045022972L) -#define G_BAD_MSG_CTX (-2045022971L) -#define G_WRONG_SIZE (-2045022970L) -#define G_BAD_USAGE (-2045022969L) -#define G_UNKNOWN_QOP (-2045022968L) -#define G_NO_HOSTNAME (-2045022967L) -#define G_BAD_HOSTNAME (-2045022966L) -#define G_WRONG_MECH (-2045022965L) -#define G_BAD_TOK_HEADER (-2045022964L) -#define G_BAD_DIRECTION (-2045022963L) -#define G_TOK_TRUNC (-2045022962L) -#define G_REFLECT (-2045022961L) -#define G_WRONG_TOKID (-2045022960L) - -#define g_OID_equal(o1,o2) \ - (((o1)->len == (o2)->len) && \ - (memcmp((o1)->data,(o2)->data,(int) (o1)->len) == 0)) - -u32 g_verify_token_header( - struct xdr_netobj *mech, - int *body_size, - unsigned char **buf_in, - int toksize); - -int g_token_size( - struct xdr_netobj *mech, - unsigned int body_size); - -void g_make_token_header( - struct xdr_netobj *mech, - int body_size, - unsigned char **buf); diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 78a80bf3fdcb..43950b5237c8 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -40,7 +40,6 @@ #include <crypto/skcipher.h> #include <linux/sunrpc/auth_gss.h> #include <linux/sunrpc/gss_err.h> -#include <linux/sunrpc/gss_asn1.h> /* Length of constant used in key derivation */ #define GSS_KRB5_K5CLENGTH (5) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index eac57914dcf3..ccba79ebf893 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -134,6 +134,7 @@ struct rpc_task_setup { #define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */ #define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ +#define RPC_TASK_NETUNREACH_FATAL 0x0040 /* ENETUNREACH is fatal */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e68fecf6eab5..48666b83fe68 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -72,16 +72,12 @@ struct svc_serv { spinlock_t sv_lock; unsigned int sv_nprogs; /* Number of sv_programs */ unsigned int sv_nrthreads; /* # of server threads */ - unsigned int sv_maxconn; /* max connections allowed or - * '0' causing max to be based - * on number of threads. */ - unsigned int sv_max_payload; /* datagram payload size */ unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ unsigned int sv_xdrsize; /* XDR buffer size */ struct list_head sv_permsocks; /* all permanent sockets */ struct list_head sv_tempsocks; /* all temporary sockets */ - int sv_tmpcnt; /* count of temporary sockets */ + int sv_tmpcnt; /* count of temporary "valid" sockets */ struct timer_list sv_temptimer; /* timer for aging temporary sockets */ char * sv_name; /* service name */ @@ -123,14 +119,14 @@ void svc_destroy(struct svc_serv **svcp); * Linux limit; someone who cares more about NFS/UDP performance * can test a larger number. * - * For TCP transports we have more freedom. A size of 1MB is - * chosen to match the client limit. Other OSes are known to - * have larger limits, but those numbers are probably beyond - * the point of diminishing returns. + * For non-UDP transports we have more freedom. A size of 4MB is + * chosen to accommodate clients that support larger I/O sizes. */ -#define RPCSVC_MAXPAYLOAD (1*1024*1024u) -#define RPCSVC_MAXPAYLOAD_TCP RPCSVC_MAXPAYLOAD -#define RPCSVC_MAXPAYLOAD_UDP (32*1024u) +enum { + RPCSVC_MAXPAYLOAD = 4 * 1024 * 1024, + RPCSVC_MAXPAYLOAD_TCP = RPCSVC_MAXPAYLOAD, + RPCSVC_MAXPAYLOAD_UDP = 32 * 1024, +}; extern u32 svc_max_payload(const struct svc_rqst *rqstp); @@ -154,14 +150,24 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); * list. xdr_buf.tail points to the end of the first page. * This assumes that the non-page part of an rpc reply will fit * in a page - NFSd ensures this. lockd also has no trouble. + */ + +/** + * svc_serv_maxpages - maximum count of pages needed for one RPC message + * @serv: RPC service context + * + * Returns a count of pages or vectors that can hold the maximum + * size RPC message for @serv. * - * Each request/reply pair can have at most one "payload", plus two pages, - * one for the request, and one for the reply. - * We using ->sendfile to return read data, we might need one extra page - * if the request is not page-aligned. So add another '1'. + * Each request/reply pair can have at most one "payload", plus two + * pages, one for the request, and one for the reply. + * nfsd_splice_actor() might need an extra page when a READ payload + * is not page-aligned. */ -#define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ - + 2 + 1) +static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv) +{ + return DIV_ROUND_UP(serv->sv_max_mesg, PAGE_SIZE) + 2 + 1; +} /* * The context of a single thread, including the request currently being @@ -192,14 +198,14 @@ struct svc_rqst { struct xdr_stream rq_res_stream; struct page *rq_scratch_page; struct xdr_buf rq_res; - struct page *rq_pages[RPCSVC_MAXPAGES + 1]; + unsigned long rq_maxpages; /* num of entries in rq_pages */ + struct page * *rq_pages; struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_next_page; /* next reply page to use */ struct page * *rq_page_end; /* one past the last page */ struct folio_batch rq_fbatch; - struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ - struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; + struct bio_vec *rq_bvec; __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ @@ -327,12 +333,7 @@ static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) */ static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err) { - rqstp->rq_err = err; - /* memory barrier ensures assignment to error above is visible before - * waitqueue_active() test below completes. - */ - smp_mb(); - wake_up_var(&rqstp->rq_err); + store_release_wake_up(&rqstp->rq_err, err); if (err) kthread_exit(1); } @@ -461,8 +462,6 @@ const char * svc_proc_name(const struct svc_rqst *rqstp); int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); -unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct xdr_buf *payload); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 619fc0bd837a..22704c2e5b9b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -202,7 +202,8 @@ struct svc_rdma_recv_ctxt { struct svc_rdma_pcl rc_reply_pcl; unsigned int rc_page_count; - struct page *rc_pages[RPCSVC_MAXPAGES]; + unsigned long rc_maxpages; + struct page *rc_pages[] __counted_by(rc_maxpages); }; /* @@ -244,7 +245,8 @@ struct svc_rdma_send_ctxt { void *sc_xprt_buf; int sc_page_count; int sc_cur_sge_no; - struct page *sc_pages[RPCSVC_MAXPAGES]; + unsigned long sc_maxpages; + struct page **sc_pages; struct ib_sge sc_sges[]; }; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 0981e35a9fed..369a89aea186 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -53,6 +53,7 @@ struct svc_xprt { struct svc_xprt_class *xpt_class; const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; + ktime_t xpt_qtime; struct list_head xpt_list; struct lwq_node xpt_ready; unsigned long xpt_flags; @@ -99,8 +100,30 @@ enum { XPT_HANDSHAKE, /* xprt requests a handshake */ XPT_TLS_SESSION, /* transport-layer security established */ XPT_PEER_AUTH, /* peer has been authenticated */ + XPT_PEER_VALID, /* peer has presented a filehandle that + * it has access to. It is NOT counted + * in ->sv_tmpcnt. + */ }; +/* + * Maximum number of "tmp" connections - those without XPT_PEER_VALID - + * permitted on any service. + */ +#define XPT_MAX_TMP_CONN 64 + +static inline void svc_xprt_set_valid(struct svc_xprt *xpt) +{ + if (test_bit(XPT_TEMP, &xpt->xpt_flags) && + !test_and_set_bit(XPT_PEER_VALID, &xpt->xpt_flags)) { + struct svc_serv *serv = xpt->xpt_server; + + spin_lock(&serv->sv_lock); + serv->sv_tmpcnt -= 1; + spin_unlock(&serv->sv_lock); + } +} + static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) { spin_lock(&xpt->xpt_lock); diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index bf45d9e8492a..963bbe251e52 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -40,7 +40,9 @@ struct svc_sock { struct completion sk_handshake_done; - struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ + /* received data */ + unsigned long sk_maxpages; + struct page * sk_pages[] __counted_by(sk_maxpages); }; static inline u32 svc_sock_reclen(struct svc_sock *svsk) diff --git a/include/linux/sunrpc/xdrgen/nfs4_1.h b/include/linux/sunrpc/xdrgen/nfs4_1.h new file mode 100644 index 000000000000..cf21a14aa885 --- /dev/null +++ b/include/linux/sunrpc/xdrgen/nfs4_1.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Generated by xdrgen. Manual edits will be lost. */ +/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */ +/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */ + +#ifndef _LINUX_XDRGEN_NFS4_1_DEF_H +#define _LINUX_XDRGEN_NFS4_1_DEF_H + +#include <linux/types.h> +#include <linux/sunrpc/xdrgen/_defs.h> + +typedef s64 int64_t; + +typedef u32 uint32_t; + +typedef struct { + u32 count; + uint32_t *element; +} bitmap4; + +struct nfstime4 { + int64_t seconds; + uint32_t nseconds; +}; + +typedef bool fattr4_offline; + +enum { FATTR4_OFFLINE = 83 }; + +struct open_arguments4 { + bitmap4 oa_share_access; + bitmap4 oa_share_deny; + bitmap4 oa_share_access_want; + bitmap4 oa_open_claim; + bitmap4 oa_create_mode; +}; + +enum open_args_share_access4 { + OPEN_ARGS_SHARE_ACCESS_READ = 1, + OPEN_ARGS_SHARE_ACCESS_WRITE = 2, + OPEN_ARGS_SHARE_ACCESS_BOTH = 3, +}; +typedef enum open_args_share_access4 open_args_share_access4; + +enum open_args_share_deny4 { + OPEN_ARGS_SHARE_DENY_NONE = 0, + OPEN_ARGS_SHARE_DENY_READ = 1, + OPEN_ARGS_SHARE_DENY_WRITE = 2, + OPEN_ARGS_SHARE_DENY_BOTH = 3, +}; +typedef enum open_args_share_deny4 open_args_share_deny4; + +enum open_args_share_access_want4 { + OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG = 3, + OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG = 4, + OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL = 5, + OPEN_ARGS_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 17, + OPEN_ARGS_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 18, + OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 20, + OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 21, +}; +typedef enum open_args_share_access_want4 open_args_share_access_want4; + +enum open_args_open_claim4 { + OPEN_ARGS_OPEN_CLAIM_NULL = 0, + OPEN_ARGS_OPEN_CLAIM_PREVIOUS = 1, + OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR = 2, + OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV = 3, + OPEN_ARGS_OPEN_CLAIM_FH = 4, + OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH = 5, + OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH = 6, +}; +typedef enum open_args_open_claim4 open_args_open_claim4; + +enum open_args_createmode4 { + OPEN_ARGS_CREATEMODE_UNCHECKED4 = 0, + OPEN_ARGS_CREATE_MODE_GUARDED = 1, + OPEN_ARGS_CREATEMODE_EXCLUSIVE4 = 2, + OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1 = 3, +}; +typedef enum open_args_createmode4 open_args_createmode4; + +typedef struct open_arguments4 fattr4_open_arguments; + +enum { FATTR4_OPEN_ARGUMENTS = 86 }; + +enum { OPEN4_RESULT_NO_OPEN_STATEID = 0x00000010 }; + +typedef struct nfstime4 fattr4_time_deleg_access; + +typedef struct nfstime4 fattr4_time_deleg_modify; + +enum { FATTR4_TIME_DELEG_ACCESS = 84 }; + +enum { FATTR4_TIME_DELEG_MODIFY = 85 }; + +enum { OPEN4_SHARE_ACCESS_WANT_DELEG_MASK = 0xFF00 }; + +enum { OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE = 0x0000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_READ_DELEG = 0x0100 }; + +enum { OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG = 0x0200 }; + +enum { OPEN4_SHARE_ACCESS_WANT_ANY_DELEG = 0x0300 }; + +enum { OPEN4_SHARE_ACCESS_WANT_NO_DELEG = 0x0400 }; + +enum { OPEN4_SHARE_ACCESS_WANT_CANCEL = 0x0500 }; + +enum { OPEN4_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 0x10000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 0x20000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 0x200000 }; + +enum open_delegation_type4 { + OPEN_DELEGATE_NONE = 0, + OPEN_DELEGATE_READ = 1, + OPEN_DELEGATE_WRITE = 2, + OPEN_DELEGATE_NONE_EXT = 3, + OPEN_DELEGATE_READ_ATTRS_DELEG = 4, + OPEN_DELEGATE_WRITE_ATTRS_DELEG = 5, +}; +typedef enum open_delegation_type4 open_delegation_type4; + +#define NFS4_int64_t_sz \ + (XDR_hyper) +#define NFS4_uint32_t_sz \ + (XDR_unsigned_int) +#define NFS4_bitmap4_sz (XDR_unsigned_int) +#define NFS4_nfstime4_sz \ + (NFS4_int64_t_sz + NFS4_uint32_t_sz) +#define NFS4_fattr4_offline_sz \ + (XDR_bool) +#define NFS4_open_arguments4_sz \ + (NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz) +#define NFS4_open_args_share_access4_sz (XDR_int) +#define NFS4_open_args_share_deny4_sz (XDR_int) +#define NFS4_open_args_share_access_want4_sz (XDR_int) +#define NFS4_open_args_open_claim4_sz (XDR_int) +#define NFS4_open_args_createmode4_sz (XDR_int) +#define NFS4_fattr4_open_arguments_sz \ + (NFS4_open_arguments4_sz) +#define NFS4_fattr4_time_deleg_access_sz \ + (NFS4_nfstime4_sz) +#define NFS4_fattr4_time_deleg_modify_sz \ + (NFS4_nfstime4_sz) +#define NFS4_open_delegation_type4_sz (XDR_int) + +#endif /* _LINUX_XDRGEN_NFS4_1_DEF_H */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 81b952649d35..f46d1fb8f71a 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -30,6 +30,8 @@ #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) +#define RPC_GSS_SEQNO_ARRAY_SIZE 3U + enum rpc_display_format_t { RPC_DISPLAY_ADDR = 0, RPC_DISPLAY_PORT, @@ -66,7 +68,8 @@ struct rpc_rqst { struct rpc_cred * rq_cred; /* Bound cred */ __be32 rq_xid; /* request XID */ int rq_cong; /* has incremented xprt->cong */ - u32 rq_seqno; /* gss seq no. used on req. */ + u32 rq_seqnos[RPC_GSS_SEQNO_ARRAY_SIZE]; /* past gss req seq nos. */ + unsigned int rq_seqno_count; /* number of entries in rq_seqnos */ int rq_enc_pages_num; struct page **rq_enc_pages; /* scratch pages for use by gss privacy code */ @@ -119,6 +122,18 @@ struct rpc_rqst { #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len +static inline int xprt_rqst_add_seqno(struct rpc_rqst *req, u32 seqno) +{ + if (likely(req->rq_seqno_count < RPC_GSS_SEQNO_ARRAY_SIZE)) + req->rq_seqno_count++; + + /* Shift array to make room for the newest element at the beginning */ + memmove(&req->rq_seqnos[1], &req->rq_seqnos[0], + (RPC_GSS_SEQNO_ARRAY_SIZE - 1) * sizeof(req->rq_seqnos[0])); + req->rq_seqnos[0] = seqno; + return 0; +} + /* RPC transport layer security policies */ enum xprtsec_policies { RPC_XPRTSEC_NONE = 0, diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index c0514c684b2c..e4db5022fe92 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -56,6 +56,7 @@ extern void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt); extern void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, struct rpc_xprt *xprt, bool offline); +extern struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps); extern void xprt_iter_init(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps); @@ -75,7 +76,6 @@ extern struct rpc_xprt_switch *xprt_iter_xchg_switch( struct rpc_xprt_switch *newswitch); extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi); -extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi); extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi); extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, diff --git a/include/linux/suspend.h b/include/linux/suspend.h index da6ebca3ff77..6a3f92098872 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -298,6 +298,11 @@ static inline void s2idle_set_ops(const struct platform_s2idle_ops *ops) {} static inline void s2idle_wake(void) {} #endif /* !CONFIG_SUSPEND */ +static inline bool pm_suspend_in_progress(void) +{ + return pm_suspend_target_state != PM_SUSPEND_ON; +} + /* struct pbe is used for creating lists of pages that should be restored * atomically during the resume from disk, because the page frames they have * occupied before the suspend are in use. @@ -441,6 +446,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb); extern void ksys_sync_helper(void); extern void pm_report_hw_sleep_time(u64 t); extern void pm_report_max_hw_sleep(u64 t); +void pm_restrict_gfp_mask(void); +void pm_restore_gfp_mask(void); #define pm_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ @@ -470,6 +477,8 @@ extern void pm_print_active_wakeup_sources(void); extern unsigned int lock_system_sleep(void); extern void unlock_system_sleep(unsigned int); +extern bool pm_sleep_transition_in_progress(void); + #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) @@ -485,6 +494,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline void pm_report_hw_sleep_time(u64 t) {}; static inline void pm_report_max_hw_sleep(u64 t) {}; +static inline void pm_restrict_gfp_mask(void) {} +static inline void pm_restore_gfp_mask(void) {} + static inline void ksys_sync_helper(void) {} #define pm_notifier(fn, pri) do { (void)(fn); } while (0) @@ -498,6 +510,8 @@ static inline void pm_system_irq_wakeup(unsigned int irq_number) {} static inline unsigned int lock_system_sleep(void) { return 0; } static inline void unlock_system_sleep(unsigned int flags) {} +static inline bool pm_sleep_transition_in_progress(void) { return false; } + #endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_SLEEP_DEBUG diff --git a/include/linux/swap.h b/include/linux/swap.h index f3e0ac20c2e8..bc0e1c275fc0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -24,7 +24,6 @@ struct pagevec; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff -#define SWAP_FLAG_PRIO_SHIFT 0 #define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ #define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */ #define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */ @@ -74,14 +73,13 @@ static inline int current_is_kswapd(void) * to a special SWP_DEVICE_{READ|WRITE} entry. * * When a page is mapped by the device for exclusive access we set the CPU page - * table entries to special SWP_DEVICE_EXCLUSIVE_* entries. + * table entries to a special SWP_DEVICE_EXCLUSIVE entry. */ #ifdef CONFIG_DEVICE_PRIVATE -#define SWP_DEVICE_NUM 4 +#define SWP_DEVICE_NUM 3 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) -#define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2) -#define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3) +#define SWP_DEVICE_EXCLUSIVE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2) #else #define SWP_DEVICE_NUM 0 #endif @@ -219,10 +217,10 @@ enum { SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */ /* add others here before... */ - SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL +#define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10) #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* Bit flag in swap_map */ @@ -257,18 +255,27 @@ struct swap_cluster_info { u8 order; struct list_head list; }; -#define CLUSTER_FLAG_FREE 1 /* This cluster is free */ -#define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */ -#define CLUSTER_FLAG_FRAG 4 /* This cluster is on nonfull list */ -#define CLUSTER_FLAG_FULL 8 /* This cluster is on full list */ + +/* All on-list cluster must have a non-zero flag. */ +enum swap_cluster_flags { + CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */ + CLUSTER_FLAG_FREE, + CLUSTER_FLAG_NONFULL, + CLUSTER_FLAG_FRAG, + /* Clusters with flags above are allocatable */ + CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG, + CLUSTER_FLAG_FULL, + CLUSTER_FLAG_DISCARD, + CLUSTER_FLAG_MAX, +}; /* * The first page in the swap file is the swap header, which is always marked * bad to prevent it from being allocated as an entry. This also prevents the * cluster to which it belongs being marked free. Therefore 0 is safe to use as - * a sentinel to indicate next is not valid in percpu_cluster. + * a sentinel to indicate an entry is not valid. */ -#define SWAP_NEXT_INVALID 0 +#define SWAP_ENTRY_INVALID 0 #ifdef CONFIG_THP_SWAP #define SWAP_NR_ORDERS (PMD_ORDER + 1) @@ -277,11 +284,10 @@ struct swap_cluster_info { #endif /* - * We assign a cluster to each CPU, so each CPU can allocate swap entry from - * its own cluster and swapout sequentially. The purpose is to optimize swapout - * throughput. + * We keep using same cluster for rotational device so IO will be sequential. + * The purpose is to optimize SWAP throughput on these device. */ -struct percpu_cluster { +struct swap_sequential_cluster { unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */ }; @@ -304,15 +310,11 @@ struct swap_info_struct { /* list of cluster that contains at least one free slot */ struct list_head frag_clusters[SWAP_NR_ORDERS]; /* list of cluster that are fragmented or contented */ - unsigned int frag_cluster_nr[SWAP_NR_ORDERS]; - unsigned int lowest_bit; /* index of first free in swap_map */ - unsigned int highest_bit; /* index of last free in swap_map */ + atomic_long_t frag_cluster_nr[SWAP_NR_ORDERS]; unsigned int pages; /* total of usable pages of swap */ - unsigned int inuse_pages; /* number of those currently in use */ - unsigned int cluster_next; /* likely index for next allocation */ - unsigned int cluster_nr; /* countdown to next cluster search */ - unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ - struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ + atomic_long_t inuse_pages; /* number of those currently in use */ + struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */ + spinlock_t global_cluster_lock; /* Serialize usage of global cluster */ struct rb_root swap_extent_root;/* root of the swap extent rbtree */ struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ @@ -412,6 +414,10 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #define MEMCG_RECLAIM_PROACTIVE (1 << 2) #define MIN_SWAPPINESS 0 #define MAX_SWAPPINESS 200 + +/* Just recliam from anon folios in proactive memory reclaim */ +#define SWAPPINESS_ANON_ONLY (MAX_SWAPPINESS + 1) + extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, @@ -426,19 +432,10 @@ extern int vm_swappiness; long remove_mapping(struct address_space *mapping, struct folio *folio); #ifdef CONFIG_NUMA -extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; extern int sysctl_min_slab_ratio; -#else -#define node_reclaim_mode 0 #endif -static inline bool node_reclaim_enabled(void) -{ - /* Is any node_reclaim_mode bit set? */ - return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP); -} - void check_move_unevictable_folios(struct folio_batch *fbatch); extern void __meminit kswapd_run(int nid); @@ -457,13 +454,12 @@ static inline unsigned long total_swapcache_pages(void) } void free_swap_cache(struct folio *folio); -void free_page_and_swap_cache(struct page *); +void free_folio_and_swap_cache(struct folio *folio); void free_pages_and_swap_cache(struct encoded_page **, int); /* linux/mm/swapfile.c */ extern atomic_long_t nr_swap_pages; extern long total_swap_pages; extern atomic_t nr_rotate_swap; -extern bool has_usable_swap(void); /* Swap 50% full? Release swapcache more aggressively.. */ static inline bool vm_swap_full(void) @@ -477,24 +473,22 @@ static inline long get_nr_swap_pages(void) } extern void si_swapinfo(struct sysinfo *); -swp_entry_t folio_alloc_swap(struct folio *folio); +int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask); bool folio_free_swap(struct folio *folio); void put_swap_folio(struct folio *folio, swp_entry_t entry); extern swp_entry_t get_swap_page_of_type(int); -extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t, int); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t entry, int nr); extern void swap_free_nr(swp_entry_t entry, int nr_pages); -extern void swapcache_free_entries(swp_entry_t *entries, int n); extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); int swap_type_of(dev_t device, sector_t offset); int find_first_swap(dev_t *device); extern unsigned int count_swap_pages(int, int); extern sector_t swapdev_block(int, pgoff_t); extern int __swap_count(swp_entry_t entry); -extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry); +extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); struct swap_info_struct *swp_swap_info(swp_entry_t entry); struct backing_dev_info; @@ -530,10 +524,8 @@ static inline void put_swap_device(struct swap_info_struct *si) #define si_swapinfo(val) \ do { (val)->freeswap = (val)->totalswap = 0; } while (0) -/* only sparc can not include linux/pagemap.h in this file - * so leave put_page and release_pages undeclared... */ -#define free_page_and_swap_cache(page) \ - put_page(page) +#define free_folio_and_swap_cache(folio) \ + folio_put(folio) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr)); @@ -577,9 +569,9 @@ static inline int __swap_count(swp_entry_t entry) return 0; } -static inline int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry) +static inline bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry) { - return 0; + return false; } static inline int swp_swapcount(swp_entry_t entry) @@ -587,11 +579,9 @@ static inline int swp_swapcount(swp_entry_t entry) return 0; } -static inline swp_entry_t folio_alloc_swap(struct folio *folio) +static inline int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask) { - swp_entry_t entry; - entry.val = 0; - return entry; + return -EINVAL; } static inline bool folio_free_swap(struct folio *folio) @@ -652,7 +642,6 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) #endif #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) -void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry); static inline int mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) @@ -673,10 +662,6 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); extern bool mem_cgroup_swap_full(struct folio *folio); #else -static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) -{ -} - static inline int mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) { diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h index ae73a87775b3..91cdf12190a0 100644 --- a/include/linux/swap_cgroup.h +++ b/include/linux/swap_cgroup.h @@ -6,10 +6,8 @@ #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) -extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, - unsigned short old, unsigned short new); -extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, - unsigned int nr_ents); +extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent); +extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents); extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); extern int swap_cgroup_swapon(int type, unsigned long max_pages); extern void swap_cgroup_swapoff(int type); @@ -17,8 +15,12 @@ extern void swap_cgroup_swapoff(int type); #else static inline -unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, - unsigned int nr_ents) +void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent) +{ +} + +static inline +unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents) { return 0; } diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h deleted file mode 100644 index 15adfb8c813a..000000000000 --- a/include/linux/swap_slots.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SWAP_SLOTS_H -#define _LINUX_SWAP_SLOTS_H - -#include <linux/swap.h> -#include <linux/spinlock.h> -#include <linux/mutex.h> - -#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH -#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE) -#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE) - -struct swap_slots_cache { - bool lock_initialized; - struct mutex alloc_lock; /* protects slots, nr, cur */ - swp_entry_t *slots; - int nr; - int cur; - spinlock_t free_lock; /* protects slots_ret, n_ret */ - swp_entry_t *slots_ret; - int n_ret; -}; - -void disable_swap_slots_cache_lock(void); -void reenable_swap_slots_cache_unlock(void); -void enable_swap_slots_cache(void); -void free_swap_slot(swp_entry_t entry); - -extern bool swap_slot_cache_enabled; - -#endif /* _LINUX_SWAP_SLOTS_H */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 96f26e29fefe..64ea151a7ae3 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -186,26 +186,16 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); } -static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset) +static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { - return swp_entry(SWP_DEVICE_EXCLUSIVE_READ, offset); -} - -static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset) -{ - return swp_entry(SWP_DEVICE_EXCLUSIVE_WRITE, offset); + return swp_entry(SWP_DEVICE_EXCLUSIVE, offset); } static inline bool is_device_exclusive_entry(swp_entry_t entry) { - return swp_type(entry) == SWP_DEVICE_EXCLUSIVE_READ || - swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE; + return swp_type(entry) == SWP_DEVICE_EXCLUSIVE; } -static inline bool is_writable_device_exclusive_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_DEVICE_EXCLUSIVE_WRITE); -} #else /* CONFIG_DEVICE_PRIVATE */ static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { @@ -227,12 +217,7 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry) return false; } -static inline swp_entry_t make_readable_device_exclusive_entry(pgoff_t offset) -{ - return swp_entry(0, 0); -} - -static inline swp_entry_t make_writable_device_exclusive_entry(pgoff_t offset) +static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(0, 0); } @@ -242,10 +227,6 @@ static inline bool is_device_exclusive_entry(swp_entry_t entry) return false; } -static inline bool is_writable_device_exclusive_entry(swp_entry_t entry) -{ - return false; -} #endif /* CONFIG_DEVICE_PRIVATE */ #ifdef CONFIG_MIGRATION diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c6333204d451..e5603cc91963 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -951,6 +951,10 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, int flags, uint32_t sig); asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); +asmlinkage long sys_open_tree_attr(int dfd, const char __user *path, + unsigned flags, + struct mount_attr __user *uattr, + size_t usize); asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path, int to_dfd, const char __user *to_path, unsigned int ms_flags); @@ -1266,14 +1270,14 @@ static inline long ksys_lchown(const char __user *filename, uid_t user, AT_SYMLINK_NOFOLLOW); } -extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small); +int do_sys_ftruncate(unsigned int fd, loff_t length, int small); static inline long ksys_ftruncate(unsigned int fd, loff_t length) { return do_sys_ftruncate(fd, length, 1); } -extern long do_sys_truncate(const char __user *pathname, loff_t length); +int do_sys_truncate(const char __user *pathname, loff_t length); static inline long ksys_truncate(const char __user *pathname, loff_t length) { diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 0f2fcd244523..f418aae4f113 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -107,7 +107,7 @@ struct attribute_group { int); struct attribute **attrs; union { - struct bin_attribute **bin_attrs; + const struct bin_attribute *const *bin_attrs; const struct bin_attribute *const *bin_attrs_new; }; }; @@ -293,7 +293,7 @@ __ATTRIBUTE_GROUPS(_name) #define BIN_ATTRIBUTE_GROUPS(_name) \ static const struct attribute_group _name##_group = { \ - .bin_attrs = _name##_attrs, \ + .bin_attrs_new = _name##_attrs, \ }; \ __ATTRIBUTE_GROUPS(_name) @@ -306,11 +306,11 @@ struct bin_attribute { size_t size; void *private; struct address_space *(*f_mapping)(void); - ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *, + ssize_t (*read)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); ssize_t (*read_new)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); - ssize_t (*write)(struct file *, struct kobject *, struct bin_attribute *, + ssize_t (*write)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); ssize_t (*write_new)(struct file *, struct kobject *, const struct bin_attribute *, char *, loff_t, size_t); @@ -332,28 +332,11 @@ struct bin_attribute { */ #define sysfs_bin_attr_init(bin_attr) sysfs_attr_init(&(bin_attr)->attr) -typedef ssize_t __sysfs_bin_rw_handler_new(struct file *, struct kobject *, - const struct bin_attribute *, char *, loff_t, size_t); - /* macros to create static binary attributes easier */ #define __BIN_ATTR(_name, _mode, _read, _write, _size) { \ .attr = { .name = __stringify(_name), .mode = _mode }, \ - .read = _Generic(_read, \ - __sysfs_bin_rw_handler_new * : NULL, \ - default : _read \ - ), \ - .read_new = _Generic(_read, \ - __sysfs_bin_rw_handler_new * : _read, \ - default : NULL \ - ), \ - .write = _Generic(_write, \ - __sysfs_bin_rw_handler_new * : NULL, \ - default : _write \ - ), \ - .write_new = _Generic(_write, \ - __sysfs_bin_rw_handler_new * : _write, \ - default : NULL \ - ), \ + .read = _read, \ + .write = _write, \ .size = _size, \ } @@ -511,7 +494,7 @@ __printf(3, 4) int sysfs_emit_at(char *buf, int at, const char *fmt, ...); ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count); #else /* CONFIG_SYSFS */ @@ -774,7 +757,7 @@ static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...) static inline ssize_t sysfs_bin_attr_simple_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { diff --git a/include/linux/sysv_fs.h b/include/linux/sysv_fs.h deleted file mode 100644 index 5cf77dbb8d86..000000000000 --- a/include/linux/sysv_fs.h +++ /dev/null @@ -1,214 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SYSV_FS_H -#define _LINUX_SYSV_FS_H - -#define __packed2__ __attribute__((packed, aligned(2))) - - -#ifndef __KERNEL__ -typedef u16 __fs16; -typedef u32 __fs16; -#endif - -/* inode numbers are 16 bit */ -typedef __fs16 sysv_ino_t; - -/* Block numbers are 24 bit, sometimes stored in 32 bit. - On Coherent FS, they are always stored in PDP-11 manner: the least - significant 16 bits come last. */ -typedef __fs32 sysv_zone_t; - -/* 0 is non-existent */ -#define SYSV_BADBL_INO 1 /* inode of bad blocks file */ -#define SYSV_ROOT_INO 2 /* inode of root directory */ - - -/* Xenix super-block data on disk */ -#define XENIX_NICINOD 100 /* number of inode cache entries */ -#define XENIX_NICFREE 100 /* number of free block list chunk entries */ -struct xenix_super_block { - __fs16 s_isize; /* index of first data zone */ - __fs32 s_fsize __packed2__; /* total number of zones of this fs */ - /* the start of the free block list: */ - __fs16 s_nfree; /* number of free blocks in s_free, <= XENIX_NICFREE */ - sysv_zone_t s_free[XENIX_NICFREE]; /* first free block list chunk */ - /* the cache of free inodes: */ - __fs16 s_ninode; /* number of free inodes in s_inode, <= XENIX_NICINOD */ - sysv_ino_t s_inode[XENIX_NICINOD]; /* some free inodes */ - /* locks, not used by Linux: */ - char s_flock; /* lock during free block list manipulation */ - char s_ilock; /* lock during inode cache manipulation */ - char s_fmod; /* super-block modified flag */ - char s_ronly; /* flag whether fs is mounted read-only */ - __fs32 s_time __packed2__; /* time of last super block update */ - __fs32 s_tfree __packed2__; /* total number of free zones */ - __fs16 s_tinode; /* total number of free inodes */ - __fs16 s_dinfo[4]; /* device information ?? */ - char s_fname[6]; /* file system volume name */ - char s_fpack[6]; /* file system pack name */ - char s_clean; /* set to 0x46 when filesystem is properly unmounted */ - char s_fill[371]; - s32 s_magic; /* version of file system */ - __fs32 s_type; /* type of file system: 1 for 512 byte blocks - 2 for 1024 byte blocks - 3 for 2048 byte blocks */ - -}; - -/* - * SystemV FS comes in two variants: - * sysv2: System V Release 2 (e.g. Microport), structure elements aligned(2). - * sysv4: System V Release 4 (e.g. Consensys), structure elements aligned(4). - */ -#define SYSV_NICINOD 100 /* number of inode cache entries */ -#define SYSV_NICFREE 50 /* number of free block list chunk entries */ - -/* SystemV4 super-block data on disk */ -struct sysv4_super_block { - __fs16 s_isize; /* index of first data zone */ - u16 s_pad0; - __fs32 s_fsize; /* total number of zones of this fs */ - /* the start of the free block list: */ - __fs16 s_nfree; /* number of free blocks in s_free, <= SYSV_NICFREE */ - u16 s_pad1; - sysv_zone_t s_free[SYSV_NICFREE]; /* first free block list chunk */ - /* the cache of free inodes: */ - __fs16 s_ninode; /* number of free inodes in s_inode, <= SYSV_NICINOD */ - u16 s_pad2; - sysv_ino_t s_inode[SYSV_NICINOD]; /* some free inodes */ - /* locks, not used by Linux: */ - char s_flock; /* lock during free block list manipulation */ - char s_ilock; /* lock during inode cache manipulation */ - char s_fmod; /* super-block modified flag */ - char s_ronly; /* flag whether fs is mounted read-only */ - __fs32 s_time; /* time of last super block update */ - __fs16 s_dinfo[4]; /* device information ?? */ - __fs32 s_tfree; /* total number of free zones */ - __fs16 s_tinode; /* total number of free inodes */ - u16 s_pad3; - char s_fname[6]; /* file system volume name */ - char s_fpack[6]; /* file system pack name */ - s32 s_fill[12]; - __fs32 s_state; /* file system state: 0x7c269d38-s_time means clean */ - s32 s_magic; /* version of file system */ - __fs32 s_type; /* type of file system: 1 for 512 byte blocks - 2 for 1024 byte blocks */ -}; - -/* SystemV2 super-block data on disk */ -struct sysv2_super_block { - __fs16 s_isize; /* index of first data zone */ - __fs32 s_fsize __packed2__; /* total number of zones of this fs */ - /* the start of the free block list: */ - __fs16 s_nfree; /* number of free blocks in s_free, <= SYSV_NICFREE */ - sysv_zone_t s_free[SYSV_NICFREE]; /* first free block list chunk */ - /* the cache of free inodes: */ - __fs16 s_ninode; /* number of free inodes in s_inode, <= SYSV_NICINOD */ - sysv_ino_t s_inode[SYSV_NICINOD]; /* some free inodes */ - /* locks, not used by Linux: */ - char s_flock; /* lock during free block list manipulation */ - char s_ilock; /* lock during inode cache manipulation */ - char s_fmod; /* super-block modified flag */ - char s_ronly; /* flag whether fs is mounted read-only */ - __fs32 s_time __packed2__; /* time of last super block update */ - __fs16 s_dinfo[4]; /* device information ?? */ - __fs32 s_tfree __packed2__; /* total number of free zones */ - __fs16 s_tinode; /* total number of free inodes */ - char s_fname[6]; /* file system volume name */ - char s_fpack[6]; /* file system pack name */ - s32 s_fill[14]; - __fs32 s_state; /* file system state: 0xcb096f43 means clean */ - s32 s_magic; /* version of file system */ - __fs32 s_type; /* type of file system: 1 for 512 byte blocks - 2 for 1024 byte blocks */ -}; - -/* V7 super-block data on disk */ -#define V7_NICINOD 100 /* number of inode cache entries */ -#define V7_NICFREE 50 /* number of free block list chunk entries */ -struct v7_super_block { - __fs16 s_isize; /* index of first data zone */ - __fs32 s_fsize __packed2__; /* total number of zones of this fs */ - /* the start of the free block list: */ - __fs16 s_nfree; /* number of free blocks in s_free, <= V7_NICFREE */ - sysv_zone_t s_free[V7_NICFREE]; /* first free block list chunk */ - /* the cache of free inodes: */ - __fs16 s_ninode; /* number of free inodes in s_inode, <= V7_NICINOD */ - sysv_ino_t s_inode[V7_NICINOD]; /* some free inodes */ - /* locks, not used by Linux or V7: */ - char s_flock; /* lock during free block list manipulation */ - char s_ilock; /* lock during inode cache manipulation */ - char s_fmod; /* super-block modified flag */ - char s_ronly; /* flag whether fs is mounted read-only */ - __fs32 s_time __packed2__; /* time of last super block update */ - /* the following fields are not maintained by V7: */ - __fs32 s_tfree __packed2__; /* total number of free zones */ - __fs16 s_tinode; /* total number of free inodes */ - __fs16 s_m; /* interleave factor */ - __fs16 s_n; /* interleave factor */ - char s_fname[6]; /* file system name */ - char s_fpack[6]; /* file system pack name */ -}; -/* Constants to aid sanity checking */ -/* This is not a hard limit, nor enforced by v7 kernel. It's actually just - * the limit used by Seventh Edition's ls, though is high enough to assume - * that no reasonable file system would have that much entries in root - * directory. Thus, if we see anything higher, we just probably got the - * endiannes wrong. */ -#define V7_NFILES 1024 -/* The disk addresses are three-byte (despite direct block addresses being - * aligned word-wise in inode). If the most significant byte is non-zero, - * something is most likely wrong (not a filesystem, bad bytesex). */ -#define V7_MAXSIZE 0x00ffffff - -/* Coherent super-block data on disk */ -#define COH_NICINOD 100 /* number of inode cache entries */ -#define COH_NICFREE 64 /* number of free block list chunk entries */ -struct coh_super_block { - __fs16 s_isize; /* index of first data zone */ - __fs32 s_fsize __packed2__; /* total number of zones of this fs */ - /* the start of the free block list: */ - __fs16 s_nfree; /* number of free blocks in s_free, <= COH_NICFREE */ - sysv_zone_t s_free[COH_NICFREE] __packed2__; /* first free block list chunk */ - /* the cache of free inodes: */ - __fs16 s_ninode; /* number of free inodes in s_inode, <= COH_NICINOD */ - sysv_ino_t s_inode[COH_NICINOD]; /* some free inodes */ - /* locks, not used by Linux: */ - char s_flock; /* lock during free block list manipulation */ - char s_ilock; /* lock during inode cache manipulation */ - char s_fmod; /* super-block modified flag */ - char s_ronly; /* flag whether fs is mounted read-only */ - __fs32 s_time __packed2__; /* time of last super block update */ - __fs32 s_tfree __packed2__; /* total number of free zones */ - __fs16 s_tinode; /* total number of free inodes */ - __fs16 s_interleave_m; /* interleave factor */ - __fs16 s_interleave_n; - char s_fname[6]; /* file system volume name */ - char s_fpack[6]; /* file system pack name */ - __fs32 s_unique; /* zero, not used */ -}; - -/* SystemV/Coherent inode data on disk */ -struct sysv_inode { - __fs16 i_mode; - __fs16 i_nlink; - __fs16 i_uid; - __fs16 i_gid; - __fs32 i_size; - u8 i_data[3*(10+1+1+1)]; - u8 i_gen; - __fs32 i_atime; /* time of last access */ - __fs32 i_mtime; /* time of last modification */ - __fs32 i_ctime; /* time of creation */ -}; - -/* SystemV/Coherent directory entry on disk */ -#define SYSV_NAMELEN 14 /* max size of name in struct sysv_dir_entry */ -struct sysv_dir_entry { - sysv_ino_t inode; - char name[SYSV_NAMELEN]; /* up to 14 characters, the rest are zeroes */ -}; - -#define SYSV_DIRSIZE sizeof(struct sysv_dir_entry) /* size of every directory entry */ - -#endif /* _LINUX_SYSV_FS_H */ diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 2964171856e0..0646804860ff 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -19,9 +19,6 @@ enum task_work_notify_mode { TWA_SIGNAL, TWA_SIGNAL_NO_IPI, TWA_NMI_CURRENT, - - TWA_FLAGS = 0xff00, - TWAF_NO_ALLOC = 0x0100, }; static inline bool task_work_pending(struct task_struct *task) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index f88daaa76d83..29f59d50dc73 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -160,6 +160,8 @@ struct tcp_request_sock { u32 rcv_isn; u32 snt_isn; u32 ts_off; + u32 snt_tsval_first; + u32 snt_tsval_last; u32 last_oow_ack_time; /* last SYNACK */ u32 rcv_nxt; /* the ack # by SYNACK. For * FastOpen it's the seq# @@ -242,6 +244,9 @@ struct tcp_sock { struct minmax rtt_min; /* OOO segments go in this rbtree. Socket lock must be held. */ struct rb_root out_of_order_queue; +#if defined(CONFIG_TLS_DEVICE) + void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq); +#endif u32 snd_ssthresh; /* Slow start size threshold */ u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */ __cacheline_group_end(tcp_sock_read_rx); @@ -335,7 +340,7 @@ struct tcp_sock { } rcv_rtt_est; /* Receiver queue space */ struct { - u32 space; + int space; u32 seq; u64 time; } rcvq_space; @@ -380,7 +385,8 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */ - syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ + syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + syn_fastopen_child:1; /* created TFO passive child socket */ u8 keepalive_probes; /* num of allowed keep alive probes */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ diff --git a/include/linux/tfrc.h b/include/linux/tfrc.h deleted file mode 100644 index a5acc768085d..000000000000 --- a/include/linux/tfrc.h +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _LINUX_TFRC_H_ -#define _LINUX_TFRC_H_ -/* - * TFRC - Data Structures for the TCP-Friendly Rate Control congestion - * control mechanism as specified in RFC 3448. - * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz> - * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - */ -#include <linux/types.h> - -/** tfrc_rx_info - TFRC Receiver Data Structure - * - * @tfrcrx_x_recv: receiver estimate of sending rate (3.2.2) - * @tfrcrx_rtt: round-trip-time (communicated by sender) - * @tfrcrx_p: current estimate of loss event rate (3.2.2) - */ -struct tfrc_rx_info { - __u32 tfrcrx_x_recv; - __u32 tfrcrx_rtt; - __u32 tfrcrx_p; -}; - -/** tfrc_tx_info - TFRC Sender Data Structure - * - * @tfrctx_x: computed transmit rate (4.3 (4)) - * @tfrctx_x_recv: receiver estimate of send rate (4.3) - * @tfrctx_x_calc: return value of throughput equation (3.1) - * @tfrctx_rtt: (moving average) estimate of RTT (4.3) - * @tfrctx_p: current loss event rate (5.4) - * @tfrctx_rto: estimate of RTO, equals 4*RTT (4.3) - * @tfrctx_ipi: inter-packet interval (4.6) - * - * Note: X and X_recv are both maintained in units of 64 * bytes/second. This - * enables a finer resolution of sending rates and avoids problems with - * integer arithmetic; u32 is not sufficient as scaling consumes 6 bits. - */ -struct tfrc_tx_info { - __u64 tfrctx_x; - __u64 tfrctx_x_recv; - __u32 tfrctx_x_calc; - __u32 tfrctx_rtt; - __u32 tfrctx_p; - __u32 tfrctx_rto; - __u32 tfrctx_ipi; -}; - -#endif /* _LINUX_TFRC_H_ */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 754802478b96..0b5ed6821080 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -86,8 +86,6 @@ struct thermal_trip { #define THERMAL_TRIP_PRIV_TO_INT(_val_) (uintptr_t)(_val_) #define THERMAL_INT_TO_TRIP_PRIV(_val_) (void *)(uintptr_t)(_val_) -struct thermal_zone_device; - struct cooling_spec { unsigned long upper; /* Highest cooling state */ unsigned long lower; /* Lowest cooling state */ @@ -295,6 +293,10 @@ static inline struct thermal_zone_device *thermal_tripless_zone_device_register( static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz) { } +static inline void thermal_zone_device_update(struct thermal_zone_device *tz, + enum thermal_notify_event event) +{ } + static inline struct thermal_cooling_device * thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index cf2446c9c30d..dd925d84fa46 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -217,54 +217,6 @@ static inline int arch_within_stack_frames(const void * const stack, } #endif -#ifdef CONFIG_HARDENED_USERCOPY -extern void __check_object_size(const void *ptr, unsigned long n, - bool to_user); - -static __always_inline void check_object_size(const void *ptr, unsigned long n, - bool to_user) -{ - if (!__builtin_constant_p(n)) - __check_object_size(ptr, n, to_user); -} -#else -static inline void check_object_size(const void *ptr, unsigned long n, - bool to_user) -{ } -#endif /* CONFIG_HARDENED_USERCOPY */ - -extern void __compiletime_error("copy source size is too small") -__bad_copy_from(void); -extern void __compiletime_error("copy destination size is too small") -__bad_copy_to(void); - -void __copy_overflow(int size, unsigned long count); - -static inline void copy_overflow(int size, unsigned long count) -{ - if (IS_ENABLED(CONFIG_BUG)) - __copy_overflow(size, count); -} - -static __always_inline __must_check bool -check_copy_size(const void *addr, size_t bytes, bool is_source) -{ - int sz = __builtin_object_size(addr, 0); - if (unlikely(sz >= 0 && sz < bytes)) { - if (!__builtin_constant_p(bytes)) - copy_overflow(sz, bytes); - else if (is_source) - __bad_copy_from(); - else - __bad_copy_to(); - return false; - } - if (WARN_ON_ONCE(bytes > INT_MAX)) - return false; - check_object_size(addr, bytes, is_source); - return true; -} - #ifndef arch_setup_new_exec static inline void arch_setup_new_exec(void) { } #endif diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 7d902d8c054b..75247486616b 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -11,6 +11,13 @@ #ifndef THUNDERBOLT_H_ #define THUNDERBOLT_H_ +#include <linux/types.h> + +struct fwnode_handle; +struct device; + +#if IS_REACHABLE(CONFIG_USB4) + #include <linux/device.h> #include <linux/idr.h> #include <linux/list.h> @@ -674,4 +681,15 @@ static inline struct device *tb_ring_dma_device(struct tb_ring *ring) return &ring->nhi->pdev->dev; } +bool usb4_usb3_port_match(struct device *usb4_port_dev, + const struct fwnode_handle *usb3_port_fwnode); + +#else /* CONFIG_USB4 */ +static inline bool usb4_usb3_port_match(struct device *usb4_port_dev, + const struct fwnode_handle *usb3_port_fwnode) +{ + return false; +} +#endif /* CONFIG_USB4 */ + #endif /* THUNDERBOLT_H_ */ diff --git a/include/linux/tick.h b/include/linux/tick.h index b8ddc8e631a3..ac76ae9fa36d 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -195,12 +195,6 @@ static inline bool tick_nohz_full_enabled(void) __ret; \ }) -static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) -{ - if (tick_nohz_full_enabled()) - cpumask_or(mask, mask, tick_nohz_full_mask); -} - extern void tick_nohz_dep_set(enum tick_dep_bits bit); extern void tick_nohz_dep_clear(enum tick_dep_bits bit); extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); @@ -281,7 +275,6 @@ extern void __init tick_nohz_full_setup(cpumask_var_t cpumask); #else static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } -static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } diff --git a/include/linux/time64.h b/include/linux/time64.h index f1bcea8c124a..9934331c7b86 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -49,6 +49,11 @@ static inline int timespec64_equal(const struct timespec64 *a, return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); } +static inline bool timespec64_is_epoch(const struct timespec64 *ts) +{ + return ts->tv_sec == 0 && ts->tv_nsec == 0; +} + /* * lhs < rhs: return <0 * lhs == rhs: return 0 diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h index 876e31b4461d..bb2c52f4fc94 100644 --- a/include/linux/time_namespace.h +++ b/include/linux/time_namespace.h @@ -12,6 +12,7 @@ struct user_namespace; extern struct user_namespace init_user_ns; +struct seq_file; struct vm_area_struct; struct timens_offsets { @@ -165,6 +166,4 @@ static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) } #endif -struct vdso_data *arch_get_vdso_data(void *vvar_page); - #endif /* _LINUX_TIMENS_H */ diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index e39d4d563b19..785048a3b3e6 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -51,7 +51,7 @@ struct tk_read_base { * @offs_real: Offset clock monotonic -> clock realtime * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai - * @tai_offset: The current UTC to TAI offset in seconds + * @coarse_nsec: The nanoseconds part for coarse time getters * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @clock_was_set_seq: The sequence number of clock was set events @@ -76,6 +76,7 @@ struct tk_read_base { * ntp shifted nano seconds. * @ntp_err_mult: Multiplication factor for scaled math conversion * @skip_second_overflow: Flag used to avoid updating NTP twice with same second + * @tai_offset: The current UTC to TAI offset in seconds * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -100,7 +101,7 @@ struct tk_read_base { * which results in the following cacheline layout: * * 0: seqcount, tkr_mono - * 1: xtime_sec ... tai_offset + * 1: xtime_sec ... coarse_nsec * 2: tkr_raw, raw_sec * 3,4: Internal variables * @@ -121,7 +122,7 @@ struct timekeeper { ktime_t offs_real; ktime_t offs_boot; ktime_t offs_tai; - s32 tai_offset; + u32 coarse_nsec; /* Cacheline 2: */ struct tk_read_base tkr_raw; @@ -144,6 +145,7 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; + s32 tai_offset; }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 0e035f675efe..542773650200 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -264,18 +264,6 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); /** - * struct ktime_timestamps - Simultaneous mono/boot/real timestamps - * @mono: Monotonic timestamp - * @boot: Boottime timestamp - * @real: Realtime timestamp - */ -struct ktime_timestamps { - u64 mono; - u64 boot; - u64 real; -}; - -/** * struct system_time_snapshot - simultaneous raw/real time capture with * counter value * @cycles: Clocksource counter value to produce the system times @@ -345,9 +333,6 @@ extern int get_device_system_crosststamp( */ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); -/* NMI safe mono/boot/realtime timestamps */ -extern void ktime_get_fast_timestamps(struct ktime_timestamps *snap); - /* * Persistent clock related interfaces */ diff --git a/include/linux/timer.h b/include/linux/timer.h index e67ecd1cbc97..0414d9e6b4fc 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -30,7 +30,7 @@ * * @TIMER_IRQSAFE: An irqsafe timer is executed with IRQ disabled and * it's safe to wait for the completion of the running instance from - * IRQ handlers, for example, by calling del_timer_sync(). + * IRQ handlers, for example, by calling timer_delete_sync(). * * Note: The irq disabled callback execution is a special case for * workqueue locking issues. It's not meant for executing random crap @@ -67,44 +67,44 @@ /* * LOCKDEP and DEBUG timer interfaces. */ -void init_timer_key(struct timer_list *timer, +void timer_init_key(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS -extern void init_timer_on_stack_key(struct timer_list *timer, +extern void timer_init_key_on_stack(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); #else -static inline void init_timer_on_stack_key(struct timer_list *timer, +static inline void timer_init_key_on_stack(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key) { - init_timer_key(timer, func, flags, name, key); + timer_init_key(timer, func, flags, name, key); } #endif #ifdef CONFIG_LOCKDEP -#define __init_timer(_timer, _fn, _flags) \ +#define __timer_init(_timer, _fn, _flags) \ do { \ static struct lock_class_key __key; \ - init_timer_key((_timer), (_fn), (_flags), #_timer, &__key);\ + timer_init_key((_timer), (_fn), (_flags), #_timer, &__key);\ } while (0) -#define __init_timer_on_stack(_timer, _fn, _flags) \ +#define __timer_init_on_stack(_timer, _fn, _flags) \ do { \ static struct lock_class_key __key; \ - init_timer_on_stack_key((_timer), (_fn), (_flags), \ + timer_init_key_on_stack((_timer), (_fn), (_flags), \ #_timer, &__key); \ } while (0) #else -#define __init_timer(_timer, _fn, _flags) \ - init_timer_key((_timer), (_fn), (_flags), NULL, NULL) -#define __init_timer_on_stack(_timer, _fn, _flags) \ - init_timer_on_stack_key((_timer), (_fn), (_flags), NULL, NULL) +#define __timer_init(_timer, _fn, _flags) \ + timer_init_key((_timer), (_fn), (_flags), NULL, NULL) +#define __timer_init_on_stack(_timer, _fn, _flags) \ + timer_init_key_on_stack((_timer), (_fn), (_flags), NULL, NULL) #endif /** @@ -115,21 +115,21 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, * * Regular timer initialization should use either DEFINE_TIMER() above, * or timer_setup(). For timers on the stack, timer_setup_on_stack() must - * be used and must be balanced with a call to destroy_timer_on_stack(). + * be used and must be balanced with a call to timer_destroy_on_stack(). */ #define timer_setup(timer, callback, flags) \ - __init_timer((timer), (callback), (flags)) + __timer_init((timer), (callback), (flags)) #define timer_setup_on_stack(timer, callback, flags) \ - __init_timer_on_stack((timer), (callback), (flags)) + __timer_init_on_stack((timer), (callback), (flags)) #ifdef CONFIG_DEBUG_OBJECTS_TIMERS -extern void destroy_timer_on_stack(struct timer_list *timer); +extern void timer_destroy_on_stack(struct timer_list *timer); #else -static inline void destroy_timer_on_stack(struct timer_list *timer) { } +static inline void timer_destroy_on_stack(struct timer_list *timer) { } #endif -#define from_timer(var, callback_timer, timer_fieldname) \ +#define timer_container_of(var, callback_timer, timer_fieldname) \ container_of(callback_timer, typeof(*var), timer_fieldname) /** @@ -156,62 +156,26 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); * The jiffies value which is added to now, when there is no timer * in the timer wheel: */ -#define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) +#define TIMER_NEXT_MAX_DELTA ((1UL << 30) - 1) extern void add_timer(struct timer_list *timer); extern void add_timer_local(struct timer_list *timer); extern void add_timer_global(struct timer_list *timer); -extern int try_to_del_timer_sync(struct timer_list *timer); +extern int timer_delete_sync_try(struct timer_list *timer); extern int timer_delete_sync(struct timer_list *timer); extern int timer_delete(struct timer_list *timer); extern int timer_shutdown_sync(struct timer_list *timer); extern int timer_shutdown(struct timer_list *timer); -/** - * del_timer_sync - Delete a pending timer and wait for a running callback - * @timer: The timer to be deleted - * - * See timer_delete_sync() for detailed explanation. - * - * Do not use in new code. Use timer_delete_sync() instead. - * - * Returns: - * * %0 - The timer was not pending - * * %1 - The timer was pending and deactivated - */ -static inline int del_timer_sync(struct timer_list *timer) -{ - return timer_delete_sync(timer); -} - -/** - * del_timer - Delete a pending timer - * @timer: The timer to be deleted - * - * See timer_delete() for detailed explanation. - * - * Do not use in new code. Use timer_delete() instead. - * - * Returns: - * * %0 - The timer was not pending - * * %1 - The timer was pending and deactivated - */ -static inline int del_timer(struct timer_list *timer) -{ - return timer_delete(timer); -} - -extern void init_timers(void); +extern void timers_init(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); -unsigned long __round_jiffies(unsigned long j, int cpu); unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies_relative(unsigned long j); -unsigned long __round_jiffies_up(unsigned long j, int cpu); unsigned long __round_jiffies_up_relative(unsigned long j, int cpu); unsigned long round_jiffies_up(unsigned long j); unsigned long round_jiffies_up_relative(unsigned long j); diff --git a/include/linux/topology.h b/include/linux/topology.h index 52f5850730b3..33b7fda97d39 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -29,6 +29,7 @@ #include <linux/arch_topology.h> #include <linux/cpumask.h> +#include <linux/nodemask.h> #include <linux/bitops.h> #include <linux/mmzone.h> #include <linux/smp.h> @@ -39,10 +40,6 @@ #define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node)) #endif -#define for_each_node_with_cpus(node) \ - for_each_online_node(node) \ - if (nr_cpus_node(node)) - int arch_update_cpu_topology(void); /* Conform to ACPI 2.0 SLIT distance definitions */ @@ -240,6 +237,29 @@ static inline const struct cpumask *cpu_smt_mask(int cpu) } #endif +#ifndef topology_is_primary_thread + +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + /* + * When disabling SMT, the primary thread of the SMT will remain + * enabled/active. Architectures that have a special primary thread + * (e.g. x86) need to override this function. Otherwise the first + * thread in the SMT can be made the primary thread. + * + * The sibling cpumask of an offline CPU always contains the CPU + * itself on architectures using the implementation of + * CONFIG_GENERIC_ARCH_TOPOLOGY for building their topology. + * Other architectures not using CONFIG_GENERIC_ARCH_TOPOLOGY for + * building their topology have to check whether to use this default + * implementation or to override it. + */ + return cpu == cpumask_first(topology_sibling_cpumask(cpu)); +} +#define topology_is_primary_thread topology_is_primary_thread + +#endif + static inline const struct cpumask *cpu_cpu_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); @@ -262,6 +282,36 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops) #endif /* CONFIG_NUMA */ /** + * for_each_node_numadist() - iterate over nodes in increasing distance + * order, starting from a given node + * @node: the iteration variable and the starting node. + * @unvisited: a nodemask to keep track of the unvisited nodes. + * + * This macro iterates over NUMA node IDs in increasing distance from the + * starting @node and yields MAX_NUMNODES when all the nodes have been + * visited. + * + * Note that by the time the loop completes, the @unvisited nodemask will + * be fully cleared, unless the loop exits early. + * + * The difference between for_each_node() and for_each_node_numadist() is + * that the former allows to iterate over nodes in numerical order, whereas + * the latter iterates over nodes in increasing order of distance. + * + * This complexity of this iterator is O(N^2), where N represents the + * number of nodes, as each iteration involves scanning all nodes to + * find the one with the shortest distance. + * + * Requires rcu_lock to be held. + */ +#define for_each_node_numadist(node, unvisited) \ + for (int __start = (node), \ + (node) = nearest_node_nodemask((__start), &(unvisited)); \ + (node) < MAX_NUMNODES; \ + node_clear((node), (unvisited)), \ + (node) = nearest_node_nodemask((__start), &(unvisited))) + +/** * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance * from a given node. * @mask: the iteration variable. @@ -279,4 +329,13 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops) !IS_ERR_OR_NULL(mask); \ __hops++) +DECLARE_PER_CPU(unsigned long, cpu_scale); + +static inline unsigned long topology_get_cpu_scale(int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); + #endif /* _LINUX_TOPOLOGY_H */ diff --git a/include/linux/torture.h b/include/linux/torture.h index c2e979f82f8d..1b59056c3b18 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -104,6 +104,7 @@ int torture_stutter_init(int s, int sgap); /* Initialization and cleanup. */ bool torture_init_begin(char *ttype, int v); void torture_init_end(void); +unsigned long get_torture_init_jiffies(void); bool torture_cleanup_begin(void); void torture_cleanup_end(void); bool torture_must_stop(void); @@ -130,7 +131,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #endif #if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) -long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn); #endif #endif /* __LINUX_TORTURE_H */ diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 20a40ade8030..a3d8305e88a5 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -224,7 +224,7 @@ enum tpm2_const { enum tpm2_timeouts { TPM2_TIMEOUT_A = 750, - TPM2_TIMEOUT_B = 2000, + TPM2_TIMEOUT_B = 4000, TPM2_TIMEOUT_C = 200, TPM2_TIMEOUT_D = 30, TPM2_DURATION_SHORT = 20, @@ -257,6 +257,7 @@ enum tpm2_return_codes { TPM2_RC_TESTING = 0x090A, /* RC_WARN */ TPM2_RC_REFERENCE_H0 = 0x0910, TPM2_RC_RETRY = 0x0922, + TPM2_RC_SESSION_MEMORY = 0x0903, }; enum tpm2_command_codes { @@ -335,6 +336,7 @@ enum tpm2_cc_attrs { #define TPM_VID_WINBOND 0x1050 #define TPM_VID_STM 0x104A #define TPM_VID_ATML 0x1114 +#define TPM_VID_IFX 0x15D1 enum tpm_chip_flags { TPM_CHIP_FLAG_BOOTSTRAPPED = BIT(0), @@ -436,6 +438,24 @@ static inline u32 tpm2_rc_value(u32 rc) return (rc & BIT(7)) ? rc & 0xbf : rc; } +/* + * Convert a return value from tpm_transmit_cmd() to POSIX error code. + */ +static inline ssize_t tpm_ret_to_err(ssize_t ret) +{ + if (ret < 0) + return ret; + + switch (tpm2_rc_value(ret)) { + case TPM2_RC_SUCCESS: + return 0; + case TPM2_RC_SESSION_MEMORY: + return -ENOMEM; + default: + return -EFAULT; + } +} + #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); diff --git a/include/linux/tpm_svsm.h b/include/linux/tpm_svsm.h new file mode 100644 index 000000000000..38e341f9761a --- /dev/null +++ b/include/linux/tpm_svsm.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 James.Bottomley@HansenPartnership.com + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * + * Helpers for the SVSM_VTPM_CMD calls used by the vTPM protocol defined by the + * AMD SVSM spec [1]. + * + * The vTPM protocol follows the Official TPM 2.0 Reference Implementation + * (originally by Microsoft, now part of the TCG) simulator protocol. + * + * [1] "Secure VM Service Module for SEV-SNP Guests" + * Publication # 58019 Revision: 1.00 + */ +#ifndef _TPM_SVSM_H_ +#define _TPM_SVSM_H_ + +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> + +#define SVSM_VTPM_MAX_BUFFER 4096 /* max req/resp buffer size */ + +/** + * struct svsm_vtpm_request - Generic request for single word command + * @cmd: The command to send + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 15: vTPM Common Request/Response Structure + * Byte Size    In/Out    Description + * Offset    (Bytes) + * 0x000     4          In        Platform command + *                        Out       Platform command response size + */ +struct svsm_vtpm_request { + u32 cmd; +}; + +/** + * struct svsm_vtpm_response - Generic response + * @size: The response size (zero if nothing follows) + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 15: vTPM Common Request/Response Structure + * Byte Size    In/Out    Description + * Offset    (Bytes) + * 0x000     4          In        Platform command + *                        Out       Platform command response size + * + * Note: most TCG Simulator commands simply return zero here with no indication + * of success or failure. + */ +struct svsm_vtpm_response { + u32 size; +}; + +/** + * struct svsm_vtpm_cmd_request - Structure for a TPM_SEND_COMMAND request + * @cmd: The command to send (must be TPM_SEND_COMMAND) + * @locality: The locality + * @buf_size: The size of the input buffer following + * @buf: A buffer of size buf_size + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 16: TPM_SEND_COMMAND Request Structure + * Byte Size Meaning + * Offset    (Bytes) + * 0x000     4          Platform command (8) + * 0x004     1          Locality (must-be-0) + * 0x005     4          TPM Command size (in bytes) + * 0x009     Variable   TPM Command + * + * Note: the TCG Simulator expects @buf_size to be equal to the size of the + * specific TPM command, otherwise an TPM_RC_COMMAND_SIZE error is returned. + */ +struct svsm_vtpm_cmd_request { + u32 cmd; + u8 locality; + u32 buf_size; + u8 buf[]; +} __packed; + +/** + * struct svsm_vtpm_cmd_response - Structure for a TPM_SEND_COMMAND response + * @buf_size: The size of the output buffer following + * @buf: A buffer of size buf_size + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 17: TPM_SEND_COMMAND Response Structure + * Byte Size Meaning + * Offset    (Bytes) + * 0x000     4          Response size (in bytes) + * 0x004     Variable   Response + */ +struct svsm_vtpm_cmd_response { + u32 buf_size; + u8 buf[]; +}; + +/** + * svsm_vtpm_cmd_request_fill() - Fill a TPM_SEND_COMMAND request to be sent to SVSM + * @req: The struct svsm_vtpm_cmd_request to fill + * @locality: The locality + * @buf: The buffer from where to copy the payload of the command + * @len: The size of the buffer + * + * Return: 0 on success, negative error code on failure. + */ +static inline int +svsm_vtpm_cmd_request_fill(struct svsm_vtpm_cmd_request *req, u8 locality, + const u8 *buf, size_t len) +{ + if (len > SVSM_VTPM_MAX_BUFFER - sizeof(*req)) + return -EINVAL; + + req->cmd = 8; /* TPM_SEND_COMMAND */ + req->locality = locality; + req->buf_size = len; + + memcpy(req->buf, buf, len); + + return 0; +} + +/** + * svsm_vtpm_cmd_response_parse() - Parse a TPM_SEND_COMMAND response received from SVSM + * @resp: The struct svsm_vtpm_cmd_response to parse + * @buf: The buffer where to copy the response + * @len: The size of the buffer + * + * Return: buffer size filled with the response on success, negative error + * code on failure. + */ +static inline int +svsm_vtpm_cmd_response_parse(const struct svsm_vtpm_cmd_response *resp, u8 *buf, + size_t len) +{ + if (len < resp->buf_size) + return -E2BIG; + + if (resp->buf_size > SVSM_VTPM_MAX_BUFFER - sizeof(*resp)) + return -EINVAL; // Invalid response from the platform TPM + + memcpy(buf, resp->buf, resp->buf_size); + + return resp->buf_size; +} + +#endif /* _TPM_SVSM_H_ */ diff --git a/include/linux/trace.h b/include/linux/trace.h index fdcd76b7be83..7eaad857dee0 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -72,8 +72,8 @@ static inline int unregister_ftrace_export(struct trace_export *export) static inline void trace_printk_init_buffers(void) { } -static inline int trace_array_printk(struct trace_array *tr, unsigned long ip, - const char *fmt, ...) +static inline __printf(3, 4) +int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...) { return 0; } diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 58ad4ead33fc..fa9cf4292dff 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -673,6 +673,20 @@ struct trace_event_file { atomic_t tm_ref; /* trigger-mode reference counter */ }; +#ifdef CONFIG_HIST_TRIGGERS +extern struct irq_work hist_poll_work; +extern wait_queue_head_t hist_poll_wq; + +static inline void hist_poll_wakeup(void) +{ + if (wq_has_sleeper(&hist_poll_wq)) + irq_work_queue(&hist_poll_work); +} + +#define hist_poll_wait(file, wait) \ + poll_wait(file, &hist_poll_wq, wait) +#endif + #define __TRACE_EVENT_FLAGS(name, value) \ static int __init trace_init_flags_##name(void) \ { \ @@ -845,24 +859,6 @@ int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); int trace_array_set_clr_event(struct trace_array *tr, const char *system, const char *event, bool enable); -/* - * The double __builtin_constant_p is because gcc will give us an error - * if we try to allocate the static variable to fmt if it is not a - * constant. Even with the outer if statement optimizing out. - */ -#define event_trace_printk(ip, fmt, args...) \ -do { \ - __trace_printk_check_format(fmt, ##args); \ - tracing_record_cmdline(current); \ - if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ - __section("__trace_printk_fmt") = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ - \ - __trace_bprintk(ip, trace_printk_fmt, ##args); \ - } else \ - __trace_printk(ip, fmt, ##args); \ -} while (0) #ifdef CONFIG_PERF_EVENTS struct perf_event; diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 1ef95c0287f0..a93ed5ac3226 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -88,8 +88,8 @@ extern __printf(2, 3) void trace_seq_printf(struct trace_seq *s, const char *fmt, ...); extern __printf(2, 0) void trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args); -extern void -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); +extern __printf(2, 0) +void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); extern int trace_print_seq(struct seq_file *m, struct trace_seq *s); extern int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt); @@ -113,8 +113,8 @@ static inline __printf(2, 3) void trace_seq_printf(struct trace_seq *s, const char *fmt, ...) { } -static inline void -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +static inline __printf(2, 0) +void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { } diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 76d9055b2cff..826ce3f8e1f8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -218,7 +218,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DEFINE_RUST_DO_TRACE(name, proto, args) \ notrace void rust_do_trace_##name(proto) \ { \ - __rust_do_trace_##name(args); \ + __do_trace_##name(args); \ } /* @@ -268,7 +268,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ - static inline void __rust_do_trace_##name(proto) \ + static inline void __do_trace_##name(proto) \ { \ if (cond) { \ guard(preempt_notrace)(); \ @@ -277,12 +277,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) } \ static inline void trace_##name(proto) \ { \ - if (static_branch_unlikely(&__tracepoint_##name.key)) { \ - if (cond) { \ - guard(preempt_notrace)(); \ - __DO_TRACE_CALL(name, TP_ARGS(args)); \ - } \ - } \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ + __do_trace_##name(args); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ @@ -291,7 +287,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define __DECLARE_TRACE_SYSCALL(name, proto, args, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ - static inline void __rust_do_trace_##name(proto) \ + static inline void __do_trace_##name(proto) \ { \ guard(rcu_tasks_trace)(); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ @@ -299,10 +295,8 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) static inline void trace_##name(proto) \ { \ might_fault(); \ - if (static_branch_unlikely(&__tracepoint_##name.key)) { \ - guard(rcu_tasks_trace)(); \ - __DO_TRACE_CALL(name, TP_ARGS(args)); \ - } \ + if (static_branch_unlikely(&__tracepoint_##name.key)) \ + __do_trace_##name(args); \ if (IS_ENABLED(CONFIG_LOCKDEP)) { \ WARN_ONCE(!rcu_is_watching(), \ "RCU not watching for tracepoint"); \ @@ -470,16 +464,30 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #endif #define DECLARE_TRACE(name, proto, args) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + __DECLARE_TRACE(name##_tp, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_CONDITION(name, proto, args, cond) \ - __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + __DECLARE_TRACE(name##_tp, PARAMS(proto), PARAMS(args), \ cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ PARAMS(void *__data, proto)) #define DECLARE_TRACE_SYSCALL(name, proto, args) \ + __DECLARE_TRACE_SYSCALL(name##_tp, PARAMS(proto), PARAMS(args), \ + PARAMS(void *__data, proto)) + +#define DECLARE_TRACE_EVENT(name, proto, args) \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()), \ + PARAMS(void *__data, proto)) + +#define DECLARE_TRACE_EVENT_CONDITION(name, proto, args, cond) \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()) && (PARAMS(cond)), \ + PARAMS(void *__data, proto)) + +#define DECLARE_TRACE_EVENT_SYSCALL(name, proto, args) \ __DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args), \ PARAMS(void *__data, proto)) @@ -597,32 +605,32 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_CONDITION(template, name, proto, \ args, cond) \ - DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ assign, print, reg, unreg) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, \ + DECLARE_TRACE_EVENT(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FN_COND(name, proto, args, cond, struct, \ assign, print, reg, unreg) \ - DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_CONDITION(name, proto, args, cond, \ struct, assign, print) \ - DECLARE_TRACE_CONDITION(name, PARAMS(proto), \ + DECLARE_TRACE_EVENT_CONDITION(name, PARAMS(proto), \ PARAMS(args), PARAMS(cond)) #define TRACE_EVENT_SYSCALL(name, proto, args, struct, assign, \ print, reg, unreg) \ - DECLARE_TRACE_SYSCALL(name, PARAMS(proto), PARAMS(args)) + DECLARE_TRACE_EVENT_SYSCALL(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FLAGS(event, flag) diff --git a/include/linux/tsm-mr.h b/include/linux/tsm-mr.h new file mode 100644 index 000000000000..50a521f4ac97 --- /dev/null +++ b/include/linux/tsm-mr.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __TSM_MR_H +#define __TSM_MR_H + +#include <crypto/hash_info.h> + +/** + * struct tsm_measurement_register - describes an architectural measurement + * register (MR) + * @mr_name: name of the MR + * @mr_value: buffer containing the current value of the MR + * @mr_size: size of the MR - typically the digest size of @mr_hash + * @mr_flags: bitwise OR of one or more flags, detailed below + * @mr_hash: optional hash identifier defined in include/uapi/linux/hash_info.h. + * + * A CC guest driver encloses an array of this structure in struct + * tsm_measurements to detail the measurement facility supported by the + * underlying CC hardware. + * + * @mr_name and @mr_value must stay valid until this structure is no longer in + * use. + * + * @mr_flags is the bitwise-OR of zero or more of the flags below. + * + * * %TSM_MR_F_READABLE - the sysfs attribute corresponding to this MR is readable. + * * %TSM_MR_F_WRITABLE - the sysfs attribute corresponding to this MR is writable. + * The semantics is typically to extend the MR but could vary depending on the + * architecture and the MR. + * * %TSM_MR_F_LIVE - this MR's value may differ from the last value written, so + * must be read back from the underlying CC hardware/firmware. + * * %TSM_MR_F_RTMR - bitwise-OR of %TSM_MR_F_LIVE and %TSM_MR_F_WRITABLE. + * * %TSM_MR_F_NOHASH - this MR does NOT have an associated hash algorithm. + * @mr_hash will be ignored when this flag is set. + */ +struct tsm_measurement_register { + const char *mr_name; + void *mr_value; + u32 mr_size; + u32 mr_flags; + enum hash_algo mr_hash; +}; + +#define TSM_MR_F_NOHASH 1 +#define TSM_MR_F_WRITABLE 2 +#define TSM_MR_F_READABLE 4 +#define TSM_MR_F_LIVE 8 +#define TSM_MR_F_RTMR (TSM_MR_F_LIVE | TSM_MR_F_WRITABLE) + +#define TSM_MR_(mr, hash) \ + .mr_name = #mr, .mr_size = hash##_DIGEST_SIZE, \ + .mr_hash = HASH_ALGO_##hash, .mr_flags = TSM_MR_F_READABLE + +/** + * struct tsm_measurements - defines the CC architecture specific measurement + * facility and methods for updating measurement registers (MRs) + * @mrs: Array of MR definitions. + * @nr_mrs: Number of elements in @mrs. + * @refresh: Callback function to load/sync all MRs from TVM hardware/firmware + * into the kernel cache. + * @write: Callback function to write to the MR specified by the parameter @mr. + * Typically, writing to an MR extends the input buffer to that MR. + * + * The @refresh callback is invoked when an MR with %TSM_MR_F_LIVE set is being + * read and the cache is stale. It must reload all MRs with %TSM_MR_F_LIVE set. + * The function parameter @tm is a pointer pointing back to this structure. + * + * The @write callback is invoked whenever an MR is being written. It takes two + * additional parameters besides @tm: + * + * * @mr - points to the MR (an element of @tm->mrs) being written. + * * @data - contains the bytes to write and whose size is @mr->mr_size. + * + * Both @refresh and @write should return 0 on success and an appropriate error + * code on failure. + */ +struct tsm_measurements { + const struct tsm_measurement_register *mrs; + size_t nr_mrs; + int (*refresh)(const struct tsm_measurements *tm); + int (*write)(const struct tsm_measurements *tm, + const struct tsm_measurement_register *mr, const u8 *data); +}; + +const struct attribute_group * +tsm_mr_create_attribute_group(const struct tsm_measurements *tm); +void tsm_mr_free_attribute_group(const struct attribute_group *attr_grp); + +#endif diff --git a/include/linux/tsm.h b/include/linux/tsm.h index 11b0c525be30..431054810dca 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -6,17 +6,17 @@ #include <linux/types.h> #include <linux/uuid.h> -#define TSM_INBLOB_MAX 64 -#define TSM_OUTBLOB_MAX SZ_32K +#define TSM_REPORT_INBLOB_MAX 64 +#define TSM_REPORT_OUTBLOB_MAX SZ_32K /* * Privilege level is a nested permission concept to allow confidential * guests to partition address space, 4-levels are supported. */ -#define TSM_PRIVLEVEL_MAX 3 +#define TSM_REPORT_PRIVLEVEL_MAX 3 /** - * struct tsm_desc - option descriptor for generating tsm report blobs + * struct tsm_report_desc - option descriptor for generating tsm report blobs * @privlevel: optional privilege level to associate with @outblob * @inblob_len: sizeof @inblob * @inblob: arbitrary input data @@ -24,10 +24,10 @@ * @service_guid: optional service-provider service guid to attest * @service_manifest_version: optional service-provider service manifest version requested */ -struct tsm_desc { +struct tsm_report_desc { unsigned int privlevel; size_t inblob_len; - u8 inblob[TSM_INBLOB_MAX]; + u8 inblob[TSM_REPORT_INBLOB_MAX]; char *service_provider; guid_t service_guid; unsigned int service_manifest_version; @@ -44,7 +44,7 @@ struct tsm_desc { * @manifestblob: (optional) manifest data associated with the report */ struct tsm_report { - struct tsm_desc desc; + struct tsm_report_desc desc; size_t outblob_len; u8 *outblob; size_t auxblob_len; @@ -88,7 +88,7 @@ enum tsm_bin_attr_index { }; /** - * struct tsm_ops - attributes and operations for tsm instances + * struct tsm_report_ops - attributes and operations for tsm_report instances * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider * @privlevel_floor: convey base privlevel for nested scenarios * @report_new: Populate @report with the report blob and auxblob @@ -99,7 +99,7 @@ enum tsm_bin_attr_index { * Implementation specific ops, only one is expected to be registered at * a time i.e. only one of "sev-guest", "tdx-guest", etc. */ -struct tsm_ops { +struct tsm_report_ops { const char *name; unsigned int privlevel_floor; int (*report_new)(struct tsm_report *report, void *data); @@ -107,6 +107,6 @@ struct tsm_ops { bool (*report_bin_attr_visible)(int n); }; -int tsm_register(const struct tsm_ops *ops, void *priv); -int tsm_unregister(const struct tsm_ops *ops); +int tsm_report_register(const struct tsm_report_ops *ops, void *priv); +int tsm_report_unregister(const struct tsm_report_ops *ops); #endif /* __TSM_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index 2372f9357240..0a46e4054dec 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -239,7 +239,6 @@ struct tty_struct { struct list_head tty_files; -#define N_TTY_BUF_SIZE 4096 struct work_struct SAK_work; } __randomize_layout; @@ -251,7 +250,7 @@ struct tty_file_private { }; /** - * DOC: TTY Struct Flags + * enum tty_struct_flags - TTY Struct Flags * * These bits are used in the :c:member:`tty_struct.flags` field. * @@ -260,62 +259,64 @@ struct tty_file_private { * tty->write. Thus, you must use the inline functions set_bit() and * clear_bit() to make things atomic. * - * TTY_THROTTLED + * @TTY_THROTTLED: * Driver input is throttled. The ldisc should call * :c:member:`tty_driver.unthrottle()` in order to resume reception when * it is ready to process more data (at threshold min). * - * TTY_IO_ERROR + * @TTY_IO_ERROR: * If set, causes all subsequent userspace read/write calls on the tty to * fail, returning -%EIO. (May be no ldisc too.) * - * TTY_OTHER_CLOSED + * @TTY_OTHER_CLOSED: * Device is a pty and the other side has closed. * - * TTY_EXCLUSIVE + * @TTY_EXCLUSIVE: * Exclusive open mode (a single opener). * - * TTY_DO_WRITE_WAKEUP + * @TTY_DO_WRITE_WAKEUP: * If set, causes the driver to call the * :c:member:`tty_ldisc_ops.write_wakeup()` method in order to resume * transmission when it can accept more data to transmit. * - * TTY_LDISC_OPEN + * @TTY_LDISC_OPEN: * Indicates that a line discipline is open. For debugging purposes only. * - * TTY_PTY_LOCK + * @TTY_PTY_LOCK: * A flag private to pty code to implement %TIOCSPTLCK/%TIOCGPTLCK logic. * - * TTY_NO_WRITE_SPLIT + * @TTY_NO_WRITE_SPLIT: * Prevent driver from splitting up writes into smaller chunks (preserve * write boundaries to driver). * - * TTY_HUPPED + * @TTY_HUPPED: * The TTY was hung up. This is set post :c:member:`tty_driver.hangup()`. * - * TTY_HUPPING + * @TTY_HUPPING: * The TTY is in the process of hanging up to abort potential readers. * - * TTY_LDISC_CHANGING + * @TTY_LDISC_CHANGING: * Line discipline for this TTY is being changed. I/O should not block * when this is set. Use tty_io_nonblock() to check. * - * TTY_LDISC_HALTED + * @TTY_LDISC_HALTED: * Line discipline for this TTY was stopped. No work should be queued to * this ldisc. */ -#define TTY_THROTTLED 0 -#define TTY_IO_ERROR 1 -#define TTY_OTHER_CLOSED 2 -#define TTY_EXCLUSIVE 3 -#define TTY_DO_WRITE_WAKEUP 5 -#define TTY_LDISC_OPEN 11 -#define TTY_PTY_LOCK 16 -#define TTY_NO_WRITE_SPLIT 17 -#define TTY_HUPPED 18 -#define TTY_HUPPING 19 -#define TTY_LDISC_CHANGING 20 -#define TTY_LDISC_HALTED 22 +enum tty_struct_flags { + TTY_THROTTLED, + TTY_IO_ERROR, + TTY_OTHER_CLOSED, + TTY_EXCLUSIVE, + TTY_DO_WRITE_WAKEUP, + TTY_LDISC_OPEN, + TTY_PTY_LOCK, + TTY_NO_WRITE_SPLIT, + TTY_HUPPED, + TTY_HUPPING, + TTY_LDISC_CHANGING, + TTY_LDISC_HALTED, +}; static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) { diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index dd4b31ce6d5d..188ee9b768eb 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -17,6 +17,92 @@ struct serial_icounter_struct; struct serial_struct; /** + * enum tty_driver_flag -- TTY Driver Flags + * + * These are flags passed to tty_alloc_driver(). + * + * @TTY_DRIVER_INSTALLED: + * Whether this driver was succesfully installed. This is a tty internal + * flag. Do not touch. + * + * @TTY_DRIVER_RESET_TERMIOS: + * Requests the tty layer to reset the termios setting when the last + * process has closed the device. Used for PTYs, in particular. + * + * @TTY_DRIVER_REAL_RAW: + * Indicates that the driver will guarantee not to set any special + * character handling flags if this is set for the tty: + * + * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)`` + * + * That is, if there is no reason for the driver to + * send notifications of parity and break characters up to the line + * driver, it won't do so. This allows the line driver to optimize for + * this case if this flag is set. (Note that there is also a promise, if + * the above case is true, not to signal overruns, either.) + * + * @TTY_DRIVER_DYNAMIC_DEV: + * The individual tty devices need to be registered with a call to + * tty_register_device() when the device is found in the system and + * unregistered with a call to tty_unregister_device() so the devices will + * be show up properly in sysfs. If not set, all &tty_driver.num entries + * will be created by the tty core in sysfs when tty_register_driver() is + * called. This is to be used by drivers that have tty devices that can + * appear and disappear while the main tty driver is registered with the + * tty core. + * + * @TTY_DRIVER_DEVPTS_MEM: + * Don't use the standard arrays (&tty_driver.ttys and + * &tty_driver.termios), instead use dynamic memory keyed through the + * devpts filesystem. This is only applicable to the PTY driver. + * + * @TTY_DRIVER_HARDWARE_BREAK: + * Hardware handles break signals. Pass the requested timeout to the + * &tty_operations.break_ctl instead of using a simple on/off interface. + * + * @TTY_DRIVER_DYNAMIC_ALLOC: + * Do not allocate structures which are needed per line for this driver + * (&tty_driver.ports) as it would waste memory. The driver will take + * care. This is only applicable to the PTY driver. + * + * @TTY_DRIVER_UNNUMBERED_NODE: + * Do not create numbered ``/dev`` nodes. For example, create + * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a + * driver for a single tty device is being allocated. + */ +enum tty_driver_flag { + TTY_DRIVER_INSTALLED = BIT(0), + TTY_DRIVER_RESET_TERMIOS = BIT(1), + TTY_DRIVER_REAL_RAW = BIT(2), + TTY_DRIVER_DYNAMIC_DEV = BIT(3), + TTY_DRIVER_DEVPTS_MEM = BIT(4), + TTY_DRIVER_HARDWARE_BREAK = BIT(5), + TTY_DRIVER_DYNAMIC_ALLOC = BIT(6), + TTY_DRIVER_UNNUMBERED_NODE = BIT(7), +}; + +enum tty_driver_type { + TTY_DRIVER_TYPE_SYSTEM, + TTY_DRIVER_TYPE_CONSOLE, + TTY_DRIVER_TYPE_SERIAL, + TTY_DRIVER_TYPE_PTY, + TTY_DRIVER_TYPE_SCC, + TTY_DRIVER_TYPE_SYSCONS, +}; + +enum tty_driver_subtype { + SYSTEM_TYPE_TTY = 1, + SYSTEM_TYPE_CONSOLE, + SYSTEM_TYPE_SYSCONS, + SYSTEM_TYPE_SYSPTMX, + + PTY_TYPE_MASTER = 1, + PTY_TYPE_SLAVE, + + SERIAL_TYPE_NORMAL = 1, +}; + +/** * struct tty_operations -- interface between driver and tty * * @lookup: ``struct tty_struct *()(struct tty_driver *self, struct file *, @@ -320,7 +406,7 @@ struct serial_struct; * * @poll_init: ``int ()(struct tty_driver *driver, int line, char *options)`` * - * kgdboc support (Documentation/dev-tools/kgdb.rst). This routine is + * kgdboc support (Documentation/process/debugging/kgdb.rst). This routine is * called to initialize the HW for later use by calling @poll_get_char or * @poll_put_char. * @@ -414,8 +500,8 @@ struct tty_operations { * @major: major /dev device number (zero for autoassignment) * @minor_start: the first minor /dev device number * @num: number of devices allocated - * @type: type of tty driver (%TTY_DRIVER_TYPE_) - * @subtype: subtype of tty driver (%SYSTEM_TYPE_, %PTY_TYPE_, %SERIAL_TYPE_) + * @type: type of tty driver (enum tty_driver_type) + * @subtype: subtype of tty driver (enum tty_driver_subtype) * @init_termios: termios to set to each tty initially (e.g. %tty_std_termios) * @flags: tty driver flags (%TTY_DRIVER_) * @proc_entry: proc fs entry, used internally @@ -447,8 +533,8 @@ struct tty_driver { int major; int minor_start; unsigned int num; - short type; - short subtype; + enum tty_driver_type type; + enum tty_driver_subtype subtype; struct ktermios init_termios; unsigned long flags; struct proc_dir_entry *proc_entry; @@ -478,7 +564,13 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line); void tty_driver_kref_put(struct tty_driver *driver); -/* Use TTY_DRIVER_* flags below */ +/** + * tty_alloc_driver - allocate tty driver + * @lines: count of lines this driver can handle at most + * @flags: some of enum tty_driver_flag, will be set in driver->flags + * + * Returns: struct tty_driver or a PTR-encoded error (use IS_ERR() and friends). + */ #define tty_alloc_driver(lines, flags) \ __tty_alloc_driver(lines, THIS_MODULE, flags) @@ -494,84 +586,6 @@ static inline void tty_set_operations(struct tty_driver *driver, driver->ops = op; } -/** - * DOC: TTY Driver Flags - * - * TTY_DRIVER_RESET_TERMIOS - * Requests the tty layer to reset the termios setting when the last - * process has closed the device. Used for PTYs, in particular. - * - * TTY_DRIVER_REAL_RAW - * Indicates that the driver will guarantee not to set any special - * character handling flags if this is set for the tty: - * - * ``(IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR || !INPCK)`` - * - * That is, if there is no reason for the driver to - * send notifications of parity and break characters up to the line - * driver, it won't do so. This allows the line driver to optimize for - * this case if this flag is set. (Note that there is also a promise, if - * the above case is true, not to signal overruns, either.) - * - * TTY_DRIVER_DYNAMIC_DEV - * The individual tty devices need to be registered with a call to - * tty_register_device() when the device is found in the system and - * unregistered with a call to tty_unregister_device() so the devices will - * be show up properly in sysfs. If not set, all &tty_driver.num entries - * will be created by the tty core in sysfs when tty_register_driver() is - * called. This is to be used by drivers that have tty devices that can - * appear and disappear while the main tty driver is registered with the - * tty core. - * - * TTY_DRIVER_DEVPTS_MEM - * Don't use the standard arrays (&tty_driver.ttys and - * &tty_driver.termios), instead use dynamic memory keyed through the - * devpts filesystem. This is only applicable to the PTY driver. - * - * TTY_DRIVER_HARDWARE_BREAK - * Hardware handles break signals. Pass the requested timeout to the - * &tty_operations.break_ctl instead of using a simple on/off interface. - * - * TTY_DRIVER_DYNAMIC_ALLOC - * Do not allocate structures which are needed per line for this driver - * (&tty_driver.ports) as it would waste memory. The driver will take - * care. This is only applicable to the PTY driver. - * - * TTY_DRIVER_UNNUMBERED_NODE - * Do not create numbered ``/dev`` nodes. For example, create - * ``/dev/ttyprintk`` and not ``/dev/ttyprintk0``. Applicable only when a - * driver for a single tty device is being allocated. - */ -#define TTY_DRIVER_INSTALLED 0x0001 -#define TTY_DRIVER_RESET_TERMIOS 0x0002 -#define TTY_DRIVER_REAL_RAW 0x0004 -#define TTY_DRIVER_DYNAMIC_DEV 0x0008 -#define TTY_DRIVER_DEVPTS_MEM 0x0010 -#define TTY_DRIVER_HARDWARE_BREAK 0x0020 -#define TTY_DRIVER_DYNAMIC_ALLOC 0x0040 -#define TTY_DRIVER_UNNUMBERED_NODE 0x0080 - -/* tty driver types */ -#define TTY_DRIVER_TYPE_SYSTEM 0x0001 -#define TTY_DRIVER_TYPE_CONSOLE 0x0002 -#define TTY_DRIVER_TYPE_SERIAL 0x0003 -#define TTY_DRIVER_TYPE_PTY 0x0004 -#define TTY_DRIVER_TYPE_SCC 0x0005 /* scc driver */ -#define TTY_DRIVER_TYPE_SYSCONS 0x0006 - -/* system subtypes (magic, used by tty_io.c) */ -#define SYSTEM_TYPE_TTY 0x0001 -#define SYSTEM_TYPE_CONSOLE 0x0002 -#define SYSTEM_TYPE_SYSCONS 0x0003 -#define SYSTEM_TYPE_SYSPTMX 0x0004 - -/* pty subtypes (magic, used by tty_io.c) */ -#define PTY_TYPE_MASTER 0x0001 -#define PTY_TYPE_SLAVE 0x0002 - -/* serial subtype definitions */ -#define SERIAL_TYPE_NORMAL 1 - int tty_register_driver(struct tty_driver *driver); void tty_unregister_driver(struct tty_driver *driver); struct device *tty_register_device(struct tty_driver *driver, unsigned index, diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index af01e89074b2..c5cccc3fc1e8 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -39,7 +39,6 @@ do { \ int ldsem_down_read(struct ld_semaphore *sem, long timeout); int ldsem_down_read_trylock(struct ld_semaphore *sem); int ldsem_down_write(struct ld_semaphore *sem, long timeout); -int ldsem_down_write_trylock(struct ld_semaphore *sem); void ldsem_up_read(struct ld_semaphore *sem); void ldsem_up_write(struct ld_semaphore *sem); diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index 1b861f2100b6..08f89a598366 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -147,9 +147,6 @@ struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); -struct device *tty_port_register_device_serdev(struct tty_port *port, - struct tty_driver *driver, unsigned index, - struct device *host, struct device *parent); struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *host, struct device *parent, void *drvdata, diff --git a/include/linux/turris-omnia-mcu-interface.h b/include/linux/turris-omnia-mcu-interface.h index 2da8cbeb158a..38b45ab00053 100644 --- a/include/linux/turris-omnia-mcu-interface.h +++ b/include/linux/turris-omnia-mcu-interface.h @@ -9,7 +9,10 @@ #define __TURRIS_OMNIA_MCU_INTERFACE_H #include <linux/bitfield.h> -#include <linux/bits.h> +#include <linux/bitops.h> +#include <linux/types.h> +#include <linux/unaligned.h> +#include <asm/byteorder.h> enum omnia_commands_e { OMNIA_CMD_GET_STATUS_WORD = 0x01, /* slave sends status word back */ @@ -236,6 +239,20 @@ enum omnia_int_e { OMNIA_INT_LAN5_LED1 = BIT(31), }; +enum omnia_cmd_led_mode_e { + OMNIA_CMD_LED_MODE_LED_MASK = GENMASK(3, 0), + OMNIA_CMD_LED_MODE_USER = BIT(4), +}; + +#define OMNIA_CMD_LED_MODE_LED(_l) FIELD_PREP(OMNIA_CMD_LED_MODE_LED_MASK, _l) + +enum omnia_cmd_led_state_e { + OMNIA_CMD_LED_STATE_LED_MASK = GENMASK(3, 0), + OMNIA_CMD_LED_STATE_ON = BIT(4), +}; + +#define OMNIA_CMD_LED_STATE_LED(_l) FIELD_PREP(OMNIA_CMD_LED_STATE_LED_MASK, _l) + enum omnia_cmd_poweroff_e { OMNIA_CMD_POWER_OFF_POWERON_BUTTON = BIT(0), OMNIA_CMD_POWER_OFF_MAGIC = 0xdead, @@ -246,4 +263,135 @@ enum omnia_cmd_usb_ovc_prot_e { OMNIA_CMD_xET_USB_OVC_PROT_ENABLE = BIT(4), }; +/* Command execution functions */ + +struct i2c_client; + +int omnia_cmd_write_read(const struct i2c_client *client, + void *cmd, unsigned int cmd_len, + void *reply, unsigned int reply_len); + +static inline int omnia_cmd_write(const struct i2c_client *client, void *cmd, + unsigned int len) +{ + return omnia_cmd_write_read(client, cmd, len, NULL, 0); +} + +static inline int omnia_cmd_write_u8(const struct i2c_client *client, u8 cmd, + u8 val) +{ + u8 buf[2] = { cmd, val }; + + return omnia_cmd_write(client, buf, sizeof(buf)); +} + +static inline int omnia_cmd_write_u16(const struct i2c_client *client, u8 cmd, + u16 val) +{ + u8 buf[3]; + + buf[0] = cmd; + put_unaligned_le16(val, &buf[1]); + + return omnia_cmd_write(client, buf, sizeof(buf)); +} + +static inline int omnia_cmd_write_u32(const struct i2c_client *client, u8 cmd, + u32 val) +{ + u8 buf[5]; + + buf[0] = cmd; + put_unaligned_le32(val, &buf[1]); + + return omnia_cmd_write(client, buf, sizeof(buf)); +} + +static inline int omnia_cmd_read(const struct i2c_client *client, u8 cmd, + void *reply, unsigned int len) +{ + return omnia_cmd_write_read(client, &cmd, 1, reply, len); +} + +static inline unsigned int +omnia_compute_reply_length(unsigned long mask, bool interleaved, + unsigned int offset) +{ + if (!mask) + return 0; + + return ((__fls(mask) >> 3) << interleaved) + 1 + offset; +} + +/* Returns 0 on success */ +static inline int omnia_cmd_read_bits(const struct i2c_client *client, u8 cmd, + unsigned long bits, unsigned long *dst) +{ + __le32 reply; + int err; + + if (!bits) { + *dst = 0; + return 0; + } + + err = omnia_cmd_read(client, cmd, &reply, + omnia_compute_reply_length(bits, false, 0)); + if (err) + return err; + + *dst = le32_to_cpu(reply) & bits; + + return 0; +} + +static inline int omnia_cmd_read_bit(const struct i2c_client *client, u8 cmd, + unsigned long bit) +{ + unsigned long reply; + int err; + + err = omnia_cmd_read_bits(client, cmd, bit, &reply); + if (err) + return err; + + return !!reply; +} + +static inline int omnia_cmd_read_u32(const struct i2c_client *client, u8 cmd, + u32 *dst) +{ + __le32 reply; + int err; + + err = omnia_cmd_read(client, cmd, &reply, sizeof(reply)); + if (err) + return err; + + *dst = le32_to_cpu(reply); + + return 0; +} + +static inline int omnia_cmd_read_u16(const struct i2c_client *client, u8 cmd, + u16 *dst) +{ + __le16 reply; + int err; + + err = omnia_cmd_read(client, cmd, &reply, sizeof(reply)); + if (err) + return err; + + *dst = le16_to_cpu(reply); + + return 0; +} + +static inline int omnia_cmd_read_u8(const struct i2c_client *client, u8 cmd, + u8 *reply) +{ + return omnia_cmd_read(client, cmd, reply, sizeof(*reply)); +} + #endif /* __TURRIS_OMNIA_MCU_INTERFACE_H */ diff --git a/include/linux/turris-signing-key.h b/include/linux/turris-signing-key.h new file mode 100644 index 000000000000..8a435b73c3a9 --- /dev/null +++ b/include/linux/turris-signing-key.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * 2025 by Marek Behún <kabel@kernel.org> + */ + +#ifndef __TURRIS_SIGNING_KEY_H +#define __TURRIS_SIGNING_KEY_H + +#include <linux/key.h> +#include <linux/types.h> + +struct device; + +#ifdef CONFIG_KEYS +struct turris_signing_key_subtype { + u16 key_size; + u8 data_size; + u8 sig_size; + u8 public_key_size; + const char *hash_algo; + const void *(*get_public_key)(const struct key *key); + int (*sign)(const struct key *key, const void *msg, void *signature); +}; + +static inline struct device *turris_signing_key_get_dev(const struct key *key) +{ + return key->payload.data[1]; +} + +int +devm_turris_signing_key_create(struct device *dev, const struct turris_signing_key_subtype *subtype, + const char *desc); +#endif + +#endif /* __TURRIS_SIGNING_KEY_H */ diff --git a/include/linux/types.h b/include/linux/types.h index 2d7b9ae8714c..6dfdb8e8e4c3 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -43,7 +43,7 @@ typedef unsigned long uintptr_t; typedef long intptr_t; #ifdef CONFIG_HAVE_UID16 -/* This is defined by include/asm-{arch}/posix_types.h */ +/* This is defined by arch/{arch}/include/asm/posix_types.h */ typedef __kernel_old_uid_t old_uid_t; typedef __kernel_old_gid_t old_gid_t; #endif /* CONFIG_UID16 */ @@ -92,6 +92,7 @@ typedef unsigned char unchar; typedef unsigned short ushort; typedef unsigned int uint; typedef unsigned long ulong; +typedef unsigned long long ullong; #ifndef __BIT_TYPES_DEFINED__ #define __BIT_TYPES_DEFINED__ @@ -135,6 +136,10 @@ typedef s64 ktime_t; typedef u64 sector_t; typedef u64 blkcnt_t; +/* generic data direction definitions */ +#define READ 0 +#define WRITE 1 + /* * The type of an index into the pagecache. */ @@ -248,5 +253,17 @@ typedef void (*swap_func_t)(void *a, void *b, int size); typedef int (*cmp_r_func_t)(const void *a, const void *b, const void *priv); typedef int (*cmp_func_t)(const void *a, const void *b); +/* + * rcuwait provides a way of blocking and waking up a single + * task in an rcu-safe manner. + * + * The only time @task is non-nil is when a user is blocked (or + * checking if it needs to) on a condition, and reset as soon as we + * know that the condition has succeeded and are awoken. + */ +struct rcuwait { + struct task_struct __rcu *task; +}; + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_TYPES_H */ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index e9c702c1908d..7c06f4795670 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -7,7 +7,7 @@ #include <linux/minmax.h> #include <linux/nospec.h> #include <linux/sched.h> -#include <linux/thread_info.h> +#include <linux/ucopysize.h> #include <asm/uaccess.h> diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h index d8219cbe09ff..3ab8d38aedb8 100644 --- a/include/linux/ubsan.h +++ b/include/linux/ubsan.h @@ -2,10 +2,10 @@ #ifndef _LINUX_UBSAN_H #define _LINUX_UBSAN_H -#ifdef CONFIG_UBSAN_TRAP -const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type); +#if defined(CONFIG_UBSAN_TRAP) || defined(CONFIG_UBSAN_KVM_EL2) +const char *report_ubsan_failure(u32 check_type); #else -static inline const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type) +static inline const char *report_ubsan_failure(u32 check_type) { return NULL; } diff --git a/include/linux/ucopysize.h b/include/linux/ucopysize.h new file mode 100644 index 000000000000..41c2d9720466 --- /dev/null +++ b/include/linux/ucopysize.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Perform sanity checking for object sizes for uaccess.h and uio.h. */ +#ifndef __LINUX_UCOPYSIZE_H__ +#define __LINUX_UCOPYSIZE_H__ + +#include <linux/bug.h> + +#ifdef CONFIG_HARDENED_USERCOPY +#include <linux/jump_label.h> +extern void __check_object_size(const void *ptr, unsigned long n, + bool to_user); + +DECLARE_STATIC_KEY_MAYBE(CONFIG_HARDENED_USERCOPY_DEFAULT_ON, + validate_usercopy_range); + +static __always_inline void check_object_size(const void *ptr, unsigned long n, + bool to_user) +{ + if (!__builtin_constant_p(n) && + static_branch_maybe(CONFIG_HARDENED_USERCOPY_DEFAULT_ON, + &validate_usercopy_range)) { + __check_object_size(ptr, n, to_user); + } +} +#else +static inline void check_object_size(const void *ptr, unsigned long n, + bool to_user) +{ } +#endif /* CONFIG_HARDENED_USERCOPY */ + +extern void __compiletime_error("copy source size is too small") +__bad_copy_from(void); +extern void __compiletime_error("copy destination size is too small") +__bad_copy_to(void); + +void __copy_overflow(int size, unsigned long count); + +static inline void copy_overflow(int size, unsigned long count) +{ + if (IS_ENABLED(CONFIG_BUG)) + __copy_overflow(size, count); +} + +static __always_inline __must_check bool +check_copy_size(const void *addr, size_t bytes, bool is_source) +{ + int sz = __builtin_object_size(addr, 0); + if (unlikely(sz >= 0 && sz < bytes)) { + if (!__builtin_constant_p(bytes)) + copy_overflow(sz, bytes); + else if (is_source) + __bad_copy_from(); + else + __bad_copy_to(); + return false; + } + if (WARN_ON_ONCE(bytes > INT_MAX)) + return false; + check_object_size(addr, bytes, is_source); + return true; +} + +#endif /* __LINUX_UCOPYSIZE_H__ */ diff --git a/include/linux/udp.h b/include/linux/udp.h index 0807e21cfec9..4e1a672af4c5 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -101,6 +101,13 @@ struct udp_sock { /* Cache friendly copy of sk->sk_peek_off >= 0 */ bool peeking_with_offset; + + /* + * Accounting for the tunnel GRO fastpath. + * Unprotected by compilers guard, as it uses space available in + * the last UDP socket cacheline. + */ + struct hlist_node tunnel_list; }; #define udp_test_bit(nr, sk) \ @@ -209,6 +216,9 @@ static inline void udp_allow_gso(struct sock *sk) #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) +#define udp_portaddr_for_each_entry_from(__sk) \ + hlist_for_each_entry_from(__sk, __sk_common.skc_portaddr_node) + #define udp_portaddr_for_each_entry_rcu(__sk, list) \ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) @@ -219,4 +229,13 @@ static inline void udp_allow_gso(struct sock *sk) #define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) +static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6) +{ +#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL) + return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk); +#else + return NULL; +#endif +} + #endif /* _LINUX_UDP_H */ diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h index f85ec5613721..2dc767e08f54 100644 --- a/include/linux/uidgid.h +++ b/include/linux/uidgid.h @@ -132,6 +132,7 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid) u32 map_id_down(struct uid_gid_map *map, u32 id); u32 map_id_up(struct uid_gid_map *map, u32 id); +u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count); #else @@ -186,6 +187,11 @@ static inline u32 map_id_down(struct uid_gid_map *map, u32 id) return id; } +static inline u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count) +{ + return id; +} + static inline u32 map_id_up(struct uid_gid_map *map, u32 id) { return id; diff --git a/include/linux/uio.h b/include/linux/uio.h index 853f9de5aa05..2e86c653186c 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -6,8 +6,8 @@ #define __LINUX_UIO_H #include <linux/kernel.h> -#include <linux/thread_info.h> #include <linux/mm_types.h> +#include <linux/ucopysize.h> #include <uapi/linux/uio.h> struct page; @@ -82,6 +82,15 @@ struct iov_iter { }; }; +typedef __u16 uio_meta_flags_t; + +struct uio_meta { + uio_meta_flags_t flags; + u16 app_tag; + u64 seed; + struct iov_iter iter; +}; + static inline const struct iovec *iter_iov(const struct iov_iter *iter) { if (iter->iter_type == ITER_UBUF) @@ -90,7 +99,13 @@ static inline const struct iovec *iter_iov(const struct iov_iter *iter) } #define iter_iov_addr(iter) (iter_iov(iter)->iov_base + (iter)->iov_offset) -#define iter_iov_len(iter) (iter_iov(iter)->iov_len - (iter)->iov_offset) + +static inline size_t iter_iov_len(const struct iov_iter *i) +{ + if (i->iter_type == ITER_UBUF) + return i->count; + return iter_iov(i)->iov_len - i->iov_offset; +} static inline enum iter_type iov_iter_type(const struct iov_iter *i) { @@ -167,8 +182,6 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } -size_t copy_page_from_iter_atomic(struct page *page, size_t offset, - size_t bytes, struct iov_iter *i); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes); @@ -178,6 +191,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); +size_t copy_folio_from_iter_atomic(struct folio *folio, size_t offset, + size_t bytes, struct iov_iter *i); size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); @@ -195,12 +210,6 @@ static inline size_t copy_folio_from_iter(struct folio *folio, size_t offset, return copy_page_from_iter(&folio->page, offset, bytes, i); } -static inline size_t copy_folio_from_iter_atomic(struct folio *folio, - size_t offset, size_t bytes, struct iov_iter *i) -{ - return copy_page_from_iter_atomic(&folio->page, offset, bytes, i); -} - size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes, struct iov_iter *i); diff --git a/include/linux/unroll.h b/include/linux/unroll.h index d42fd6366373..186b71de740f 100644 --- a/include/linux/unroll.h +++ b/include/linux/unroll.h @@ -9,6 +9,48 @@ #include <linux/args.h> +#ifdef CONFIG_CC_IS_CLANG +#define __pick_unrolled(x, y) _Pragma(#x) +#else +#define __pick_unrolled(x, y) _Pragma(#y) +#endif + +/** + * unrolled - loop attributes to ask the compiler to unroll it + * + * Usage: + * + * #define BATCH 8 + * + * unrolled_count(BATCH) + * for (u32 i = 0; i < BATCH; i++) + * // loop body without cross-iteration dependencies + * + * This is only a hint and the compiler is free to disable unrolling if it + * thinks the count is suboptimal and may hurt performance and/or hugely + * increase object code size. + * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends + * on what iter x will do with variables) is not a strict requirement, but + * provides best performance and object code size. + * Available only on Clang and GCC 8.x onwards. + */ + +/* Ask the compiler to pick an optimal unroll count, Clang only */ +#define unrolled \ + __pick_unrolled(clang loop unroll(enable), /* nothing */) + +/* Unroll each @n iterations of the loop */ +#define unrolled_count(n) \ + __pick_unrolled(clang loop unroll_count(n), GCC unroll n) + +/* Unroll the whole loop */ +#define unrolled_full \ + __pick_unrolled(clang loop unroll(full), GCC unroll 65534) + +/* Never unroll the loop */ +#define unrolled_none \ + __pick_unrolled(clang loop unroll(disable), GCC unroll 1) + #define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args) #define __UNROLL_0(MACRO, args...) diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index e0a4c2082245..516217c39094 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/wait.h> #include <linux/timer.h> +#include <linux/seqlock.h> struct uprobe; struct vm_area_struct; @@ -38,6 +39,8 @@ struct page; #define MAX_URETPROBE_DEPTH 64 +#define UPROBE_NO_TRAMPOLINE_VADDR (~0UL) + struct uprobe_consumer { /* * handler() can return UPROBE_HANDLER_REMOVE to signal the need to @@ -124,6 +127,10 @@ struct uprobe_task { unsigned int depth; struct return_instance *return_instances; + struct return_instance *ri_pool; + struct timer_list ri_timer; + seqcount_t ri_seqcount; + union { struct { struct arch_uprobe_task autask; @@ -137,8 +144,8 @@ struct uprobe_task { }; struct uprobe *active_uprobe; - struct timer_list ri_timer; unsigned long xol_vaddr; + bool signal_denied; struct arch_uprobe *auprobe; }; @@ -154,12 +161,18 @@ struct return_instance { unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ bool chained; /* true, if instance is nested */ - int consumers_cnt; + int cons_cnt; /* total number of session consumers */ struct return_instance *next; /* keep as stack */ struct rcu_head rcu; - struct return_consumer consumers[] __counted_by(consumers_cnt); + /* singular pre-allocated return_consumer instance for common case */ + struct return_consumer consumer; + /* + * extra return_consumer instances for rare cases of multiple session consumers, + * contains (cons_cnt - 1) elements + */ + struct return_consumer *extra_consumers; } ____cacheline_aligned; enum rp_check { @@ -175,13 +188,13 @@ struct uprobes_state { }; extern void __init uprobes_init(void); -extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); -extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); +extern int set_swbp(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); +extern int set_orig_insn(struct arch_uprobe *aup, struct vm_area_struct *vma, unsigned long vaddr); extern bool is_swbp_insn(uprobe_opcode_t *insn); extern bool is_trap_insn(uprobe_opcode_t *insn); extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs); extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); -extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); +extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma, unsigned long vaddr, uprobe_opcode_t); extern struct uprobe *uprobe_register(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool); extern void uprobe_unregister_nosync(struct uprobe *uprobe, struct uprobe_consumer *uc); diff --git a/include/linux/usb.h b/include/linux/usb.h index cfa8005e24f9..92c752f5446f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -51,6 +51,7 @@ struct ep_device; * @desc: descriptor for this endpoint, wMaxPacketSize in native byteorder * @ss_ep_comp: SuperSpeed companion descriptor for this endpoint * @ssp_isoc_ep_comp: SuperSpeedPlus isoc companion descriptor for this endpoint + * @eusb2_isoc_ep_comp: eUSB2 isoc companion descriptor for this endpoint * @urb_list: urbs queued to this endpoint; maintained by usbcore * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH) * with one or more transfer descriptors (TDs) per urb @@ -64,9 +65,10 @@ struct ep_device; * descriptor within an active interface in a given USB configuration. */ struct usb_host_endpoint { - struct usb_endpoint_descriptor desc; - struct usb_ss_ep_comp_descriptor ss_ep_comp; - struct usb_ssp_isoc_ep_comp_descriptor ssp_isoc_ep_comp; + struct usb_endpoint_descriptor desc; + struct usb_ss_ep_comp_descriptor ss_ep_comp; + struct usb_ssp_isoc_ep_comp_descriptor ssp_isoc_ep_comp; + struct usb_eusb2_isoc_ep_comp_descriptor eusb2_isoc_ep_comp; struct list_head urb_list; void *hcpriv; struct ep_device *ep_dev; /* For sysfs info */ @@ -612,6 +614,7 @@ struct usb3_lpm_parameters { * FIXME -- complete doc * @authenticated: Crypto authentication passed * @tunnel_mode: Connection native or tunneled over USB4 + * @usb4_link: device link to the USB4 host interface * @lpm_capable: device supports LPM * @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range * @usb2_hw_lpm_capable: device can perform USB2 hardware LPM @@ -722,6 +725,7 @@ struct usb_device { unsigned reset_resume:1; unsigned port_is_suspended:1; enum usb_link_tunnel_mode tunnel_mode; + struct device_link *usb4_link; int slot_id; struct usb2_lpm_parameters l1_params; @@ -813,10 +817,10 @@ static inline void usb_mark_last_busy(struct usb_device *udev) #else -static inline int usb_enable_autosuspend(struct usb_device *udev) -{ return 0; } -static inline int usb_disable_autosuspend(struct usb_device *udev) -{ return 0; } +static inline void usb_enable_autosuspend(struct usb_device *udev) +{ } +static inline void usb_disable_autosuspend(struct usb_device *udev) +{ } static inline int usb_autopm_get_interface(struct usb_interface *intf) { return 0; } diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 2d207cb4837d..4ac082a63173 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -119,6 +119,7 @@ struct cdc_ncm_ctx { u32 timer_interval; u32 max_ndp_size; u8 is_ndp16; + u8 filtering_supported; union { struct usb_cdc_ncm_ndp16 *delayed_ndp16; struct usb_cdc_ncm_ndp32 *delayed_ndp32; diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 6e38fb9d2117..d8c4e9f73839 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -237,7 +237,7 @@ struct usb_function { /* internals */ struct list_head list; DECLARE_BITMAP(endpoints, 32); - const struct usb_function_instance *fi; + struct usb_function_instance *fi; unsigned int bind_deactivated:1; }; diff --git a/include/linux/usb/mctp-usb.h b/include/linux/usb/mctp-usb.h new file mode 100644 index 000000000000..a2f6f1e04efb --- /dev/null +++ b/include/linux/usb/mctp-usb.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * mctp-usb.h - MCTP USB transport binding: common definitions, + * based on DMTF0283 specification: + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0283_1.0.1.pdf + * + * These are protocol-level definitions, that may be shared between host + * and gadget drivers. + * + * Copyright (C) 2024-2025 Code Construct Pty Ltd + */ + +#ifndef __LINUX_USB_MCTP_USB_H +#define __LINUX_USB_MCTP_USB_H + +#include <linux/types.h> + +struct mctp_usb_hdr { + __be16 id; + u8 rsvd; + u8 len; +} __packed; + +#define MCTP_USB_XFER_SIZE 512 +#define MCTP_USB_BTU 68 +#define MCTP_USB_MTU_MIN MCTP_USB_BTU +#define MCTP_USB_MTU_MAX (U8_MAX - sizeof(struct mctp_usb_hdr)) +#define MCTP_USB_DMTF_ID 0x1ab4 + +#endif /* __LINUX_USB_MCTP_USB_H */ diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index 3963e55e88a3..fbdef950f06c 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -61,7 +61,7 @@ struct musb_hdrc_eps_bits { }; struct musb_hdrc_config { - struct musb_fifo_cfg *fifo_cfg; /* board fifo configuration */ + const struct musb_fifo_cfg *fifo_cfg; /* board fifo configuration */ unsigned fifo_cfg_size; /* size of the fifo configuration */ /* MUSB configuration-specific details */ diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h index d50098fb16b5..3068c3084eb6 100644 --- a/include/linux/usb/pd.h +++ b/include/linux/usb/pd.h @@ -33,7 +33,9 @@ enum pd_ctrl_msg_type { PD_CTRL_FR_SWAP = 19, PD_CTRL_GET_PPS_STATUS = 20, PD_CTRL_GET_COUNTRY_CODES = 21, - /* 22-31 Reserved */ + /* 22-23 Reserved */ + PD_CTRL_GET_REVISION = 24, + /* 25-31 Reserved */ }; enum pd_data_msg_type { @@ -46,7 +48,9 @@ enum pd_data_msg_type { PD_DATA_ALERT = 6, PD_DATA_GET_COUNTRY_INFO = 7, PD_DATA_ENTER_USB = 8, - /* 9-14 Reserved */ + /* 9-11 Reserved */ + PD_DATA_REVISION = 12, + /* 13-14 Reserved */ PD_DATA_VENDOR_DEF = 15, /* 16-31 Reserved */ }; @@ -453,6 +457,20 @@ static inline unsigned int rdo_max_power(u32 rdo) #define EUDO_TBT_SUPPORT BIT(14) #define EUDO_HOST_PRESENT BIT(13) +/* + * Request Message Data Object (PD Revision 3.1+ only) + * -------- + * <31:28> :: Revision Major + * <27:24> :: Revision Minor + * <23:20> :: Version Major + * <19:16> :: Version Minor + * <15:0> :: Reserved, Shall be set to zero + */ + +#define RMDO(rev_maj, rev_min, ver_maj, ver_min) \ + (((rev_maj) & 0xf) << 28 | ((rev_min) & 0xf) << 24 | \ + ((ver_maj) & 0xf) << 20 | ((ver_min) & 0xf) << 16) + /* USB PD timers and counters */ #define PD_T_NO_RESPONSE 5000 /* 4.5 - 5.5 seconds */ #define PD_T_DB_DETECT 10000 /* 10 - 15 seconds */ diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index e4de6bc1f69b..0fa9885a1038 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -223,7 +223,6 @@ extern struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev, extern struct usb_phy *devm_usb_get_phy_by_node(struct device *dev, struct device_node *node, struct notifier_block *nb); extern void usb_put_phy(struct usb_phy *); -extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x); extern void usb_phy_set_event(struct usb_phy *x, unsigned long event); extern void usb_phy_set_charger_current(struct usb_phy *usb_phy, unsigned int mA); @@ -259,10 +258,6 @@ static inline void usb_put_phy(struct usb_phy *x) { } -static inline void devm_usb_put_phy(struct device *dev, struct usb_phy *x) -{ -} - static inline void usb_phy_set_event(struct usb_phy *x, unsigned long event) { } diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 33a4c146dc19..2ca60828f28b 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -30,6 +30,7 @@ #define VENDOR_ID_NVIDIA 0x0955 #define VENDOR_ID_TPLINK 0x2357 #define VENDOR_ID_DLINK 0x2001 +#define VENDOR_ID_DELL 0x413c #define VENDOR_ID_ASUS 0x0b05 #if IS_REACHABLE(CONFIG_USB_RTL8152) diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h index 8539956bc2be..51be3bb8fccb 100644 --- a/include/linux/usb/storage.h +++ b/include/linux/usb/storage.h @@ -82,4 +82,12 @@ struct bulk_cs_wrap { #define US_BULK_RESET_REQUEST 0xff #define US_BULK_GET_MAX_LUN 0xfe +/* + * If 4 LUNs are supported then the LUNs would be + * numbered from 0 to 3, and the return value for + * US_BULK_GET_MAX_LUN request would be 3. The valid + * LUN field is 4 bits wide, the upper limit is 0x0f. + */ +#define US_BULK_MAX_LUN_LIMIT 0x0f + #endif diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index d616b8807000..252af3f77039 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -140,6 +140,7 @@ int typec_cable_set_identity(struct typec_cable *cable); * @mode: Index of the Mode * @vdo: VDO returned by Discover Modes USB PD command * @roles: Only for ports. DRP if the mode is available in both roles + * @inactive: Only for ports. Make this port inactive (default is active). * * Description of an Alternate Mode which a connector, cable plug or partner * supports. @@ -150,6 +151,7 @@ struct typec_altmode_desc { u32 vdo; /* Only used with ports */ enum typec_port_data roles; + bool inactive; }; void typec_partner_set_pd_revision(struct typec_partner *partner, u16 pd_revision); diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h index f2da264d9c14..acb0ad03bdac 100644 --- a/include/linux/usb/typec_dp.h +++ b/include/linux/usb/typec_dp.h @@ -57,6 +57,7 @@ enum { DP_PIN_ASSIGN_D, DP_PIN_ASSIGN_E, DP_PIN_ASSIGN_F, /* Not supported after v1.0b */ + DP_PIN_ASSIGN_MAX, }; /* DisplayPort alt mode specific commands */ diff --git a/include/linux/usb/typec_tbt.h b/include/linux/usb/typec_tbt.h index fa97d7e00f5c..55dcea12082c 100644 --- a/include/linux/usb/typec_tbt.h +++ b/include/linux/usb/typec_tbt.h @@ -44,6 +44,7 @@ struct typec_thunderbolt_data { #define TBT_GEN3_NON_ROUNDED 0 #define TBT_GEN3_GEN4_ROUNDED_NON_ROUNDED 1 +#define TBT_CABLE_ROUNDED BIT(19) #define TBT_CABLE_OPTICAL BIT(21) #define TBT_CABLE_RETIMER BIT(22) #define TBT_CABLE_LINK_TRAINING BIT(23) diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h index 5050f502c1ed..4b651065738a 100644 --- a/include/linux/usb/ulpi.h +++ b/include/linux/usb/ulpi.h @@ -49,19 +49,10 @@ /*-------------------------------------------------------------------------*/ #if IS_ENABLED(CONFIG_USB_ULPI) -struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops, - unsigned int flags); - struct usb_phy *devm_otg_ulpi_create(struct device *dev, struct usb_phy_io_ops *ops, unsigned int flags); #else -static inline struct usb_phy *otg_ulpi_create(struct usb_phy_io_ops *ops, - unsigned int flags) -{ - return NULL; -} - static inline struct usb_phy *devm_otg_ulpi_create(struct device *dev, struct usb_phy_io_ops *ops, unsigned int flags) diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 0b9f1e598e3a..4bc6bb01a0eb 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -76,6 +76,7 @@ struct usbnet { # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 # define EVENT_NO_IP_ALIGN 13 +# define EVENT_LINK_CARRIER_ON 14 /* This one is special, as it indicates that the device is going away * there are cyclic dependencies between tasklet, timer and bh * that must be broken diff --git a/include/linux/usb/xhci-sideband.h b/include/linux/usb/xhci-sideband.h new file mode 100644 index 000000000000..45288c392f6e --- /dev/null +++ b/include/linux/usb/xhci-sideband.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * xHCI host controller sideband support + * + * Copyright (c) 2023-2025, Intel Corporation. + * + * Author: Mathias Nyman <mathias.nyman@linux.intel.com> + */ +#ifndef __LINUX_XHCI_SIDEBAND_H +#define __LINUX_XHCI_SIDEBAND_H + +#include <linux/scatterlist.h> +#include <linux/usb.h> + +#define EP_CTX_PER_DEV 31 /* FIXME defined twice, from xhci.h */ + +struct xhci_sideband; + +enum xhci_sideband_type { + XHCI_SIDEBAND_AUDIO, + XHCI_SIDEBAND_VENDOR, +}; + +enum xhci_sideband_notify_type { + XHCI_SIDEBAND_XFER_RING_FREE, +}; + +/** + * struct xhci_sideband_event - sideband event + * @type: notifier type + * @evt_data: event data + */ +struct xhci_sideband_event { + enum xhci_sideband_notify_type type; + void *evt_data; +}; + +/** + * struct xhci_sideband - representation of a sideband accessed usb device. + * @xhci: The xhci host controller the usb device is connected to + * @vdev: the usb device accessed via sideband + * @eps: array of endpoints controlled via sideband + * @ir: event handling and buffer for sideband accessed device + * @type: xHCI sideband type + * @mutex: mutex for sideband operations + * @intf: USB sideband client interface + * @notify_client: callback for xHCI sideband sequences + * + * FIXME usb device accessed via sideband Keeping track of sideband accessed usb devices. + */ +struct xhci_sideband { + struct xhci_hcd *xhci; + struct xhci_virt_device *vdev; + struct xhci_virt_ep *eps[EP_CTX_PER_DEV]; + struct xhci_interrupter *ir; + enum xhci_sideband_type type; + + /* Synchronizing xHCI sideband operations with client drivers operations */ + struct mutex mutex; + + struct usb_interface *intf; + int (*notify_client)(struct usb_interface *intf, + struct xhci_sideband_event *evt); +}; + +struct xhci_sideband * +xhci_sideband_register(struct usb_interface *intf, enum xhci_sideband_type type, + int (*notify_client)(struct usb_interface *intf, + struct xhci_sideband_event *evt)); +void +xhci_sideband_unregister(struct xhci_sideband *sb); +int +xhci_sideband_add_endpoint(struct xhci_sideband *sb, + struct usb_host_endpoint *host_ep); +int +xhci_sideband_remove_endpoint(struct xhci_sideband *sb, + struct usb_host_endpoint *host_ep); +int +xhci_sideband_stop_endpoint(struct xhci_sideband *sb, + struct usb_host_endpoint *host_ep); +struct sg_table * +xhci_sideband_get_endpoint_buffer(struct xhci_sideband *sb, + struct usb_host_endpoint *host_ep); +struct sg_table * +xhci_sideband_get_event_buffer(struct xhci_sideband *sb); +int +xhci_sideband_create_interrupter(struct xhci_sideband *sb, int num_seg, + bool ip_autoclear, u32 imod_interval, int intr_num); +void +xhci_sideband_remove_interrupter(struct xhci_sideband *sb); +int +xhci_sideband_interrupter_id(struct xhci_sideband *sb); + +#if IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) +void xhci_sideband_notify_ep_ring_free(struct xhci_sideband *sb, + unsigned int ep_index); +#else +static inline void xhci_sideband_notify_ep_ring_free(struct xhci_sideband *sb, + unsigned int ep_index) +{ } +#endif /* IS_ENABLED(CONFIG_USB_XHCI_SIDEBAND) */ +#endif /* __LINUX_XHCI_SIDEBAND_H */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 7183e5aca282..a0bb6d012137 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -5,8 +5,10 @@ #include <linux/kref.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> +#include <linux/rculist_nulls.h> #include <linux/sched.h> #include <linux/workqueue.h> +#include <linux/rcuref.h> #include <linux/rwsem.h> #include <linux/sysctl.h> #include <linux/err.h> @@ -115,10 +117,11 @@ struct user_namespace { } __randomize_layout; struct ucounts { - struct hlist_node node; + struct hlist_nulls_node node; struct user_namespace *ns; kuid_t uid; - atomic_t count; + struct rcu_head rcu; + rcuref_t count; atomic_long_t ucount[UCOUNT_COUNTS]; atomic_long_t rlimit[UCOUNT_RLIMIT_COUNTS]; }; @@ -131,9 +134,15 @@ void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); -struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts); +static inline struct ucounts * __must_check get_ucounts(struct ucounts *ucounts) +{ + if (rcuref_get(&ucounts->count)) + return ucounts; + return NULL; +} + static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type type) { return atomic_long_read(&ucounts->rlimit[type]); diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 825487fb66fa..9373962aade9 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -2,7 +2,25 @@ #ifndef _LINUX_HELPER_MACROS_H_ #define _LINUX_HELPER_MACROS_H_ +#include <linux/compiler_attributes.h> #include <linux/math.h> +#include <linux/typecheck.h> +#include <linux/stddef.h> + +/** + * for_each_if - helper for handling conditionals in various for_each macros + * @condition: The condition to check + * + * Typical use:: + * + * #define for_each_foo_bar(x, y) \' + * list_for_each_entry(x, y->list, head) \' + * for_each_if(x->something == SOMETHING) + * + * The for_each_if() macro makes the use of for_each_foo_bar() less error + * prone. + */ +#define for_each_if(condition) if (!(condition)) {} else /** * find_closest - locate the closest element in a sorted array @@ -65,6 +83,72 @@ }) /** + * PTR_IF - evaluate to @ptr if @cond is true, or to NULL otherwise. + * @cond: A conditional, usually in a form of IS_ENABLED(CONFIG_FOO) + * @ptr: A pointer to assign if @cond is true. + * + * PTR_IF(IS_ENABLED(CONFIG_FOO), ptr) evaluates to @ptr if CONFIG_FOO is set + * to 'y' or 'm', or to NULL otherwise. The @ptr argument must be a pointer. + * + * The macro can be very useful to help compiler dropping dead code. + * + * For instance, consider the following:: + * + * #ifdef CONFIG_FOO_SUSPEND + * static int foo_suspend(struct device *dev) + * { + * ... + * } + * #endif + * + * static struct pm_ops foo_ops = { + * #ifdef CONFIG_FOO_SUSPEND + * .suspend = foo_suspend, + * #endif + * }; + * + * While this works, the foo_suspend() macro is compiled conditionally, + * only when CONFIG_FOO_SUSPEND is set. This is problematic, as there could + * be a build bug in this function, we wouldn't have a way to know unless + * the configuration option is set. + * + * An alternative is to declare foo_suspend() always, but mark it + * as __maybe_unused. This works, but the __maybe_unused attribute + * is required to instruct the compiler that the function may not + * be referenced anywhere, and is safe to remove without making + * a fuss about it. This makes the programmer responsible for tagging + * the functions that can be garbage-collected. + * + * With the macro it is possible to write the following: + * + * static int foo_suspend(struct device *dev) + * { + * ... + * } + * + * static struct pm_ops foo_ops = { + * .suspend = PTR_IF(IS_ENABLED(CONFIG_FOO_SUSPEND), foo_suspend), + * }; + * + * The foo_suspend() function will now be automatically dropped by the + * compiler, and it does not require any specific attribute. + */ +#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL) + +/** + * to_user_ptr - cast a pointer passed as u64 from user space to void __user * + * @x: The u64 value from user space, usually via IOCTL + * + * to_user_ptr() simply casts a pointer passed as u64 from user space to void + * __user * correctly. Using this lets us get rid of all the tiresome casts. + */ +#define u64_to_user_ptr(x) \ +({ \ + typecheck(u64, (x)); \ + (void __user *)(uintptr_t)(x); \ +}) + +/** * is_insidevar - check if the @ptr points inside the @var memory range. * @ptr: the pointer to a memory address. * @var: the variable which address and size identify the memory range. diff --git a/include/linux/vdso_datastore.h b/include/linux/vdso_datastore.h new file mode 100644 index 000000000000..a91fa24b06e0 --- /dev/null +++ b/include/linux/vdso_datastore.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VDSO_DATASTORE_H +#define _LINUX_VDSO_DATASTORE_H + +#include <linux/mm_types.h> + +extern const struct vm_special_mapping vdso_vvar_mapping; +struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr); + +#endif /* _LINUX_VDSO_DATASTORE_H */ diff --git a/include/linux/verification.h b/include/linux/verification.h index cb2d47f28091..4f3022d081c3 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -38,8 +38,6 @@ enum key_being_used_for { VERIFYING_UNSPECIFIED_SIGNATURE, NR__KEY_BEING_USED_FOR }; -extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR]; - #ifdef CONFIG_SYSTEM_DATA_VERIFICATION struct key; diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 939ceabcaf06..335c360d4f9b 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -33,7 +33,6 @@ #define MODULE_VERMAGIC_MODVERSIONS "" #endif #ifdef RANDSTRUCT -#include <generated/randstruct_hash.h> #define MODULE_RANDSTRUCT "RANDSTRUCT_" RANDSTRUCT_HASHED_SEED #else #define MODULE_RANDSTRUCT diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 000a6cab2d31..eb563f538dee 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -67,6 +67,7 @@ struct vfio_device { struct inode *inode; #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; + struct ida pasids; u8 iommufd_attached:1; #endif u8 cdev_opened:1; @@ -91,6 +92,8 @@ struct vfio_device { * bound iommufd. Undo in unbind_iommufd if @detach_ioas is not * called. * @detach_ioas: Opposite of attach_ioas + * @pasid_attach_ioas: The pasid variation of attach_ioas + * @pasid_detach_ioas: Opposite of pasid_attach_ioas * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor @@ -102,6 +105,9 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @match_token_uuid: Optional device token match/validation. Return 0 + * if the uuid is valid for the device, -errno otherwise. uuid is NULL + * if none was provided. * @dma_unmap: Called when userspace unmaps IOVA from the container * this device is attached to. * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl @@ -115,6 +121,9 @@ struct vfio_device_ops { void (*unbind_iommufd)(struct vfio_device *vdev); int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id); void (*detach_ioas)(struct vfio_device *vdev); + int (*pasid_attach_ioas)(struct vfio_device *vdev, u32 pasid, + u32 *pt_id); + void (*pasid_detach_ioas)(struct vfio_device *vdev, u32 pasid); int (*open_device)(struct vfio_device *vdev); void (*close_device)(struct vfio_device *vdev); ssize_t (*read)(struct vfio_device *vdev, char __user *buf, @@ -126,6 +135,7 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + int (*match_token_uuid)(struct vfio_device *vdev, const uuid_t *uuid); void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); @@ -139,6 +149,10 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev, void vfio_iommufd_physical_unbind(struct vfio_device *vdev); int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id); void vfio_iommufd_physical_detach_ioas(struct vfio_device *vdev); +int vfio_iommufd_physical_pasid_attach_ioas(struct vfio_device *vdev, + u32 pasid, u32 *pt_id); +void vfio_iommufd_physical_pasid_detach_ioas(struct vfio_device *vdev, + u32 pasid); int vfio_iommufd_emulated_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx, u32 *out_device_id); void vfio_iommufd_emulated_unbind(struct vfio_device *vdev); @@ -166,6 +180,10 @@ vfio_iommufd_get_dev_id(struct vfio_device *vdev, struct iommufd_ctx *ictx) ((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL) #define vfio_iommufd_physical_detach_ioas \ ((void (*)(struct vfio_device *vdev)) NULL) +#define vfio_iommufd_physical_pasid_attach_ioas \ + ((int (*)(struct vfio_device *vdev, u32 pasid, u32 *pt_id)) NULL) +#define vfio_iommufd_physical_pasid_detach_ioas \ + ((void (*)(struct vfio_device *vdev, u32 pasid)) NULL) #define vfio_iommufd_emulated_bind \ ((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \ u32 *out_device_id)) NULL) diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index fbb472dd99b3..f541044e42a2 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -122,6 +122,8 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma); void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count); int vfio_pci_core_match(struct vfio_device *core_vdev, char *buf); +int vfio_pci_core_match_token_uuid(struct vfio_device *core_vdev, + const uuid_t *uuid); int vfio_pci_core_enable(struct vfio_pci_core_device *vdev); void vfio_pci_core_disable(struct vfio_pci_core_device *vdev); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev); diff --git a/include/linux/vfsdebug.h b/include/linux/vfsdebug.h new file mode 100644 index 000000000000..9cf22d3eb9dd --- /dev/null +++ b/include/linux/vfsdebug.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_VFS_DEBUG_H +#define LINUX_VFS_DEBUG_H 1 + +#include <linux/bug.h> + +struct inode; + +#ifdef CONFIG_DEBUG_VFS +void dump_inode(struct inode *inode, const char *reason); + +#define VFS_BUG_ON(cond) BUG_ON(cond) +#define VFS_WARN_ON(cond) (void)WARN_ON(cond) +#define VFS_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) +#define VFS_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) +#define VFS_WARN(cond, format...) (void)WARN(cond, format) + +#define VFS_BUG_ON_INODE(cond, inode) ({ \ + if (unlikely(!!(cond))) { \ + dump_inode(inode, "VFS_BUG_ON_INODE(" #cond")");\ + BUG_ON(1); \ + } \ +}) + +#define VFS_WARN_ON_INODE(cond, inode) ({ \ + int __ret_warn = !!(cond); \ + \ + if (unlikely(__ret_warn)) { \ + dump_inode(inode, "VFS_WARN_ON_INODE(" #cond")");\ + WARN_ON(1); \ + } \ + unlikely(__ret_warn); \ +}) +#else +#define VFS_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) +#define VFS_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) +#define VFS_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#define VFS_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) +#define VFS_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) + +#define VFS_BUG_ON_INODE(cond, inode) VFS_BUG_ON(cond) +#define VFS_WARN_ON_INODE(cond, inode) BUILD_BUG_ON_INVALID(cond) +#endif /* CONFIG_DEBUG_VFS */ + +#endif diff --git a/include/linux/virtio.h b/include/linux/virtio.h index dd88682e27e3..64cb4b04be7a 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -190,6 +190,8 @@ int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); #endif void virtio_reset_device(struct virtio_device *dev); +int virtio_device_reset_prepare(struct virtio_device *dev); +int virtio_device_reset_done(struct virtio_device *dev); size_t virtio_max_dma_size(const struct virtio_device *vdev); @@ -214,6 +216,12 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); * changes; may be called in interrupt context. * @freeze: optional function to call during suspend/hibernation. * @restore: optional function to call on resume. + * @reset_prepare: optional function to call when a transport specific reset + * occurs. + * @reset_done: optional function to call after transport specific reset + * operation has finished. + * @shutdown: synchronize with the device on shutdown. If provided, replaces + * the virtio core implementation. */ struct virtio_driver { struct device_driver driver; @@ -229,6 +237,9 @@ struct virtio_driver { void (*config_changed)(struct virtio_device *dev); int (*freeze)(struct virtio_device *dev); int (*restore)(struct virtio_device *dev); + int (*reset_prepare)(struct virtio_device *dev); + int (*reset_done)(struct virtio_device *dev); + void (*shutdown)(struct virtio_device *dev); }; #define drv_to_virtio(__drv) container_of_const(__drv, struct virtio_driver, driver) diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 169c7d367fac..b3e1d30c765b 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -329,6 +329,8 @@ static inline bool virtio_get_shm_region(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id) { + if (!region->len) + return false; if (!vdev->config->get_shm_region) return false; return vdev->config->get_shm_region(vdev, region, id); diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 0387d64e2c66..6c00687539cf 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -111,7 +111,12 @@ static inline size_t virtio_vsock_skb_len(struct sk_buff *skb) return (size_t)(skb_end_pointer(skb) - skb->head); } -#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4) +/* Dimension the RX SKB so that the entire thing fits exactly into + * a single 4KiB page. This avoids wasting memory due to alloc_skb() + * rounding up to the next page order and also means that we + * don't leave higher-order pages sitting around in the RX queue. + */ +#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE SKB_WITH_OVERHEAD(1024 * 4) #define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL #define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64) @@ -140,6 +145,7 @@ struct virtio_vsock_sock { u32 last_fwd_cnt; u32 rx_bytes; u32 buf_alloc; + u32 buf_used; struct sk_buff_head rx_queue; u32 msg_count; }; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index f70d0958095c..9e15a088ba38 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -41,9 +41,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, PGSTEAL_KHUGEPAGED, + PGSTEAL_PROACTIVE, PGSCAN_KSWAPD, PGSCAN_DIRECT, PGSCAN_KHUGEPAGED, + PGSCAN_PROACTIVE, PGSCAN_DIRECT_THROTTLE, PGSCAN_ANON, PGSCAN_FILE, @@ -151,6 +153,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_X86 DIRECT_MAP_LEVEL2_SPLIT, DIRECT_MAP_LEVEL3_SPLIT, + DIRECT_MAP_LEVEL2_COLLAPSE, + DIRECT_MAP_LEVEL3_COLLAPSE, #endif #ifdef CONFIG_PER_VMA_LOCK_STATS VMA_LOCK_SUCCESS, diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e3..fdc9aeb74a44 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -61,6 +61,7 @@ struct vm_struct { unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; + unsigned long requested_size; }; struct vmap_area { @@ -113,6 +114,14 @@ static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, uns } #endif +#ifndef arch_vmap_pte_range_unmap_size +static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr, + pte_t *ptep) +{ + return PAGE_SIZE; +} +#endif + #ifndef arch_vmap_pte_supported_shift static inline int arch_vmap_pte_supported_shift(unsigned long size) { @@ -168,8 +177,13 @@ void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_m int node, const void *caller) __alloc_size(1); #define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__)) -void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); -#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__)) +void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1); +#define vmalloc_huge_node(...) alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__)) + +static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) +{ + return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE); +} extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__)) diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h index e1dec1a6a749..37e003ae5262 100644 --- a/include/linux/vmcore_info.h +++ b/include/linux/vmcore_info.h @@ -6,9 +6,8 @@ #include <linux/elfcore.h> #include <linux/elf.h> -#define CRASH_CORE_NOTE_NAME "CORE" #define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) -#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) +#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(NN_PRSTATUS), 4) #define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) /* diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9f3a04345b86..b2ccb6845595 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -10,15 +10,8 @@ #include <linux/static_key.h> #include <linux/mmdebug.h> -extern int sysctl_stat_interval; - #ifdef CONFIG_NUMA -#define ENABLE_NUMA_STAT 1 -#define DISABLE_NUMA_STAT 0 -extern int sysctl_vm_numa_stat; DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key); -int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write, - void *buffer, size_t *length, loff_t *ppos); #endif struct reclaim_stat { @@ -304,10 +297,6 @@ void quiet_vmstat(void); void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); -struct ctl_table; -int vmstat_refresh(const struct ctl_table *, int write, void *buffer, size_t *lenp, - loff_t *ppos); - void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *); int calculate_pressure_threshold(struct zone *zone); @@ -515,7 +504,7 @@ static inline const char *node_stat_name(enum node_stat_item item) static inline const char *lru_list_name(enum lru_list lru) { - return node_stat_name(NR_LRU_BASE + (enum node_stat_item)lru) + 3; // skip "nr_" + return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h index 6fb663b36f72..60c9eacd2cf3 100644 --- a/include/linux/vmw_vmci_defs.h +++ b/include/linux/vmw_vmci_defs.h @@ -431,11 +431,11 @@ enum { ((((_p)[0] & 0xFF) << 24) | (((_p)[1] & 0xFF) << 16) | ((_p)[2])) /* - * The VMCI IOCTLs. We use identity code 7, as noted in ioctl-number.h, and - * we start at sequence 9f. This gives us the same values that our shipping - * products use, starting at 1951, provided we leave out the direction and - * structure size. Note that VMMon occupies the block following us, starting - * at 2001. + * The VMCI IOCTLs. We use identity code 7, as noted in ioctl-number.rst, + * and we start at sequence 9f. This gives us the same values that our + * shipping products use, starting at 1951, provided we leave out the + * direction and structure size. Note that VMMon occupies the block + * following us, starting at 2001. */ #define IOCTL_VMCI_VERSION _IO(7, 0x9f) /* 1951 */ #define IOCTL_VMCI_INIT_CONTEXT _IO(7, 0xa0) @@ -453,9 +453,7 @@ enum { #define IOCTL_VMCI_CTX_GET_CPT_STATE _IO(7, 0xb1) #define IOCTL_VMCI_CTX_SET_CPT_STATE _IO(7, 0xb2) #define IOCTL_VMCI_GET_CONTEXT_ID _IO(7, 0xb3) -#define IOCTL_VMCI_SOCKETS_VERSION _IO(7, 0xb4) -#define IOCTL_VMCI_SOCKETS_GET_AF_VALUE _IO(7, 0xb8) -#define IOCTL_VMCI_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9) +/*IOCTL_VM_SOCKETS_GET_LOCAL_CID _IO(7, 0xb9)*/ #define IOCTL_VMCI_SET_NOTIFY _IO(7, 0xcb) /* 1995 */ /*IOCTL_VMMON_START _IO(7, 0xd1)*/ /* 2001 */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 6d90ad974408..965a19809c7e 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -316,6 +316,9 @@ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags); } \ \ cmd; \ + \ + if (condition) \ + break; \ } \ finish_wait(&wq_head, &__wq_entry); \ __out: __ret; \ @@ -1207,14 +1210,16 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) -#define init_wait(wait) \ +#define init_wait_func(wait, function) \ do { \ (wait)->private = current; \ - (wait)->func = autoremove_wake_function; \ + (wait)->func = function; \ INIT_LIST_HEAD(&(wait)->entry); \ (wait)->flags = 0; \ } while (0) +#define init_wait(wait) init_wait_func(wait, autoremove_wake_function) + typedef int (*task_call_f)(struct task_struct *p, void *arg); extern int task_call_func(struct task_struct *p, task_call_f func, void *arg); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b0dc957c3e56..6e30f275da77 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -316,7 +316,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ - __init_timer(&(_work)->timer, \ + __timer_init(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) @@ -324,7 +324,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \ do { \ INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ - __init_timer_on_stack(&(_work)->timer, \ + __timer_init_on_stack(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) @@ -480,7 +480,7 @@ void workqueue_softirq_dead(unsigned int cpu); * executing at most one work item for the workqueue. * * For unbound workqueues, @max_active limits the number of in-flight work items - * for the whole system. e.g. @max_active of 16 indicates that that there can be + * for the whole system. e.g. @max_active of 16 indicates that there can be * at most 16 work items executing for the workqueue in the whole system. * * As sharing the same active counter for an unbound workqueue across multiple diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d11b903c2edb..eda4b62511f7 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -313,6 +313,30 @@ static inline void cgroup_writeback_umount(struct super_block *sb) /* * mm/page-writeback.c */ +/* consolidated parameters for balance_dirty_pages() and its subroutines */ +struct dirty_throttle_control { +#ifdef CONFIG_CGROUP_WRITEBACK + struct wb_domain *dom; + struct dirty_throttle_control *gdtc; /* only set in memcg dtc's */ +#endif + struct bdi_writeback *wb; + struct fprop_local_percpu *wb_completions; + + unsigned long avail; /* dirtyable */ + unsigned long dirty; /* file_dirty + write + nfs */ + unsigned long thresh; /* dirty threshold */ + unsigned long bg_thresh; /* dirty background threshold */ + unsigned long limit; /* hard dirty limit */ + + unsigned long wb_dirty; /* per-wb counterparts */ + unsigned long wb_thresh; + unsigned long wb_bg_thresh; + + unsigned long pos_ratio; + bool freerun; + bool dirty_exceeded; +}; + void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); void laptop_mode_timer_fn(struct timer_list *t); @@ -327,12 +351,8 @@ extern struct wb_domain global_wb_domain; /* These are exported to sysctl. */ extern unsigned int dirty_writeback_interval; extern unsigned int dirty_expire_interval; -extern unsigned int dirtytime_expire_interval; extern int laptop_mode; -int dirtytime_interval_handler(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); - void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); unsigned long cgwb_calc_thresh(struct bdi_writeback *wb); diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 79c781875c09..a4d6cc0c9f68 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -97,7 +97,7 @@ struct wwan_port_caps { * * This function must be balanced with a call to wwan_remove_port(). * - * Returns a valid pointer to wwan_port on success or PTR_ERR on failure + * Returns: a valid pointer to wwan_port on success or PTR_ERR on failure */ struct wwan_port *wwan_create_port(struct device *parent, enum wwan_port_type type, diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 0b618ec04115..be850174e802 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -965,10 +965,12 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id, * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set * in xa_init_flags(). * + * Note that callers interested in whether wrapping has occurred should + * use __xa_alloc_cyclic() instead. + * * Context: Any context. Takes and releases the xa_lock. May sleep if * the @gfp flags permit. - * Return: 0 if the allocation succeeded without wrapping. 1 if the - * allocation succeeded after wrapping, -ENOMEM if memory could not be + * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, @@ -981,7 +983,7 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock(xa); - return err; + return err < 0 ? err : 0; } /** @@ -1002,10 +1004,12 @@ static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry, * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set * in xa_init_flags(). * + * Note that callers interested in whether wrapping has occurred should + * use __xa_alloc_cyclic() instead. + * * Context: Any context. Takes and releases the xa_lock while * disabling softirqs. May sleep if the @gfp flags permit. - * Return: 0 if the allocation succeeded without wrapping. 1 if the - * allocation succeeded after wrapping, -ENOMEM if memory could not be + * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, @@ -1018,7 +1022,7 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock_bh(xa); - return err; + return err < 0 ? err : 0; } /** @@ -1039,10 +1043,12 @@ static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry, * Must only be operated on an xarray initialized with flag XA_FLAGS_ALLOC set * in xa_init_flags(). * + * Note that callers interested in whether wrapping has occurred should + * use __xa_alloc_cyclic() instead. + * * Context: Process context. Takes and releases the xa_lock while * disabling interrupts. May sleep if the @gfp flags permit. - * Return: 0 if the allocation succeeded without wrapping. 1 if the - * allocation succeeded after wrapping, -ENOMEM if memory could not be + * Return: 0 if the allocation succeeded, -ENOMEM if memory could not be * allocated or -EBUSY if there are no free entries in @limit. */ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry, @@ -1055,7 +1061,7 @@ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry, err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp); xa_unlock_irq(xa); - return err; + return err < 0 ? err : 0; } /** @@ -1555,6 +1561,8 @@ int xa_get_order(struct xarray *, unsigned long index); int xas_get_order(struct xa_state *xas); void xas_split(struct xa_state *, void *entry, unsigned int order); void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); +void xas_try_split(struct xa_state *xas, void *entry, unsigned int order); +unsigned int xas_try_split_min_order(unsigned int order); #else static inline int xa_get_order(struct xarray *xa, unsigned long index) { @@ -1576,6 +1584,17 @@ static inline void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, gfp_t gfp) { } + +static inline void xas_try_split(struct xa_state *xas, void *entry, + unsigned int order) +{ +} + +static inline unsigned int xas_try_split_min_order(unsigned int order) +{ + return 0; +} + #endif /** diff --git a/include/linux/zpool.h b/include/linux/zpool.h index a67d62b79698..369ef068fad8 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -4,9 +4,8 @@ * * Copyright (C) 2014 Dan Streetman * - * This is a common frontend for the zbud and zsmalloc memory - * storage pool implementations. Typically, this is used to - * store compressed memory. + * This is a common frontend for the zswap compressed memory storage + * implementations. */ #ifndef _ZPOOL_H_ @@ -14,25 +13,6 @@ struct zpool; -/* - * Control how a handle is mapped. It will be ignored if the - * implementation does not support it. Its use is optional. - * Note that this does not refer to memory protection, it - * refers to how the memory will be copied in/out if copying - * is necessary during mapping; read-write is the safest as - * it copies the existing memory in on map, and copies the - * changed memory back out on unmap. Write-only does not copy - * in the memory and should only be used for initialization. - * If in doubt, use ZPOOL_MM_DEFAULT which is read-write. - */ -enum zpool_mapmode { - ZPOOL_MM_RW, /* normal read-write mapping */ - ZPOOL_MM_RO, /* read-only (no copy-out at unmap time) */ - ZPOOL_MM_WO, /* write-only (no copy-in at map time) */ - - ZPOOL_MM_DEFAULT = ZPOOL_MM_RW -}; - bool zpool_has_pool(char *type); struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp); @@ -41,17 +21,19 @@ const char *zpool_get_type(struct zpool *pool); void zpool_destroy_pool(struct zpool *pool); -bool zpool_malloc_support_movable(struct zpool *pool); - int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, - unsigned long *handle); + unsigned long *handle, const int nid); void zpool_free(struct zpool *pool, unsigned long handle); -void *zpool_map_handle(struct zpool *pool, unsigned long handle, - enum zpool_mapmode mm); +void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle, + void *local_copy); + +void zpool_obj_read_end(struct zpool *zpool, unsigned long handle, + void *handle_mem); -void zpool_unmap_handle(struct zpool *pool, unsigned long handle); +void zpool_obj_write(struct zpool *zpool, unsigned long handle, + void *handle_mem, size_t mem_len); u64 zpool_get_total_pages(struct zpool *pool); @@ -81,15 +63,16 @@ struct zpool_driver { void *(*create)(const char *name, gfp_t gfp); void (*destroy)(void *pool); - bool malloc_support_movable; int (*malloc)(void *pool, size_t size, gfp_t gfp, - unsigned long *handle); + unsigned long *handle, const int nid); void (*free)(void *pool, unsigned long handle); - bool sleep_mapped; - void *(*map)(void *pool, unsigned long handle, - enum zpool_mapmode mm); - void (*unmap)(void *pool, unsigned long handle); + void *(*obj_read_begin)(void *pool, unsigned long handle, + void *local_copy); + void (*obj_read_end)(void *pool, unsigned long handle, + void *handle_mem); + void (*obj_write)(void *pool, unsigned long handle, + void *handle_mem, size_t mem_len); u64 (*total_pages)(void *pool); }; diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index a48cd0ffe57d..13e9cc5490f7 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -16,23 +16,6 @@ #include <linux/types.h> -/* - * zsmalloc mapping modes - * - * NOTE: These only make a difference when a mapped object spans pages. - */ -enum zs_mapmode { - ZS_MM_RW, /* normal read-write mapping */ - ZS_MM_RO, /* read-only (no copy-out at unmap time) */ - ZS_MM_WO /* write-only (no copy-in at map time) */ - /* - * NOTE: ZS_MM_WO should only be used for initializing new - * (uninitialized) allocations. Partial writes to already - * initialized allocations should use ZS_MM_RW to preserve the - * existing data. - */ -}; - struct zs_pool_stats { /* How many pages were migrated (freed) */ atomic_long_t pages_compacted; @@ -43,19 +26,24 @@ struct zs_pool; struct zs_pool *zs_create_pool(const char *name); void zs_destroy_pool(struct zs_pool *pool); -unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags); +unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags, + const int nid); void zs_free(struct zs_pool *pool, unsigned long obj); size_t zs_huge_class_size(struct zs_pool *pool); -void *zs_map_object(struct zs_pool *pool, unsigned long handle, - enum zs_mapmode mm); -void zs_unmap_object(struct zs_pool *pool, unsigned long handle); - unsigned long zs_get_total_pages(struct zs_pool *pool); unsigned long zs_compact(struct zs_pool *pool); unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size); void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats); + +void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle, + void *local_copy); +void zs_obj_read_end(struct zs_pool *pool, unsigned long handle, + void *handle_mem); +void zs_obj_write(struct zs_pool *pool, unsigned long handle, + void *handle_mem, size_t mem_len); + #endif diff --git a/include/linux/zstd.h b/include/linux/zstd.h index b2c7cf310c8f..2f2a3c8b8a33 100644 --- a/include/linux/zstd.h +++ b/include/linux/zstd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -160,7 +160,6 @@ typedef ZSTD_parameters zstd_parameters; zstd_parameters zstd_get_params(int level, unsigned long long estimated_src_size); - /** * zstd_get_cparams() - returns zstd_compression_parameters for selected level * @level: The compression level @@ -173,9 +172,20 @@ zstd_parameters zstd_get_params(int level, zstd_compression_parameters zstd_get_cparams(int level, unsigned long long estimated_src_size, size_t dict_size); -/* ====== Single-pass Compression ====== */ - typedef ZSTD_CCtx zstd_cctx; +typedef ZSTD_cParameter zstd_cparameter; + +/** + * zstd_cctx_set_param() - sets a compression parameter + * @cctx: The context. Must have been initialized with zstd_init_cctx(). + * @param: The parameter to set. + * @value: The value to set the parameter to. + * + * Return: Zero or an error, which can be checked using zstd_is_error(). + */ +size_t zstd_cctx_set_param(zstd_cctx *cctx, zstd_cparameter param, int value); + +/* ====== Single-pass Compression ====== */ /** * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx @@ -191,6 +201,20 @@ typedef ZSTD_CCtx zstd_cctx; size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters); /** + * zstd_cctx_workspace_bound_with_ext_seq_prod() - max memory needed to + * initialize a zstd_cctx when using the block-level external sequence + * producer API. + * @parameters: The compression parameters to be used. + * + * If multiple compression parameters might be used, the caller must call + * this function for each set of parameters and use the maximum size. + * + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cctx(). + */ +size_t zstd_cctx_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *parameters); + +/** * zstd_init_cctx() - initialize a zstd compression context * @workspace: The workspace to emplace the context into. It must outlive * the returned context. @@ -425,6 +449,16 @@ typedef ZSTD_CStream zstd_cstream; size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams); /** + * zstd_cstream_workspace_bound_with_ext_seq_prod() - memory needed to initialize + * a zstd_cstream when using the block-level external sequence producer API. + * @cparams: The compression parameters to be used for compression. + * + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cstream(). + */ +size_t zstd_cstream_workspace_bound_with_ext_seq_prod(const zstd_compression_parameters *cparams); + +/** * zstd_init_cstream() - initialize a zstd streaming compression context * @parameters The zstd parameters to use for compression. * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller @@ -584,6 +618,18 @@ size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); /** + * zstd_register_sequence_producer() - exposes the zstd library function + * ZSTD_registerSequenceProducer(). This is used for the block-level external + * sequence producer API. See upstream zstd.h for detailed documentation. + */ +typedef ZSTD_sequenceProducer_F zstd_sequence_producer_f; +void zstd_register_sequence_producer( + zstd_cctx *cctx, + void* sequence_producer_state, + zstd_sequence_producer_f sequence_producer +); + +/** * struct zstd_frame_params - zstd frame parameters stored in the frame header * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not * present. @@ -596,7 +642,7 @@ size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); * * See zstd_lib.h. */ -typedef ZSTD_frameHeader zstd_frame_header; +typedef ZSTD_FrameHeader zstd_frame_header; /** * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame @@ -611,4 +657,35 @@ typedef ZSTD_frameHeader zstd_frame_header; size_t zstd_get_frame_header(zstd_frame_header *params, const void *src, size_t src_size); +/** + * struct zstd_sequence - a sequence of literals or a match + * + * @offset: The offset of the match + * @litLength: The literal length of the sequence + * @matchLength: The match length of the sequence + * @rep: Represents which repeat offset is used + */ +typedef ZSTD_Sequence zstd_sequence; + +/** + * zstd_compress_sequences_and_literals() - compress an array of zstd_sequence and literals + * + * @cctx: The zstd compression context. + * @dst: The buffer to compress the data into. + * @dst_capacity: The size of the destination buffer. + * @in_seqs: The array of zstd_sequence to compress. + * @in_seqs_size: The number of sequences in in_seqs. + * @literals: The literals associated to the sequences to be compressed. + * @lit_size: The size of the literals in the literals buffer. + * @lit_capacity: The size of the literals buffer. + * @decompressed_size: The size of the input data + * + * Return: The compressed size or an error, which can be checked using + * zstd_is_error(). + */ +size_t zstd_compress_sequences_and_literals(zstd_cctx *cctx, void* dst, size_t dst_capacity, + const zstd_sequence *in_seqs, size_t in_seqs_size, + const void* literals, size_t lit_size, size_t lit_capacity, + size_t decompressed_size); + #endif /* LINUX_ZSTD_H */ diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h index 58b6dd45a969..c307fb011132 100644 --- a/include/linux/zstd_errors.h +++ b/include/linux/zstd_errors.h @@ -1,5 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -12,13 +13,18 @@ #define ZSTD_ERRORS_H_398273423 -/*===== dependency =====*/ -#include <linux/types.h> /* size_t */ +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#define ZSTDERRORLIB_VISIBLE +#ifndef ZSTDERRORLIB_HIDDEN +# if (__GNUC__ >= 4) && !defined(__MINGW32__) +# define ZSTDERRORLIB_HIDDEN __attribute__ ((visibility ("hidden"))) +# else +# define ZSTDERRORLIB_HIDDEN +# endif +#endif -/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ -#define ZSTDERRORLIB_VISIBILITY -#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBLE /*-********************************************* * Error codes list @@ -43,14 +49,18 @@ typedef enum { ZSTD_error_frameParameter_windowTooLarge = 16, ZSTD_error_corruption_detected = 20, ZSTD_error_checksum_wrong = 22, + ZSTD_error_literals_headerWrong = 24, ZSTD_error_dictionary_corrupted = 30, ZSTD_error_dictionary_wrong = 32, ZSTD_error_dictionaryCreation_failed = 34, ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_combination_unsupported = 41, ZSTD_error_parameter_outOfBound = 42, ZSTD_error_tableLog_tooLarge = 44, ZSTD_error_maxSymbolValue_tooLarge = 46, ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_cannotProduce_uncompressedBlock = 49, + ZSTD_error_stabilityCondition_notRespected = 50, ZSTD_error_stage_wrong = 60, ZSTD_error_init_missing = 62, ZSTD_error_memory_allocation = 64, @@ -58,18 +68,18 @@ typedef enum { ZSTD_error_dstSize_tooSmall = 70, ZSTD_error_srcSize_wrong = 72, ZSTD_error_dstBuffer_null = 74, + ZSTD_error_noForwardProgress_destFull = 80, + ZSTD_error_noForwardProgress_inputEmpty = 82, /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ ZSTD_error_frameIndex_tooLarge = 100, ZSTD_error_seekableIO = 102, ZSTD_error_dstBuffer_wrong = 104, ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_sequenceProducer_failed = 106, + ZSTD_error_externalSequences_invalid = 107, ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; -/*! ZSTD_getErrorCode() : - convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, - which can be used to compare with enum list published above */ -ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h index 79d55465d5c1..e295d4125dde 100644 --- a/include/linux/zstd_lib.h +++ b/include/linux/zstd_lib.h @@ -1,5 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -11,23 +12,47 @@ #ifndef ZSTD_H_235446 #define ZSTD_H_235446 -/* ====== Dependency ======*/ -#include <linux/limits.h> /* INT_MAX */ + +/* ====== Dependencies ======*/ #include <linux/types.h> /* size_t */ +#include <linux/zstd_errors.h> /* list of errors */ +#if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#include <linux/limits.h> /* INT_MAX */ +#endif /* ZSTD_STATIC_LINKING_ONLY */ + /* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#ifndef ZSTDLIB_VISIBLE +#define ZSTDLIB_VISIBLE + +#ifndef ZSTDLIB_HIDDEN # if (__GNUC__ >= 4) && !defined(__MINGW32__) -# define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default"))) # define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden"))) # else -# define ZSTDLIB_VISIBLE # define ZSTDLIB_HIDDEN # endif #endif + #define ZSTDLIB_API ZSTDLIB_VISIBLE +/* Deprecation warnings : + * Should these warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. + * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. + */ +#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS +# define ZSTD_DEPRECATED(message) /* disable deprecation warnings */ +#else +# if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) || defined(__IAR_SYSTEMS_ICC__) +# define ZSTD_DEPRECATED(message) __attribute__((deprecated(message))) +# elif (__GNUC__ >= 3) +# define ZSTD_DEPRECATED(message) __attribute__((deprecated)) +# else +# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") +# define ZSTD_DEPRECATED(message) +# endif +#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ + /* ***************************************************************************** Introduction @@ -65,7 +90,7 @@ /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 5 -#define ZSTD_VERSION_RELEASE 2 +#define ZSTD_VERSION_RELEASE 7 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) /*! ZSTD_versionNumber() : @@ -103,11 +128,12 @@ ZSTDLIB_API const char* ZSTD_versionString(void); /* ************************************* -* Simple API +* Simple Core API ***************************************/ /*! ZSTD_compress() : * Compresses `src` content as a single zstd compressed frame into already allocated `dst`. - * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have + * enough space to successfully compress the data. * @return : compressed size written into `dst` (<= `dstCapacity), * or an error code if it fails (which can be tested using ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, @@ -115,47 +141,55 @@ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, int compressionLevel); /*! ZSTD_decompress() : - * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. - * `dstCapacity` is an upper bound of originalSize to regenerate. - * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. - * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * Multiple compressed frames can be decompressed at once with this method. + * The result will be the concatenation of all decompressed frames, back to back. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * First frame's decompressed size can be extracted using ZSTD_getFrameContentSize(). + * If maximum upper bound isn't known, prefer using streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, const void* src, size_t compressedSize); + +/*====== Decompression helper functions ======*/ + /*! ZSTD_getFrameContentSize() : requires v1.3.0+ - * `src` should point to the start of a ZSTD encoded frame. - * `srcSize` must be at least as large as the frame header. - * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. - * @return : - decompressed size of `src` frame content, if known - * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) - * note 1 : a 0 return value means the frame is valid but "empty". - * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. - * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - * In which case, it's necessary to use streaming mode to decompress data. - * Optionally, application can rely on some implicit limit, - * as ZSTD_decompress() only needs an upper bound of decompressed size. - * (For example, data could be necessarily cut into blocks <= 16 KB). - * note 3 : decompressed size is always present when compression is completed using single-pass functions, - * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). - * note 4 : decompressed size can be very large (64-bits value), - * potentially larger than what local system can handle as a single memory segment. - * In which case, it's necessary to use streaming mode to decompress data. - * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. - * Always ensure return value fits within application's authorized limits. - * Each application can set its own limits. - * note 6 : This function replaces ZSTD_getDecompressedSize() */ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * When invoking this method on a skippable frame, it will return 0. + * note 2 : decompressed size is an optional field, it may not be present (typically in streaming mode). + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ #define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) #define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); -/*! ZSTD_getDecompressedSize() : - * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). +/*! ZSTD_getDecompressedSize() (obsolete): + * This function is now obsolete, in favor of ZSTD_getFrameContentSize(). * Both functions work the same way, but ZSTD_getDecompressedSize() blends * "empty", "unknown" and "error" results to the same return value (0), * while ZSTD_getFrameContentSize() gives them separate return values. * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTD_DEPRECATED("Replaced by ZSTD_getFrameContentSize") ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); /*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+ @@ -163,18 +197,50 @@ ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t * `srcSize` must be >= first frame size * @return : the compressed size of the first frame starting at `src`, * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, - * or an error code if input is invalid */ + * or an error code if input is invalid + * Note 1: this method is called _find*() because it's not enough to read the header, + * it may have to scan through the frame's content, to reach its end. + * Note 2: this method also works with Skippable Frames. In which case, + * it returns the size of the complete skippable frame, + * which is always equal to its content size + 8 bytes for headers. */ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); -/*====== Helper functions ======*/ -#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ -ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ -ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ -ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ -ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ -ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ +/*====== Compression helper functions ======*/ + +/*! ZSTD_compressBound() : + * maximum compressed size in worst case single-pass scenario. + * When invoking `ZSTD_compress()`, or any other one-pass compression function, + * it's recommended to provide @dstCapacity >= ZSTD_compressBound(srcSize) + * as it eliminates one potential failure scenario, + * aka not enough room in dst buffer to write the compressed frame. + * Note : ZSTD_compressBound() itself can fail, if @srcSize >= ZSTD_MAX_INPUT_SIZE . + * In which case, ZSTD_compressBound() will return an error code + * which can be tested using ZSTD_isError(). + * + * ZSTD_COMPRESSBOUND() : + * same as ZSTD_compressBound(), but as a macro. + * It can be used to produce constants, which can be useful for static allocation, + * for example to size a static array on stack. + * Will produce constant value 0 if srcSize is too large. + */ +#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U) +#define ZSTD_COMPRESSBOUND(srcSize) (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ + + +/*====== Error helper functions ======*/ +/* ZSTD_isError() : + * Most ZSTD_* functions returning a size_t value can be tested for error, + * using ZSTD_isError(). + * @return 1 if error, 0 otherwise + */ +ZSTDLIB_API unsigned ZSTD_isError(size_t result); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); /* convert a result into an error code, which can be compared to error enum list */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t result); /*!< provides readable string from a function result */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ /* ************************************* @@ -182,25 +248,25 @@ ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compres ***************************************/ /*= Compression context * When compressing many times, - * it is recommended to allocate a context just once, - * and re-use it for each successive compression operation. - * This will make workload friendlier for system's memory. + * it is recommended to allocate a compression context just once, + * and reuse it for each successive compression operation. + * This will make the workload easier for system's memory. * Note : re-using context is just a speed / resource optimization. * It doesn't change the compression ratio, which remains identical. - * Note 2 : In multi-threaded environments, - * use one different context per thread for parallel execution. + * Note 2: For parallel execution in multi-threaded environments, + * use one different context per thread . */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); -ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* compatible with NULL pointer */ /*! ZSTD_compressCCtx() : * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - * Important : in order to behave similarly to `ZSTD_compress()`, - * this function compresses at requested compression level, - * __ignoring any other parameter__ . + * Important : in order to mirror `ZSTD_compress()` behavior, + * this function compresses at the requested compression level, + * __ignoring any other advanced parameter__ . * If any advanced parameter was set using the advanced API, - * they will all be reset. Only `compressionLevel` remains. + * they will all be reset. Only @compressionLevel remains. */ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, @@ -210,7 +276,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, /*= Decompression context * When decompressing many times, * it is recommended to allocate a context only once, - * and re-use it for each successive compression operation. + * and reuse it for each successive compression operation. * This will make workload friendlier for system's memory. * Use one context per thread for parallel execution. */ typedef struct ZSTD_DCtx_s ZSTD_DCtx; @@ -220,7 +286,7 @@ ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer * /*! ZSTD_decompressDCtx() : * Same as ZSTD_decompress(), * requires an allocated ZSTD_DCtx. - * Compatible with sticky parameters. + * Compatible with sticky parameters (see below). */ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, @@ -236,12 +302,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, * using ZSTD_CCtx_set*() functions. * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! - * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ . * * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). * * This API supersedes all other "advanced" API entry points in the experimental section. - * In the future, we expect to remove from experimental API entry points which are redundant with this API. + * In the future, we expect to remove API entry points from experimental which are redundant with this API. */ @@ -324,6 +390,19 @@ typedef enum { * The higher the value of selected strategy, the more complex it is, * resulting in stronger and slower compression. * Special: value 0 means "use default strategy". */ + + ZSTD_c_targetCBlockSize=130, /* v1.5.6+ + * Attempts to fit compressed block size into approximately targetCBlockSize. + * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX. + * Note that it's not a guarantee, just a convergence target (default:0). + * No target when targetCBlockSize == 0. + * This is helpful in low bandwidth streaming environments to improve end-to-end latency, + * when a client can make use of partial documents (a prominent example being Chrome). + * Note: this parameter is stable since v1.5.6. + * It was present as an experimental parameter in earlier versions, + * but it's not recommended using it with earlier library versions + * due to massive performance regressions. + */ /* LDM mode parameters */ ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. * This parameter is designed to improve compression ratio @@ -403,15 +482,18 @@ typedef enum { * ZSTD_c_forceMaxWindow * ZSTD_c_forceAttachDict * ZSTD_c_literalCompressionMode - * ZSTD_c_targetCBlockSize * ZSTD_c_srcSizeHint * ZSTD_c_enableDedicatedDictSearch * ZSTD_c_stableInBuffer * ZSTD_c_stableOutBuffer * ZSTD_c_blockDelimiters * ZSTD_c_validateSequences - * ZSTD_c_useBlockSplitter + * ZSTD_c_blockSplitterLevel + * ZSTD_c_splitAfterSequences * ZSTD_c_useRowMatchFinder + * ZSTD_c_prefetchCDictTables + * ZSTD_c_enableSeqProducerFallback + * ZSTD_c_maxBlockSize * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -421,7 +503,7 @@ typedef enum { ZSTD_c_experimentalParam3=1000, ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam5=1002, - ZSTD_c_experimentalParam6=1003, + /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */ ZSTD_c_experimentalParam7=1004, ZSTD_c_experimentalParam8=1005, ZSTD_c_experimentalParam9=1006, @@ -430,7 +512,12 @@ typedef enum { ZSTD_c_experimentalParam12=1009, ZSTD_c_experimentalParam13=1010, ZSTD_c_experimentalParam14=1011, - ZSTD_c_experimentalParam15=1012 + ZSTD_c_experimentalParam15=1012, + ZSTD_c_experimentalParam16=1013, + ZSTD_c_experimentalParam17=1014, + ZSTD_c_experimentalParam18=1015, + ZSTD_c_experimentalParam19=1016, + ZSTD_c_experimentalParam20=1017 } ZSTD_cParameter; typedef struct { @@ -493,7 +580,7 @@ typedef enum { * They will be used to compress next frame. * Resetting session never fails. * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. + * This also removes any reference to any dictionary or external sequence producer. * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) * - Both : similar to resetting the session, followed by resetting parameters. @@ -502,11 +589,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); /*! ZSTD_compress2() : * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * (note that this entry point doesn't even expose a compression level parameter). * ZSTD_compress2() always starts a new frame. * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() * - The function is always blocking, returns when compression is completed. - * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * NOTE: Providing `dstCapacity >= ZSTD_compressBound(srcSize)` guarantees that zstd will have + * enough space to successfully compress the data, though it is possible it fails for other reasons. * @return : compressed size written into `dst` (<= `dstCapacity), * or an error code if it fails (which can be tested using ZSTD_isError()). */ @@ -543,13 +632,17 @@ typedef enum { * ZSTD_d_stableOutBuffer * ZSTD_d_forceIgnoreChecksum * ZSTD_d_refMultipleDDicts + * ZSTD_d_disableHuffmanAssembly + * ZSTD_d_maxBlockSize * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly */ ZSTD_d_experimentalParam1=1000, ZSTD_d_experimentalParam2=1001, ZSTD_d_experimentalParam3=1002, - ZSTD_d_experimentalParam4=1003 + ZSTD_d_experimentalParam4=1003, + ZSTD_d_experimentalParam5=1004, + ZSTD_d_experimentalParam6=1005 } ZSTD_dParameter; @@ -604,14 +697,14 @@ typedef struct ZSTD_outBuffer_s { * A ZSTD_CStream object is required to track streaming operation. * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. -* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. * * For parallel execution, use one separate ZSTD_CStream per thread. * * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. * * Parameters are sticky : when starting a new compression on the same context, -* it will re-use the same sticky parameters as previous compression session. +* it will reuse the same sticky parameters as previous compression session. * When in doubt, it's recommended to fully initialize the context before usage. * Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), * ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to @@ -700,6 +793,11 @@ typedef enum { * only ZSTD_e_end or ZSTD_e_flush operations are allowed. * Before starting a new compression job, or changing compression parameters, * it is required to fully flush internal buffers. + * - note: if an operation ends with an error, it may leave @cctx in an undefined state. + * Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state. + * In order to be re-employed after an error, a state must be reset, + * which can be done explicitly (ZSTD_CCtx_reset()), + * or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx()) */ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, ZSTD_outBuffer* output, @@ -728,8 +826,6 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /*< recommended size for output * This following is a legacy streaming API, available since v1.0+ . * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). * It is redundant, but remains fully supported. - * Streaming in combination with advanced parameters and dictionary compression - * can only be used through the new API. ******************************************************************************/ /*! @@ -738,6 +834,9 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /*< recommended size for output * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * + * Note that ZSTD_initCStream() clears any previously set dictionary. Use the new API + * to compress with a dictionary. */ ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); /*! @@ -758,7 +857,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); * * A ZSTD_DStream object is required to track streaming operations. * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. -* ZSTD_DStream objects can be re-used multiple times. +* ZSTD_DStream objects can be re-employed multiple times. * * Use ZSTD_initDStream() to start a new decompression operation. * @return : recommended first input size @@ -768,16 +867,21 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); * The function will update both `pos` fields. * If `input.pos < input.size`, some input has not been consumed. * It's up to the caller to present again remaining data. +* * The function tries to flush all data decoded immediately, respecting output buffer size. * If `output.pos < output.size`, decoder has flushed everything it could. -* But if `output.pos == output.size`, there might be some data left within internal buffers., +* +* However, when `output.pos == output.size`, it's more difficult to know. +* If @return > 0, the frame is not complete, meaning +* either there is still some data left to flush within internal buffers, +* or there is more input to read to complete the frame (or both). * In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. * Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. * @return : 0 when a frame is completely decoded and fully flushed, * or an error code, which can be tested using ZSTD_isError(), * or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : * the return value is a suggested next input size (just a hint for better latency) -* that will never request more than the remaining frame size. +* that will never request more than the remaining content of the compressed frame. * *******************************************************************************/ typedef ZSTD_DCtx ZSTD_DStream; /*< DCtx and DStream are now effectively same object (>= v1.3.0) */ @@ -788,13 +892,38 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer /*===== Streaming decompression functions =====*/ -/* This function is redundant with the advanced API and equivalent to: +/*! ZSTD_initDStream() : + * Initialize/reset DStream state for new decompression operation. + * Call before new decompression operation using same DStream. * + * Note : This function is redundant with the advanced API and equivalent to: * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * ZSTD_DCtx_refDDict(zds, NULL); */ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); +/*! ZSTD_decompressStream() : + * Streaming decompression function. + * Call repetitively to consume full input updating it as necessary. + * Function will update both input and output `pos` fields exposing current state via these fields: + * - `input.pos < input.size`, some input remaining and caller should provide remaining input + * on the next call. + * - `output.pos < output.size`, decoder flushed internal output buffer. + * - `output.pos == output.size`, unflushed data potentially present in the internal buffers, + * check ZSTD_decompressStream() @return value, + * if > 0, invoke it again to flush remaining data to output. + * Note : with no additional input, amount of data flushed <= ZSTD_BLOCKSIZE_MAX. + * + * @return : 0 when a frame is completely decoded and fully flushed, + * or an error code, which can be tested using ZSTD_isError(), + * or any other value > 0, which means there is some decoding or flushing to do to complete current frame. + * + * Note: when an operation returns with an error code, the @zds state may be left in undefined state. + * It's UB to invoke `ZSTD_decompressStream()` on such a state. + * In order to re-use such a state, it must be first reset, + * which can be done explicitly (`ZSTD_DCtx_reset()`), + * or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`) + */ ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ @@ -913,7 +1042,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); * If @return == 0, the dictID could not be decoded. * This could for one of the following reasons : * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden piece of information. * Note : this use case also happens when using a non-conformant dictionary. * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). * - This is not a Zstandard frame. @@ -925,9 +1054,11 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); * Advanced dictionary and prefix API (Requires v1.4.0+) * * This API allows dictionaries to be used with ZSTD_compress2(), - * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and - * only reset with the context is reset with ZSTD_reset_parameters or - * ZSTD_reset_session_and_parameters. Prefixes are single-use. + * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). + * Dictionaries are sticky, they remain valid when same context is reused, + * they only reset when the context is reset + * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters. + * In contrast, Prefixes are single-use. ******************************************************************************/ @@ -937,8 +1068,9 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, * meaning "return to no-dictionary mode". - * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. - * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames, + * until parameters are reset, a new dictionary is loaded, or the dictionary + * is explicitly invalidated by loading a NULL dictionary. * Note 2 : Loading a dictionary involves building tables. * It's also a CPU consuming operation, with non-negligible impact on latency. * Tables are dependent on compression parameters, and for this reason, @@ -947,11 +1079,15 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. * In such a case, dictionary buffer must outlive its users. * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() - * to precisely select how dictionary content must be interpreted. */ + * to precisely select how dictionary content must be interpreted. + * Note 5 : This method does not benefit from LDM (long distance mode). + * If you want to employ LDM on some large dictionary content, + * prefer employing ZSTD_CCtx_refPrefix() described below. + */ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); /*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ - * Reference a prepared dictionary, to be used for all next compressed frames. + * Reference a prepared dictionary, to be used for all future compressed frames. * Note that compression parameters are enforced from within CDict, * and supersede any compression parameter previously set within CCtx. * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. @@ -970,6 +1106,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); * Decompression will need same prefix to properly regenerate data. * Compressing with a prefix is similar in outcome as performing a diff and compressing it, * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * This method is compatible with LDM (long distance mode). * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary * Note 1 : Prefix buffer is referenced. It **must** outlive compression. @@ -986,9 +1123,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize); /*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ - * Create an internal DDict from dict buffer, - * to be used to decompress next frames. - * The dictionary remains valid for all future frames, until explicitly invalidated. + * Create an internal DDict from dict buffer, to be used to decompress all future frames. + * The dictionary remains valid for all future frames, until explicitly invalidated, or + * a new dictionary is loaded. * @result : 0, or an error code (which can be tested with ZSTD_isError()). * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, * meaning "return to no-dictionary mode". @@ -1012,9 +1149,10 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s * The memory for the table is allocated on the first call to refDDict, and can be * freed with ZSTD_freeDCtx(). * + * If called with ZSTD_d_refMultipleDDicts disabled (the default), only one dictionary + * will be managed, and referencing a dictionary effectively "discards" any previous one. + * * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. * Special: referencing a NULL DDict means "return to no-dictionary mode". * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. */ @@ -1051,6 +1189,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + #endif /* ZSTD_H_235446 */ @@ -1066,29 +1205,12 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + /* This can be overridden externally to hide static symbols. */ #ifndef ZSTDLIB_STATIC_API #define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE #endif -/* Deprecation warnings : - * Should these warnings be a problem, it is generally possible to disable them, - * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. - * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. - */ -#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS -# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API /* disable deprecation warnings */ -#else -# if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) -# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message))) -# elif (__GNUC__ >= 3) -# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated)) -# else -# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") -# define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API -# endif -#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ - /* ************************************************************************************** * experimental API (static linking only) **************************************************************************************** @@ -1123,6 +1245,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ #define ZSTD_STRATEGY_MIN ZSTD_fast #define ZSTD_STRATEGY_MAX ZSTD_btultra2 +#define ZSTD_BLOCKSIZE_MAX_MIN (1 << 10) /* The minimum valid max blocksize. Maximum blocksizes smaller than this make compressBound() inaccurate. */ #define ZSTD_OVERLAPLOG_MIN 0 @@ -1146,7 +1269,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) /* Advanced parameter bounds */ -#define ZSTD_TARGETCBLOCKSIZE_MIN 64 +#define ZSTD_TARGETCBLOCKSIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */ #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX #define ZSTD_SRCSIZEHINT_MIN 0 #define ZSTD_SRCSIZEHINT_MAX INT_MAX @@ -1188,7 +1311,7 @@ typedef struct { * * Note: This field is optional. ZSTD_generateSequences() will calculate the value of * 'rep', but repeat offsets do not necessarily need to be calculated from an external - * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * sequence provider perspective. For example, ZSTD_compressSequences() does not * use this 'rep' field at all (as of now). */ } ZSTD_Sequence; @@ -1293,17 +1416,18 @@ typedef enum { } ZSTD_literalCompressionMode_e; typedef enum { - /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final - * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable - * or ZSTD_ps_disable allow for a force enable/disable the feature. + /* Note: This enum controls features which are conditionally beneficial. + * Zstd can take a decision on whether or not to enable the feature (ZSTD_ps_auto), + * but setting the switch to ZSTD_ps_enable or ZSTD_ps_disable force enable/disable the feature. */ ZSTD_ps_auto = 0, /* Let the library automatically determine whether the feature shall be enabled */ ZSTD_ps_enable = 1, /* Force-enable the feature */ ZSTD_ps_disable = 2 /* Do not use the feature */ -} ZSTD_paramSwitch_e; +} ZSTD_ParamSwitch_e; +#define ZSTD_paramSwitch_e ZSTD_ParamSwitch_e /* old name */ /* ************************************* -* Frame size functions +* Frame header and size functions ***************************************/ /*! ZSTD_findDecompressedSize() : @@ -1345,34 +1469,130 @@ ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src, ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); /*! ZSTD_frameHeaderSize() : - * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * srcSize must be large enough, aka >= ZSTD_FRAMEHEADERSIZE_PREFIX. * @return : size of the Frame Header, * or an error code (if srcSize is too small) */ ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_FrameType_e; +#define ZSTD_frameType_e ZSTD_FrameType_e /* old name */ +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_FrameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; /* for ZSTD_skippableFrame, contains the skippable magic variant [0-15] */ + unsigned checksumFlag; + unsigned _reserved1; + unsigned _reserved2; +} ZSTD_FrameHeader; +#define ZSTD_frameHeader ZSTD_FrameHeader /* old name */ + +/*! ZSTD_getFrameHeader() : + * decode Frame Header into `zfhPtr`, or requires larger `srcSize`. + * @return : 0 => header is complete, `zfhPtr` is correctly filled, + * >0 => `srcSize` is too small, @return value is the wanted `srcSize` amount, `zfhPtr` is not filled, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize); +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_FrameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); + +/*! ZSTD_decompressionMargin() : + * Zstd supports in-place decompression, where the input and output buffers overlap. + * In this case, the output buffer must be at least (Margin + Output_Size) bytes large, + * and the input buffer must be at the end of the output buffer. + * + * _______________________ Output Buffer ________________________ + * | | + * | ____ Input Buffer ____| + * | | | + * v v v + * |---------------------------------------|-----------|----------| + * ^ ^ ^ + * |___________________ Output_Size ___________________|_ Margin _| + * + * NOTE: See also ZSTD_DECOMPRESSION_MARGIN(). + * NOTE: This applies only to single-pass decompression through ZSTD_decompress() or + * ZSTD_decompressDCtx(). + * NOTE: This function supports multi-frame input. + * + * @param src The compressed frame(s) + * @param srcSize The size of the compressed frame(s) + * @returns The decompression margin or an error that can be checked with ZSTD_isError(). + */ +ZSTDLIB_STATIC_API size_t ZSTD_decompressionMargin(const void* src, size_t srcSize); + +/*! ZSTD_DECOMPRESS_MARGIN() : + * Similar to ZSTD_decompressionMargin(), but instead of computing the margin from + * the compressed frame, compute it from the original size and the blockSizeLog. + * See ZSTD_decompressionMargin() for details. + * + * WARNING: This macro does not support multi-frame input, the input must be a single + * zstd frame. If you need that support use the function, or implement it yourself. + * + * @param originalSize The original uncompressed size of the data. + * @param blockSize The block size == MIN(windowSize, ZSTD_BLOCKSIZE_MAX). + * Unless you explicitly set the windowLog smaller than + * ZSTD_BLOCKSIZELOG_MAX you can just use ZSTD_BLOCKSIZE_MAX. + */ +#define ZSTD_DECOMPRESSION_MARGIN(originalSize, blockSize) ((size_t)( \ + ZSTD_FRAMEHEADERSIZE_MAX /* Frame header */ + \ + 4 /* checksum */ + \ + ((originalSize) == 0 ? 0 : 3 * (((originalSize) + (blockSize) - 1) / blockSize)) /* 3 bytes per block */ + \ + (blockSize) /* One block of margin */ \ + )) + typedef enum { - ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ - ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ -} ZSTD_sequenceFormat_e; + ZSTD_sf_noBlockDelimiters = 0, /* ZSTD_Sequence[] has no block delimiters, just sequences */ + ZSTD_sf_explicitBlockDelimiters = 1 /* ZSTD_Sequence[] contains explicit block delimiters */ +} ZSTD_SequenceFormat_e; +#define ZSTD_sequenceFormat_e ZSTD_SequenceFormat_e /* old name */ + +/*! ZSTD_sequenceBound() : + * `srcSize` : size of the input buffer + * @return : upper-bound for the number of sequences that can be generated + * from a buffer of srcSize bytes + * + * note : returns number of sequences - to get bytes, multiply by sizeof(ZSTD_Sequence). + */ +ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize); /*! ZSTD_generateSequences() : - * Generate sequences using ZSTD_compress2, given a source buffer. + * WARNING: This function is meant for debugging and informational purposes ONLY! + * Its implementation is flawed, and it will be deleted in a future version. + * It is not guaranteed to succeed, as there are several cases where it will give + * up and fail. You should NOT use this function in production code. + * + * This function is deprecated, and will be removed in a future version. + * + * Generate sequences using ZSTD_compress2(), given a source buffer. + * + * @param zc The compression context to be used for ZSTD_compress2(). Set any + * compression parameters you need on this context. + * @param outSeqs The output sequences buffer of size @p outSeqsSize + * @param outSeqsCapacity The size of the output sequences buffer. + * ZSTD_sequenceBound(srcSize) is an upper bound on the number + * of sequences that can be generated. + * @param src The source buffer to generate sequences from of size @p srcSize. + * @param srcSize The size of the source buffer. * * Each block will end with a dummy sequence * with offset == 0, matchLength == 0, and litLength == length of last literals. * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) * simply acts as a block delimiter. * - * zc can be used to insert custom compression params. - * This function invokes ZSTD_compress2 - * - * The output of this function can be fed into ZSTD_compressSequences() with CCtx - * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters - * @return : number of sequences generated + * @returns The number of sequences generated, necessarily less than + * ZSTD_sequenceBound(srcSize), or an error code that can be checked + * with ZSTD_isError(). */ - -ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize); +ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()") +ZSTDLIB_STATIC_API size_t +ZSTD_generateSequences(ZSTD_CCtx* zc, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize); /*! ZSTD_mergeBlockDelimiters() : * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals @@ -1388,8 +1608,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* o ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); /*! ZSTD_compressSequences() : - * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. - * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * Compress an array of ZSTD_Sequence, associated with @src buffer, into dst. + * @src contains the entire input (not just the literals). + * If @srcSize > sum(sequence.length), the remaining bytes are considered all literals + * If a dictionary is included, then the cctx should reference the dict (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.). * The entire source is compressed into a single frame. * * The compression behavior changes based on cctx params. In particular: @@ -1398,11 +1620,17 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si * the block size derived from the cctx, and sequences may be split. This is the default setting. * * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain - * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * valid block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * When ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, it's possible to decide generating repcodes + * using the advanced parameter ZSTD_c_repcodeResolution. Repcodes will improve compression ratio, though the benefit + * can vary greatly depending on Sequences. On the other hand, repcode resolution is an expensive operation. + * By default, it's disabled at low (<10) compression levels, and enabled above the threshold (>=10). + * ZSTD_c_repcodeResolution makes it possible to directly manage this processing in either direction. * - * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined - * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for - * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * If ZSTD_c_validateSequences == 0, this function blindly accepts the Sequences provided. Invalid Sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then the function will detect invalid Sequences (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) and then bail out and return an error. * * In addition to the two adjustable experimental params, there are other important cctx params. * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. @@ -1410,14 +1638,42 @@ ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, si * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md * - * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. - * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, - * and cannot emit an RLE block that disagrees with the repcode history - * @return : final compressed size or a ZSTD error. - */ -ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, - const ZSTD_Sequence* inSeqs, size_t inSeqsSize, - const void* src, size_t srcSize); + * Note: Repcodes are, as of now, always re-calculated within this function, ZSTD_Sequence.rep is effectively unused. + * Dev Note: Once ability to ingest repcodes become available, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history. + * @return : final compressed size, or a ZSTD error code. + */ +ZSTDLIB_STATIC_API size_t +ZSTD_compressSequences(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_compressSequencesAndLiterals() : + * This is a variant of ZSTD_compressSequences() which, + * instead of receiving (src,srcSize) as input parameter, receives (literals,litSize), + * aka all the literals, already extracted and laid out into a single continuous buffer. + * This can be useful if the process generating the sequences also happens to generate the buffer of literals, + * thus skipping an extraction + caching stage. + * It's a speed optimization, useful when the right conditions are met, + * but it also features the following limitations: + * - Only supports explicit delimiter mode + * - Currently does not support Sequences validation (so input Sequences are trusted) + * - Not compatible with frame checksum, which must be disabled + * - If any block is incompressible, will fail and return an error + * - @litSize must be == sum of all @.litLength fields in @inSeqs. Any discrepancy will generate an error. + * - @litBufCapacity is the size of the underlying buffer into which literals are written, starting at address @literals. + * @litBufCapacity must be at least 8 bytes larger than @litSize. + * - @decompressedSize must be correct, and correspond to the sum of all Sequences. Any discrepancy will generate an error. + * @return : final compressed size, or a ZSTD error code. + */ +ZSTDLIB_STATIC_API size_t +ZSTD_compressSequencesAndLiterals(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t nbSequences, + const void* literals, size_t litSize, size_t litBufCapacity, + size_t decompressedSize); /*! ZSTD_writeSkippableFrame() : @@ -1425,8 +1681,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* ds * * Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number, * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. - * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so - * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, + * so the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. * * Returns an error if destination buffer is not large enough, if the source size is not representable * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). @@ -1434,26 +1690,28 @@ ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* ds * @return : number of bytes written or a ZSTD error. */ ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, - const void* src, size_t srcSize, unsigned magicVariant); + const void* src, size_t srcSize, + unsigned magicVariant); /*! ZSTD_readSkippableFrame() : - * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer. + * Retrieves the content of a zstd skippable frame starting at @src, and writes it to @dst buffer. * - * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, - * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested - * in the magicVariant. + * The parameter @magicVariant will receive the magicVariant that was supplied when the frame was written, + * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. + * This can be NULL if the caller is not interested in the magicVariant. * * Returns an error if destination buffer is not large enough, or if the frame is not skippable. * * @return : number of bytes written or a ZSTD error. */ -ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant, - const void* src, size_t srcSize); +ZSTDLIB_STATIC_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, + unsigned* magicVariant, + const void* src, size_t srcSize); /*! ZSTD_isSkippableFrame() : * Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame. */ -ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); +ZSTDLIB_STATIC_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); @@ -1464,48 +1722,59 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size); /*! ZSTD_estimate*() : * These functions make it possible to estimate memory usage * of a future {D,C}Ctx, before its creation. + * This is useful in combination with ZSTD_initStatic(), + * which makes it possible to employ a static buffer for ZSTD_CCtx* state. * * ZSTD_estimateCCtxSize() will provide a memory budget large enough - * for any compression level up to selected one. - * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate - * does not include space for a window buffer. - * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2() + * associated with any compression level up to max specified one. * The estimate will assume the input may be arbitrarily large, * which is the worst case. * + * Note that the size estimation is specific for one-shot compression, + * it is not valid for streaming (see ZSTD_estimateCStreamSize*()) + * nor other potential ways of using a ZSTD_CCtx* state. + * * When srcSize can be bound by a known and rather "small" value, - * this fact can be used to provide a tighter estimation - * because the CCtx compression context will need less memory. - * This tighter estimation can be provided by more advanced functions + * this knowledge can be used to provide a tighter budget estimation + * because the ZSTD_CCtx* state will need less memory for small inputs. + * This tighter estimation can be provided by employing more advanced functions * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. * - * Note 2 : only single-threaded compression is supported. + * Note : only single-threaded compression is supported. * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. */ -ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void); /*! ZSTD_estimateCStreamSize() : - * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. - * It will also consider src size to be arbitrarily "large", which is worst case. + * ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression + * using any compression level up to the max specified one. + * It will also consider src size to be arbitrarily "large", which is a worst case scenario. * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. * Note : CStream size estimation is only correct for single-threaded compression. - * ZSTD_DStream memory budget depends on window Size. + * ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time. + * Size estimates assume that no external sequence producer is registered. + * + * ZSTD_DStream memory budget depends on frame's window Size. * This information can be passed manually, using ZSTD_estimateDStreamSize, * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Any frame requesting a window size larger than max specified one will be rejected. * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), * an internal ?Dict will be created, which additional size is not estimated here. - * In this case, get total size by adding ZSTD_estimate?DictSize */ -ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel); + * In this case, get total size by adding ZSTD_estimate?DictSize + */ +ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel); ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize); ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); /*! ZSTD_estimate?DictSize() : @@ -1568,7 +1837,15 @@ typedef void (*ZSTD_freeFunction) (void* opaque, void* address); typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; static __attribute__((__unused__)) + +#if defined(__clang__) && __clang_major__ >= 5 +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /*< this constant defers to stdlib's functions */ +#if defined(__clang__) && __clang_major__ >= 5 +#pragma clang diagnostic pop +#endif ZSTDLIB_STATIC_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); @@ -1649,22 +1926,45 @@ ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); * This function never fails (wide contract) */ ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); +/*! ZSTD_CCtx_setCParams() : + * Set all parameters provided within @p cparams into the working @p cctx. + * Note : if modifying parameters during compression (MT mode only), + * note that changes to the .windowLog parameter will be ignored. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()). + * On failure, no parameters are updated. + */ +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams); + +/*! ZSTD_CCtx_setFParams() : + * Set all parameters provided within @p fparams into the working @p cctx. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()). + */ +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams); + +/*! ZSTD_CCtx_setParams() : + * Set all parameters provided within @p params into the working @p cctx. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()). + */ +ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params); + /*! ZSTD_compress_advanced() : * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_compress2") +ZSTDLIB_STATIC_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); /*! ZSTD_compress_usingCDict_advanced() : * Note : this function is now DEPRECATED. * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") +ZSTDLIB_STATIC_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -1725,7 +2025,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo * See the comments on that enum for an explanation of the feature. */ #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 -/* Controlled with ZSTD_paramSwitch_e enum. +/* Controlled with ZSTD_ParamSwitch_e enum. * Default is ZSTD_ps_auto. * Set to ZSTD_ps_disable to never compress literals. * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals @@ -1737,11 +2037,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 -/* Tries to fit compressed block size to be around targetCBlockSize. - * No target when targetCBlockSize == 0. - * There is no guarantee on compressed block size (default:0) */ -#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 - /* User's best guess of source size. * Hint is not valid when srcSizeHint == 0. * There is no guarantee that hint is close to actual source size, @@ -1808,13 +2103,16 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo * Experimental parameter. * Default is 0 == disabled. Set to 1 to enable. * - * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same - * between calls, except for the modifications that zstd makes to pos (the - * caller must not modify pos). This is checked by the compressor, and - * compression will fail if it ever changes. This means the only flush - * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end - * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) - * MUST not be modified during compression or you will get data corruption. + * Tells the compressor that input data presented with ZSTD_inBuffer + * will ALWAYS be the same between calls. + * Technically, the @src pointer must never be changed, + * and the @pos field can only be updated by zstd. + * However, it's possible to increase the @size field, + * allowing scenarios where more data can be appended after compressions starts. + * These conditions are checked by the compressor, + * and compression will fail if they are not respected. + * Also, data in the ZSTD_inBuffer within the range [src, src + pos) + * MUST not be modified during compression or it will result in data corruption. * * When this flag is enabled zstd won't allocate an input window buffer, * because the user guarantees it can reference the ZSTD_inBuffer until @@ -1822,18 +2120,15 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also * avoid the memcpy() from the input buffer to the input window buffer. * - * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. - * That means this flag cannot be used with ZSTD_compressStream(). - * * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using * this flag is ALWAYS memory safe, and will never access out-of-bounds - * memory. However, compression WILL fail if you violate the preconditions. + * memory. However, compression WILL fail if conditions are not respected. * - * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST - * not be modified during compression or you will get data corruption. This - * is because zstd needs to reference data in the ZSTD_inBuffer to find + * WARNING: The data in the ZSTD_inBuffer in the range [src, src + pos) MUST + * not be modified during compression or it will result in data corruption. + * This is because zstd needs to reference data in the ZSTD_inBuffer to find * matches. Normally zstd maintains its own window buffer for this purpose, - * but passing this flag tells zstd to use the user provided buffer. + * but passing this flag tells zstd to rely on user provided buffer instead. */ #define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 @@ -1871,22 +2166,46 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo /* ZSTD_c_validateSequences * Default is 0 == disabled. Set to 1 to enable sequence validation. * - * For use with sequence compression API: ZSTD_compressSequences(). - * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * For use with sequence compression API: ZSTD_compressSequences*(). + * Designates whether or not provided sequences are validated within ZSTD_compressSequences*() * during function execution. * - * Without validation, providing a sequence that does not conform to the zstd spec will cause - * undefined behavior, and may produce a corrupted block. + * When Sequence validation is disabled (default), Sequences are compressed as-is, + * so they must correct, otherwise it would result in a corruption error. * - * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * Sequence validation adds some protection, by ensuring that all values respect boundary conditions. + * If a Sequence is detected invalid (see doc/zstd_compression_format.md for * specifics regarding offset/matchlength requirements) then the function will bail out and * return an error. - * */ #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 -/* ZSTD_c_useBlockSplitter - * Controlled with ZSTD_paramSwitch_e enum. +/* ZSTD_c_blockSplitterLevel + * note: this parameter only influences the first splitter stage, + * which is active before producing the sequences. + * ZSTD_c_splitAfterSequences controls the next splitter stage, + * which is active after sequence production. + * Note that both can be combined. + * Allowed values are between 0 and ZSTD_BLOCKSPLITTER_LEVEL_MAX included. + * 0 means "auto", which will select a value depending on current ZSTD_c_strategy. + * 1 means no splitting. + * Then, values from 2 to 6 are sorted in increasing cpu load order. + * + * Note that currently the first block is never split, + * to ensure expansion guarantees in presence of incompressible data. + */ +#define ZSTD_BLOCKSPLITTER_LEVEL_MAX 6 +#define ZSTD_c_blockSplitterLevel ZSTD_c_experimentalParam20 + +/* ZSTD_c_splitAfterSequences + * This is a stronger splitter algorithm, + * based on actual sequences previously produced by the selected parser. + * It's also slower, and as a consequence, mostly used for high compression levels. + * While the post-splitter does overlap with the pre-splitter, + * both can nonetheless be combined, + * notably with ZSTD_c_blockSplitterLevel at ZSTD_BLOCKSPLITTER_LEVEL_MAX, + * resulting in higher compression ratio than just one of them. + * * Default is ZSTD_ps_auto. * Set to ZSTD_ps_disable to never use block splitter. * Set to ZSTD_ps_enable to always use block splitter. @@ -1894,10 +2213,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use * block splitting based on the compression parameters. */ -#define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13 +#define ZSTD_c_splitAfterSequences ZSTD_c_experimentalParam13 /* ZSTD_c_useRowMatchFinder - * Controlled with ZSTD_paramSwitch_e enum. + * Controlled with ZSTD_ParamSwitch_e enum. * Default is ZSTD_ps_auto. * Set to ZSTD_ps_disable to never use row-based matchfinder. * Set to ZSTD_ps_enable to force usage of row-based matchfinder. @@ -1928,6 +2247,80 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo */ #define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 +/* ZSTD_c_prefetchCDictTables + * Controlled with ZSTD_ParamSwitch_e enum. Default is ZSTD_ps_auto. + * + * In some situations, zstd uses CDict tables in-place rather than copying them + * into the working context. (See docs on ZSTD_dictAttachPref_e above for details). + * In such situations, compression speed is seriously impacted when CDict tables are + * "cold" (outside CPU cache). This parameter instructs zstd to prefetch CDict tables + * when they are used in-place. + * + * For sufficiently small inputs, the cost of the prefetch will outweigh the benefit. + * For sufficiently large inputs, zstd will by default memcpy() CDict tables + * into the working context, so there is no need to prefetch. This parameter is + * targeted at a middle range of input sizes, where a prefetch is cheap enough to be + * useful but memcpy() is too expensive. The exact range of input sizes where this + * makes sense is best determined by careful experimentation. + * + * Note: for this parameter, ZSTD_ps_auto is currently equivalent to ZSTD_ps_disable, + * but in the future zstd may conditionally enable this feature via an auto-detection + * heuristic for cold CDicts. + * Use ZSTD_ps_disable to opt out of prefetching under any circumstances. + */ +#define ZSTD_c_prefetchCDictTables ZSTD_c_experimentalParam16 + +/* ZSTD_c_enableSeqProducerFallback + * Allowed values are 0 (disable) and 1 (enable). The default setting is 0. + * + * Controls whether zstd will fall back to an internal sequence producer if an + * external sequence producer is registered and returns an error code. This fallback + * is block-by-block: the internal sequence producer will only be called for blocks + * where the external sequence producer returns an error code. Fallback parsing will + * follow any other cParam settings, such as compression level, the same as in a + * normal (fully-internal) compression operation. + * + * The user is strongly encouraged to read the full Block-Level Sequence Producer API + * documentation (below) before setting this parameter. */ +#define ZSTD_c_enableSeqProducerFallback ZSTD_c_experimentalParam17 + +/* ZSTD_c_maxBlockSize + * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). + * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. + * + * This parameter can be used to set an upper bound on the blocksize + * that overrides the default ZSTD_BLOCKSIZE_MAX. It cannot be used to set upper + * bounds greater than ZSTD_BLOCKSIZE_MAX or bounds lower than 1KB (will make + * compressBound() inaccurate). Only currently meant to be used for testing. + */ +#define ZSTD_c_maxBlockSize ZSTD_c_experimentalParam18 + +/* ZSTD_c_repcodeResolution + * This parameter only has an effect if ZSTD_c_blockDelimiters is + * set to ZSTD_sf_explicitBlockDelimiters (may change in the future). + * + * This parameter affects how zstd parses external sequences, + * provided via the ZSTD_compressSequences*() API + * or from an external block-level sequence producer. + * + * If set to ZSTD_ps_enable, the library will check for repeated offsets within + * external sequences, even if those repcodes are not explicitly indicated in + * the "rep" field. Note that this is the only way to exploit repcode matches + * while using compressSequences*() or an external sequence producer, since zstd + * currently ignores the "rep" field of external sequences. + * + * If set to ZSTD_ps_disable, the library will not exploit repeated offsets in + * external sequences, regardless of whether the "rep" field has been set. This + * reduces sequence compression overhead by about 25% while sacrificing some + * compression ratio. + * + * The default value is ZSTD_ps_auto, for which the library will enable/disable + * based on compression level (currently: level<10 disables, level>=10 enables). + */ +#define ZSTD_c_repcodeResolution ZSTD_c_experimentalParam19 +#define ZSTD_c_searchForExternalRepcodes ZSTD_c_experimentalParam19 /* older name */ + + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. @@ -2084,7 +2477,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete * in the range [dst, dst + pos) MUST not be modified during decompression * or you will get data corruption. * - * When this flags is enabled zstd won't allocate an output buffer, because + * When this flag is enabled zstd won't allocate an output buffer, because * it can write directly to the ZSTD_outBuffer, but it will still allocate * an input buffer large enough to fit any compressed block. This will also * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. @@ -2137,6 +2530,33 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete */ #define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 +/* ZSTD_d_disableHuffmanAssembly + * Set to 1 to disable the Huffman assembly implementation. + * The default value is 0, which allows zstd to use the Huffman assembly + * implementation if available. + * + * This parameter can be used to disable Huffman assembly at runtime. + * If you want to disable it at compile time you can define the macro + * ZSTD_DISABLE_ASM. + */ +#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5 + +/* ZSTD_d_maxBlockSize + * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB). + * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default. + * + * Forces the decompressor to reject blocks whose content size is + * larger than the configured maxBlockSize. When maxBlockSize is + * larger than the windowSize, the windowSize is used instead. + * This saves memory on the decoder when you know all blocks are small. + * + * This option is typically used in conjunction with ZSTD_c_maxBlockSize. + * + * WARNING: This causes the decoder to reject otherwise valid frames + * that have block sizes larger than the configured maxBlockSize. + */ +#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6 + /*! ZSTD_DCtx_setFormat() : * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). @@ -2145,6 +2565,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete * such ZSTD_f_zstd1_magicless for example. * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") +ZSTDLIB_STATIC_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); /*! ZSTD_decompressStream_simpleArgs() : @@ -2181,6 +2602,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs ( * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); @@ -2198,17 +2620,15 @@ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /*! ZSTD_initCStream_advanced() : - * This function is DEPRECATED, and is approximately equivalent to: + * This function is DEPRECATED, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd parameter and leave the rest as-is. - * for ((param, value) : params) { - * ZSTD_CCtx_setParameter(zcs, param, value); - * } + * ZSTD_CCtx_setParams(zcs, params); * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); * @@ -2218,6 +2638,7 @@ size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, ZSTD_parameters params, @@ -2232,15 +2653,13 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /*! ZSTD_initCStream_usingCDict_advanced() : - * This function is DEPRECATED, and is approximately equivalent to: + * This function is DEPRECATED, and is equivalent to: * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. - * for ((fParam, value) : fParams) { - * ZSTD_CCtx_setParameter(zcs, fParam, value); - * } + * ZSTD_CCtx_setFParams(zcs, fParams); * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); * ZSTD_CCtx_refCDict(zcs, cdict); * @@ -2250,6 +2669,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, @@ -2264,7 +2684,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, * explicitly specified. * * start a new frame, using same parameters from previous frame. - * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * This is typically useful to skip dictionary loading stage, since it will reuse it in-place. * Note that zcs must be init at least once before using ZSTD_resetCStream(). * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. @@ -2274,6 +2694,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, * This prototype will generate compilation warnings. */ ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +ZSTDLIB_STATIC_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); @@ -2319,8 +2740,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); * * note: no dictionary will be used if dict == NULL or dictSize < 8 - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ +ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_loadDictionary, see zstd.h for detailed instructions") ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /*! @@ -2330,8 +2751,8 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const vo * ZSTD_DCtx_refDDict(zds, ddict); * * note : ddict is referenced, it must outlive decompression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ +ZSTD_DEPRECATED("use ZSTD_DCtx_reset + ZSTD_DCtx_refDDict, see zstd.h for detailed instructions") ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /*! @@ -2339,18 +2760,202 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const Z * * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * - * re-use decompression parameters from previous init; saves dictionary loading - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + * reuse decompression parameters from previous init; saves dictionary loading */ +ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions") ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); +/* ********************* BLOCK-LEVEL SEQUENCE PRODUCER API ********************* + * + * *** OVERVIEW *** + * The Block-Level Sequence Producer API allows users to provide their own custom + * sequence producer which libzstd invokes to process each block. The produced list + * of sequences (literals and matches) is then post-processed by libzstd to produce + * valid compressed blocks. + * + * This block-level offload API is a more granular complement of the existing + * frame-level offload API compressSequences() (introduced in v1.5.1). It offers + * an easier migration story for applications already integrated with libzstd: the + * user application continues to invoke the same compression functions + * ZSTD_compress2() or ZSTD_compressStream2() as usual, and transparently benefits + * from the specific advantages of the external sequence producer. For example, + * the sequence producer could be tuned to take advantage of known characteristics + * of the input, to offer better speed / ratio, or could leverage hardware + * acceleration not available within libzstd itself. + * + * See contrib/externalSequenceProducer for an example program employing the + * Block-Level Sequence Producer API. + * + * *** USAGE *** + * The user is responsible for implementing a function of type + * ZSTD_sequenceProducer_F. For each block, zstd will pass the following + * arguments to the user-provided function: + * + * - sequenceProducerState: a pointer to a user-managed state for the sequence + * producer. + * + * - outSeqs, outSeqsCapacity: an output buffer for the sequence producer. + * outSeqsCapacity is guaranteed >= ZSTD_sequenceBound(srcSize). The memory + * backing outSeqs is managed by the CCtx. + * + * - src, srcSize: an input buffer for the sequence producer to parse. + * srcSize is guaranteed to be <= ZSTD_BLOCKSIZE_MAX. + * + * - dict, dictSize: a history buffer, which may be empty, which the sequence + * producer may reference as it parses the src buffer. Currently, zstd will + * always pass dictSize == 0 into external sequence producers, but this will + * change in the future. + * + * - compressionLevel: a signed integer representing the zstd compression level + * set by the user for the current operation. The sequence producer may choose + * to use this information to change its compression strategy and speed/ratio + * tradeoff. Note: the compression level does not reflect zstd parameters set + * through the advanced API. + * + * - windowSize: a size_t representing the maximum allowed offset for external + * sequences. Note that sequence offsets are sometimes allowed to exceed the + * windowSize if a dictionary is present, see doc/zstd_compression_format.md + * for details. + * + * The user-provided function shall return a size_t representing the number of + * sequences written to outSeqs. This return value will be treated as an error + * code if it is greater than outSeqsCapacity. The return value must be non-zero + * if srcSize is non-zero. The ZSTD_SEQUENCE_PRODUCER_ERROR macro is provided + * for convenience, but any value greater than outSeqsCapacity will be treated as + * an error code. + * + * If the user-provided function does not return an error code, the sequences + * written to outSeqs must be a valid parse of the src buffer. Data corruption may + * occur if the parse is not valid. A parse is defined to be valid if the + * following conditions hold: + * - The sum of matchLengths and literalLengths must equal srcSize. + * - All sequences in the parse, except for the final sequence, must have + * matchLength >= ZSTD_MINMATCH_MIN. The final sequence must have + * matchLength >= ZSTD_MINMATCH_MIN or matchLength == 0. + * - All offsets must respect the windowSize parameter as specified in + * doc/zstd_compression_format.md. + * - If the final sequence has matchLength == 0, it must also have offset == 0. + * + * zstd will only validate these conditions (and fail compression if they do not + * hold) if the ZSTD_c_validateSequences cParam is enabled. Note that sequence + * validation has a performance cost. + * + * If the user-provided function returns an error, zstd will either fall back + * to an internal sequence producer or fail the compression operation. The user can + * choose between the two behaviors by setting the ZSTD_c_enableSeqProducerFallback + * cParam. Fallback compression will follow any other cParam settings, such as + * compression level, the same as in a normal compression operation. + * + * The user shall instruct zstd to use a particular ZSTD_sequenceProducer_F + * function by calling + * ZSTD_registerSequenceProducer(cctx, + * sequenceProducerState, + * sequenceProducer) + * This setting will persist until the next parameter reset of the CCtx. + * + * The sequenceProducerState must be initialized by the user before calling + * ZSTD_registerSequenceProducer(). The user is responsible for destroying the + * sequenceProducerState. + * + * *** LIMITATIONS *** + * This API is compatible with all zstd compression APIs which respect advanced parameters. + * However, there are three limitations: + * + * First, the ZSTD_c_enableLongDistanceMatching cParam is not currently supported. + * COMPRESSION WILL FAIL if it is enabled and the user tries to compress with a block-level + * external sequence producer. + * - Note that ZSTD_c_enableLongDistanceMatching is auto-enabled by default in some + * cases (see its documentation for details). Users must explicitly set + * ZSTD_c_enableLongDistanceMatching to ZSTD_ps_disable in such cases if an external + * sequence producer is registered. + * - As of this writing, ZSTD_c_enableLongDistanceMatching is disabled by default + * whenever ZSTD_c_windowLog < 128MB, but that's subject to change. Users should + * check the docs on ZSTD_c_enableLongDistanceMatching whenever the Block-Level Sequence + * Producer API is used in conjunction with advanced settings (like ZSTD_c_windowLog). + * + * Second, history buffers are not currently supported. Concretely, zstd will always pass + * dictSize == 0 to the external sequence producer (for now). This has two implications: + * - Dictionaries are not currently supported. Compression will *not* fail if the user + * references a dictionary, but the dictionary won't have any effect. + * - Stream history is not currently supported. All advanced compression APIs, including + * streaming APIs, work with external sequence producers, but each block is treated as + * an independent chunk without history from previous blocks. + * + * Third, multi-threading within a single compression is not currently supported. In other words, + * COMPRESSION WILL FAIL if ZSTD_c_nbWorkers > 0 and an external sequence producer is registered. + * Multi-threading across compressions is fine: simply create one CCtx per thread. + * + * Long-term, we plan to overcome all three limitations. There is no technical blocker to + * overcoming them. It is purely a question of engineering effort. + */ + +#define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1)) + +typedef size_t (*ZSTD_sequenceProducer_F) ( + void* sequenceProducerState, + ZSTD_Sequence* outSeqs, size_t outSeqsCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel, + size_t windowSize +); + +/*! ZSTD_registerSequenceProducer() : + * Instruct zstd to use a block-level external sequence producer function. + * + * The sequenceProducerState must be initialized by the caller, and the caller is + * responsible for managing its lifetime. This parameter is sticky across + * compressions. It will remain set until the user explicitly resets compression + * parameters. + * + * Sequence producer registration is considered to be an "advanced parameter", + * part of the "advanced API". This means it will only have an effect on compression + * APIs which respect advanced parameters, such as compress2() and compressStream2(). + * Older compression APIs such as compressCCtx(), which predate the introduction of + * "advanced parameters", will ignore any external sequence producer setting. + * + * The sequence producer can be "cleared" by registering a NULL function pointer. This + * removes all limitations described above in the "LIMITATIONS" section of the API docs. + * + * The user is strongly encouraged to read the full API documentation (above) before + * calling this function. */ +ZSTDLIB_STATIC_API void +ZSTD_registerSequenceProducer( + ZSTD_CCtx* cctx, + void* sequenceProducerState, + ZSTD_sequenceProducer_F sequenceProducer +); + +/*! ZSTD_CCtxParams_registerSequenceProducer() : + * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params. + * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(), + * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx(). + * + * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx() + * is required, then this function is for you. Otherwise, you probably don't need it. + * + * See tests/zstreamtest.c for example usage. */ +ZSTDLIB_STATIC_API void +ZSTD_CCtxParams_registerSequenceProducer( + ZSTD_CCtx_params* params, + void* sequenceProducerState, + ZSTD_sequenceProducer_F sequenceProducer +); + + /* ******************************************************************* -* Buffer-less and synchronous inner streaming functions +* Buffer-less and synchronous inner streaming functions (DEPRECATED) +* +* This API is deprecated, and will be removed in a future version. +* It allows streaming (de)compression with user allocated buffers. +* However, it is hard to use, and not as well tested as the rest of +* our API. * -* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. -* But it's also a complex one, with several restrictions, documented below. -* Prefer normal streaming API for an easier experience. +* Please use the normal streaming API instead: ZSTD_compressStream2, +* and ZSTD_decompressStream. +* If there is functionality that you need, but it doesn't provide, +* please open an issue on our GitHub. ********************************************************************* */ /* @@ -2358,11 +2963,10 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); A ZSTD_CCtx object is required to track streaming operations. Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. - ZSTD_CCtx object can be re-used multiple times within successive compression operations. + ZSTD_CCtx object can be reused multiple times within successive compression operations. Start by initializing a context. Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. - It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() Then, consume your input using ZSTD_compressContinue(). There are some important considerations to keep in mind when using this advanced function : @@ -2380,39 +2984,49 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. + `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again. */ /*===== Buffer-less streaming compression functions =====*/ +ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */ -ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTD_DEPRECATED("This function will likely be removed in a future release. It is misleading and has very limited utility.") +ZSTDLIB_STATIC_API +size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTD_DEPRECATED("The buffer-less API is deprecated in favor of the normal streaming API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); /* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ ZSTD_DEPRECATED("use advanced API to access custom parameters") +ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ ZSTD_DEPRECATED("use advanced API to access custom parameters") +ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ /* Buffer-less streaming decompression (synchronous mode) A ZSTD_DCtx object is required to track streaming operations. Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. - A ZSTD_DCtx object can be re-used multiple times. + A ZSTD_DCtx object can be reused multiple times. First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. Data fragment must be large enough to ensure successful decoding. `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. - @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. - >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least result bytes on next attempt. errorCode, which can be tested using ZSTD_isError(). - It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + It fills a ZSTD_FrameHeader structure with important information to correctly decode the frame, such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. As a consequence, check that values remain within valid application range. @@ -2428,7 +3042,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ The most memory efficient way is to use a round buffer of sufficient size. Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), - which can @return an error code if required value is too large for current system (in 32-bits mode). + which can return an error code if required value is too large for current system (in 32-bits mode). In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, up to the moment there is not enough room left in the buffer to guarantee decoding another full block, which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. @@ -2448,7 +3062,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. It can also be an error code, which can be tested with ZSTD_isError(). @@ -2471,27 +3085,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_ */ /*===== Buffer-less streaming decompression functions =====*/ -typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; -typedef struct { - unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ - unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ - unsigned blockSizeMax; - ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ - unsigned headerSize; - unsigned dictID; - unsigned checksumFlag; -} ZSTD_frameHeader; -/*! ZSTD_getFrameHeader() : - * decode Frame Header, or requires larger `srcSize`. - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /*< doesn't consume input */ -/*! ZSTD_getFrameHeader_advanced() : - * same as ZSTD_getFrameHeader(), - * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); @@ -2502,6 +3096,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); /* misc */ +ZSTD_DEPRECATED("This function will likely be removed in the next minor release. It is misleading and has very limited utility.") ZSTDLIB_STATIC_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); @@ -2509,11 +3104,23 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); -/* ============================ */ -/* Block level API */ -/* ============================ */ +/* ========================================= */ +/* Block level API (DEPRECATED) */ +/* ========================================= */ /*! + + This API is deprecated in favor of the regular compression API. + You can get the frame header down to 2 bytes by setting: + - ZSTD_c_format = ZSTD_f_zstd1_magicless + - ZSTD_c_contentSizeFlag = 0 + - ZSTD_c_checksumFlag = 0 + - ZSTD_c_dictIDFlag = 0 + + This API is not as well tested as our normal API, so we recommend not using it. + We will be removing it in a future version. If the normal API doesn't provide + the functionality you need, please open a GitHub issue. + Block functions produce and decode raw zstd blocks, without frame metadata. Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. @@ -2524,7 +3131,6 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - It is necessary to init context before starting + compression : any ZSTD_compressBegin*() variant, including with dictionary + decompression : any ZSTD_decompressBegin*() variant, including with dictionary - + copyCCtx() and copyDCtx() can be used too - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + If input is larger than a block size, it's necessary to split input data into multiple blocks + For inputs larger than a single block, consider using regular ZSTD_compress() instead. @@ -2541,11 +3147,14 @@ ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); */ /*===== Raw zstd block functions =====*/ +ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTD_DEPRECATED("The block API is deprecated in favor of the normal compression API. See docs.") ZSTDLIB_STATIC_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ - diff --git a/include/linux/zswap.h b/include/linux/zswap.h index d961ead91bf1..30c193a1207e 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -26,7 +26,7 @@ struct zswap_lruvec_state { unsigned long zswap_total_pages(void); bool zswap_store(struct folio *folio); -bool zswap_load(struct folio *folio); +int zswap_load(struct folio *folio); void zswap_invalidate(swp_entry_t swp); int zswap_swapon(int type, unsigned long nr_pages); void zswap_swapoff(int type); @@ -44,9 +44,9 @@ static inline bool zswap_store(struct folio *folio) return false; } -static inline bool zswap_load(struct folio *folio) +static inline int zswap_load(struct folio *folio) { - return false; + return -ENOENT; } static inline void zswap_invalidate(swp_entry_t swp) {} |